diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..00f58d7b08f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +*.bat text eol=crlf +*.cmd text eol=crlf +*.bin binary diff --git a/.github/ISSUE_TEMPLATE/bug-issue.md b/.github/ISSUE_TEMPLATE/bug-issue.md new file mode 100644 index 00000000000..fc8c9f8086d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-issue.md @@ -0,0 +1,60 @@ +--- +name: Bug Issue +about: Use this template for reporting a bug +labels: 'bug' +title: '[BUG]' + +--- + +## Bug + +#### Which Delta project/connector is this regarding? + + +- [ ] Spark +- [ ] Standalone +- [ ] Flink +- [ ] Kernel +- [ ] Other (fill in here) + +### Describe the problem + +#### Steps to reproduce + + + +#### Observed results + + + +#### Expected results + + + +#### Further details + + + +### Environment information + +* Delta Lake version: +* Spark version: +* Scala version: + +### Willingness to contribute + +The Delta Lake Community encourages bug fix contributions. Would you or another member of your organization be willing to contribute a fix for this bug to the Delta Lake code base? + +- [ ] Yes. I can contribute a fix for this bug independently. +- [ ] Yes. I would be willing to contribute a fix for this bug with guidance from the Delta Lake community. +- [ ] No. I cannot contribute a bug fix at this time. diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 00000000000..5d6537af359 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,44 @@ +--- +name: Feature Request +about: Use this template for raising a feature request +labels: 'enhancement' +title: '[Feature Request]' + +--- + +## Feature request + +#### Which Delta project/connector is this regarding? + + +- [ ] Spark +- [ ] Standalone +- [ ] Flink +- [ ] Kernel +- [ ] Other (fill in here) + +### Overview + + + +### Motivation + + + +### Further details + + + +### Willingness to contribute + +The Delta Lake Community encourages new feature contributions. Would you or another member of your organization be willing to contribute an implementation of this feature? + +- [ ] Yes. I can contribute this feature independently. +- [ ] Yes. I would be willing to contribute this feature with guidance from the Delta Lake community. +- [ ] No. I cannot contribute this feature at this time. \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000000..97d818dfa57 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,46 @@ + + +#### Which Delta project/connector is this regarding? + + +- [ ] Spark +- [ ] Standalone +- [ ] Flink +- [ ] Kernel +- [ ] Other (fill in here) + +## Description + + + +## How was this patch tested? + + + +## Does this PR introduce _any_ user-facing changes? + + diff --git a/.github/workflows/connectors_test.yaml b/.github/workflows/connectors_test.yaml new file mode 100644 index 00000000000..1624efc64cf --- /dev/null +++ b/.github/workflows/connectors_test.yaml @@ -0,0 +1,45 @@ +name: "Delta Connectors Tests" +on: [push, pull_request] +jobs: + build: + name: "Run tests" + runs-on: ubuntu-20.04 + strategy: + matrix: + # These Scala versions must match those in the build.sbt + scala: [2.13.8, 2.12.17] + steps: + - uses: actions/checkout@v2 + - name: install java + uses: actions/setup-java@v2 + with: + distribution: 'zulu' + java-version: '8' + - name: Cache Scala, SBT + uses: actions/cache@v2 + with: + path: | + ~/.sbt + ~/.ivy2 + ~/.cache/coursier + ~/.m2 + key: build-cache-3-with-scala_${{ matrix.scala }} + - name: Run Scala Style tests on test sources (Scala 2.12 only) + run: build/sbt "++ ${{ matrix.scala }}" testScalastyle + if: startsWith(matrix.scala, '2.12.') + - name: Run sqlDeltaImport tests (Scala 2.12 and 2.13 only) + run: build/sbt "++ ${{ matrix.scala }}" sqlDeltaImport/test + if: ${{ !startsWith(matrix.scala, '2.11.') }} + # These tests are not working yet + # - name: Run Delta Standalone Compatibility tests (Scala 2.12 only) + # run: build/sbt "++ ${{ matrix.scala }}" compatibility/test + # if: startsWith(matrix.scala, '2.12.') + - name: Run Delta Standalone tests + run: build/sbt "++ ${{ matrix.scala }}" standalone/test testStandaloneCosmetic/test standaloneParquet/test testParquetUtilsWithStandaloneCosmetic/test + - name: Run Hive 3 tests + run: build/sbt "++ ${{ matrix.scala }}" hiveMR/test hiveTez/test + - name: Run Hive 2 tests + run: build/sbt "++ ${{ matrix.scala }}" hive2MR/test hive2Tez/test + - name: Run Flink tests (Scala 2.12 only) + run: build/sbt -mem 3000 "++ ${{ matrix.scala }}" flink/test + if: ${{ startsWith(matrix.scala, '2.12.') }} diff --git a/.github/workflows/kernel_docs.yaml b/.github/workflows/kernel_docs.yaml new file mode 100644 index 00000000000..27e397df56f --- /dev/null +++ b/.github/workflows/kernel_docs.yaml @@ -0,0 +1,51 @@ +# Simple workflow for deploying static content to GitHub Pages +name: Deploy static content to Pages + +on: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Single deploy job since we're just deploying + deploy_docs: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: install java + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: "8" + - name: Generate docs + run: | + build/sbt kernelGroup/unidoc + mkdir -p kernel/docs/snapshot/kernel-api/java + mkdir -p kernel/docs/snapshot/kernel-defaults/java + cp -r kernel/kernel-api/target/javaunidoc/. kernel/docs/snapshot/kernel-api/java/ + cp -r kernel/kernel-defaults/target/javaunidoc/. kernel/docs/snapshot/kernel-defaults/java/ + - name: Setup Pages + uses: actions/configure-pages@v3 + - name: Upload artifact + uses: actions/upload-pages-artifact@v1 + with: + # Upload kernel docs + path: kernel/docs + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/.github/workflows/kernel_test.yaml b/.github/workflows/kernel_test.yaml new file mode 100644 index 00000000000..ffe3b0ed78c --- /dev/null +++ b/.github/workflows/kernel_test.yaml @@ -0,0 +1,20 @@ +name: "Delta Kernel Tests" +on: [push, pull_request] +jobs: + test: + runs-on: ubuntu-20.04 + env: + SCALA_VERSION: 2.12.17 + steps: + - uses: actions/checkout@v3 + - name: install java + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: "8" + - name: Run tests + run: | + python run-tests.py --group kernel --coverage + - name: Run integration tests + run: | + cd kernel/examples && python run-kernel-examples.py --use-local diff --git a/.github/workflows/new_pull_request.yaml b/.github/workflows/new_pull_request.yaml new file mode 100644 index 00000000000..5e3532bd150 --- /dev/null +++ b/.github/workflows/new_pull_request.yaml @@ -0,0 +1,16 @@ +name: Add new pull requests to Backlog (External) + +on: + pull_request_target: + types: [opened, reopened] + +jobs: + automate-new-pull-requests: + if: ${{ !contains('allisonport-db dennyglee scottsand-db tdas vkorukanti zsxwing', github.event.sender.login) }} + runs-on: ubuntu-latest + steps: + - uses: alex-page/github-project-automation-plus@v0.8.1 + with: + project: oss-delta-prs + column: Needs Review + repo-token: ${{ secrets.PROJECT_BOARD_AUTOMATION_TOKEN }} diff --git a/.github/workflows/new_updated_issue.yaml b/.github/workflows/new_updated_issue.yaml new file mode 100644 index 00000000000..9470c4e7b9c --- /dev/null +++ b/.github/workflows/new_updated_issue.yaml @@ -0,0 +1,32 @@ +name: Add new and updated issues to Needs Review + +env: + eng_usernames: allisonport-db scottsand-db tdas vkorukanti zsxwing + devrel_usernames: dennyglee MrPowers nkarpov vinijaiswal + +on: + issues: + types: [opened, reopened] + issue_comment: + types: [created] + +jobs: + automate-new-updated-issues: + if: ${{ !github.event.issue.pull_request && + !contains('allisonport-db scottsand-db tdas vkorukanti zsxwing dennyglee MrPowers nkarpov vinijaiswal', github.event.sender.login) }} + runs-on: ubuntu-latest + steps: + - name: Move to Needs Review (ENG) + if: ${{ github.event.issue.assignee != null && contains(env.eng_usernames, github.event.issue.assignee.login) }} + uses: alex-page/github-project-automation-plus@v0.8.1 + with: + project: oss-delta-issues + column: Needs Review (ENG) + repo-token: ${{ secrets.PROJECT_BOARD_AUTOMATION_TOKEN }} + - name: Move to Needs Review + if: ${{ github.event.issue.assignee == null || contains(env.devrel_usernames, github.event.issue.assignee.login) }} + uses: alex-page/github-project-automation-plus@v0.8.1 + with: + project: oss-delta-issues + column: Needs Review + repo-token: ${{ secrets.PROJECT_BOARD_AUTOMATION_TOKEN }} diff --git a/.github/workflows/spark_test.yaml b/.github/workflows/spark_test.yaml new file mode 100644 index 00000000000..40476509ce1 --- /dev/null +++ b/.github/workflows/spark_test.yaml @@ -0,0 +1,69 @@ +name: "Delta Spark Tests" +on: [push, pull_request] +jobs: + test: + runs-on: ubuntu-20.04 + strategy: + matrix: + # These Scala versions must match those in the build.sbt + scala: [2.12.17, 2.13.8] + env: + SCALA_VERSION: ${{ matrix.scala }} + steps: + - uses: actions/checkout@v3 + - uses: technote-space/get-diff-action@v4 + id: git-diff + with: + PATTERNS: | + ** + !kernel/** + !connectors/** + - name: install java + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: "8" + - name: Cache Scala, SBT + uses: actions/cache@v3 + with: + path: | + ~/.sbt + ~/.ivy2 + ~/.cache/coursier + # Change the key if dependencies are changed. For each key, GitHub Actions will cache the + # the above directories when we use the key for the first time. After that, each run will + # just use the cache. The cache is immutable so we need to use a new key when trying to + # cache new stuff. + key: delta-sbt-cache-spark3.2-scala${{ matrix.scala }} + - name: Install Job dependencies + run: | + sudo apt-get update + sudo apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python-openssl git + sudo apt install libedit-dev + sudo apt install python3-pip --fix-missing + sudo pip3 install pipenv==2021.5.29 + curl https://pyenv.run | bash + export PATH="~/.pyenv/bin:$PATH" + eval "$(pyenv init -)" + eval "$(pyenv virtualenv-init -)" + pyenv install 3.8.18 + pyenv global system 3.8.18 + pipenv --python 3.8 install + pipenv run pip install pyspark==3.5.0 + pipenv run pip install flake8==3.5.0 pypandoc==1.3.3 + pipenv run pip install importlib_metadata==3.10.0 + pipenv run pip install mypy==0.982 + pipenv run pip install cryptography==37.0.4 + pipenv run pip install twine==4.0.1 + pipenv run pip install wheel==0.33.4 + pipenv run pip install setuptools==41.0.1 + pipenv run pip install pydocstyle==3.0.0 + pipenv run pip install pandas==1.0.5 + pipenv run pip install pyarrow==8.0.0 + pipenv run pip install numpy==1.20.3 + if: steps.git-diff.outputs.diff + - name: Run Scala/Java and Python tests + run: | + pipenv run python run-tests.py --group spark + cd examples/scala && build/sbt "++ $SCALA_VERSION compile" + if: steps.git-diff.outputs.diff diff --git a/.github/workflows/updated_pull_request.yaml b/.github/workflows/updated_pull_request.yaml new file mode 100644 index 00000000000..035f0223836 --- /dev/null +++ b/.github/workflows/updated_pull_request.yaml @@ -0,0 +1,22 @@ +name: Move updated pull requests to Needs Review + +on: + issue_comment: + types: [created] + pull_request_target: + types: [synchronize] + pull_request_review_comment: + types: [created] + +jobs: + automate-updated-pull-requests: + if: ${{ (github.event.issue.pull_request || github.event.pull_request) && + !contains('allisonport-db scottsand-db tdas vkorukanti zsxwing dennyglee MrPowers nkarpov vinijaiswal', github.event.sender.login) && + (github.event.pull_request.state == 'open' || github.event.issue.state == 'open') }} + runs-on: ubuntu-latest + steps: + - uses: alex-page/github-project-automation-plus@2af3cf061aeca8ac6ab40a960eee1968a7f9ce0e # TODO: update to use a version after fixes are merged & released + with: + project: oss-delta-prs + column: Needs Review + repo-token: ${{ secrets.PROJECT_BOARD_AUTOMATION_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..7efa6f0810a --- /dev/null +++ b/.gitignore @@ -0,0 +1,131 @@ +*#*# +*.#* +*.iml +*.ipr +*.iws +*.pyc +*.pyo +*.swp +*~ +.DS_Store +.ammonite +.bloop +.bsp +.cache +.classpath +.ensime +.ensime_cache/ +.ensime_lucene +.generated-mima* +.idea/ +.idea_modules/ +.metals +.project +.pydevproject +.scala_dependencies +.settings +/lib/ +R-unit-tests.log +R/unit-tests.out +R/cran-check.out +R/pkg/vignettes/sparkr-vignettes.html +R/pkg/tests/fulltests/Rplots.pdf +build/*.jar +build/apache-maven* +build/scala* +build/zinc* +cache +checkpoint +conf/*.cmd +conf/*.conf +conf/*.properties +conf/*.sh +conf/*.xml +conf/java-opts +dependency-reduced-pom.xml +derby.log +dev/create-release/*final +dev/create-release/*txt +dev/pr-deps/ +dist/ +docs/_site +docs/api +sql/docs +sql/site +lib_managed/ +lint-r-report.log +log/ +logs/ +metals.sbt +out/ +project/boot/ +project/build/target/ +project/plugins/lib_managed/ +project/plugins/project/build.properties +project/plugins/src_managed/ +project/plugins/target/ +python/lib/pyspark.zip +python/deps +docs/python/_static/ +docs/python/_templates/ +docs/python/_build/ +python/test_coverage/coverage_data +python/test_coverage/htmlcov +python/pyspark/python +reports/ +scalastyle-on-compile.generated.xml +scalastyle-output.xml +scalastyle.txt +spark-*-bin-*.tgz +spark-tests.log +src_managed/ +streaming-tests.log +target/ +unit-tests.log +work/ +docs/.jekyll-metadata + +# For Hive +TempStatsStore/ +metastore/ +metastore_db/ +sql/hive-thriftserver/test_warehouses +warehouse/ +spark-warehouse/ + +# For R session data +.RData +.RHistory +.Rhistory +*.Rproj +*.Rproj.* + +.Rproj.user + +**/src/main/resources/js + +# For SBT +.jvmopts +sbt-launch-*.jar + +# For Python linting +pep8*.py +pycodestyle*.py + +# For IDE settings +.vscode + +# For Terraform +**/.terraform/* +*.tfstate +*.tfstate.* +crash.log +crash.*.log +*.tfvars +*.tfvars.json +override.tf +override.tf.json +*_override.tf +*_override.tf.json +.terraformrc +.terraform.rc diff --git a/.sbtopts b/.sbtopts new file mode 100644 index 00000000000..2b63e3b2d85 --- /dev/null +++ b/.sbtopts @@ -0,0 +1 @@ +-J-Xmx4G diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000000..8e0cbb9a2f1 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,79 @@ +# Delta Lake Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: +shipit +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting +shipit +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the Technical Steering Committee defined [here](https://github.com/delta-io/delta/blob/master/CONTRIBUTING.md#governance). All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq + +## Linux Foundation Code of Conduct +Your use is additionally subject to the [Linux Foundation Code of Conduct](https://lfprojects.org/policies/code-of-conduct/) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..8a57e00c8cc --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,75 @@ +We happily welcome contributions to Delta Lake. We use [GitHub Issues](/../../issues/) to track community reported issues and [GitHub Pull Requests ](/../../pulls/) for accepting changes. + +# Governance +Delta Lake is an independent open-source project and not controlled by any single company. To emphasize this we joined the [Delta Lake Project](https://community.linuxfoundation.org/delta-lake/) in 2019, which is a sub-project of the Linux Foundation Projects. Within the project, we make decisions based on [these rules](https://delta.io/pdfs/delta-charter.pdf). + +Delta Lake is supported by a wide set of developers from over 50 organizations across multiple repositories. Since 2019, more than 190 developers have contributed to Delta Lake! The Delta Lake community is growing by leaps and bounds with more than 6000 members in the [Delta Users slack](https://go.delta.io/slack)). + +For more information, please refer to the [founding technical charter](https://delta.io/pdfs/delta-charter.pdf). + +# Communication +- Before starting work on a major feature, please reach out to us via [GitHub](https://github.com/delta-io/delta/issues), [Slack](https://go.delta.io/slack), [email](https://groups.google.com/g/delta-users), etc. We will make sure no one else is already working on it and ask you to open a GitHub issue. +- A "major feature" is defined as any change that is > 100 LOC altered (not including tests), or changes any user-facing behavior. +- We will use the GitHub issue to discuss the feature and come to agreement. +- This is to prevent your time being wasted, as well as ours. +- The GitHub review process for major features is also important so that organizations with commit access can come to agreement on design. +- If it is appropriate to write a design document, the document must be hosted either in the GitHub tracking issue, or linked to from the issue and hosted in a world-readable location. Examples of design documents include [sample 1](https://docs.google.com/document/d/16S7xoAmXpSax7W1OWYYHo5nZ71t5NvrQ-F79pZF6yb8), [sample 2](https://docs.google.com/document/d/1MJhmW_H7doGWY2oty-I78vciziPzBy_nzuuB-Wv5XQ8), and [sample 3](https://docs.google.com/document/d/19CU4eJuBXOwW7FC58uSqyCbcLTsgvQ5P1zoPOPgUSpI). +- Specifically, if the goal is to add a new extension, please read the extension policy. +- Small patches and bug fixes don't need prior communication. If you have identified a bug and have ways to solve it, please create an [issue](https://github.com/delta-io/delta/issues) or create a [pull request](https://github.com/delta-io/delta/pulls). +- If you have an example code that explains a use case or a feature, create a pull request to post under [examples](https://github.com/delta-io/delta/tree/master/examples). + + +# Coding style +We generally follow the [Apache Spark Scala Style Guide](https://spark.apache.org/contributing.html). + +# Sign your work +The sign-off is a simple line at the end of the explanation for the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify the below (from developercertificate.org): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +1 Letterman Drive +Suite D4700 +San Francisco, CA, 94129 + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +Then you just add a line to every git commit message: + +``` +Signed-off-by: Jane Smith +Use your real name (sorry, no pseudonyms or anonymous contributions.) +``` + +If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with `git commit -s`. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000000..947f8a94c14 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,60 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM ubuntu:focal-20221019 + +ENV DEBIAN_FRONTEND noninteractive +ENV DEBCONF_NONINTERACTIVE_SEEN true + +RUN apt-get update +RUN apt-get install -y software-properties-common +RUN apt-get install -y curl +RUN apt-get install -y wget +RUN apt-get install -y openjdk-8-jdk +RUN apt-get install -y python3.8 +RUN apt-get install -y python3-pip +RUN apt-get install -y git + +# Upgrade pip. This is needed to use prebuilt wheels for packages cffi (dep of cryptography) and +# cryptography. Otherwise, building wheels for these packages fails. +RUN pip3 install --upgrade pip + +RUN pip3 install pyspark==3.5.0 + +RUN pip3 install mypy==0.982 + +RUN pip3 install pydocstyle==3.0.0 + +RUN pip3 install pandas==1.0.5 + +RUN pip3 install pyarrow==8.0.0 + +RUN pip3 install numpy==1.20.3 + +RUN pip3 install importlib_metadata==3.10.0 + +RUN pip3 install cryptography==37.0.4 + +# We must install cryptography before twine. Else, twine will pull a newer version of +# cryptography that requires a newer version of Rust and may break tests. +RUN pip3 install twine==4.0.1 + +RUN pip3 install wheel==0.33.4 + +RUN pip3 install setuptools==41.0.1 + +# Do not add any non-deterministic changes (e.g., copy from files +# from repo) in this Dockerfile, so that the docker image +# generated from this can be reused across builds. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000000..a04059199fb --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,388 @@ +Copyright (2021) The Delta Lake Project Authors. All rights reserved. + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + + +------------------------------------------------------------------------- +This project includes code derived from the Apache Spark project. +The individual files containing this code carry the original Apache Spark +license, which is reproduced here as well: + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NOTICE.txt b/NOTICE.txt new file mode 100644 index 00000000000..4e84d594faf --- /dev/null +++ b/NOTICE.txt @@ -0,0 +1,24 @@ +Delta Lake +Copyright (2021) The Delta Lake Project Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +This project includes software licensed by the Apache Software Foundation (Apache 2.0) +from the Apache Spark project (www.github.com/apache/spark) + +---------------------------------------------------------- +Apache Spark +Copyright 2014 and onwards The Apache Software Foundation. + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/PROTOCOL.md b/PROTOCOL.md new file mode 100644 index 00000000000..d1d8f1c677f --- /dev/null +++ b/PROTOCOL.md @@ -0,0 +1,2088 @@ + + +# Delta Transaction Log Protocol + +- [Overview](#overview) +- [Delta Table Specification](#delta-table-specification) + - [File Types](#file-types) + - [Data Files](#data-files) + - [Deletion Vector Files](#deletion-vector-files) + - [Change Data Files](#change-data-files) + - [Delta Log Entries](#delta-log-entries) + - [Checkpoints](#checkpoints) + - [Sidecar Files](#sidecar-files) + - [Log Compaction Files](#log-compaction-files) + - [Last Checkpoint File](#last-checkpoint-file) + - [Actions](#actions) + - [Change Metadata](#change-metadata) + - [Format Specification](#format-specification) + - [Add File and Remove File](#add-file-and-remove-file) + - [Add CDC File](#add-cdc-file) + - [Writer Requirements for AddCDCFile](#writer-requirements-for-addcdcfile) + - [Reader Requirements for AddCDCFile](#reader-requirements-for-addcdcfile) + - [Transaction Identifiers](#transaction-identifiers) + - [Protocol Evolution](#protocol-evolution) + - [Commit Provenance Information](#commit-provenance-information) + - [Domain Metadata](#domain-metadata) + - [Reader Requirements for Domain Metadata](#reader-requirements-for-domain-metadata) + - [Writer Requirements for Domain Metadata](#writer-requirements-for-domain-metadata) + - [Sidecar File Information](#sidecar-file-information) + - [Checkpoint Metadata](#checkpoint-metadata) +- [Action Reconciliation](#action-reconciliation) +- [Table Features](#table-features) + - [Table Features for New and Existing Tables](#table-features-for-new-and-existing-tables) + - [Supported Features](#supported-features) + - [Active Features](#active-features) +- [Column Mapping](#column-mapping) + - [Writer Requirements for Column Mapping](#writer-requirements-for-column-mapping) + - [Reader Requirements for Column Mapping](#reader-requirements-for-column-mapping) +- [Deletion Vectors](#deletion-vectors) + - [Deletion Vector Descriptor Schema](#deletion-vector-descriptor-schema) + - [Derived Fields](#derived-fields) + - [JSON Example 1 — On Disk with Relative Path (with Random Prefix)](#json-example-1--on-disk-with-relative-path-with-random-prefix) + - [JSON Example 2 — On Disk with Absolute Path](#json-example-2--on-disk-with-absolute-path) + - [JSON Example 3 — Inline](#json-example-3--inline) + - [Reader Requirements for Deletion Vectors](#reader-requirements-for-deletion-vectors) + - [Writer Requirement for Deletion Vectors](#writer-requirement-for-deletion-vectors) +- [Iceberg Compatibility V1](#iceberg-compatibility-v1) + - [Writer Requirements for IcebergCompatV1](#writer-requirements-for-icebergcompatv1) +- [Iceberg Compatibility V2](#iceberg-compatibility-v2) + - [Writer Requirement for IcebergCompatV2](#iceberg-compatibility-v2) +- [Timestamp without timezone (TimestampNtz)](#timestamp-without-timezone-timestampntz) +- [V2 Checkpoint Table Feature](#v2-checkpoint-table-feature) +- [Row Tracking](#row-tracking) + - [Row IDs](#row-ids) + - [Row Commit Versions](#row-commit-versions) + - [Reader Requirements for Row Tracking](#reader-requirements-for-row-tracking) + - [Writer Requirements for Row Tracking](#writer-requirements-for-row-tracking) +- [Clustered Table](#clustered-table) + - [Writer Requirements for Clustered Table](#writer-requirements-for-clustered-table) +- [Requirements for Writers](#requirements-for-writers) + - [Creation of New Log Entries](#creation-of-new-log-entries) + - [Consistency Between Table Metadata and Data Files](#consistency-between-table-metadata-and-data-files) + - [Delta Log Entries](#delta-log-entries-1) + - [Checkpoints](#checkpoints-1) + - [Checkpoint Specs](#checkpoint-specs) + - [V2 Spec](#v2-spec) + - [V1 Spec](#v1-spec) + - [Checkpoint Naming Scheme](#checkpoint-naming-scheme) + - [UUID-named checkpoint](#uuid-named-checkpoint) + - [Classic checkpoint](#classic-checkpoint) + - [Multi-part checkpoint](#multi-part-checkpoint) + - [Problems with multi-part checkpoints](#problems-with-multi-part-checkpoints) + - [Handling Backward compatibility while moving to UUID-named v2 Checkpoints](#handling-backward-compatibility-while-moving-to-uuid-named-v2-checkpoints) + - [Allowed combinations for `checkpoint spec` <-> `checkpoint file naming`](#allowed-combinations-for-checkpoint-spec---checkpoint-file-naming) + - [Metadata Cleanup](#metadata-cleanup) + - [Data Files](#data-files-1) + - [Append-only Tables](#append-only-tables) + - [Column Invariants](#column-invariants) + - [CHECK Constraints](#check-constraints) + - [Generated Columns](#generated-columns) + - [Default Columns](#default-columns) + - [Identity Columns](#identity-columns) + - [Writer Version Requirements](#writer-version-requirements) +- [Requirements for Readers](#requirements-for-readers) + - [Reader Version Requirements](#reader-version-requirements) +- [Appendix](#appendix) + - [Valid Feature Names in Table Features](#valid-feature-names-in-table-features) + - [Deletion Vector Format](#deletion-vector-format) + - [Deletion Vector File Storage Format](#deletion-vector-file-storage-format) + - [Per-file Statistics](#per-file-statistics) + - [Partition Value Serialization](#partition-value-serialization) + - [Schema Serialization Format](#schema-serialization-format) + - [Primitive Types](#primitive-types) + - [Struct Type](#struct-type) + - [Struct Field](#struct-field) + - [Array Type](#array-type) + - [Map Type](#map-type) + - [Column Metadata](#column-metadata) + - [Example](#example) + - [Checkpoint Schema](#checkpoint-schema) + - [Last Checkpoint File Schema](#last-checkpoint-file-schema) + - [JSON checksum](#json-checksum) + - [How to URL encode keys and string values](#how-to-url-encode-keys-and-string-values) + + + +# Overview +This document is a specification for the Delta Transaction Protocol, which brings [ACID](https://en.wikipedia.org/wiki/ACID) properties to large collections of data, stored as files, in a distributed file system or object store. The protocol was designed with the following goals in mind: + +- **Serializable ACID Writes** - multiple writers can concurrently modify a Delta table while maintaining ACID semantics. +- **Snapshot Isolation for Reads** - readers can read a consistent snapshot of a Delta table, even in the face of concurrent writes. +- **Scalability to billions of partitions or files** - queries against a Delta table can be planned on a single machine or in parallel. +- **Self describing** - all metadata for a Delta table is stored alongside the data. This design eliminates the need to maintain a separate metastore just to read the data and also allows static tables to be copied or moved using standard filesystem tools. +- **Support for incremental processing** - readers can tail the Delta log to determine what data has been added in a given period of time, allowing for efficient streaming. + +Delta's transactions are implemented using multi-version concurrency control (MVCC). +As a table changes, Delta's MVCC algorithm keeps multiple copies of the data around rather than immediately replacing files that contain records that are being updated or removed. + +Readers of the table ensure that they only see one consistent _snapshot_ of a table at time by using the _transaction log_ to selectively choose which _data files_ to process. + +Writers modify the table in two phases: +First, they optimistically write out new data files or updated copies of existing ones. +Then, they _commit_, creating the latest _atomic version_ of the table by adding a new entry to the log. +In this log entry they record which data files to logically add and remove, along with changes to other metadata about the table. + +Data files that are no longer present in the latest version of the table can be lazily deleted by the vacuum command after a user-specified retention period (default 7 days). + +# Delta Table Specification +A table has a single serial history of atomic versions, which are named using contiguous, monotonically-increasing integers. +The state of a table at a given version is called a _snapshot_ and is defined by the following properties: + - **Delta log protocol** consists of two **protocol versions**, and if applicable, corresponding **table features**, that are required to correctly read or write the table + - **Reader features** only exists when Reader Version is 3 + - **Writer features** only exists when Writer Version is 7 + - **Metadata** of the table (e.g., the schema, a unique identifier, partition columns, and other configuration properties) + - **Set of files** present in the table, along with metadata about those files + - **Set of tombstones** for files that were recently deleted + - **Set of applications-specific transactions** that have been successfully committed to the table + +## File Types +A Delta table is stored within a directory and is composed of the following different types of files. + +Here is an example of a Delta table with three entries in the commit log, stored in the directory `mytable`. +``` +/mytable/_delta_log/00000000000000000000.json +/mytable/_delta_log/00000000000000000001.json +/mytable/_delta_log/00000000000000000003.json +/mytable/_delta_log/00000000000000000003.checkpoint.parquet +/mytable/_delta_log/_last_checkpoint +/mytable/_change_data/cdc-00000-924d9ac7-21a9-4121-b067-a0a6517aa8ed.c000.snappy.parquet +/mytable/part-00000-3935a07c-416b-4344-ad97-2a38342ee2fc.c000.snappy.parquet +/mytable/deletion_vector-0c6cbaaf-5e04-4c9d-8959-1088814f58ef.bin +``` + +### Data Files +Data files can be stored in the root directory of the table or in any non-hidden subdirectory (i.e., one whose name does not start with an `_`). +By default, the reference implementation stores data files in directories that are named based on the partition values for data in that file (i.e. `part1=value1/part2=value2/...`). +This directory format is only used to follow existing conventions and is not required by the protocol. +Actual partition values for a file must be read from the transaction log. + +### Deletion Vector Files +Deletion Vector (DV) files are stored root directory of the table alongside the data files. A DV file contains one or more serialised DV, each describing the set of *invalidated* (or "soft deleted") rows for a particular data file it is associated with. +For data with partition values, DV files are *not* kept in the same directory hierarchy as data files, as each one can contain DVs for files from multiple partitions. +DV files store DVs in a [binary format](#deletion-vector-format). + +### Change Data Files +Change data files are stored in a directory at the root of the table named `_change_data`, and represent the changes for the table version they are in. For data with partition values, it is recommended that the change data files are stored within the `_change_data` directory in their respective partitions (i.e. `_change_data/part1=value1/...`). Writers can _optionally_ produce these change data files as a consequence of operations that change underlying data, like `UPDATE`, `DELETE`, and `MERGE` operations to a Delta Lake table. If an operation only adds new data or removes existing data without updating any existing rows, a writer can write only data files and commit them in `add` or `remove` actions without duplicating the data into change data files. When available, change data readers should use the change data files instead of computing changes from the underlying data files. + +In addition to the data columns, change data files contain additional columns that identify the type of change event: + +Field Name | Data Type | Description +-|-|- +_change_type|`String`| `insert`, `update_preimage` , `update_postimage`, `delete` __(1)__ + +__(1)__ `preimage` is the value before the update, `postimage` is the value after the update. + +### Delta Log Entries +Delta files are stored as JSON in a directory at the root of the table named `_delta_log`, and together with checkpoints make up the log of all changes that have occurred to a table. + +Delta files are the unit of atomicity for a table, and are named using the next available version number, zero-padded to 20 digits. + +For example: + +``` +./_delta_log/00000000000000000000.json +``` +Delta files use new-line delimited JSON format, where every action is stored as a single line JSON document. +A delta file, `n.json`, contains an atomic set of [_actions_](#Actions) that should be applied to the previous table state, `n-1.json`, in order to the construct `n`th snapshot of the table. +An action changes one aspect of the table's state, for example, adding or removing a file. + +### Checkpoints +Checkpoints are also stored in the `_delta_log` directory, and can be created at any time, for any committed version of the table. +For performance reasons, readers should prefer to use the newest complete checkpoint possible. +For time travel, the checkpoint used must not be newer than the time travel version. + +A checkpoint contains the complete replay of all actions, up to and including the checkpointed table version, with invalid actions removed. +Invalid actions are those that have been canceled out by subsequent ones (for example removing a file that has been added), using the [rules for reconciliation](#Action-Reconciliation). +In addition to above, checkpoint also contains the [_remove tombstones_](#add-file-and-remove-file) until they are expired. +Checkpoints allow readers to short-cut the cost of reading the log up-to a given point in order to reconstruct a snapshot, and they also allow [Metadata cleanup](#metadata-cleanup) to delete expired JSON Delta log entries. + +Readers SHOULD NOT make any assumptions about the existence or frequency of checkpoints, with one exception: +[Metadata cleanup](#metadata-cleanup) MUST provide a checkpoint for the oldest kept table version, to cover all deleted [Delta log entries](#delta-log-entries). +That said, writers are encouraged to checkpoint reasonably frequently, so that readers do not pay excessive log replay costs due to reading large numbers of delta files. + +The checkpoint file name is based on the version of the table that the checkpoint contains. + +Delta supports three kinds of checkpoints: + +1. UUID-named Checkpoints: These follow [V2 spec](#v2-spec) which uses the following file name: `n.checkpoint.u.{json/parquet}`, where `u` is a UUID and `n` is the +snapshot version that this checkpoint represents. The UUID-named V2 Checkpoint may be in json or parquet format, and references zero or more checkpoint sidecars +in the `_delta_log/_sidecars` directory. A checkpoint sidecar is a uniquely-named parquet file: `{unique}.parquet` where `unique` is some unique +string such as a UUID. + +For example: + +``` +00000000000000000010.checkpoint.80a083e8-7026-4e79-81be-64bd76c43a11.json +_sidecars/3a0d65cd-4056-49b8-937b-95f9e3ee90e5.parquet +_sidecars/016ae953-37a9-438e-8683-9a9a4a79a395.parquet +_sidecars/7d17ac10-5cc3-401b-bd1a-9c82dd2ea032.parquet +``` + +2. A [classic checkpoint](#classic-checkpoint) for version `n` of the table consists of a file named `n.checkpoint.parquet`. +These could follow either [V1 spec](#v1-spec) or [V2 spec](#v2-spec). +For example: + +``` +00000000000000000010.checkpoint.parquet +``` + + +3. A [multi-part checkpoint](#multi-part-checkpoint) for version `n` consists of `p` "part" files (`p > 1`), where +part `o` of `p` is named `n.checkpoint.o.p.parquet`. These are always [V1 checkpoints](#v1-spec). +For example: + +``` +00000000000000000010.checkpoint.0000000001.0000000003.parquet +00000000000000000010.checkpoint.0000000002.0000000003.parquet +00000000000000000010.checkpoint.0000000003.0000000003.parquet +``` + +A writer can choose to write checkpoints with following constraints: +- Writers are always allowed create a [classic checkpoint](#classic-checkpoint) following [v1 spec](#v1-spec). +- Writers are forbidden to create [multi-part checkpoints](#multi-part-checkpoint) if [v2 checkpoints](#v2-checkpoint-table-feature) are enabled. +- Writers are allowed to create v2 spec checkpoints (either [classic](#classic-checkpoint) or [uuid-named](#uuid-named-checkpoint)) if [v2 checkpoint table feature](#v2-checkpoint-table-feature) is enabled. + +Multi-part checkpoints are [deprecated](#problems-with-multi-part-checkpoints), and writers should avoid creating them. Use uuid-named [V2 spec](#v2-spec) checkpoints instead of these. + +Multiple checkpoints could exist for the same table version, e.g. if two clients race to create checkpoints at the same time, but with different formats. +In such cases, a client can choose which checkpoint to use. + +Because a multi-part checkpoint cannot be created atomically (e.g. vulnerable to slow and/or failed writes), readers must ignore multi-part checkpoints with missing parts. + +Checkpoints for a given version must only be created after the associated delta file has been successfully written. + +#### Sidecar Files + +A sidecar file contains file actions. These files are in parquet format and they must have unique names. +These are then [linked](#sidecar-file-information) to checkpoints. Refer to [V2 checkpoint spec](#v2-spec) +for more detail. The sidecar files can have only [add file and remove file](#Add-File-and-Remove-File) entries +as of now. The add and remove file actions are stored as their individual columns in parquet as struct fields. + +These files reside in the `_delta_log/_sidecars` directory. + +### Log Compaction Files + +Log compaction files reside in the `_delta_log` directory. A log compaction file from a start version `x` to an end version `y` will have the following name: +`..compact.json`. This contains the aggregated +actions for commit range `[x, y]`. Similar to commits, each row in the log +compaction file represents an [action](#actions). +The commit files for a given range are created by doing [Action Reconciliation](#action-reconciliation) +of the corresponding commits. +Instead of reading the individual commit files in range `[x, y]`, an implementation could choose to read +the log compaction file `..compact.json` to speed up the snapshot construction. + +Example: +Suppose we have `4.json` as: +``` +{"commitInfo":{...}} +{"add":{"path":"f2",...}} +{"remove":{"path":"f1",...}} +``` +`5.json` as: +``` +{"commitInfo":{...}} +{"add":{"path":"f3",...}} +{"add":{"path":"f4",...}} +{"txn":{"appId":"3ae45b72-24e1-865a-a211-34987ae02f2a","version":4389}} +``` +`6.json` as: +``` +{"commitInfo":{...}} +{"remove":{"path":"f3",...}} +{"txn":{"appId":"3ae45b72-24e1-865a-a211-34987ae02f2a","version":4390}} +``` + +Then `4.6.compact.json` will have the following content: +``` +{"add":{"path":"f2",...}} +{"add":{"path":"f4",...}} +{"remove":{"path":"f1",...}} +{"remove":{"path":"f3",...}} +{"txn":{"appId":"3ae45b72-24e1-865a-a211-34987ae02f2a","version":4390}} +``` + +Writers: +- Can optionally produce log compactions for any given commit range + +Readers: +- Can optionally consume log compactions, if available +- The compaction replaces the corresponding commits during action reconciliation + +### Last Checkpoint File +The Delta transaction log will often contain many (e.g. 10,000+) files. +Listing such a large directory can be prohibitively expensive. +The last checkpoint file can help reduce the cost of constructing the latest snapshot of the table by providing a pointer to near the end of the log. + +Rather than list the entire directory, readers can locate a recent checkpoint by looking at the `_delta_log/_last_checkpoint` file. +Due to the zero-padded encoding of the files in the log, the version id of this recent checkpoint can be used on storage systems that support lexicographically-sorted, paginated directory listing to enumerate any delta files or newer checkpoints that comprise more recent versions of the table. + +## Actions +Actions modify the state of the table and they are stored both in delta files and in checkpoints. +This section lists the space of available actions as well as their schema. + +### Change Metadata +The `metaData` action changes the current metadata of the table. +The first version of a table must contain a `metaData` action. +Subsequent` metaData` actions completely overwrite the current metadata of the table. + +There can be at most one metadata action in a given version of the table. + +Every metadata action **must** include required fields at a minimum. + +The schema of the `metaData` action is as follows: + +Field Name | Data Type | Description | optional/required +-|-|-|- +id|`GUID`|Unique identifier for this table | required +name|`String`| User-provided identifier for this table | optional +description|`String`| User-provided description for this table | optional +format|[Format Struct](#Format-Specification)| Specification of the encoding for the files stored in the table | required +schemaString|[Schema Struct](#Schema-Serialization-Format)| Schema of the table | required +partitionColumns|`Array[String]`| An array containing the names of columns by which the data should be partitioned | required +createdTime|`Option[Long]`| The time when this metadata action is created, in milliseconds since the Unix epoch | optional +configuration|`Map[String, String]`| A map containing configuration options for the metadata action | required + +#### Format Specification +Field Name | Data Type | Description +-|-|- +provider|`String`|Name of the encoding for files in this table +options|`Map[String, String]`|A map containing configuration options for the format + +In the reference implementation, the provider field is used to instantiate a Spark SQL [`FileFormat`](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala). As of Spark 2.4.3 there is built-in `FileFormat` support for `parquet`, `csv`, `orc`, `json`, and `text`. + +As of Delta Lake 0.3.0, user-facing APIs only allow the creation of tables where `format = 'parquet'` and `options = {}`. Support for reading other formats is present both for legacy reasons and to enable possible support for other formats in the future (See [#87](https://github.com/delta-io/delta/issues/87)). + +The following is an example `metaData` action: +```json +{ + "metaData":{ + "id":"af23c9d7-fff1-4a5a-a2c8-55c59bd782aa", + "format":{"provider":"parquet","options":{}}, + "schemaString":"...", + "partitionColumns":[], + "configuration":{ + "appendOnly": "true" + } + } +} +``` + + + +### Add File and Remove File +The `add` and `remove` actions are used to modify the data in a table by adding or removing individual _logical files_ respectively. + +Every _logical file_ of the table is represented by a path to a data file, combined with an optional Deletion Vector (DV) that indicates which rows of the data file are no longer in the table. Deletion Vectors are an optional feature, see their [reader requirements](#deletion-vectors) for details. + +When an `add` action is encountered for a logical file that is already present in the table, statistics and other information from the latest version should replace that from any previous version. +The primary key for the entry of a logical file in the set of files is a tuple of the data file's `path` and a unique id describing the DV. If no DV is part of this logical file, then its primary key is `(path, NULL)` instead. + +The `remove` action includes a timestamp that indicates when the removal occurred. +Physical deletion of physical files can happen lazily after some user-specified expiration time threshold. +This delay allows concurrent readers to continue to execute against a stale snapshot of the data. +A `remove` action should remain in the state of the table as a _tombstone_ until it has expired. +A tombstone expires when *current time* (according to the node performing the cleanup) exceeds the expiration threshold added to the `remove` action timestamp. + +In the following statements, `dvId` can refer to either the unique id of a specific Deletion Vector (`deletionVector.uniqueId`) or to `NULL`, indicating that no rows are invalidated. Since actions within a given Delta commit are not guaranteed to be applied in order, a **valid** version is restricted to contain at most one file action *of the same type* (i.e. `add`/`remove`) for any one combination of `path` and `dvId`. Moreover, for simplicity it is required that there is at most one file action of the same type for any `path` (regardless of `dvId`). +That means specifically that for any commit… + + - it is **legal** for the same `path` to occur in an `add` action and a `remove` action, but with two different `dvId`s. + - it is **legal** for the same `path` to be added and/or removed and also occur in a `cdc` action. + - it is **illegal** for the same `path` to be occur twice with different `dvId`s within each set of `add` or `remove` actions. + +The `dataChange` flag on either an `add` or a `remove` can be set to `false` to indicate that an action when combined with other actions in the same atomic version only rearranges existing data or adds new statistics. +For example, streaming queries that are tailing the transaction log can use this flag to skip actions that would not affect the final results. + +The schema of the `add` action is as follows: + +Field Name | Data Type | Description | optional/required +-|-|-|- +path| String | A relative path to a data file from the root of the table or an absolute path to a file that should be added to the table. The path is a URI as specified by [RFC 2396 URI Generic Syntax](https://www.ietf.org/rfc/rfc2396.txt), which needs to be decoded to get the data file path. | required +partitionValues| Map[String, String] | A map from partition column to value for this logical file. See also [Partition Value Serialization](#Partition-Value-Serialization) | required +size| Long | The size of this data file in bytes | required +modificationTime | Long | The time this logical file was created, as milliseconds since the epoch | required +dataChange | Boolean | When `false` the logical file must already be present in the table or the records in the added file must be contained in one or more `remove` actions in the same version | required +stats | [Statistics Struct](#Per-file-Statistics) | Contains statistics (e.g., count, min/max values for columns) about the data in this logical file | optional +tags | Map[String, String] | Map containing metadata about this logical file | optional +deletionVector | [DeletionVectorDescriptor Struct](#Deletion-Vectors) | Either null (or absent in JSON) when no DV is associated with this data file, or a struct (described below) that contains necessary information about the DV that is part of this logical file. | optional +baseRowId | Long | Default generated Row ID of the first row in the file. The default generated Row IDs of the other rows in the file can be reconstructed by adding the physical index of the row within the file to the base Row ID. See also [Row IDs](#row-ids) | optional +defaultRowCommitVersion | Long | First commit version in which an `add` action with the same `path` was committed to the table. | optional +clusteringProvider | String | The name of the clustering implementation. See also [Clustered Table](#clustered-table)| optional + +The following is an example `add` action for a partitioned table: +```json +{ + "add": { + "path": "date=2017-12-10/part-000...c000.gz.parquet", + "partitionValues": {"date": "2017-12-10"}, + "size": 841454, + "modificationTime": 1512909768000, + "dataChange": true, + "baseRowId": 4071, + "defaultRowCommitVersion": 41, + "stats": "{\"numRecords\":1,\"minValues\":{\"val..." + } +} +``` + +The following is an example `add` action for a clustered table: +```json +{ + "add": { + "path": "date=2017-12-10/part-000...c000.gz.parquet", + "partitionValues": {}, + "size": 841454, + "modificationTime": 1512909768000, + "dataChange": true, + "baseRowId": 4071, + "defaultRowCommitVersion": 41, + "clusteringProvider": "liquid", + "stats": "{\"numRecords\":1,\"minValues\":{\"val..." + } +} +``` + +The schema of the `remove` action is as follows: + +Field Name | Data Type | Description | optional/required +-|-|-|- +path| String | A relative path to a file from the root of the table or an absolute path to a file that should be removed from the table. The path is a URI as specified by [RFC 2396 URI Generic Syntax](https://www.ietf.org/rfc/rfc2396.txt), which needs to be decoded to get the data file path. | required +deletionTimestamp | Option[Long] | The time the deletion occurred, represented as milliseconds since the epoch | optional +dataChange | Boolean | When `false` the records in the removed file must be contained in one or more `add` file actions in the same version | required +extendedFileMetadata | Boolean | When `true` the fields `partitionValues`, `size`, and `tags` are present | optional +partitionValues| Map[String, String] | A map from partition column to value for this file. See also [Partition Value Serialization](#Partition-Value-Serialization) | optional +size| Long | The size of this data file in bytes | optional +stats | [Statistics Struct](#Per-file-Statistics) | Contains statistics (e.g., count, min/max values for columns) about the data in this logical file | optional +tags | Map[String, String] | Map containing metadata about this file | optional +deletionVector | [DeletionVectorDescriptor Struct](#Deletion-Vectors) | Either null (or absent in JSON) when no DV is associated with this data file, or a struct (described below) that contains necessary information about the DV that is part of this logical file. | optional +baseRowId | Long | Default generated Row ID of the first row in the file. The default generated Row IDs of the other rows in the file can be reconstructed by adding the physical index of the row within the file to the base Row ID. See also [Row IDs](#row-ids) | optional +defaultRowCommitVersion | Long | First commit version in which an `add` action with the same `path` was committed to the table | optional + +The following is an example `remove` action. +```json +{ + "remove": { + "path": "part-00001-9…..snappy.parquet", + "deletionTimestamp": 1515488792485, + "baseRowId": 4071, + "defaultRowCommitVersion": 41, + "dataChange": true + } +} +``` + +### Add CDC File +The `cdc` action is used to add a [file](#change-data-files) containing only the data that was changed as part of the transaction. When change data readers encounter a `cdc` action in a particular Delta table version, they must read the changes made in that version exclusively using the `cdc` files. If a version has no `cdc` action, then the data in `add` and `remove` actions are read as inserted and deleted rows, respectively. + +The schema of the `cdc` action is as follows: + +Field Name | Data Type | Description +-|-|- +path| String | A relative path to a change data file from the root of the table or an absolute path to a change data file that should be added to the table. The path is a URI as specified by [RFC 2396 URI Generic Syntax](https://www.ietf.org/rfc/rfc2396.txt), which needs to be decoded to get the file path. +partitionValues| Map[String, String] | A map from partition column to value for this file. See also [Partition Value Serialization](#Partition-Value-Serialization) +size| Long | The size of this file in bytes +dataChange | Boolean | Should always be set to `false` for `cdc` actions because they _do not_ change the underlying data of the table +tags | Map[String, String] | Map containing metadata about this file + +The following is an example of `cdc` action. + +```json +{ + "cdc": { + "path": "_change_data/cdc-00001-c…..snappy.parquet", + "partitionValues": {}, + "size": 1213, + "dataChange": false + } +} +``` + +#### Writer Requirements for AddCDCFile + +For [Writer Versions 4 up to 6](#Writer-Version-Requirements), all writers must respect the `delta.enableChangeDataFeed` configuration flag in the metadata of the table. When `delta.enableChangeDataFeed` is `true`, writers must produce the relevant `AddCDCFile`'s for any operation that changes data, as specified in [Change Data Files](#change-data-files). + +For Writer Version 7, all writers must respect the `delta.enableChangeDataFeed` configuration flag in the metadata of the table only if the feature `changeDataFeed` exists in the table `protocol`'s `writerFeatures`. + +#### Reader Requirements for AddCDCFile + +When available, change data readers should use the `cdc` actions in a given table version instead of computing changes from the underlying data files referenced by the `add` and `remove` actions. +Specifically, to read the row-level changes made in a version, the following strategy should be used: +1. If there are `cdc` actions in this version, then read only those to get the row-level changes, and skip the remaining `add` and `remove` actions in this version. +2. Otherwise, if there are no `cdc` actions in this version, read and treat all the rows in the `add` and `remove` actions as inserted and deleted rows, respectively. +3. Change data readers should return the following extra columns: + + Field Name | Data Type | Description + -|-|- + _commit_version|`Long`| The table version containing the change. This can be derived from the name of the Delta log file that contains actions. + _commit_timestamp|`Timestamp`| The timestamp associated when the commit was created. This can be derived from the file modification time of the Delta log file that contains actions. + +##### Note for non-change data readers + +In a table with Change Data Feed enabled, the data Parquet files referenced by `add` and `remove` actions are allowed to contain an extra column `_change_type`. This column is not present in the table's schema and will consistently have a `null` value. When accessing these files, readers should disregard this column and only process columns defined within the table's schema. + +### Transaction Identifiers +Incremental processing systems (e.g., streaming systems) that track progress using their own application-specific versions need to record what progress has been made, in order to avoid duplicating data in the face of failures and retries during a write. +Transaction identifiers allow this information to be recorded atomically in the transaction log of a delta table along with the other actions that modify the contents of the table. + +Transaction identifiers are stored in the form of `appId` `version` pairs, where `appId` is a unique identifier for the process that is modifying the table and `version` is an indication of how much progress has been made by that application. +The atomic recording of this information along with modifications to the table enables these external system to make their writes into a Delta table _idempotent_. + +For example, the [Delta Sink for Apache Spark's Structured Streaming](https://github.com/delta-io/delta/blob/master/core/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSink.scala) ensures exactly-once semantics when writing a stream into a table using the following process: + 1. Record in a write-ahead-log the data that will be written, along with a monotonically increasing identifier for this batch. + 2. Check the current version of the transaction with `appId = streamId` in the target table. If this value is greater than or equal to the batch being written, then this data has already been added to the table and processing can skip to the next batch. + 3. Write the data optimistically into the table. + 4. Attempt to commit the transaction containing both the addition of the data written out and an updated `appId` `version` pair. + +The semantics of the application-specific `version` are left up to the external system. +Delta only ensures that the latest `version` for a given `appId` is available in the table snapshot. +The Delta transaction protocol does not, for example, assume monotonicity of the `version` and it would be valid for the `version` to decrease, possibly representing a "rollback" of an earlier transaction. + +The schema of the `txn` action is as follows: + +Field Name | Data Type | Description | optional/required +-|-|-|- +appId | String | A unique identifier for the application performing the transaction | required +version | Long | An application-specific numeric identifier for this transaction | required +lastUpdated | Option[Long] | The time when this transaction action is created, in milliseconds since the Unix epoch | optional + +The following is an example `txn` action: +```json +{ + "txn": { + "appId":"3ba13872-2d47-4e17-86a0-21afd2a22395", + "version":364475 + } +} +``` + +### Protocol Evolution +The `protocol` action is used to increase the version of the Delta protocol that is required to read or write a given table. +Protocol versioning allows a newer client to exclude older readers and/or writers that are missing features required to correctly interpret the transaction log. +The _protocol version_ will be increased whenever non-forward-compatible changes are made to this specification. +In the case where a client is running an invalid protocol version, an error should be thrown instructing the user to upgrade to a newer protocol version of their Delta client library. + +Since breaking changes must be accompanied by an increase in the protocol version recorded in a table or by the addition of a table feature, clients can assume that unrecognized actions, fields, and/or metadata domains are never required in order to correctly interpret the transaction log. Clients must ignore such unrecognized fields, and should not produce an error when reading a table that contains unrecognized fields. + +Reader Version 3 and Writer Version 7 add two lists of table features to the protocol action. The capability for readers and writers to operate on such a table is not only dependent on their supported protocol versions, but also on whether they support all features listed in `readerFeatures` and `writerFeatures`. See [Table Features](#table-features) section for more information. + +The schema of the `protocol` action is as follows: + +Field Name | Data Type | Description | optional/required +-|-|-|- +minReaderVersion | Int | The minimum version of the Delta read protocol that a client must implement in order to correctly *read* this table | required +minWriterVersion | Int | The minimum version of the Delta write protocol that a client must implement in order to correctly *write* this table | required +readerFeatures | Array[String] | A collection of features that a client must implement in order to correctly read this table (exist only when `minReaderVersion` is set to `3`) | optional +writerFeatures | Array[String] | A collection of features that a client must implement in order to correctly write this table (exist only when `minWriterVersion` is set to `7`) | optional + +Some example Delta protocols: +```json +{ + "protocol":{ + "minReaderVersion":1, + "minWriterVersion":2 + } +} +``` + +A table that is using table features only for writers: +```json +{ + "protocol":{ + "readerVersion":2, + "writerVersion":7, + "writerFeatures":["columnMapping","identityColumns"] + } +} +``` +Reader version 2 in the above example does not support listing reader features but supports Column Mapping. This example is equivalent to the next one, where Column Mapping is represented as a reader table feature. + +A table that is using table features for both readers and writers: +```json +{ + "protocol": { + "readerVersion":3, + "writerVersion":7, + "readerFeatures":["columnMapping"], + "writerFeatures":["columnMapping","identityColumns"] + } +} +``` + +### Commit Provenance Information +A delta file can optionally contain additional provenance information about what higher-level operation was being performed as well as who executed it. + +Implementations are free to store any valid JSON-formatted data via the `commitInfo` action. + +An example of storing provenance information related to an `INSERT` operation: +```json +{ + "commitInfo":{ + "timestamp":1515491537026, + "userId":"100121", + "userName":"michael@databricks.com", + "operation":"INSERT", + "operationParameters":{"mode":"Append","partitionBy":"[]"}, + "notebook":{ + "notebookId":"4443029", + "notebookPath":"Users/michael@databricks.com/actions"}, + "clusterId":"1027-202406-pooh991" + } +} +``` + +### Domain Metadata +The domain metadata action contains a configuration (string) for a named metadata domain. Two overlapping transactions conflict if they both contain a domain metadata action for the same metadata domain. + +There are two types of metadata domains: +1. **User-controlled metadata domains** have names that start with anything other than the `delta.` prefix. Any Delta client implementation or user application can modify these metadata domains, and can allow users to modify them arbitrarily. Delta clients and user applications are encouraged to use a naming convention designed to avoid conflicts with other clients' or users' metadata domains (e.g. `com.databricks.*` or `org.apache.*`). +2. **System-controlled metadata domains** have names that start with the `delta.` prefix. This prefix is reserved for metadata domains defined by the Delta spec, and Delta client implementations must not allow users to modify the metadata for system-controlled domains. A Delta client implementation should only update metadata for system-controlled domains that it knows about and understands. System-controlled metadata domains are used by various table features and each table feature may impose additional semantics on the metadata domains it uses. + +The schema of the `domainMetadata` action is as follows: + +Field Name | Data Type | Description +-|-|- +domain | String | Identifier for this domain (system- or user-provided) +configuration | String | String containing configuration for the metadata domain +removed | Boolean | When `true`, the action serves as a tombstone to logically delete a metadata domain. Writers should preserve an accurate pre-image of the configuration. + +To support this feature: +- The table must be on Writer Version 7. +- A feature name `domainMetadata` must exist in the table's `writerFeatures`. + +#### Reader Requirements for Domain Metadata +- Readers are not required to support domain metadata. +- Readers who choose not to support domain metadata should ignore metadata domain actions as unrecognized (see [Protocol Evolution](#protocol-evolution)) and snapshots should not include any metadata domains. +- Readers who choose to support domain metadata must apply [Action Reconciliation](#action-reconciliation) to all metadata domains and snapshots must include them -- even if the reader does not understand them. +- Any system-controlled domain that imposes any requirements on readers is a [breaking change](#protocol-evolution), and must be part of a reader-writer table feature that specifies the desired behavior. + +#### Writer Requirements for Domain Metadata +- Writers must preserve all domains even if they don't understand them. +- Writers must not allow users to modify or delete system-controlled domains. +- Writers must only modify or delete system-controlled domains they understand. +- Any system-controlled domain that imposes additional requirements on the writer is a [breaking change](#protocol-evolution), and must be part of a writer table feature that specifies the desired behavior. + +The following is an example `domainMetadata` action: +```json +{ + "domainMetadata": { + "domain": "delta.deltaTableFeatureX", + "configuration": "{\"key1\":\"value1\"}", + "removed": false + } +} +``` + +### Sidecar File Information +The `sidecar` action references a [sidecar file](#sidecar-files) which provides some of the checkpoint's file actions. +This action is only allowed in checkpoints following [V2 spec](#v2-spec). +The schema of `sidecar` action is as follows: + +Field Name | Data Type | Description | optional/required +-|-|-|- +path | String | URI-encoded path to the sidecar file. Because sidecar files must always reside in the table's own _delta_log/_sidecars directory, implementations are encouraged to store only the file's name (without scheme or parent directories). | required +sizeInBytes | Long | Size of the sidecar file. | required +modificationTime | Long | The time this logical file was created, as milliseconds since the epoch. | required +tags|`Map[String, String]`|Map containing any additional metadata about the checkpoint sidecar file. | optional + +The following is an example `sidecar` action: +```json +{ + "sidecar":{ + "path": "016ae953-37a9-438e-8683-9a9a4a79a395.parquet", + "sizeInBytes": 2304522, + "modificationTime": 1512909768000, + "tags": {} + } +} +``` + +#### Checkpoint Metadata +This action is only allowed in checkpoints following [V2 spec](#v2-spec). +It describes the details about the checkpoint. It has the following schema: + +Field Name | Data Type | Description | optional/required +-|-|-|- +version|`Long`|The checkpoint version.| required +tags|`Map[String, String]`|Map containing any additional metadata about the v2 spec checkpoint.| optional + +E.g. +```json +{ + "checkpointMetadata":{ + "version":1, + "tags":{} + } +} +``` + +# Action Reconciliation +A given snapshot of the table can be computed by replaying the events committed to the table in ascending order by commit version. A given snapshot of a Delta table consists of: + + - A single `protocol` action + - A single `metaData` action + - A collection of `txn` actions with unique `appId`s + - A collection of `domainMetadata` actions with unique `domain`s. + - A collection of `add` actions with unique `(path, deletionVector.uniqueId)` keys. + - A collection of `remove` actions with unique `(path, deletionVector.uniqueId)` keys. The intersection of the primary keys in the `add` collection and `remove` collection must be empty. That means a logical file cannot exist in both the `remove` and `add` collections at the same time; however, the same *data file* can exist with *different* DVs in the `remove` collection, as logically they represent different content. The `remove` actions act as _tombstones_, and only exist for the benefit of the VACUUM command. Snapshot reads only return `add` actions on the read path. + +To achieve the requirements above, related actions from different delta files need to be reconciled with each other: + + - The latest `protocol` action seen wins + - The latest `metaData` action seen wins + - For `txn` actions, the latest `version` seen for a given `appId` wins + - For `domainMetadata`, the latest `domainMetadata` seen for a given `domain` wins. The actions with `removed=true` act as tombstones to suppress earlier versions. Snapshot reads do _not_ return removed `domainMetadata` actions. + - Logical files in a table are identified by their `(path, deletionVector.uniqueId)` primary key. File actions (`add` or `remove`) reference logical files, and a log can contain any number of references to a single file. + - To replay the log, scan all file actions and keep only the newest reference for each logical file. + - `add` actions in the result identify logical files currently present in the table (for queries). `remove` actions in the result identify tombstones of logical files no longer present in the table (for VACUUM). + - [v2 checkpoint spec](#v2-spec) actions are not allowed in normal commit files, and do not participate in log replay. + +# Table Features +Table features must only exist on tables that have a supported protocol version. When the table's Reader Version is 3, `readerFeatures` must exist in the `protocol` action, and when the Writer Version is 7, `writerFeatures` must exist in the `protocol` action. `readerFeatures` and `writerFeatures` define the features that readers and writers must implement in order to read and write this table. + +Readers and writers must not ignore table features when they are present: + - to read a table, readers must implement and respect all features listed in `readerFeatures`; + - to write a table, writers must implement and respect all features listed in `writerFeatures`. Because writers have to read the table (or only the Delta log) before write, they must implement and respect all reader features as well. + +## Table Features for New and Existing Tables +It is possible to create a new table or upgrade an existing table to the protocol versions that supports the use of table features. A table must support either the use of writer features or both reader and writer features. It is illegal to support reader but not writer features. + +For new tables, when a new table is created with a Reader Version up to 2 and Writer Version 7, its `protocol` action must only contain `writerFeatures`. When a new table is created with Reader Version 3 and Writer Version 7, its `protocol` action must contain both `readerFeatures` and `writerFeatures`. Creating a table with a Reader Version 3 and Writer Version less than 7 is not allowed. + +When upgrading an existing table to Reader Version 3 and/or Writer Version 7, the client should, on a best effort basis, determine which features supported by the original protocol version are used in any historical version of the table, and add only used features to reader and/or writer feature sets. The client must assume a feature has been used, unless it can prove that the feature is *definitely* not used in any historical version of the table that is reachable by time travel. + +For example, given a table on Reader Version 1 and Writer Version 4, along with four versions: + 1. Table property change: set `delta.enableChangeDataFeed` to `true`. + 2. Data change: three rows updated. + 3. Table property change: unset `delta.enableChangeDataFeed`. + 4. Table protocol change: upgrade protocol to Reader Version 3 and Writer Version 7. + +To produce Version 4, a writer could look at only Version 3 and discover that Change Data Feed has not been used. But in fact, this feature has been used and the table does contain some Change Data Files for Version 2. This means that, to determine all features that have ever been used by the table, a writer must either scan the whole history (which is very time-consuming) or assume the worst case: all features supported by protocol `(1, 4)` has been used. + +## Supported Features +A feature is supported by a table when its name is in the `protocol` action’s `readerFeatures` and/or `writerFeatures`. Subsequent read and/or write operations on this table must respect the feature. Clients must not remove the feature from the `protocol` action. + +Writers are allowed to add support of a feature to the table by adding its name to `readerFeatures` or `writerFeatures`. Reader features should be listed in both `readerFeatures` and `writerFeatures` simultaneously, while writer features should be listed only in `writerFeatures`. It is not allowed to list a feature only in `readerFeatures` but not in `writerFeatures`. + +A feature being supported does not imply that it is active. For example, a table may have the [Append-only Tables](#append-only-tables) feature (feature name `appendOnly`) listed in `writerFeatures`, but it does not have a table property `delta.appendOnly` that is set to `true`. In such a case the table is not append-only, and writers are allowed to change, remove, and rearrange data. However, writers must know that the table property `delta.appendOnly` should be checked before writing the table. + +## Active Features +A feature is active on a table when it is supported *and* its metadata requirements are satisfied. Each feature defines its own metadata requirements, as stated in the corresponding sections of this document. For example, the Append-only feature is active when the `appendOnly` feature name is present in a `protocol`'s `writerFeatures` *and* a table property `delta.appendOnly` set to `true`. + +# Column Mapping +Delta can use column mapping to avoid any column naming restrictions, and to support the renaming and dropping of columns without having to rewrite all the data. There are two modes of column mapping, by `name` and by `id`. In both modes, every column - nested or leaf - is assigned a unique _physical_ name, and a unique 32-bit integer as an id. The physical name is stored as part of the column metadata with the key `delta.columnMapping.physicalName`. The column id is stored within the metadata with the key `delta.columnMapping.id`. + +The column mapping is governed by the table property `delta.columnMapping.mode` being one of `none`, `id`, and `name`. The table property should only be honored if the table's protocol has reader and writer versions and/or table features that support the `columnMapping` table feature. For readers this is Reader Version 2, or Reader Version 3 with the `columnMapping` table feature listed as supported. For writers this is Writer Version 5 or 6, or Writer Version 7 with the `columnMapping` table feature supported. + +The following is an example for the column definition of a table that leverages column mapping. See the [appendix](#schema-serialization-format) for a more complete schema definition. +```json +{ + "name" : "e", + "type" : { + "type" : "array", + "elementType" : { + "type" : "struct", + "fields" : [ { + "name" : "d", + "type" : "integer", + "nullable" : false, + "metadata" : { + "delta.columnMapping.id": 5, + "delta.columnMapping.physicalName": "col-a7f4159c-53be-4cb0-b81a-f7e5240cfc49" + } + } ] + }, + "containsNull" : true + }, + "nullable" : true, + "metadata" : { + "delta.columnMapping.id": 4, + "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1" + } + } +``` + +## Writer Requirements for Column Mapping +In order to support column mapping, writers must: + - Write `protocol` and `metaData` actions when Column Mapping is turned on for the first time: + - If the table is on Writer Version 5 or 6: write a `metaData` action to add the `delta.columnMapping.mode` table property; + - If the table is on Writer Version 7: + - write a `protocol` action to add the feature `columnMapping` to both `readerFeatures` and `writerFeatures`, and + - write a `metaData` action to add the `delta.columnMapping.mode` table property. + - Write data files by using the _physical name_ that is chosen for each column. The physical name of the column is static and can be different than the _display name_ of the column, which is changeable. + - Write the 32 bit integer column identifier as part of the `field_id` field of the `SchemaElement` struct in the [Parquet Thrift specification](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift). + - Track partition values and column level statistics with the physical name of the column in the transaction log. + - Assign a globally unique identifier as the physical name for each new column that is added to the schema. This is especially important for supporting cheap column deletions in `name` mode. In addition, column identifiers need to be assigned to each column. The maximum id that is assigned to a column is tracked as the table property `delta.columnMapping.maxColumnId`. This is an internal table property that cannot be configured by users. This value must increase monotonically as new columns are introduced and committed to the table alongside the introduction of the new columns to the schema. + +## Reader Requirements for Column Mapping +If the table is on Reader Version 2, or if the table is on Reader Version 3 and the feature `columnMapping` is present in `readerFeatures`, readers and writers must read the table property `delta.columnMapping.mode` and do one of the following. + +In `none` mode, or if the table property is not present, readers must read the parquet files by using the display names (the `name` field of the column definition) of the columns in the schema. + +In `id ` mode, readers must resolve columns by using the `field_id` in the parquet metadata for each file, as given by the column metadata property `delta.columnMapping.id` in the Delta schema. Partition values and column level statistics must be resolved by their *physical names* for each `add` entry in the transaction log. If a data file does not contain field ids, readers must refuse to read that file or return nulls for each column. For ids that cannot be found in a file, readers must return `null` values for those columns. + +In `name` mode, readers must resolve columns in the data files by their physical names as given by the column metadata property `delta.columnMapping.physicalName` in the Delta schema. Partition values and column level statistics will also be resolved by their physical names. For columns that are not found in the files, `null`s need to be returned. Column ids are not used in this mode for resolution purposes. + +# Deletion Vectors +To support this feature: + - To support Deletion Vectors, a table must have Reader Version 3 and Writer Version 7. A feature name `deletionVectors` must exist in the table's `readerFeatures` and `writerFeatures`. + +When supported: + - A table may have a metadata property `delta.enableDeletionVectors` in the Delta schema set to `true`. Writers must only write new Deletion Vectors (DVs) when this property is set to `true`. + - A table's `add` and `remove` actions can optionally include a DV that provides information about logically deleted rows, that are however still physically present in the underlying data file and must thus be skipped during processing. Readers must read the table considering the existence of DVs, even when the `delta.enableDeletionVectors` table property is not set. + +DVs can be stored and accessed in different ways, indicated by the `storageType` field. The Delta protocol currently supports inline or on-disk storage, where the latter can be accessed either by a relative path derived from a UUID or an absolute path. + +## Deletion Vector Descriptor Schema + +The schema of the `DeletionVectorDescriptor` struct is as follows: + +Field Name | Data Type | Description +-|-|- +storageType | String | A single character to indicate how to access the DV. Legal options are: `['u', 'i', 'p']`. +pathOrInlineDv | String | Three format options are currently proposed:
  • If `storageType = 'u'` then ``: The deletion vector is stored in a file with a path relative to the data directory of this Delta table, and the file name can be reconstructed from the UUID. See Derived Fields for how to reconstruct the file name. The random prefix is recovered as the extra characters before the (20 characters fixed length) uuid.
  • If `storageType = 'i'` then ``: The deletion vector is stored inline in the log. The format used is the `RoaringBitmapArray` format also used when the DV is stored on disk and described in [Deletion Vector Format](#Deletion-Vector-Format).
  • If `storageType = 'p'` then ``: The DV is stored in a file with an absolute path given by this path, which has the same format as the `path` field in the `add`/`remove` actions.
+offset | Option[Int] | Start of the data for this DV in number of bytes from the beginning of the file it is stored in. Always `None` (absent in JSON) when `storageType = 'i'`. +sizeInBytes | Int | Size of the serialized DV in bytes (raw data size, i.e. before base85 encoding, if inline). +cardinality | Long | Number of rows the given DV logically removes from the file. + +The concrete Base85 variant used is [Z85](https://rfc.zeromq.org/spec/32/), because it is JSON-friendly. + +### Derived Fields + +Some fields that are necessary to use the DV are not stored explicitly but can be derived in code from the stored fields. + +Field Name | Data Type | Description | Computed As +-|-|-|- +uniqueId | String | Uniquely identifies a DV for a given file. This is used for snapshot reconstruction to differentiate the same file with different DVs in successive versions. | If `offset` is `None` then ``.
Otherwise `@`. +absolutePath | String/URI/Path | The absolute path of the DV file. Can be calculated for relative path DVs by providing a parent directory path. | If `storageType='p'`, just use the already absolute path. If `storageType='u'`, the DV is stored at `//deletion_vector_.bin`. This is not a legal field if `storageType='i'`, as an inline DV has no absolute path. + +### JSON Example 1 — On Disk with Relative Path (with Random Prefix) +```json +{ + "storageType" : "u", + "pathOrInlineDv" : "ab^-aqEH.-t@S}K{vb[*k^", + "offset" : 4, + "sizeInBytes" : 40, + "cardinality" : 6 +} +``` +Assuming that this DV is stored relative to an `s3://mytable/` directory, the absolute path to be resolved here would be: `s3://mytable/ab/deletion_vector_d2c639aa-8816-431a-aaf6-d3fe2512ff61.bin`. + +### JSON Example 2 — On Disk with Absolute Path +```json +{ + "storageType" : "p", + "pathOrInlineDv" : "s3://mytable/deletion_vector_d2c639aa-8816-431a-aaf6-d3fe2512ff61.bin", + "offset" : 4, + "sizeInBytes" : 40, + "cardinality" : 6 +} +``` + +### JSON Example 3 — Inline +```json +{ + "storageType" : "i", + "pathOrInlineDv" : "wi5b=000010000siXQKl0rr91000f55c8Xg0@@D72lkbi5=-{L", + "sizeInBytes" : 40, + "cardinality" : 6 +} +``` +The row indexes encoded in this DV are: 3, 4, 7, 11, 18, 29. + +## Reader Requirements for Deletion Vectors +If a snapshot contains logical files with records that are invalidated by a DV, then these records *must not* be returned in the output. + +## Writer Requirement for Deletion Vectors +When adding a logical file with a deletion vector, then that logical file must have correct `numRecords` information for the data file in the `stats` field. + +# Iceberg Compatibility V1 + +This table feature (`icebergCompatV1`) ensures that Delta tables can be converted to Apache Iceberg™ format, though this table feature does not implement or specify that conversion. + +To support this feature: +- Since this table feature depends on Column Mapping, the table must be on Reader Version = 2, or it must be on Reader Version >= 3 and the feature `columnMapping` must exist in the `protocol`'s `readerFeatures`. +- The table must be on Writer Version 7. +- The feature `icebergCompatV1` must exist in the table `protocol`'s `writerFeatures`. + +This table feature is enabled when the table property `delta.enableIcebergCompatV1` is set to `true`. + +## Writer Requirements for IcebergCompatV1 + +When supported and active, writers must: +- Require that Column Mapping be enabled and set to either `name` or `id` mode +- Require that Deletion Vectors are not supported (and, consequently, not active, either). i.e., the `deletionVectors` table feature is not present in the table `protocol`. +- Require that partition column values are materialized into any Parquet data file that is present in the table, placed *after* the data columns in the parquet schema +- Require that all `AddFile`s committed to the table have the `numRecords` statistic populated in their `stats` field +- Block adding `Map`/`Array`/`Void` types to the table schema (and, thus, block writing them, too) +- Block replacing partitioned tables with a differently-named partition spec + - e.g. replacing a table partitioned by `part_a INT` with partition spec `part_b INT` must be blocked + - e.g. replacing a table partitioned by `part_a INT` with partition spec `part_a LONG` is allowed + +# Iceberg Compatibility V2 + +This table feature (`icebergCompatV2`) ensures that Delta tables can be converted to Apache Iceberg™ format, though this table feature does not implement or specify that conversion. + +To support this feature: +- Since this table feature depends on Column Mapping, the table must be on Reader Version = 2, or it must be on Reader Version >= 3 and the feature `columnMapping` must exist in the `protocol`'s `readerFeatures`. +- The table must be on Writer Version 7. +- The feature `icebergCompatV2` must exist in the table protocol's `writerFeatures`. + +This table feature is enabled when the table property `delta.enableIcebergCompatV2` is set to `true`. + +## Writer Requirements for IcebergCompatV2 + +When this feature is supported and enabled, writers must: +- Require that Column Mapping be enabled and set to either `name` or `id` mode +- Require that the nested `element` field of ArrayTypes and the nested `key` and `value` fields of MapTypes be assigned 32 bit integer identifiers. These identifiers must be unique and different from those used in [Column Mapping](#column-mapping), and must be stored in the metadata of their nearest ancestor [StructField](#struct-field) of the Delta table schema. Identifiers belonging to the same `StructField` must be organized as a `Map[String, Long]` and stored in metadata with key `parquet.field.nested.ids`. The keys of the map are "element", "key", or "value", prefixed by the name of the nearest ancestor StructField, separated by dots. The values are the identifiers. The keys for fields in nested arrays or nested maps are prefixed by their parents' key, separated by dots. An [example](#example-of-storing-identifiers-for-nested-fields-in-arraytype-and-maptype) is provided below to demonstrate how the identifiers are stored. These identifiers must be also written to the `field_id` field of the `SchemaElement` struct in the [Parquet Thrift specification](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift) when writing parquet files. +- Require that IcebergCompatV1 is not active, which means either the `icebergCompatV1` table feature is not present in the table protocol or the table property `delta.enableIcebergCompatV1` is not set to `true` +- Require that Deletion Vectors are not active, which means either the `deletionVectors` table feature is not present in the table protocol or the table property `delta.enableDeletionVectors` is not set to `true` +- Require that partition column values be materialized when writing Parquet data files +- Require that all new `AddFile`s committed to the table have the `numRecords` statistic populated in their `stats` field +- Require writing timestamp columns as int64 +- Require that the table schema contains only data types in the following allow-list: [`byte`, `short`, `integer`, `long`, `float`, `double`, `decimal`, `string`, `binary`, `boolean`, `timestamp`, `timestampNTZ`, `date`, `array`, `map`, `struct`]. +- Block replacing partitioned tables with a differently-named partition spec + - e.g. replacing a table partitioned by `part_a INT` with partition spec `part_b INT` must be blocked + - e.g. replacing a table partitioned by `part_a INT` with partition spec `part_a LONG` is allowed + +### Example of storing identifiers for nested fields in ArrayType and MapType +The following is an example of storing the identifiers for nested fields in `ArrayType` and `MapType`, of a table with the following schema, +``` +|-- col1: array[array[int]] +|-- col2: map[int, array[int]] +|-- col3: map[int, struct] + |-- subcol1: array[int] +``` +The identifiers for the nested fields are stored in the metadata as follows: +```json +[ + { + "name": "col1", + "type": { + "type": "array", + "elementType": { + "type": "array", + "elementType": "int" + } + }, + "metadata": { + "parquet.field.nested.ids": { + "col1.element": 100, + "col1.element.element": 101 + } + } + }, + { + "name": "col2", + "type": { + "type": "map", + "keyType": "int", + "valueType": { + "type": "array", + "elementType": "int" + } + }, + "metadata": { + "parquet.field.nested.ids": { + "col2.key": 102, + "col2.value": 103, + "col2.value.element": 104 + } + } + }, + { + "name": "col3", + "type": { + "type": "map", + "keyType": "int", + "valueType": { + "type": "struct", + "fields": [ + { + "name": "subcol1", + "type": { + "type": "array", + "elementType": "int" + }, + "metadata": { + "parquet.field.nested.ids": { + "subcol1.element": 107 + } + } + } + ] + } + }, + "metadata": { + "parquet.field.nested.ids": { + "col3.key": 105, + "col3.value": 106 + } + } + } +] +``` +# Timestamp without timezone (TimestampNtz) +This feature introduces a new data type to support timestamps without timezone information. For example: `1970-01-01 00:00:00`, or `1970-01-01 00:00:00.123456`. +The serialization method is described in Sections [Partition Value Serialization](#partition-value-serialization) and [Schema Serialization Format](#schema-serialization-format). + +To support this feature: +- To have a column of TimestampNtz type in a table, the table must have Reader Version 3 and Writer Version 7. A feature name `timestampNtz` must exist in the table's `readerFeatures` and `writerFeatures`. + + +# V2 Checkpoint Table Feature +To support this feature: +- To add [V2 Checkpoints](#v2-spec) support to a table, the table must have Reader Version 3 and Writer Version 7. A feature name `v2Checkpoint` must exist in the table's `readerFeatures` and `writerFeatures`. + +When supported: +- A table could use [uuid-named](#uuid-named-checkpoint) [V2 spec Checkpoints](#v2-spec) which must have [checkpoint metadata](#checkpoint-metadata) and may have [sidecar files](#sidecar-files) OR +- A table could use [classic](#classic-checkpoint) checkpoints which can be follow [V1](#v1-spec) or [V2](#v2-spec) spec. +- A table must not use [multi-part checkpoints](#multi-part-checkpoint) + +# Row Tracking + +Row Tracking is a feature that allows the tracking of rows across multiple versions of a Delta table. +It enables this by exposing two metadata columns: Row IDs, which uniquely identify a row across multiple versions of a table, +and Row Commit Versions, which make it possible to check whether two rows with the same ID in two different versions of the table represent the same version of the row. + +Row Tracking is defined to be **supported** or **enabled** on a table as follows: +- When the feature `rowTracking` exists in the table `protocol`'s `writerFeatures`, then we say that Row Tracking is **supported**. + In this situation, writers must assign Row IDs and Commit Versions, but they cannot yet be relied upon to be present in the table. + When Row Tracking is supported but not yet enabled writers cannot preserve Row IDs and Commit Versions. +- When additionally the table property `delta.enableRowTracking` is set to `true`, then we say that Row Tracking is **enabled**. + In this situation, Row IDs and Row Commit versions can be relied upon to be present in the table for all rows. + When Row Tracking is enabled writers are expected to preserve Row IDs and Commit Versions. + +Enablement: +- The table must be on Writer Version 7. +- The feature `rowTracking` must exist in the table `protocol`'s `writerFeatures`. +- The table property `delta.enableRowTracking` must be set to `true`. + +## Row IDs + +Delta provides Row IDs. Row IDs are integers that are used to uniquely identify rows within a table. +Every row has two Row IDs: + +- A **fresh** or unstable **Row ID**. + This ID uniquely identifies the row within one version of the table. + The fresh ID of a row may change every time the table is updated, even for rows that are not modified. E.g. when a row is copied unchanged during an update operation, it will get a new fresh ID. Fresh IDs can be used to identify rows within one version of the table, e.g. for identifying matching rows in self joins. +- A **stable Row ID**. + This ID uniquely identifies the row across versions of the table and across updates. + When a row is inserted, it is assigned a new stable Row ID that is equal to the fresh Row ID. + When a row is updated or copied, the stable Row ID for this row is preserved. + When a row is restored (i.e. the table is restored to an earlier version), its stable Row ID is restored as well. + +The fresh and stable Row IDs are not required to be equal. + +Row IDs are stored in two ways: + +- **Default generated Row IDs** use the `baseRowId` field stored in `add` and `remove` actions to generate fresh Row IDs. + The default generated Row IDs for data files are calculated by adding the `baseRowId` of the file in which a row is contained to the (physical) position (index) of the row within the file. + Default generated Row IDs require little storage overhead but are reassigned every time a row is updated or moved to a different file (for instance when a row is contained in a file that is compacted by OPTIMIZE). + +- **Materialized Row IDs** are stored in a column in the data files. + This column is hidden from readers and writers, i.e. it is not part of the `schemaString` in the table's `metaData`. + Instead, the name of this column can be found in the value for the `delta.rowTracking.materializedRowIdColumnName` key in the `configuration` of the table's `metaData` action. + This column may contain `null` values meaning that the corresponding row has no materialized Row ID. This column may be omitted if all its values are `null` in the file. + Materialized Row IDs provide a mechanism for writers to preserve stable Row IDs for rows that are updated or copied. + +The fresh Row ID of a row is equal to the default generated Row ID. The stable Row ID of a row is equal to the materialized Row ID of the row when that column is present and the value is not NULL, otherwise it is equal to the default generated Row ID. + +When Row Tracking is enabled: +- Default generated Row IDs must be assigned to all existing rows. + This means in particular that all files that are part of the table version that sets the table property `delta.enableRowTracking` to `true` must have `baseRowId` set. + A backfill operation may be required to commit `add` and `remove` actions with the `baseRowId` field set for all data files before the table property `delta.enableRowTracking` can be set to `true`. + +## Row Commit Versions + +Row Commit Versions provide versioning of rows. + +- **Fresh** or unstable **Row Commit Versions** can be used to identify the first commit version in which the `add` action containing the row was committed. + The fresh Commit Version of a row may change every time the table is updated, even for rows that are not modified. E.g. when a row is copied unchanged during an update operation, it will get a new fresh Commit Version. +- **Stable Row Commit Versions** identify the last commit version in which the row (with the same ID) was either inserted or updated. + When a row is inserted or updated, it is assigned the commit version number of the log entry containing the `add` entry with the new row. + When a row is copied, the stable Row Commit Version for this row is preserved. + When a row is restored (i.e. the table is restored to an earlier version), its stable Row Commit Version is restored as well. + +The fresh and stable Row Commit Versions are not required to be equal. + +Commit Versions are stored in two ways: + +- **Default generated Row Commit Versions** use the `defaultRowCommitVersion` field in `add` and `remove` actions. + Default generated Row Commit Versions require little storage overhead but are reassigned every time a row is updated or moved to a different file (for instance when a row is contained in a file that is compacted by OPTIMIZE). + +- **Materialized Row Commit Versions** are stored in a column in the data files. + This column is hidden from readers and writers, i.e. it is not part of the `schemaString` in the table's `metaData`. + Instead, the name of this column can be found in the value for the `delta.rowTracking.materializedRowCommitVersionColumnName` key in the `configuration` of the table's `metaData` action. + This column may contain `null` values meaning that the corresponding row has no materialized Row Commit Version. This column may be omitted if all its values are `null` in the file. + Materialized Row Commit Versions provide a mechanism for writers to preserve Row Commit Versions for rows that are copied. + +The fresh Row Commit Version of a row is equal to the default generated Row Commit version. +The stable Row Commit Version of a row is equal to the materialized Row Commit Version of the row when that column is present and the value is not NULL, otherwise it is equal to the default generated Commit Version. + +## Reader Requirements for Row Tracking + +When Row Tracking is enabled (when the table property `delta.enableRowTracking` is set to `true`), then: +- When Row IDs are requested, readers must reconstruct stable Row IDs as follows: + 1. Readers must use the materialized Row ID if the column determined by `delta.rowTracking.materializedRowIdColumnName` is present in the data file and the column contains a non `null` value for a row. + 2. Otherwise, readers must use the default generated Row ID of the `add` or `remove` action containing the row in all other cases. + I.e. readers must add the index of the row in the file to the `baseRowId` of the `add` or `remove` action for the file containing the row. +- When Row Commit Versions are requested, readers must reconstruct them as follows: + 1. Readers must use the materialized Row Commit Versions if the column determined by `delta.rowTracking.materializedRowCommitVersionColumnName` is present in the data file and the column contains a non `null` value for a row. + 2. Otherwise, Readers must use the default generated Row Commit Versions of the `add` or `remove` action containing the row in all other cases. + I.e. readers must use the `defaultRowCommitVersion` of the `add` or `remove` action for the file containing the row. +- Readers cannot read Row IDs and Row Commit Versions while reading change data files from `cdc` actions. + +## Writer Requirements for Row Tracking + +When Row Tracking is supported (when the `writerFeatures` field of a table's `protocol` action contains `rowTracking`), then: +- Writers must assign unique fresh Row IDs to all rows that they commit. + - Writers must set the `baseRowId` field in all `add` actions that they commit so that all default generated Row IDs are unique in the table version. + Writers must never commit duplicate Row IDs in the table in any version. + - Writers must set the `baseRowId` field in recommitted and checkpointed `add` actions and `remove` actions to the `baseRowId` value (if present) of the last committed `add` action with the same `path`. + - Writers must track the high water mark, i.e. the highest fresh row id assigned. + - The high water mark must be stored in a `domainMetadata` action with `delta.rowTracking` as the `domain` + and a `configuration` containing a single key-value pair with `highWaterMark` as the key and the highest assigned fresh row id as the value. + - Writers must include a `domainMetadata` for `delta.rowTracking` whenever they assign new fresh Row IDs that are higher than `highWaterMark` value of the current `domainMetadata` for `delta.rowTracking`. + The `highWaterMark` value in the `configuration` of this `domainMetadata` action must always be equal to or greater than the highest fresh Row ID committed so far. + Writers can either commit this `domainMetadata` in the same commit, or they can reserve the fresh Row IDs in an earlier commit. + - Writers must set the `baseRowId` field to a value that is higher than the row id high water mark. +- Writer must assign fresh Row Commit Versions to all rows that they commit. + - Writers must set the `defaultRowCommitVersion` field in new `add` actions to the version number of the log enty containing the `add` action. + - Writers must set the `defaultRowCommitVersion` field in recommitted and checkpointed `add` actions and `remove` actions to the `defaultRowCommitVersion` of the last committed `add` action with the same `path`. + +Writers can enable Row Tracking by setting `delta.enableRowTracking` to `true` in the `configuration` of the table's `metaData`. +This is only allowed if the following requirements are satisfied: +- The feature `rowTracking` has been added to the `writerFeatures` field of a table's `protocol` action either in the same version of the table or in an earlier version of the table. +- The column name for the materialized Row IDs and Row Commit Versions have been assigned and added to the `configuration` in the table's `metaData` action using the keys `delta.rowTracking.materializedRowIdColumnName` and `delta.rowTracking.materializedRowCommitVersionColumnName` respectively. + - The assigned column names must be unique. They must not be equal to the name of any other column in the table's schema. + The assigned column names must remain unique in all future versions of the table. + If [Column Mapping](#column-mapping) is enabled, then the assigned column name must be distinct from the physical column names of the table. +- The `baseRowId` and `defaultRowCommitVersion` fields are set for all active `add` actions in the version of the table in which `delta.enableRowTracking` is set to `true`. +- If the `baseRowId` and `defaultRowCommitVersion` fields are not set in some active `add` action in the table, then writers must first commit new `add` actions that set these fields to replace the `add` actions that do not have these fields set. + This can be done in the commit that sets `delta.enableRowTracking` to `true` or in an earlier commit. + The assigned `baseRowId` and `defaultRowCommitVersion` values must satisfy the same requirements as when assigning fresh Row IDs and fresh Row Commit Versions respectively. + +When Row Tracking is enabled (when the table property `delta.enableRowTracking` is set to `true`), then: +- Writers must assign stable Row IDs to all rows. + - Stable Row IDs must be unique within a version of the table and must not be equal to the fresh Row IDs of other rows in the same version of the table. + - Writers should preserve the stable Row IDs of rows that are updated or copied using materialized Row IDs. + - The preserved stable Row ID (i.e. a stable Row ID that is not equal to the fresh Row ID of the same physical row) should be equal to the stable Row ID of the same logical row before it was updated or copied. + - Materialized Row IDs must be written to the column determined by `delta.rowTracking.materializedRowIdColumnName` in the `configuration` of the table's `metaData` action. + The value in this column must be set to `NULL` for stable Row IDs that are not preserved. +- Writers must assign stable Row Commit Versions to all rows. + - Writers should preserve the stable Row Commit Versions of rows that are copied (but not updated) using materialized Row Commit Versions. + - The preserved stable Row Commit Version (i.e. a stable Row Commit Version that is not equal to the fresh Row Commit Version of the same physical row) should be equal to the stable Commit Version of the same logical row before it was copied. + - Materialized Row Commit Versions must be written to the column determined by `delta.rowTracking.materializedRowCommitVersionColumnName` in the `configuration` of the table's `metaData` action. + The value in this column must be set to `NULL` for stable Row Commit Versions that are not preserved (i.e. that are equal to the fresh Row Commit Version). +- Writers should set `delta.rowTracking.preserved` in the `tags` of the `commitInfo` action to `true` whenever all the stable Row IDs of rows that are updated or copied and all the stable Row Commit Versions of rows that are copied were preserved. + In particular, writers should set `delta.rowTracking.preserved` in the `tags` of the `commitInfo` action to `true` if no rows are updated or copied. + Writers should set that flag to false otherwise. + +# Clustered Table + +The Clustered Table feature facilitates the physical clustering of rows that share similar values on a predefined set of clustering columns. +This enhances query performance when selective filters are applied to these clustering columns through data skipping. +Clustering columns can be specified during the initial creation of a table, or they can be added later, provided that the table doesn't have partition columns. + +A table is defined as a clustered table through the following criteria: +- When the feature `clustering` exists in the table `protocol`'s `writerFeatures`, then we say that the table is a clustered table. + The feature `domainMetadata` is required in the table `protocol`'s `writerFeatures`. + +Enablement: +- The table must be on Writer Version 7. +- The feature `clustering` must exist in the table `protocol`'s `writerFeatures`, either during its creation or at a later stage, provided the table does not have partition columns. + +## Writer Requirements for Clustered Table + +When the Clustered Table is supported (when the `writerFeatures` field of a table's `protocol` action contains `clustering`), then: +- Writers must track clustering column names in a `domainMetadata` action with `delta.clustering` as the `domain` and a `configuration` containing all clustering column names. + If [Column Mapping](#column-mapping) is enabled, the physical column names should be used. +- Writers must write out [per-file statistics](#per-file-statistics) and per-column statistics for clustering columns in `add` action. + If a new column is included in the clustering columns list, it is required for all table files to have statistics for these added columns. +- When a clustering implementation clusters files, writers must set the name of the clustering implementation in the `clusteringProvider` field when adding `add` actions for clustered files. + - By default, a clustering implementation must only recluster files that have the field `clusteringProvider` set to the name of the same clustering implementation, or to the names of other clustering implementations that are superseded by the current clustering implementation. In addition, a clustering implementation may cluster any files with an unset `clusteringProvider` field (i.e., unclustered files). + - Writer is not required to cluster a specific file at any specific moment. + - A clustering implementation is free to add additional information such as adding a new user-controlled metadata domain to keep track of its metadata. +- Writers must not define clustered and partitioned table at the same time. + +The following is an example for the `domainMetadata` action defintion of a table that leverages column mapping. +```json +{ + "domainMetadata": { + "domain": "delta.clustering", + "configuration": "{\"clusteringColumns\":[\"col-daadafd7-7c20-4697-98f8-bff70199b1f9\", \"col-5abe0e80-cf57-47ac-9ffc-a861a3d1077e\"]}", + "removed": false + } +} +``` +The example above converts `configuration` field into JSON format, including escaping characters. Here's how it looks in plain JSON for better understanding. +```json +{ + "clusteringColumns": [ + "col-daadafd7-7c20-4697-98f8-bff70199b1f9", + "col-5abe0e80-cf57-47ac-9ffc-a861a3d1077e" + ] +} +``` + +# Requirements for Writers +This section documents additional requirements that writers must follow in order to preserve some of the higher level guarantees that Delta provides. + +## Creation of New Log Entries + - Writers MUST never overwrite an existing log entry. When ever possible they should use atomic primitives of the underlying filesystem to ensure concurrent writers do not overwrite each others entries. + +## Consistency Between Table Metadata and Data Files + - Any column that exists in a data file present in the table MUST also be present in the metadata of the table. + - Values for all partition columns present in the schema MUST be present for all files in the table. + - Columns present in the schema of the table MAY be missing from data files. Readers SHOULD fill these missing columns in with `null`. + +## Delta Log Entries +- A single log entry MUST NOT include more than one action that reconciles with each other. + - Add / Remove actions with the same `(path, DV)` tuple. + - More than one Metadata action + - More than one protocol action + - More than one SetTransaction with the same `appId` + +## Checkpoints +Each row in the checkpoint corresponds to a single action. The checkpoint **must** contain all information regarding the following actions: + * The [protocol version](#Protocol-Evolution) + * The [metadata](#Change-Metadata) of the table + * Files that have been [added](#Add-File-and-Remove-File) and not yet removed + * Files that were recently [removed](#Add-File-and-Remove-File) and have not yet expired + * [Transaction identifiers](#Transaction-Identifiers) + * [Domain Metadata](#Domain-Metadata) + * [Checkpoint Metadata](#checkpoint-metadata) - Requires [V2 checkpoints](#v2-spec) + * [Sidecar File](#sidecar-files) - Requires [V2 checkpoints](#v2-spec) + +All of these actions are stored as their individual columns in parquet as struct fields. Any missing column should be treated as null. + +Checkpoints must not preserve [commit provenance information](#commit-provenance-information) nor [change data](#add-cdc-file) actions. + +Within the checkpoint, the `add` struct may or may not contain the following columns based on the configuration of the table: + - partitionValues_parsed: In this struct, the column names correspond to the partition columns and the values are stored in their corresponding data type. This is a required field when the table is partitioned and the table property `delta.checkpoint.writeStatsAsStruct` is set to `true`. If the table is not partitioned, this column can be omitted. For example, for partition columns `year`, `month` and `event` with data types `int`, `int` and `string` respectively, the schema for this field will look like: + + ``` +|-- add: struct +| |-- partitionValues_parsed: struct +| | |-- year: int +| | |-- month: int +| | |-- event: string + ``` + + - stats: Column level statistics can be stored as a JSON string in the checkpoint. This field needs to be written when statistics are available and the table property: `delta.checkpoint.writeStatsAsJson` is set to `true` (which is the default). When this property is set to `false`, this field should be omitted from the checkpoint. + - stats_parsed: The stats can be stored in their [original format](#Per-file-Statistics). This field needs to be written when statistics are available and the table property: `delta.checkpoint.writeStatsAsStruct` is set to `true`. When this property is set to `false` (which is the default), this field should be omitted from the checkpoint. + +Within the checkpoint, the `remove` struct does not contain the `stats` and `tags` fields because the `remove` actions stored in checkpoints act only as tombstones for VACUUM operations, and VACUUM tombstones do not require `stats` or `tags`. These fields are only stored in Delta JSON commit files. + +Refer to the [appendix](#checkpoint-schema) for an example on the schema of the checkpoint. + +Delta supports two checkpoint specs and three kind of checkpoint naming schemes. + +### Checkpoint Specs +Delta supports following two checkpoint specs: + +#### V2 Spec +This checkpoint spec allows putting [add and remove file](#Add-File-and-Remove-File) in the +[sidecar files](#sidecar-files). This spec can be used only when [v2 checkpoint table feature](#v2-checkpoint-table-feature) is enabled. +Checkpoints following V2 spec have the following structure: +- Each v2 spec checkpoint includes exactly one [Checkpoint Metadata](#checkpoint-metadata) action. +- Remaining rows in the V2 spec checkpoint refer to the other actions mentioned [here](#checkpoints-1) +- All the non-file actions i.e. all actions except [add and remove file](#Add-File-and-Remove-File) +must be part of the v2 spec checkpoint itself. +- A writer could choose to include the [add and remove file](#Add-File-and-Remove-File) action in the +V2 spec Checkpoint or they could write the [add and remove file](#Add-File-and-Remove-File) actions in +separate [sidecar files](#sidecar-files). These sidecar files will then be referenced in the V2 spec checkpoint. +All sidecar files reside in the `_delta_log/_sidecars` directory. +- A V2 spec Checkpoint could reference zero or more [sidecar file actions](#sidecar-file-information). + +Note: A V2 spec Checkpoint can either have all the [add and remove file](#Add-File-and-Remove-File) actions +embedded inside itself or all of them should be in [sidecar files](#sidecar-files). Having partial +add and remove file actions in V2 Checkpoint and partial entries in sidecar files is not allowed. + +After producing a V2 spec checkpoint, a writer can choose to embed some or all of the V2 spec checkpoint in +the `_last_checkpoint` file, so that readers don't have to read the V2 Checkpoint. + +E.g. showing the content of V2 spec checkpoint: +``` +{"checkpointMetadata":{"version":364475,"tags":{}}} +{"metaData":{...}} +{"protocol":{...}} +{"txn":{"appId":"3ba13872-2d47-4e17-86a0-21afd2a22395","version":364475}} +{"txn":{"appId":"3ae45b72-24e1-865a-a211-34987ae02f2a","version":4389}} +{"sidecar":{"path":"3a0d65cd-4056-49b8-937b-95f9e3ee90e5.parquet","sizeInBytes":2341330,"modificationTime":1512909768000,"tags":{}} +{"sidecar":{"path":"016ae953-37a9-438e-8683-9a9a4a79a395.parquet","sizeInBytes":8468120,"modificationTime":1512909848000,"tags":{}} +``` + +Another example of a v2 spec checkpoint without sidecars: +``` +{"checkpointMetadata":{"version":364475,"tags":{}}} +{"metaData":{...}} +{"protocol":{...}} +{"txn":{"appId":"3ba13872-2d47-4e17-86a0-21afd2a22395","version":364475}} +{"add":{"path":"date=2017-12-10/part-000...c000.gz.parquet",...} +{"add":{"path":"date=2017-12-09/part-000...c000.gz.parquet",...} +{"remove":{"path":"date=2017-12-08/part-000...c000.gz.parquet",...} +``` + +#### V1 Spec + +The V1 Spec does not support [sidecar files](#sidecar-files) and [checkpoint metadata](#checkpoint-metadata). +These are flat checkpoints which contains all actions mentioned [here](#checkpoints-1). + +### Checkpoint Naming Scheme +Delta supports three checkpoint naming schemes: UUID-named, classic, and multi-part. + +#### UUID-named checkpoint +This naming scheme represents a [V2 spec checkpoint](#v2-spec) with following file name: `n.checkpoint.u.{json/parquet}`, +where `u` is a UUID and `n` is the snapshot version that this checkpoint represents. +The UUID-named checkpoints may be in JSON or parquet format. Since these are following [V2 spec](#v2-spec), they must +have a [checkpoint metadata](#checkpoint-metadata) action and may reference zero or more checkpoint [sidecar files](#sidecar-files). + +Example-1: Json UUID-named checkpoint with sidecars + +``` +00000000000000000010.checkpoint.80a083e8-7026-4e79-81be-64bd76c43a11.json +_sidecars/016ae953-37a9-438e-8683-9a9a4a79a395.parquet +_sidecars/3a0d65cd-4056-49b8-937b-95f9e3ee90e5.parquet +_sidecars/7d17ac10-5cc3-401b-bd1a-9c82dd2ea032.parquet +``` + +Example-2: Parquet UUID-named checkpoint with sidecars + +``` +00000000000000000020.checkpoint.80a083e8-7026-4e79-81be-64bd76c43a11.parquet +_sidecars/016ae953-37a9-438e-8683-9a9a4a79a395.parquet +_sidecars/3a0d65cd-4056-49b8-937b-95f9e3ee90e5.parquet +``` + +Example-3: Json UUID-named checkpoint without sidecars + +``` +00000000000000000112.checkpoint.80a083e8-7026-4e79-81be-64bd76c43a11.json +``` + +#### Classic checkpoint + +A classic checkpoint for version `n` uses the file name `n.checkpoint.parquet`. For example: + +``` +00000000000000000010.checkpoint.parquet +``` + +If two checkpoint writers race to create the same classic checkpoint, the latest writer wins. +However, this should not matter because both checkpoints should contain the same information and a +reader could safely use either one. + +A classic checkpoint could: +1. Either follow [V1 spec](#v1-spec) or +2. Could follow [V2 spec](#v2-spec). This is possible only when +[V2 Checkpoint table feature](#v2-checkpoint-table-feature) is enabled. In this case it must include +[checkpoint metadata](#checkpoint-metadata) and may or may not have [sidecar files](#sidecar-file-information). + +#### Multi-part checkpoint +Multi-part checkpoint uses parquet format. +This checkpoint type is [deprecated](#problems-with-multi-part-checkpoints) and writers should avoid using it. + +A multi-part checkpoint for version `n` consists of `p` "part" files (`p > 1`), where part `o` of `p` is named `n.checkpoint.o.p.parquet`. For example: + +``` +00000000000000000010.checkpoint.0000000001.0000000003.parquet +00000000000000000010.checkpoint.0000000002.0000000003.parquet +00000000000000000010.checkpoint.0000000003.0000000003.parquet +``` + +For [safety reasons](#problems-with-multi-part-checkpoints), multi-part checkpoints MUST be clustered by +spark-style hash partitioning. If the table supports [Deletion Vectors](#deletion-vectors), the partitioning +key is the logical file identifier `(path, dvId)`; otherwise the key is just `path` (not `(path, NULL)`). This +ensures deterministic content in each part file in case of multiple attempts to write the files -- even when +older and newer Delta clients race. + +##### Problems with multi-part checkpoints + +Because they cannot be written atomically, multi-part checkpoints have several weaknesses: + +1. A writer cannot validate the content of the just-written checkpoint before readers could start using it. + +2. Two writers who race to produce the same checkpoint (same version, same number of parts) can overwrite each other, producing an arbitrary mix of checkpoint part files. If an overwrite changes the content of a file in any way, the resulting checkpoint may not produce an accurate snapshot. + +3. Not amenable to performance and scalability optimizations. For example, there is no way to store skipping stats for checkpoint parts, nor to reuse checkpoint part files across multiple checkpoints. + +4. Multi-part checkpoints also bloat the _delta_log dir and slow down LIST operations. + +The [UUID-named](#uuid-named-checkpoint) checkpoint (which follows [V2 spec](#v2-spec)) solves all +of these problems and should be preferred over multi-part checkpoints. For this reason, Multi-part +checkpoints are forbidden when [V2 Checkpoints table feature](#v2-checkpoint-table-feature) is enabled. + +### Handling Backward compatibility while moving to UUID-named v2 Checkpoints + +A UUID-named v2 Checkpoint should only be created by clients if the [v2 checkpoint table feature](#v2-checkpoint-table-feature) is enabled. +When UUID-named v2 checkpoints are enabled, Writers should occasionally create a v2 [Classic Checkpoint](#classic-checkpoint) +to maintain compatibility with older clients which do not support [v2 checkpoint table feature](#v2-checkpoint-table-feature) and +so do not recognize UUID-named checkpoints. These classic checkpoints have the same content as the UUID-named v2 checkpoint, but older +clients will recognize the classic file name, allowing them to extract [Protocol](#protocol-evolution) and fail gracefully with an +invalid protocol version error on v2-checkpoint-enabled tables. Writers should create classic checkpoints often enough to allow older +clients to discover them and fail gracefully. + +### Allowed combinations for `checkpoint spec` <-> `checkpoint file naming` + +Checkpoint Spec | [UUID-named](#uuid-named-checkpoint) | [classic](#classic-checkpoint) | [multi-part](#multi-part-checkpoint) +-|-|-|- +[V1](#v1-spec) | Invalid | Valid | Valid +[V2](#v2-spec) | Valid | Valid | Invalid + +### Metadata Cleanup + +The _delta_log directory grows over time as more and more commits and checkpoints are accumulated. +Implementations are recommended to delete expired commits and checkpoints in order to reduce the directory size. +The following steps could be used to do cleanup of the DeltaLog directory: +1. Identify a threshold (in days) uptil which we want to preserve the deltaLog. Let's refer to +midnight UTC of that day as `cutOffTimestamp`. The newest commit not newer than the `cutOffTimestamp` is +the `cutoffCommit`, because a commit exactly at midnight is an acceptable cutoff. We want to retain everything including and after the `cutoffCommit`. +2. Identify the newest checkpoint that is not newer than the `cutOffCommit`. A checkpoint at the `cutOffCommit` is ideal, but an older one will do. Lets call it `cutOffCheckpoint`. +We need to preserve the `cutOffCheckpoint` and all commits after it, because we need them to enable +time travel for commits between `cutOffCheckpoint` and the next available checkpoint. +3. Delete all [delta log entries](#delta-log-entries and [checkpoint files](#checkpoints) before the +`cutOffCheckpoint` checkpoint. Also delete all the [log compaction files](#log-compaction-files) having +startVersion <= `cutOffCheckpoint`'s version. +4. Now read all the available [checkpoints](#checkpoints-1) in the _delta_log directory and identify +the corresponding [sidecar files](#sidecar-files). These sidecar files need to be protected. +5. List all the files in `_delta_log/_sidecars` directory, preserve files that are less than a day +old (as of midnight UTC), to not break in-progress checkpoints. Also preserve the referenced sidecar files +identified in Step-4 above. Delete everything else. + +## Data Files + - Data files MUST be uniquely named and MUST NOT be overwritten. The reference implementation uses a GUID in the name to ensure this property. + +## Append-only Tables +To support this feature: + - The table must be on a Writer Version starting from 2 up to 7. + - If the table is on Writer Version 7, the feature `appendOnly` must exist in the table `protocol`'s `writerFeatures`. + +When supported, and if the table has a property `delta.appendOnly` set to `true`: + - New log entries MUST NOT change or remove data from the table. + - New log entries may rearrange data (i.e. `add` and `remove` actions where `dataChange=false`). + +To remove the append-only restriction, the table property `delta.appendOnly` must be set to `false`, or it must be removed. + +## Column Invariants +To support this feature + - If the table is on a Writer Version starting from 2 up to 6, Column Invariants are always enabled. + - If the table is on Writer Version 7, the feature `invariants` must exist in the table `protocol`'s `writerFeatures`. + +When supported: + - The `metadata` for a column in the table schema MAY contain the key `delta.invariants`. + - The value of `delta.invariants` SHOULD be parsed as a JSON string containing a boolean SQL expression at the key `expression.expression` (that is, `{"expression": {"expression": ""}}`). + - Writers MUST abort any transaction that adds a row to the table, where an invariant evaluates to `false` or `null`. + +For example, given the schema string (pretty printed for readability. The entire schema string in the log should be a single JSON line): + +```json +{ + "type": "struct", + "fields": [ + { + "name": "x", + "type": "integer", + "nullable": true, + "metadata": { + "delta.invariants": "{\"expression\": { \"expression\": \"x > 3\"} }" + } + } + ] +} +``` + +Writers should reject any transaction that contains data where the expression `x > 3` returns `false` or `null`. + +## CHECK Constraints + +To support this feature: +- If the table is on a Writer Version starting from 3 up to 6, CHECK Constraints are always supported. +- If the table is on Writer Version 7, a feature name `checkConstraints` must exist in the table `protocol`'s `writerFeatures`. + +CHECK constraints are stored in the map of the `configuration` field in [Metadata](#change-metadata). Each CHECK constraint has a name and is stored as a key value pair. The key format is `delta.constraints.{name}`, and the value is a SQL expression string whose return type must be `Boolean`. Columns referred by the SQL expression must exist in the table schema. + +Rows in a table must satisfy CHECK constraints. In other words, evaluating the SQL expressions of CHECK constraints must return `true` for each row in a table. + +For example, a key value pair (`delta.constraints.birthDateCheck`, `birthDate > '1900-01-01'`) means there is a CHECK constraint called `birthDateCheck` in the table and the value of the `birthDate` column in each row must be greater than `1900-01-01`. + +Hence, a writer must follow the rules below: +- CHECK Constraints may not be added to a table unless the above "to support this feature" rules are satisfied. When adding a CHECK Constraint to a table for the first time, writers are allowed to submit a `protocol` change in the same commit to add support of this feature. +- When adding a CHECK constraint to a table, a writer must validate the existing data in the table and ensure every row satisfies the new CHECK constraint before committing the change. Otherwise, the write operation must fail and the table must stay unchanged. +- When writing to a table that contains CHECK constraints, every new row being written to the table must satisfy CHECK constraints in the table. Otherwise, the write operation must fail and the table must stay unchanged. + +## Generated Columns + +To support this feature: + - If the table is on a Writer Version starting from 4 up to 6, Generated Columns are always supported. + - If the table is on Writer Version 7, a feature name `generatedColumns` must exist in the table `protocol`'s `writerFeatures`. + +When supported: + - The `metadata` for a column in the table schema MAY contain the key `delta.generationExpression`. + - The value of `delta.generationExpression` SHOULD be parsed as a SQL expression. + - Writers MUST enforce that any data writing to the table satisfy the condition `( <=> ) IS TRUE`. `<=>` is the NULL-safe equal operator which performs an equality comparison like the `=` operator but returns `TRUE` rather than NULL if both operands are `NULL` + +## Default Columns + +Delta supports defining default expressions for columns on Delta tables. Delta will generate default values for columns when users do not explicitly provide values for them when writing to such tables, or when the user explicitly specifies the `DEFAULT` SQL keyword for any such column. + +Semantics for write and read operations: +- Note that this metadata only applies for write operations, not read operations. +- Table write operations (such as SQL INSERT, UPDATE, and MERGE commands) will use the default values. For example, this SQL command will use default values: `INSERT INTO t VALUES (42, DEFAULT);` +- Table operations that add new columns (such as SQL ALTER TABLE ... ADD COLUMN commands) MUST not specify a default value for any column in the same command that the column is created. For example, this SQL command is not supported in Delta Lake: `ALTER TABLE t ADD COLUMN c INT DEFAULT 42;` +- Note that it is acceptable to assign or update default values for columns that were already created in previous commands, however. For example, this SQL command is valid: `ALTER TABLE t ALTER COLUMN c SET DEFAULT 42;` + +Enablement: +- The table must be on Writer Version 7, and a feature name `allowColumnDefaults` must exist in the table `protocol`'s `writerFeatures`. + +When enabled: +- The `metadata` for the column in the table schema MAY contain the key `CURRENT_DEFAULT`. +- The value of `CURRENT_DEFAULT` SHOULD be parsed as a SQL expression. +- Writers MUST enforce that before writing any rows to the table, for each such requested row that lacks any explicit value (including NULL) for columns with default values, the writing system will assign the result of evaluating the default value expression for each such column as the value for that column in the row. By the same token, if the engine specified the explicit `DEFAULT` SQL keyword for any column, the expression result must be substituted in the same way. + +## Identity Columns + +Delta supports defining Identity columns on Delta tables. Delta will generate unique values for Identity columns when users do not explicitly provide values for them when writing to such tables. To support Identity Columns: + - The table must be on Writer Version 6, or + - The table must be on Writer Version 7, and a feature name `identityColumns` must exist in the table `protocol`'s `writerFeatures`. + +When supported, the `metadata` for a column in the table schema MAY contain the following keys for Identity Column properties: +- `delta.identity.start`: Starting value for the Identity column. This is a long type value. It should not be changed after table creation. +- `delta.identity.step`: Increment to the next Identity value. This is a long type value. It cannot be set to 0. It should not be changed after table creation. +- `delta.identity.highWaterMark`: The highest value generated for the Identity column. This is a long type value. When `delta.identity.step` is positive (negative), this should be the largest (smallest) value in the column. +- `delta.identity.allowExplicitInsert`: True if this column allows explicitly inserted values. This is a boolean type value. It should not be changed after table creation. + +When `delta.identity.allowExplicitInsert` is true, writers should meet the following requirements: +- Users should be allowed to provide their own values for Identity columns. + +When `delta.identity.allowExplicitInsert` is false, writers should meet the following requirements: +- Users should not be allowed to provide their own values for Identity columns. +- Delta should generate values that satisfy the following requirements + - The new value does not already exist in the column. + - The new value should satisfy `value = start + k * step` where k is a non-negative integer. + - The new value should be higher than `delta.identity.highWaterMark`. When `delta.identity.step` is positive (negative), the new value should be the greater (smaller) than `delta.identity.highWaterMark`. +- Overflow when calculating generated Identity values should be detected and such writes should not be allowed. +- `delta.identity.highWaterMark` should be updated to the new highest value when the write operation commits. + +## Writer Version Requirements + +The requirements of the writers according to the protocol versions are summarized in the table below. Each row inherits the requirements from the preceding row. + +
| Requirements +-|- +Writer Version 2 | - Respect [Append-only Tables](#append-only-tables)
- Respect [Column Invariants](#column-invariants) +Writer Version 3 | - Enforce `delta.checkpoint.writeStatsAsJson`
- Enforce `delta.checkpoint.writeStatsAsStruct`
- Respect [`CHECK` constraints](#check-constraints) +Writer Version 4 | - Respect [Change Data Feed](#add-cdc-file)
- Respect [Generated Columns](#generated-columns) +Writer Version 5 | Respect [Column Mapping](#column-mapping) +Writer Version 6 | Respect [Identity Columns](#identity-columns) +Writer Version 7 | Respect [Table Features](#table-features) for writers + +# Requirements for Readers + +This section documents additional requirements that readers must respect in order to produce correct scans of a Delta table. + +## Reader Version Requirements + +The requirements of the readers according to the protocol versions are summarized in the table below. Each row inherits the requirements from the preceding row. + +
| Requirements +-|- +Reader Version 2 | Respect [Column Mapping](#column-mapping) +Reader Version 3 | Respect [Table Features](#table-features) for readers
- Writer Version must be 7 + +# Appendix + +## Valid Feature Names in Table Features + +Feature | Name | Readers or Writers? +-|-|- +[Append-only Tables](#append-only-tables) | `appendOnly` | Writers only +[Column Invariants](#column-invariants) | `invariants` | Writers only +[`CHECK` constraints](#check-constraints) | `checkConstraints` | Writers only +[Generated Columns](#generated-columns) | `generatedColumns` | Writers only +[Default Columns](#default-columns) | `allowColumnDefaults` | Writers only +[Change Data Feed](#add-cdc-file) | `changeDataFeed` | Writers only +[Column Mapping](#column-mapping) | `columnMapping` | Readers and writers +[Identity Columns](#identity-columns) | `identityColumns` | Writers only +[Deletion Vectors](#deletion-vectors) | `deletionVectors` | Readers and writers +[Row Tracking](#row-tracking) | `rowTracking` | Writers only +[Timestamp without Timezone](#timestamp-without-timezone-timestampNtz) | `timestampNtz` | Readers and writers +[Domain Metadata](#domain-metadata) | `domainMetadata` | Writers only +[V2 Checkpoint](#v2-checkpoint-table-feature) | `v2Checkpoint` | Readers and writers +[Iceberg Compatibility V1](#iceberg-compatibility-v1) | `icebergCompatV1` | Writers only +[Clustered Table](#clustered-table) | `clustering` | Writers only + +## Deletion Vector Format + +Deletion Vectors are basically sets of row indexes, that is 64-bit integers that describe the position (index) of a row in a parquet file starting from zero. We store these sets in a compressed format. The fundamental building block for this is the open source [RoaringBitmap](https://roaringbitmap.org/) library. RoaringBitmap is a flexible format for storing 32-bit integers that automatically switches between three different encodings at the granularity of a 16-bit block (64K values): + +- Simple integer array, when the number of values in the block is small. +- Bitmap-compressed, when the number of values in the block is large and scattered. +- Run-length encoded, when the number of values in the block is large, but clustered. + +The serialization format is [standardized](https://github.com/RoaringBitmap/RoaringFormatSpec), and both [Java](https://github.com/lemire/RoaringBitmap/) and [C/C++](https://github.com/RoaringBitmap/CRoaring) implementations are available (among others). + +The above description only applies to 32-bit bitmaps, but Deletion Vectors use 64-bit integers. In order to extend coverage from 32 to 64 bits, RoaringBitmaps defines a "portable" serialization format in the [RoaringBitmaps Specification](https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations). This format essentially splits the space into an outer part with the most significant 32-bit "keys" indexing the least significant 32-bit RoaringBitmaps in ascending sequence. The spec calls these least signficant 32-bit RoaringBitmaps "buckets". + +Bytes | Name | Description +-|-|- +0 – 7 | numBuckets | The number of distinct 32-bit buckets in this bitmap. +`repeat for each bucket b` | | For each bucket in ascending order of keys. +`` – ` + 3` | key | The most significant 32-bit of all the values in this bucket. +` + 4` – `` | bucketData | A serialized 32-bit RoaringBitmap with all the least signficant 32-bit entries in this bucket. + +The 32-bit serialization format then consists of a header that describes all the (least signficant) 16-bit containers, their types (s. above), and their their key (most significant 16-bits). +This is followed by the data for each individual container in a container-specific format. + +Reference Implementations of the Roaring format: + +- [32-bit Java RoaringBitmap](https://github.com/RoaringBitmap/RoaringBitmap/blob/c7993318d7224cd3cc0244dcc99c8bbc5ddb0c87/RoaringBitmap/src/main/java/org/roaringbitmap/RoaringArray.java#L905-L949) +- [64-bit Java RoaringNavigableBitmap](https://github.com/RoaringBitmap/RoaringBitmap/blob/c7993318d7224cd3cc0244dcc99c8bbc5ddb0c87/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1253-L1260) + +Delta uses the format described above as a black box, but with two additions: + +1. We prepend a "magic number", which can be used to make sure we are reading the correct format and also retains the ability to evolve the format in the future. +2. We require that every "key" (s. above) in the bitmap has a 0 as its most significant bit. This ensures that in Java, where values are read signed, we never read negative keys. + +The concrete serialization format is as follows (all numerical values are written in little endian byte order): + +Bytes | Name | Description +-|-|- +0 — 3 | magicNumber | 1681511377; Indicates that the following bytes are serialized in this exact format. Future alternative—but related—formats must have a different magic number, for example by incrementing this one. +4 — end | bitmap | A serialized 64-bit bitmap in the portable standard format as defined in the [RoaringBitmaps Specification](https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations). This can be treated as a black box by any Delta implementation that has a native, standard-compliant RoaringBitmap library available to pass these bytes to. + +### Deletion Vector File Storage Format + +Deletion Vectors can be stored in files in cloud storage or inline in the Delta log. +The format for storing DVs in file storage is one (or more) DV, using the 64-bit RoaringBitmaps described in the previous section, per file, together with a checksum for each DV. +The concrete format is as follows, with all numerical values written in big endian byte order: + +Bytes | Name | Description +-|-|- +0 — 1 | version | The format version of this file: `1` for the format described here. +`repeat for each DV i` | | For each DV +`` — ` + 3` | dataSize | Size of this DV’s data (without the checksum) +` + 4` — ` + 4 + dataSize - 1` | bitmapData | One 64-bit RoaringBitmap serialised as described above. +` + 4 + dataSize` — ` + 4 + dataSize + 3` | checksum | CRC-32 checksum of `bitmapData` + +## Per-file Statistics +`add` and `remove` actions can optionally contain statistics about the data in the file being added or removed from the table. +These statistics can be used for eliminating files based on query predicates or as inputs to query optimization. + +Global statistics record information about the entire file. +The following global statistic is currently supported: + +Name | Description +-|- +numRecords | The number of records in this data file. +tightBounds | Whether per-column statistics are currently **tight** or **wide** (see below). + +For any logical file where `deletionVector` is not `null`, the `numRecords` statistic *must* be present and accurate. That is, it must equal the number of records in the data file, not the valid records in the logical file. +In the presence of [Deletion Vectors](#Deletion-Vectors) the statistics may be somewhat outdated, i.e. not reflecting deleted rows yet. The flag `stats.tightBounds` indicates whether we have **tight bounds** (i.e. the min/maxValue exists[^1] in the valid state of the file) or **wide bounds** (i.e. the minValue is <= all valid values in the file, and the maxValue >= all valid values in the file). These upper/lower bounds are sufficient information for data skipping. + +Per-column statistics record information for each column in the file and they are encoded, mirroring the schema of the actual data. +For example, given the following data schema: +``` +|-- a: struct +| |-- b: struct +| | |-- c: long +``` + +Statistics could be stored with the following schema: +``` +|-- stats: struct +| |-- numRecords: long +| |-- tightBounds: boolean +| |-- minValues: struct +| | |-- a: struct +| | | |-- b: struct +| | | | |-- c: long +| |-- maxValues: struct +| | |-- a: struct +| | | |-- b: struct +| | | | |-- c: long +``` + +The following per-column statistics are currently supported: + +Name | Description (`stats.tightBounds=true`) | Description (`stats.tightBounds=false`) +-|-|- +nullCount | The number of `null` values for this column |

If the `nullCount` for a column equals the physical number of records (`stats.numRecords`) then **all** valid rows for this column must have `null` values (the reverse is not necessarily true).

If the `nullCount` for a column equals 0 then **all** valid rows are non-`null` in this column (the reverse is not necessarily true).

If the `nullCount` for a column is any value other than these two special cases, the value carries no information and should be treated as if absent.

+minValues | A value that is equal to the smallest valid value[^1] present in the file for this column. If all valid rows are null, this carries no information. | A value that is less than or equal to all valid values[^1] present in this file for this column. If all valid rows are null, this carries no information. +maxValues | A value that is equal to the largest valid value[^1] present in the file for this column. If all valid rows are null, this carries no information. | A value that is greater than or equal to all valid values[^1] present in this file for this column. If all valid rows are null, this carries no information. + +[^1]: String columns are cut off at a fixed prefix length. Timestamp columns are truncated down to milliseconds. + +## Partition Value Serialization + +Partition values are stored as strings, using the following formats. An empty string for any type translates to a `null` partition value. + +Type | Serialization Format +-|- +string | No translation required +numeric types | The string representation of the number +date | Encoded as `{year}-{month}-{day}`. For example, `1970-01-01` +timestamp | Encoded as `{year}-{month}-{day} {hour}:{minute}:{second}` or `{year}-{month}-{day} {hour}:{minute}:{second}.{microsecond}` For example: `1970-01-01 00:00:00`, or `1970-01-01 00:00:00.123456` +timestamp without timezone | Encoded as `{year}-{month}-{day} {hour}:{minute}:{second}` or `{year}-{month}-{day} {hour}:{minute}:{second}.{microsecond}` For example: `1970-01-01 00:00:00`, or `1970-01-01 00:00:00.123456` To use this type, a table must support a feature `timestampNtz`. See section [Timestamp without timezone (TimestampNtz)](#timestamp-without-timezone-timestampNtz) for more information. +boolean | Encoded as the string "true" or "false" +binary | Encoded as a string of escaped binary values. For example, `"\u0001\u0002\u0003"` + +Note: A `timestamp` value in a partition value doesn't store the time zone due to historical reasons. +It means its behavior looks similar to `timestamp without time zone` when it is used in a partition column. + +## Schema Serialization Format + +Delta uses a subset of Spark SQL's JSON Schema representation to record the schema of a table in the transaction log. +A reference implementation can be found in [the catalyst package of the Apache Spark repository](https://github.com/apache/spark/tree/master/sql/catalyst/src/main/scala/org/apache/spark/sql/types). + +### Primitive Types + +Type Name | Description +-|- +string| UTF-8 encoded string of characters +long| 8-byte signed integer. Range: -9223372036854775808 to 9223372036854775807 +integer|4-byte signed integer. Range: -2147483648 to 2147483647 +short| 2-byte signed integer numbers. Range: -32768 to 32767 +byte| 1-byte signed integer number. Range: -128 to 127 +float| 4-byte single-precision floating-point numbers +double| 8-byte double-precision floating-point numbers +decimal| signed decimal number with fixed precision (maximum number of digits) and scale (number of digits on right side of dot). The precision and scale can be up to 38. +boolean| `true` or `false` +binary| A sequence of binary data. +date| A calendar date, represented as a year-month-day triple without a timezone. +timestamp| Microsecond precision timestamp elapsed since the Unix epoch, 1970-01-01 00:00:00 UTC. When this is stored in a parquet file, its `isAdjustedToUTC` must be set to `true`. +timestamp without time zone | Microsecond precision timestamp in a local timezone elapsed since the Unix epoch, 1970-01-01 00:00:00. It doesn't have the timezone information, and a value of this type can map to multiple physical time instants. It should always be displayed in the same way, regardless of the local time zone in effect. When this is stored in a parquet file, its `isAdjustedToUTC` must be set to `false`. To use this type, a table must support a feature `timestampNtz`. See section [Timestamp without timezone (TimestampNtz)](#timestamp-without-timezone-timestampNtz) for more information. + +See Parquet [timestamp type](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp) for more details about timestamp and `isAdjustedToUTC`. + +Note: Existing tables may have `void` data type columns. Behavior is undefined for `void` data type columns but it is recommended to drop any `void` data type columns on reads (as is implemented by the Spark connector). + +### Struct Type + +A struct is used to represent both the top-level schema of the table as well as struct columns that contain nested columns. A struct is encoded as a JSON object with the following fields: + +Field Name | Description +-|- +type | Always the string "struct" +fields | An array of fields + +### Struct Field + +A struct field represents a top-level or nested column. + +Field Name | Description +-|- +name| Name of this (possibly nested) column +type| String containing the name of a primitive type, a struct definition, an array definition or a map definition +nullable| Boolean denoting whether this field can be null +metadata| A JSON map containing information about this column. Keys prefixed with `Delta` are reserved for the implementation. See [Column Metadata](#column-metadata) for more information on column level metadata that clients must handle when writing to a table. + +### Array Type + +An array stores a variable length collection of items of some type. + +Field Name | Description +-|- +type| Always the string "array" +elementType| The type of element stored in this array represented as a string containing the name of a primitive type, a struct definition, an array definition or a map definition +containsNull| Boolean denoting whether this array can contain one or more null values + +### Map Type + +A map stores an arbitrary length collection of key-value pairs with a single `keyType` and a single `valueType`. + +Field Name | Description +-|- +type| Always the string "map". +keyType| The type of element used for the key of this map, represented as a string containing the name of a primitive type, a struct definition, an array definition or a map definition +valueType| The type of element used for the key of this map, represented as a string containing the name of a primitive type, a struct definition, an array definition or a map definition + +### Column Metadata +A column metadata stores various information about the column. +For example, this MAY contain some keys like [`delta.columnMapping`](#column-mapping) or [`delta.generationExpression`](#generated-columns) or [`CURRENT_DEFAULT`](#default-columns). +Field Name | Description +-|- +delta.columnMapping.*| These keys are used to store information about the mapping between the logical column name to the physical name. See [Column Mapping](#column-mapping) for details. +delta.identity.*| These keys are for defining identity columns. See [Identity Columns](#identity-columns) for details. +delta.invariants| JSON string contains SQL expression information. See [Column Invariants](#column-invariants) for details. +delta.generationExpression| SQL expression string. See [Generated Columns](#generated-columns) for details. + + +### Example + +Example Table Schema: +``` +|-- a: integer (nullable = false) +|-- b: struct (nullable = true) +| |-- d: integer (nullable = false) +|-- c: array (nullable = true) +| |-- element: integer (containsNull = false) +|-- e: array (nullable = true) +| |-- element: struct (containsNull = true) +| | |-- d: integer (nullable = false) +|-- f: map (nullable = true) +| |-- key: string +| |-- value: string (valueContainsNull = true) +``` + +JSON Encoded Table Schema: +``` +{ + "type" : "struct", + "fields" : [ { + "name" : "a", + "type" : "integer", + "nullable" : false, + "metadata" : { } + }, { + "name" : "b", + "type" : { + "type" : "struct", + "fields" : [ { + "name" : "d", + "type" : "integer", + "nullable" : false, + "metadata" : { } + } ] + }, + "nullable" : true, + "metadata" : { } + }, { + "name" : "c", + "type" : { + "type" : "array", + "elementType" : "integer", + "containsNull" : false + }, + "nullable" : true, + "metadata" : { } + }, { + "name" : "e", + "type" : { + "type" : "array", + "elementType" : { + "type" : "struct", + "fields" : [ { + "name" : "d", + "type" : "integer", + "nullable" : false, + "metadata" : { } + } ] + }, + "containsNull" : true + }, + "nullable" : true, + "metadata" : { } + }, { + "name" : "f", + "type" : { + "type" : "map", + "keyType" : "string", + "valueType" : "string", + "valueContainsNull" : true + }, + "nullable" : true, + "metadata" : { } + } ] +} +``` + +## Checkpoint Schema +The following examples uses a table with two partition columns: "date" and "region" of types date and string, respectively, and three data columns: "asset", "quantity", and "is_available" with data types string, double, and boolean. The checkpoint schema will look as follows: + +``` +|-- metaData: struct +| |-- id: string +| |-- name: string +| |-- description: string +| |-- format: struct +| | |-- provider: string +| | |-- options: map +| |-- schemaString: string +| |-- partitionColumns: array +| |-- createdTime: long +| |-- configuration: map +|-- protocol: struct +| |-- minReaderVersion: int +| |-- minWriterVersion: int +| |-- readerFeatures: array[string] +| |-- writerFeatures: array[string] +|-- txn: struct +| |-- appId: string +| |-- version: long +|-- add: struct +| |-- path: string +| |-- partitionValues: map +| |-- size: long +| |-- modificationTime: long +| |-- dataChange: boolean +| |-- stats: string +| |-- tags: map +| |-- baseRowId: long +| |-- defaultRowCommitVersion: long +| |-- partitionValues_parsed: struct +| | |-- date: date +| | |-- region: string +| |-- stats_parsed: struct +| | |-- numRecords: long +| | |-- minValues: struct +| | | |-- asset: string +| | | |-- quantity: double +| | |-- maxValues: struct +| | | |-- asset: string +| | | |-- quantity: double +| | |-- nullCounts: struct +| | | |-- asset: long +| | | |-- quantity: long +|-- remove: struct +| |-- path: string +| |-- deletionTimestamp: long +| |-- dataChange: boolean +|-- checkpointMetadata: struct +| |-- version: long +| |-- tags: map +|-- sidecar: struct +| |-- path: string +| |-- sizeInBytes: long +| |-- modificationTime: long +| |-- tags: map +``` + +Observe that `readerFeatures` and `writerFeatures` fields should comply with: +- If a table has Reader Version 3, then a writer must write checkpoints with a not-null `readerFeatures` in the schema. +- If a table has Writer Version 7, then a writer must write checkpoints with a not-null `writerFeatures` in the schema. +- If a table has neither of the above, then a writer chooses whether to write `readerFeatures` and/or `writerFeatures` into the checkpoint schema. But if it does, their values must be null. + +Note that `remove` actions in the checkpoint are tombstones used only by VACUUM, and do not contain the `stats` and `tags` fields. + +For a table that uses column mapping, whether in `id` or `name` mode, the schema of the `add` column will look as follows. + +Schema definition: +``` +{ + "type" : "struct", + "fields" : [ { + "name" : "asset", + "type" : "string", + "nullable" : true, + "metadata" : { + "delta.columnMapping.id": 1, + "delta.columnMapping.physicalName": "col-b96921f0-2329-4cb3-8d79-184b2bdab23b" + } + }, { + "name" : "quantity", + "type" : "double", + "nullable" : true, + "metadata" : { + "delta.columnMapping.id": 2, + "delta.columnMapping.physicalName": "col-04ee4877-ee53-4cb9-b1fb-1a4eb74b508c" + } + }, { + "name" : "date", + "type" : "date", + "nullable" : true, + "metadata" : { + "delta.columnMapping.id": 3, + "delta.columnMapping.physicalName": "col-798f4abc-c63f-444c-9a04-e2cf1ecba115" + } + }, { + "name" : "region", + "type" : "string", + "nullable" : true, + "metadata" : { + "delta.columnMapping.id": 4, + "delta.columnMapping.physicalName": "col-19034dc3-8e3d-4156-82fc-8e05533c088e" + } + } ] +} +``` + +Checkpoint schema (just the `add` column): +``` +|-- add: struct +| |-- path: string +| |-- partitionValues: map +| |-- size: long +| |-- modificationTime: long +| |-- dataChange: boolean +| |-- stats: string +| |-- tags: map +| |-- baseRowId: long +| |-- defaultRowCommitVersion: long +| |-- partitionValues_parsed: struct +| | |-- col-798f4abc-c63f-444c-9a04-e2cf1ecba115: date +| | |-- col-19034dc3-8e3d-4156-82fc-8e05533c088e: string +| |-- stats_parsed: struct +| | |-- numRecords: long +| | |-- minValues: struct +| | | |-- col-b96921f0-2329-4cb3-8d79-184b2bdab23b: string +| | | |-- col-04ee4877-ee53-4cb9-b1fb-1a4eb74b508c: double +| | |-- maxValues: struct +| | | |-- col-b96921f0-2329-4cb3-8d79-184b2bdab23b: string +| | | |-- col-04ee4877-ee53-4cb9-b1fb-1a4eb74b508c: double +| | |-- nullCounts: struct +| | | |-- col-b96921f0-2329-4cb3-8d79-184b2bdab23b: long +| | | |-- col-04ee4877-ee53-4cb9-b1fb-1a4eb74b508c: long +``` + +## Last Checkpoint File Schema + +This last checkpoint file is encoded as JSON and contains the following information: + +Field | Description +-|- +version | The version of the table when the last checkpoint was made. +size | The number of actions that are stored in the checkpoint. +parts | The number of fragments if the last checkpoint was written in multiple parts. This field is optional. +sizeInBytes | The number of bytes of the checkpoint. This field is optional. +numOfAddFiles | The number of AddFile actions in the checkpoint. This field is optional. +checkpointSchema | The schema of the checkpoint file. This field is optional. +tags | String-string map containing any additional metadata about the last checkpoint. This field is optional. +checksum | The checksum of the last checkpoint JSON. This field is optional. + +The checksum field is an optional field which contains the MD5 checksum for fields of the last checkpoint json file. +Last checkpoint file readers are encouraged to validate the checksum, if present, and writers are encouraged to write the checksum +while overwriting the file. Refer to [this section](#json-checksum) for rules around calculating the checksum field +for the last checkpoint JSON. + +### JSON checksum +To generate the checksum for the last checkpoint JSON, firstly, the checksum JSON is canonicalized and converted to a string. Then +the 32 character MD5 digest is calculated on the resultant string to get the checksum. Rules for [JSON](https://datatracker.ietf.org/doc/html/rfc8259) canonicalization are: + +1. Literal values (`true`, `false`, and `null`) are their own canonical form +2. Numeric values (e.g. `42` or `3.14`) are their own canonical form +3. String values (e.g. `"hello world"`) are canonicalized by preserving the surrounding quotes and [URL-encoding](#how-to-url-encode-keys-and-string-values) +their content, e.g. `"hello%20world"` +4. Object values (e.g. `{"a": 10, "b": {"y": null, "x": "https://delta.io"} }` are canonicalized by: + * Canonicalize each scalar (leaf) value following the rule for its type (literal, numeric, string) + * Canonicalize each (string) name along the path to that value + * Connect path segments by `+`, e.g. `"b"+"y"` + * Connect path and value pairs by `=`, e.g. `"b"+"y"=null` + * Sort canonicalized path/value pairs using a byte-order sort on paths. The byte-order sort can be done by converting paths to byte array using UTF-8 charset\ + and then comparing them, e.g. `"a" < "b"+"x" < "b"+"y"` + * Separate ordered pairs by `,`, e.g. `"a"=10,"b"+"x"="https%3A%2F%2Fdelta.io","b"+"y"=null` + +5. Array values (e.g. `[null, "hi ho", 2.71]`) are canonicalized as if they were objects, except the "name" has numeric type instead of string type, and gives the (0-based) +position of the corresponding array element, e.g. `0=null,1="hi%20ho",2=2.71` + +6. Top level `checksum` key is ignored in the canonicalization process. e.g. +`{"k1": "v1", "checksum": "", "k3": 23}` is canonicalized to `"k1"="v1","k3"=23` + +7. Duplicate keys are not allowed in the last checkpoint JSON and such JSON is considered invalid. + +Given the following test sample JSON, a correct implementation of JSON canonicalization should produce the corresponding canonicalized form and checksum value: +e.g. +Json: `{"k0":"'v 0'", "checksum": "adsaskfljadfkjadfkj", "k1":{"k2": 2, "k3": ["v3", [1, 2], {"k4": "v4", "k5": ["v5", "v6", "v7"]}]}}`\ +Canonicalized form: `"k0"="%27v%200%27","k1"+"k2"=2,"k1"+"k3"+0="v3","k1"+"k3"+1+0=1,"k1"+"k3"+1+1=2,"k1"+"k3"+2+"k4"="v4","k1"+"k3"+2+"k5"+0="v5","k1"+"k3"+2+"k5"+1="v6","k1"+"k3"+2+"k5"+2="v7"`\ +Checksum is `6a92d155a59bf2eecbd4b4ec7fd1f875` + +#### How to URL encode keys and string values +The [URL Encoding](https://datatracker.ietf.org/doc/html/rfc3986) spec is a bit flexible to give a reliable encoding. e.g. the spec allows both +uppercase and lowercase as part of percent-encoding. Thus, we require a stricter set of rules for encoding: + +1. The string to be encoded must be represented as octets according to the UTF-8 character encoding +2. All octets except a-z / A-Z / 0-9 / "-" / "." / "_" / "~" are reserved +3. Always [percent-encode](https://datatracker.ietf.org/doc/html/rfc3986#section-2) reserved octets +4. Never percent-encode non-reserved octets +5. A percent-encoded octet consists of three characters: `%` followed by its 2-digit hexadecimal value in uppercase letters, e.g. `>` encodes to `%3E` diff --git a/README.md b/README.md new file mode 100644 index 00000000000..353ca26b6de --- /dev/null +++ b/README.md @@ -0,0 +1,183 @@ +Delta Lake Logo + +[![Test](https://github.com/delta-io/delta/actions/workflows/test.yaml/badge.svg)](https://github.com/delta-io/delta/actions/workflows/test.yaml) +[![License](https://img.shields.io/badge/license-Apache%202-brightgreen.svg)](https://github.com/delta-io/delta/blob/master/LICENSE.txt) +[![PyPI](https://img.shields.io/pypi/v/delta-spark.svg)](https://pypi.org/project/delta-spark/) +[![PyPI - Downloads](https://img.shields.io/pypi/dm/delta-spark)](https://pypistats.org/packages/delta-spark) + +Delta Lake is an open-source storage framework that enables building a [Lakehouse architecture](http://cidrdb.org/cidr2021/papers/cidr2021_paper17.pdf) with compute engines including Spark, PrestoDB, Flink, Trino, and Hive and APIs for Scala, Java, Rust, Ruby, and Python. +* See the [Delta Lake Documentation](https://docs.delta.io) for details. +* See the [Quick Start Guide](https://docs.delta.io/latest/quick-start.html) to get started with Scala, Java and Python. +* Note, this repo is one of many Delta Lake repositories in the [delta.io](https://github.com/delta-io) organizations including +[delta](https://github.com/delta-io/delta), +[delta-rs](https://github.com/delta-io/delta-rs), +[delta-sharing](https://github.com/delta-io/delta-sharing), +[kafka-delta-ingest](https://github.com/delta-io/kafka-delta-ingest), and +[website](https://github.com/delta-io/website). + +The following are some of the more popular Delta Lake integrations, refer to [delta.io/integrations](https://delta.io/integrations/) for the complete list: + +* [Apache Spark™](https://docs.delta.io/): This connector allows Apache Spark™ to read from and write to Delta Lake. +* [Apache Flink (Preview)](https://github.com/delta-io/delta/tree/master/connectors/flink): This connector allows Apache Flink to write to Delta Lake. +* [PrestoDB](https://prestodb.io/docs/current/connector/deltalake.html): This connector allows PrestoDB to read from Delta Lake. +* [Trino](https://trino.io/docs/current/connector/delta-lake.html): This connector allows Trino to read from and write to Delta Lake. +* [Delta Standalone](https://docs.delta.io/latest/delta-standalone.html): This library allows Scala and Java-based projects (including Apache Flink, Apache Hive, Apache Beam, and PrestoDB) to read from and write to Delta Lake. +* [Apache Hive](https://docs.delta.io/latest/hive-integration.html): This connector allows Apache Hive to read from Delta Lake. +* [Delta Rust API](https://docs.rs/deltalake/latest/deltalake/): This library allows Rust (with Python and Ruby bindings) low level access to Delta tables and is intended to be used with data processing frameworks like datafusion, ballista, rust-dataframe, vega, etc. + +
+ +
+Table of Contents + +* [Latest binaries](#latest-binaries) +* [API Documentation](#api-documentation) +* [Compatibility](#compatibility) + * [API Compatibility](#api-compatibility) + * [Data Storage Compatibility](#data-storage-compatibility) +* [Roadmap](#roadmap) +* [Building](#building) +* [Transaction Protocol](#transaction-protocol) +* [Requirements for Underlying Storage Systems](#requirements-for-underlying-storage-systems) +* [Concurrency Control](#concurrency-control) +* [Reporting issues](#reporting-issues) +* [Contributing](#contributing) +* [License](#license) +* [Community](#community) +
+ + +## Latest Binaries + +See the [online documentation](https://docs.delta.io/latest/) for the latest release. + +## API Documentation + +* [Scala API docs](https://docs.delta.io/latest/delta-apidoc.html) +* [Java API docs](https://docs.delta.io/latest/api/java/index.html) +* [Python API docs](https://docs.delta.io/latest/api/python/index.html) + +## Compatibility +[Delta Standalone](https://docs.delta.io/latest/delta-standalone.html) library is a single-node Java library that can be used to read from and write to Delta tables. Specifically, this library provides APIs to interact with a table’s metadata in the transaction log, implementing the Delta Transaction Log Protocol to achieve the transactional guarantees of the Delta Lake format. + + +### API Compatibility + +There are two types of APIs provided by the Delta Lake project. + +- Direct Java/Scala/Python APIs - The classes and methods documented in the [API docs](https://docs.delta.io/latest/delta-apidoc.html) are considered as stable public APIs. All other classes, interfaces, methods that may be directly accessible in code are considered internal, and they are subject to change across releases. +- Spark-based APIs - You can read Delta tables through the `DataFrameReader`/`Writer` (i.e. `spark.read`, `df.write`, `spark.readStream` and `df.writeStream`). Options to these APIs will remain stable within a major release of Delta Lake (e.g., 1.x.x). +- See the [online documentation](https://docs.delta.io/latest/releases.html) for the releases and their compatibility with Apache Spark versions. + + +### Data Storage Compatibility + +Delta Lake guarantees backward compatibility for all Delta Lake tables (i.e., newer versions of Delta Lake will always be able to read tables written by older versions of Delta Lake). However, we reserve the right to break forward compatibility as new features are introduced to the transaction protocol (i.e., an older version of Delta Lake may not be able to read a table produced by a newer version). + +Breaking changes in the protocol are indicated by incrementing the minimum reader/writer version in the `Protocol` [action](https://github.com/delta-io/delta/blob/master/core/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala). + +## Roadmap + +* For the high-level Delta Lake roadmap, see [Delta Lake 2022H1 roadmap](http://delta.io/roadmap). +* For the detailed timeline, see the [project roadmap](https://github.com/delta-io/delta/milestones). + +## Transaction Protocol + +[Delta Transaction Log Protocol](PROTOCOL.md) document provides a specification of the transaction protocol. + +## Requirements for Underlying Storage Systems + +Delta Lake ACID guarantees are predicated on the atomicity and durability guarantees of the storage system. Specifically, we require the storage system to provide the following. + +1. **Atomic visibility**: There must be a way for a file to be visible in its entirety or not visible at all. +2. **Mutual exclusion**: Only one writer must be able to create (or rename) a file at the final destination. +3. **Consistent listing**: Once a file has been written in a directory, all future listings for that directory must return that file. + +See the [online documentation on Storage Configuration](https://docs.delta.io/latest/delta-storage.html) for details. + +## Concurrency Control + +Delta Lake ensures _serializability_ for concurrent reads and writes. Please see [Delta Lake Concurrency Control](https://docs.delta.io/latest/delta-concurrency.html) for more details. + +## Reporting issues + +We use [GitHub Issues](https://github.com/delta-io/delta/issues) to track community reported issues. You can also [contact](#community) the community for getting answers. + +## Contributing + +We welcome contributions to Delta Lake. See our [CONTRIBUTING.md](https://github.com/delta-io/delta/blob/master/CONTRIBUTING.md) for more details. + +We also adhere to the [Delta Lake Code of Conduct](https://github.com/delta-io/delta/blob/master/CODE_OF_CONDUCT.md). + +## Building + +Delta Lake is compiled using [SBT](https://www.scala-sbt.org/1.x/docs/Command-Line-Reference.html). + +To compile, run + + build/sbt compile + +To generate artifacts, run + + build/sbt package + +To execute tests, run + + build/sbt test + +To execute a single test suite, run + + build/sbt 'testOnly org.apache.spark.sql.delta.optimize.OptimizeCompactionSuite' + +To execute a single test within and a single test suite, run + + build/sbt 'testOnly *.OptimizeCompactionSuite -- -z "optimize command: on partitioned table - all partitions"' + +Refer to [SBT docs](https://www.scala-sbt.org/1.x/docs/Command-Line-Reference.html) for more commands. + +## IntelliJ Setup + +IntelliJ is the recommended IDE to use when developing Delta Lake. To import Delta Lake as a new project: +1. Clone Delta Lake into, for example, `~/delta`. +2. In IntelliJ, select `File` > `New Project` > `Project from Existing Sources...` and select `~/delta`. +3. Under `Import project from external model` select `sbt`. Click `Next`. +4. Under `Project JDK` specify a valid Java `1.8` JDK and opt to use SBT shell for `project reload` and `builds`. +5. Click `Finish`. + +### Setup Verification + +After waiting for IntelliJ to index, verify your setup by running a test suite in IntelliJ. +1. Search for and open `DeltaLogSuite` +2. Next to the class declaration, right click on the two green arrows and select `Run 'DeltaLogSuite'` + +### Troubleshooting + +If you see errors of the form + +``` +Error:(46, 28) object DeltaSqlBaseParser is not a member of package io.delta.sql.parser +import io.delta.sql.parser.DeltaSqlBaseParser._ +... +Error:(91, 22) not found: type DeltaSqlBaseParser + val parser = new DeltaSqlBaseParser(tokenStream) +``` + +then follow these steps: +1. Compile using the SBT CLI: `build/sbt compile`. +2. Go to `File` > `Project Structure...` > `Modules` > `delta-spark`. +3. In the right panel under `Source Folders` remove any `target` folders, e.g. `target/scala-2.12/src_managed/main [generated]` +4. Click `Apply` and then re-run your test. + +## License +Apache License 2.0, see [LICENSE](https://github.com/delta-io/delta/blob/master/LICENSE.txt). + +## Community + +There are two mediums of communication within the Delta Lake community. + +* Public Slack Channel + - [Register here](https://go.delta.io/slack) + - [Login here](https://delta-users.slack.com/) +* [Linkedin page](https://www.linkedin.com/company/deltalake) +* [Youtube channel](https://www.youtube.com/c/deltalake) +* Public [Mailing list](https://groups.google.com/forum/#!forum/delta-users) diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000000..56ce892a438 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,310 @@ +# Benchmarks + +## Overview +This is a basic framework for writing benchmarks to measure Delta's performance. It is currently designed to run benchmark on Spark running in an EMR or a Dataproc cluster. However, it can be easily extended for other Spark-based benchmarks. To get started, first download/clone this repository in your local machine. Then you have to set up a cluster and run the benchmark scripts in this directory. See the next section for more details. + +## Running TPC-DS benchmark + +This TPC-DS benchmark is constructed such that you have to run the following two steps. +1. *Load data*: You have to create the TPC-DS database with all the Delta tables. To do that, the raw TPC-DS data has been provided as Apache Parquet files. In this step you will have to use your EMR or a Dataproc cluster to read the parquet files and rewrite them as Delta tables. +2. *Query data*: Then, using the tables definitions in the Hive Metatore, you can run the 99 benchmark queries. + +The next section will provide the detailed steps of how to setup the necessary Hive Metastore and a cluster, how to test the setup with small-scale data, and then finally run the full scale benchmark. + +### Configure cluster with Amazon Web Services + +#### Prerequisites +- An AWS account with necessary permissions to do the following: + - Manage RDS instances for creating an external Hive Metastore + - Manage EMR clusters for running the benchmark + - Read and write to an S3 bucket from the EMR cluster +- A S3 bucket which will be used to generate the TPC-DS data. +- A machine which has access to the AWS setup and where this repository has been downloaded or cloned. + +There are two ways to create infrastructure required for benchmarks - using provided [Terraform template](infrastructure/aws/terraform/README.md) or manually (described below). + +#### Create external Hive Metastore using Amazon RDS +Create an external Hive Metastore in a MySQL database using Amazon RDS with the following specifications: +- MySQL 8.x on a `db.m5.large`. +- General purpose SSDs, and no Autoscaling storage. +- Non-empty password for admin +- Same region, VPC, subnet as those you will run the EMR cluster. See AWS docs for more guidance. + - *Note:* Region us-west-2 since that is what this benchmark has been most tested with. + +After the database is ready, note the JDBC connection details, the username and password. We will need them for the next step. Note that this step needs to be done just once. All EMR clusters can connect and reused this Hive Metastsore. + +#### Create EMR cluster +Create an EMR cluster that connects to the external Hive Metastore. Here are the specifications of the EMR cluster required for running benchmarks. +- EMR with Spark and Hive (needed for writing to Hive Metastore). Choose the EMR version based on the Spark version compatible with the format. For example: + - For Delta 2.0 on Spark 3.2 - EMR 6.6.0 + - For Delta 1.0 on Spark 3.1 - EMR 6.5.0 +- Master - i3.2xlarge +- Workers - 16 x i3.2xlarge (or just 1 worker if you are just testing by running the 1GB benchmark). +- Hive-site configuration to connect to the Hive Metastore. See [Using an external MySQL database or Amazon Aurora](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hive-metastore-external.html) for more details. +- Same region, VPC, subnet as those of the Hive Metastore. + - *Note:* Region us-west-2 since that is what this benchmark has been most tested with. +- No autoscaling, and default EBS storage. + +Once the EMR cluster is ready, note the following: +- Hostname of the EMR cluster master node. +- PEM file for SSH into the master node. +These will be needed to run the workloads in this framework. + +#### Prepare S3 bucket +Create a new S3 bucket (or use an existing one) which is in the same region as your EMR cluster. + +_________________ + +### Configure cluster with Google Cloud Platform + +#### Prerequisites +- A GCP account with necessary permissions to do the following: + - Manage Dataproc clusters for running the benchmark + - Manage Dataproc Metastore instances + - Read and write to a GCS bucket from the Dataproc cluster +- A GCS bucket which will be used to generate the TPC-DS data. +- A machine which has access to the GCP setup and where this repository has been downloaded or cloned. +- SSH keys for a user which will be used to access the master node. The user's SSH key can be either [a project-wide key](https://cloud.google.com/compute/docs/connect/add-ssh-keys#add_ssh_keys_to_project_metadata) + or assigned to the [master node](https://cloud.google.com/compute/docs/connect/add-ssh-keys#after-vm-creation) only. +- Ideally, all GCP components used in benchmark should be in the same location (Storage bucket, Dataproc Metastore service and Dataproc cluster). + +There are two ways to create infrastructure required for benchmarks - using provided [Terraform template](infrastructure/gcp/terraform/README.md) or manually (described below). + +#### Prepare GCS bucket +Create a new GCS bucket (or use an existing one) which is in the same region as your Dataproc cluster. + +#### Create Dataproc Metastore +You can create [Dataproc metastore](https://cloud.google.com/dataproc-metastore/docs/create-service) +either via Web Console or gcloud command. + +Sample create command: +```bash +gcloud metastore services create dataproc-metastore-for-benchmarks \ + --location= \ + --tier=enterprise +``` + +#### Create Dataproc cluster +Here are the specifications of the Dataproc cluster required for running benchmarks. +- Image version >= 2.0 having Apache Spark 3.1 +- Master - n2-highmem-8 (8 vCPU, 64 GB memory) +- Workers - 16 x n2-highmem-8 (or just 2 workers if you are just testing by running the 1GB benchmark). +- The cluster connects to the Dataproc Metastore. +- Same region and subnet as those of the Dataproc Metastore and GCS bucket. +- No autoscaling. + +Sample create command: +```bash +gcloud dataproc clusters create delta-performance-benchmarks-cluster \ + --project \ + --enable-component-gateway \ + --region \ + --zone \ + --subnet default \ + --master-machine-type n2-highmem-8 \ + --master-boot-disk-type pd-ssd \ + --master-boot-disk-size 100 \ + --num-master-local-ssds 4 \ + --master-local-ssd-interface NVME \ + --num-workers 16 \ + --worker-machine-type n2-highmem-8 \ + --worker-boot-disk-type pd-ssd \ + --worker-boot-disk-size 100 \ + --num-worker-local-ssds 4 \ + --worker-local-ssd-interface NVME \ + --dataproc-metastore projects//locations//services/dataproc-metastore-for-benchmarks \ + --enable-component-gateway \ + --image-version 2.0-debian10 +``` + +#### Input data +The benchmark is run using the raw TPC-DS data which has been provided as Apache Parquet files. There are two +predefined datasets of different size, 1GB and 3TB, located in `s3://devrel-delta-datasets/tpcds-2.13/tpcds_sf1_parquet/` +and `s3://devrel-delta-datasets/tpcds-2.13/tpcds_sf3000_parquet/`, respectively. Please keep in mind that +`devrel-delta-datasets` bucket is configured as [Requester Pays](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ObjectsinRequesterPaysBuckets.html) bucket, +so [access requests have to be configured properly](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ObjectsinRequesterPaysBuckets.html). + +Unfortunately, Hadoop in versions available in Dataproc does not support *Requester Pays* feature. It will be available +as of Hadoop 3.3.4 ([HADOOP-14661](https://issues.apache.org/jira/browse/HADOOP-14661)). + +In consequence, one need to copy the datasets to Google Storage manually before running benchmarks. The simplest +solution is to copy the data in two steps: first to a S3 bucket with *Requester Pays* disabled, then copy the data +using [Cloud Storage Transfer Service](https://cloud.google.com/storage-transfer/docs/how-to). + +_________________ + +### Test the cluster setup +Navigate to your local copy of this repository and this benchmark directory. Then run the following steps. + +#### Run simple test workload +Verify that you have the following information + - : Cluster master node host name + - : Local path to your PEM file for SSH into the master node. + - : The username that will be used to SSH into the master node. The username is tied to the SSH key you + have imported into the cloud. It defaults to `hadoop`. + - : Path where tables will be created. Make sure your credentials have read/write permission to that path. + - : Currently either `gcp` or `aws`. For each storage type, different Delta properties might be added. + +Then run a simple table write-read test: Run the following in your shell. + +```sh +./run-benchmark.py \ + --cluster-hostname \ + -i \ + --ssh-user \ + --benchmark-path \ + --cloud-provider \ + --benchmark test +``` + +If this works correctly, then you should see an output that look like this. + +```text +>>> Benchmark script generated and uploaded + +... +There is a screen on: +12001..ip-172-31-21-247 (Detached) + +Files for this benchmark: +20220126-191336-test-benchmarks.jar +20220126-191336-test-cmd.sh +20220126-191336-test-out.txt +>>> Benchmark script started in a screen. Stdout piped into 20220126-191336-test-out.txt.Final report will be generated on completion in 20220126-191336-test-report.json. +``` + +The test workload launched in a `screen` is going to run the following: +- Spark jobs to run a simple SQL query +- Create a Delta table in the given location +- Read it back + +To see whether they worked correctly, SSH into the node and check the output of 20220126-191336-test-out.txt. Once the workload terminates, the last few lines should be something like the following: +```text +RESULT: +{ + "benchmarkSpecs" : { + "benchmarkPath" : ..., + "benchmarkId" : "20220126-191336-test" + }, + "queryResults" : [ { + "name" : "sql-test", + "durationMs" : 11075 + }, { + "name" : "db-list-test", + "durationMs" : 208 + }, { + "name" : "db-create-test", + "durationMs" : 4070 + }, { + "name" : "db-use-test", + "durationMs" : 41 + }, { + "name" : "table-drop-test", + "durationMs" : 74 + }, { + "name" : "table-create-test", + "durationMs" : 33812 + }, { + "name" : "table-query-test", + "durationMs" : 4795 + } ] +} +FILE UPLOAD: Uploaded /home/hadoop/20220126-191336-test-report.json to s3:// ... +SUCCESS +``` + +The above metrics are also written to a json file and uploaded to the given path. Please verify that both the table and report are generated in that path. + +#### Run 1GB TPC-DS +Now that you are familiar with how the framework runs the workload, you can try running the small scale TPC-DS benchmark. + + +1. Load data as Delta tables: + ```bash + ./run-benchmark.py \ + --cluster-hostname \ + -i \ + --ssh-user \ + --benchmark-path \ + --cloud-provider \ + --benchmark tpcds-1gb-delta-load + ``` + If you run the benchmark in GCP you should provide `--source-path ` parameter, where `` is the location of the raw parquet input data files (see *Input data* section). + ```bash + ./run-benchmark.py \ + --cluster-hostname \ + -i \ + --ssh-user \ + --benchmark-path \ + --source-path \ + --cloud-provider gcp \ + --benchmark tpcds-1gb-delta-load + ``` + +3. Run queries on Delta tables: + ```bash + ./run-benchmark.py \ + --cluster-hostname \ + -i \ + --ssh-user \ + --benchmark-path \ + --cloud-provider \ + --benchmark tpcds-1gb-delta + ``` + +### Run 3TB TPC-DS +Finally, you are all set up to run the full scale benchmark. Similar to the 1GB benchmark, run the following + +1. Load data as Delta tables: + ```bash + ./run-benchmark.py \ + --cluster-hostname \ + -i \ + --ssh-user \ + --benchmark-path \ + --cloud-provider \ + --benchmark tpcds-3tb-delta-load + ``` + If you run the benchmark in GCP you should provide `--source-path ` parameter, where `` is the location of the raw parquet input data files (see *Input data* section). + ```bash + ./run-benchmark.py \ + --cluster-hostname \ + -i \ + --ssh-user \ + --benchmark-path \ + --source-path \ + --cloud-provider gcp \ + --benchmark tpcds-3tb-delta-load + ``` + +2. Run queries on Delta tables: + ```bash + ./run-benchmark.py \ + --cluster-hostname \ + -i \ + --ssh-user \ + --benchmark-path \ + --cloud-provider \ + --benchmark tpcds-3tb-delta + ``` + +Compare the results using the generated JSON files. + +_________________ + +## Internals of the framework + +Structure of this framework's code +- `build.sbt`, `project/`, `src/` form the SBT project which contains the Scala code that define the benchmark workload. + - `Benchmark.scala` is the basic interface, and `TestBenchmark.scala` is a sample implementation. +- `run-benchmark.py` contains the specification of the benchmarks defined by name (e.g. `tpcds-3tb-delta`). Each benchmark specification is defined by the following: + - Fully qualified name of the main Scala class to be started. + - Command line argument for the main function. + - Additional Maven artifact to load (example `io.delta:delta-core_2.12:1.0.0`). + - Spark configurations to use. +- `scripts` has the core python scripts that are called by `run-benchmark.py` + +The script `run-benchmark.py` does the following: +- Compile the Scala code into a uber jar. +- Upload it to the given hostname. +- Using ssh to the hostname, it will launch a screen and start the main class with spark-submit. diff --git a/benchmarks/build.sbt b/benchmarks/build.sbt new file mode 100644 index 00000000000..56718877d2c --- /dev/null +++ b/benchmarks/build.sbt @@ -0,0 +1,32 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +name := "benchmarks" +scalaVersion := "2.12.17" + +lazy val root = (project in file(".")) + .settings( + name := "benchmarks", + libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.5.0" % "provided", + libraryDependencies += "com.github.scopt" %% "scopt" % "4.0.1", + libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.13.1", + + assemblyMergeStrategy in assembly := { + case PathList("META-INF", xs @ _*) => MergeStrategy.discard + case x => MergeStrategy.first + } + ) + diff --git a/benchmarks/build/sbt b/benchmarks/build/sbt new file mode 100755 index 00000000000..044a2929bde --- /dev/null +++ b/benchmarks/build/sbt @@ -0,0 +1,183 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2021) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so +# that we can run Hive to generate the golden answer. This is not required for normal development +# or testing. +if [ -n "$HIVE_HOME" ]; then + for i in "$HIVE_HOME"/lib/* + do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i" + done + export HADOOP_CLASSPATH +fi + +realpath () { +( + TARGET_FILE="$1" + + cd "$(dirname "$TARGET_FILE")" + TARGET_FILE="$(basename "$TARGET_FILE")" + + COUNT=0 + while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] + do + TARGET_FILE="$(readlink "$TARGET_FILE")" + cd $(dirname "$TARGET_FILE") + TARGET_FILE="$(basename $TARGET_FILE)" + COUNT=$(($COUNT + 1)) + done + + echo "$(pwd -P)/"$TARGET_FILE"" +) +} + +if [[ "$JENKINS_URL" != "" ]]; then + # Make Jenkins use Google Mirror first as Maven Central may ban us + SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories" + export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG" +fi + +. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash + + +declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" +declare -r sbt_opts_file=".sbtopts" +declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" + +usage() { + cat < path to global settings/plugins directory (default: ~/.sbt) + -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) + -ivy path to local Ivy repository (default: ~/.ivy2) + -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) + -no-share use all local caches; no sharing + -no-global uses global caches, but does not use global ~/.sbt directory. + -jvm-debug Turn on JVM debugging, open at the given port. + -batch Disable interactive mode + + # sbt version (default: from project/build.properties if present, else latest release) + -sbt-version use the specified version of sbt + -sbt-jar use the specified jar as the sbt launcher + -sbt-rc use an RC version of sbt + -sbt-snapshot use a snapshot version of sbt + + # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) + -java-home alternate JAVA_HOME + + # jvm options and output control + JAVA_OPTS environment variable, if unset uses "$java_opts" + SBT_OPTS environment variable, if unset uses "$default_sbt_opts" + .sbtopts if this file exists in the current directory, it is + prepended to the runner args + /etc/sbt/sbtopts if this file exists, it is prepended to the runner args + -Dkey=val pass -Dkey=val directly to the java runtime + -J-X pass option -X directly to the java runtime + (-J is stripped) + -S-X add -X to sbt's scalacOptions (-S is stripped) + -PmavenProfiles Enable a maven profile for the build. + +In the case of duplicated or conflicting options, the order above +shows precedence: JAVA_OPTS lowest, command line options highest. +EOM +} + +process_my_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; + -no-share) addJava "$noshare_opts" && shift ;; + -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; + -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; + -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; + -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; + -batch) exec /dev/null) + if [[ ! $? ]]; then + saved_stty="" + fi +} + +saveSttySettings +trap onExit INT + +run "$@" + +exit_status=$? +onExit diff --git a/benchmarks/build/sbt-launch-lib.bash b/benchmarks/build/sbt-launch-lib.bash new file mode 100755 index 00000000000..8f77812a0e1 --- /dev/null +++ b/benchmarks/build/sbt-launch-lib.bash @@ -0,0 +1,189 @@ +#!/usr/bin/env bash +# + +# A library to simplify using the SBT launcher from other packages. +# Note: This should be used by tools like giter8/conscript etc. + +# TODO - Should we merge the main SBT script with this library? + +if test -z "$HOME"; then + declare -r script_dir="$(dirname "$script_path")" +else + declare -r script_dir="$HOME/.sbt" +fi + +declare -a residual_args +declare -a java_args +declare -a scalac_args +declare -a sbt_commands +declare -a maven_profiles + +if test -x "$JAVA_HOME/bin/java"; then + echo -e "Using $JAVA_HOME as default JAVA_HOME." + echo "Note, this will be overridden by -java-home if it is set." + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi + +echoerr () { + echo 1>&2 "$@" +} +vlog () { + [[ $verbose || $debug ]] && echoerr "$@" +} +dlog () { + [[ $debug ]] && echoerr "$@" +} + +acquire_sbt_jar () { + SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` + URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar + JAR=build/sbt-launch-${SBT_VERSION}.jar + + sbt_jar=$JAR + + if [[ ! -f "$sbt_jar" ]]; then + # Download sbt launch jar if it hasn't been downloaded yet + if [ ! -f "${JAR}" ]; then + # Download + printf "Attempting to fetch sbt\n" + JAR_DL="${JAR}.part" + if [ $(command -v curl) ]; then + curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + elif [ $(command -v wget) ]; then + wget --quiet ${URL1} -O "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + else + printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 + fi + fi + if [ ! -f "${JAR}" ]; then + # We failed to download + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 + fi + printf "Launching sbt from ${JAR}\n" + fi +} + +execRunner () { + # print the arguments one to a line, quoting any containing spaces + [[ $verbose || $debug ]] && echo "# Executing command line:" && { + for arg; do + if printf "%s\n" "$arg" | grep -q ' '; then + printf "\"%s\"\n" "$arg" + else + printf "%s\n" "$arg" + fi + done + echo "" + } + + "$@" +} + +addJava () { + dlog "[addJava] arg = '$1'" + java_args=( "${java_args[@]}" "$1" ) +} + +enableProfile () { + dlog "[enableProfile] arg = '$1'" + maven_profiles=( "${maven_profiles[@]}" "$1" ) + export SBT_MAVEN_PROFILES="${maven_profiles[@]}" +} + +addSbt () { + dlog "[addSbt] arg = '$1'" + sbt_commands=( "${sbt_commands[@]}" "$1" ) +} +addResidual () { + dlog "[residual] arg = '$1'" + residual_args=( "${residual_args[@]}" "$1" ) +} +addDebugger () { + addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" +} + +# a ham-fisted attempt to move some memory settings in concert +# so they need not be dicked around with individually. +get_mem_opts () { + local mem=${1:-1000} + local perm=$(( $mem / 4 )) + (( $perm > 256 )) || perm=256 + (( $perm < 4096 )) || perm=4096 + local codecache=$(( $perm / 2 )) + + echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m" +} + +require_arg () { + local type="$1" + local opt="$2" + local arg="$3" + if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then + echo "$opt requires <$type> argument" 1>&2 + exit 1 + fi +} + +is_function_defined() { + declare -f "$1" > /dev/null +} + +process_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -h|-help) usage; exit 1 ;; + -v|-verbose) verbose=1 && shift ;; + -d|-debug) debug=1 && shift ;; + + -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; + -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; + -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; + -batch) exec -i --ssh-user --cloud-provider --benchmark test + + """ + + +def parse_args(): + # Parse cmd line arguments + parser = argparse.ArgumentParser() + parser.add_argument( + "--benchmark", "-b", + required=True, + help="Run the given benchmark. See this " + + "python file for the list of predefined benchmark names and definitions.") + parser.add_argument( + "--cluster-hostname", + required=True, + help="Hostname or public IP of the cluster driver") + parser.add_argument( + "--ssh-id-file", "-i", + required=True, + help="SSH identity file") + parser.add_argument( + "--spark-conf", + action="append", + help="Run benchmark with given spark conf. Use separate --spark-conf for multiple confs.") + parser.add_argument( + "--resume-benchmark", + help="Resume waiting for the given running benchmark.") + parser.add_argument( + "--use-local-delta-dir", + help="Local path to delta repository which will be used for running the benchmark " + + "instead of the version specified in the specification. Make sure that new delta" + + " version is compatible with version in the spec.") + parser.add_argument( + "--cloud-provider", + choices=delta_log_store_classes.keys(), + help="Cloud where the benchmark will be executed.") + parser.add_argument( + "--ssh-user", + default="hadoop", + help="The user which is used to communicate with the master via SSH.") + + parsed_args, parsed_passthru_args = parser.parse_known_args() + return parsed_args, parsed_passthru_args + + +def run_single_benchmark(benchmark_name, benchmark_spec, other_args): + benchmark_spec.append_spark_confs(other_args.spark_conf) + benchmark_spec.append_spark_conf(delta_log_store_classes.get(other_args.cloud_provider)) + benchmark_spec.append_main_class_args(passthru_args) + print("------") + print("Benchmark spec to run:\n" + str(vars(benchmark_spec))) + print("------") + + benchmark = Benchmark(benchmark_name, benchmark_spec, + use_spark_shell=True, local_delta_dir=other_args.use_local_delta_dir) + benchmark_dir = os.path.dirname(os.path.abspath(__file__)) + with WorkingDirectory(benchmark_dir): + benchmark.run(other_args.cluster_hostname, other_args.ssh_id_file, other_args.ssh_user) + + +if __name__ == "__main__": + """ + Run benchmark on a cluster using ssh. + + Example usage: + + ./run-benchmark.py --cluster-hostname -i --ssh-user --cloud-provider --benchmark test + + """ + args, passthru_args = parse_args() + + if args.resume_benchmark is not None: + Benchmark.wait_for_completion( + args.cluster_hostname, args.ssh_id_file, args.resume_benchmark, args.ssh_user) + exit(0) + + benchmark_names = args.benchmark.split(",") + for benchmark_name in benchmark_names: + # Create and run the benchmark + if benchmark_name in benchmarks: + run_single_benchmark(benchmark_name, benchmarks[benchmark_name], args) + else: + raise Exception("Could not find benchmark spec for '" + benchmark_name + "'." + + "Must provide one of the predefined benchmark names:\n" + + "\n".join(benchmarks.keys()) + + "\nSee this python file for more details.") diff --git a/benchmarks/scripts/benchmarks.py b/benchmarks/scripts/benchmarks.py new file mode 100644 index 00000000000..95fa31aff09 --- /dev/null +++ b/benchmarks/scripts/benchmarks.py @@ -0,0 +1,494 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from scripts.utils import * +from datetime import datetime +import time + + +class BenchmarkSpec: + """ + Specifications of a benchmark. + + :param format_name: Spark format name + :param maven_artifacts: Maven artifact name in x:y:z format + :param spark_confs: list of spark conf strings in key=value format + :param benchmark_main_class: Name of main Scala class from the JAR to run + :param main_class_args: command line args for the main class + """ + def __init__( + self, format_name, maven_artifacts, spark_confs, + benchmark_main_class, main_class_args, extra_spark_shell_args=None, **kwargs): + if main_class_args is None: + main_class_args = [] + if extra_spark_shell_args is None: + extra_spark_shell_args = [] + self.format_name = format_name + self.maven_artifacts = maven_artifacts + self.spark_confs = spark_confs + self.benchmark_main_class = benchmark_main_class + self.benchmark_main_class_args = main_class_args + self.extra_spark_shell_args = extra_spark_shell_args + + def append_spark_conf(self, new_conf): + if isinstance(new_conf, str): + self.spark_confs.append(new_conf) + + def append_spark_confs(self, new_confs): + if new_confs is not None and isinstance(new_confs, list): + self.spark_confs.extend(new_confs) + + def append_main_class_args(self, new_args): + if new_args is not None and isinstance(new_args, list): + self.benchmark_main_class_args.extend(new_args) + + def get_sparksubmit_cmd(self, benchmark_jar_path): + spark_conf_str = "" + for conf in self.spark_confs: + print(f"conf={conf}") + spark_conf_str += f"""--conf "{conf}" """ + main_class_args = ' '.join(self.benchmark_main_class_args) + spark_shell_args_str = ' '.join(self.extra_spark_shell_args) + spark_submit_cmd = ( + f"spark-submit {spark_shell_args_str} " + + (f"--packages {self.maven_artifacts} " if self.maven_artifacts else "") + + f"{spark_conf_str} --class {self.benchmark_main_class} " + + f"{benchmark_jar_path} {main_class_args}" + ) + print(spark_submit_cmd) + return spark_submit_cmd + + def get_sparkshell_cmd(self, benchmark_jar_path, benchmark_init_file_path): + spark_conf_str = "" + for conf in self.spark_confs: + print(f"conf={conf}") + spark_conf_str += f"""--conf "{conf}" """ + spark_shell_args_str = ' '.join(self.extra_spark_shell_args) + spark_shell_cmd = ( + f"spark-shell {spark_shell_args_str} " + + (f"--packages {self.maven_artifacts} " if self.maven_artifacts else "") + + f"{spark_conf_str} --jars {benchmark_jar_path} -I {benchmark_init_file_path}" + ) + print(spark_shell_cmd) + return spark_shell_cmd + + +class TPCDSDataLoadSpec(BenchmarkSpec): + """ + Specifications of TPC-DS data load process. + Always mixin in this first before the base benchmark class. + """ + def __init__(self, scale_in_gb, exclude_nulls=True, **kwargs): + # forward all keyword args to next constructor + super().__init__(benchmark_main_class="benchmark.TPCDSDataLoad", **kwargs) + self.benchmark_main_class_args.extend([ + "--format", self.format_name, + "--scale-in-gb", str(scale_in_gb), + "--exclude-nulls", str(exclude_nulls), + ]) + # To access the public TPCDS parquet files on S3 + self.spark_confs.extend(["spark.hadoop.fs.s3.useRequesterPaysHeader=true"]) + + +class TPCDSBenchmarkSpec(BenchmarkSpec): + """ + Specifications of TPC-DS benchmark. + """ + def __init__(self, scale_in_gb, **kwargs): + # forward all keyword args to next constructor + super().__init__(benchmark_main_class="benchmark.TPCDSBenchmark", **kwargs) + # after init of super class, use the format to add main class args + self.benchmark_main_class_args.extend([ + "--format", self.format_name, + "--scale-in-gb", str(scale_in_gb) + ]) + + +class MergeDataLoadSpec(BenchmarkSpec): + """ + Specifications of Merge data load process. + Always mixin in this first before the base benchmark class. + """ + def __init__(self, scale_in_gb, exclude_nulls=True, **kwargs): + # forward all keyword args to next constructor + super().__init__(benchmark_main_class="benchmark.MergeDataLoad", **kwargs) + self.benchmark_main_class_args.extend([ + "--scale-in-gb", str(scale_in_gb), + ]) + # To access the public TPCDS parquet files on S3 + self.spark_confs.extend(["spark.hadoop.fs.s3.useRequesterPaysHeader=true"]) + + +class MergeBenchmarkSpec(BenchmarkSpec): + """ + Specifications of Merge benchmark. + """ + def __init__(self, scale_in_gb, **kwargs): + # forward all keyword args to next constructor + super().__init__(benchmark_main_class="benchmark.MergeBenchmark", **kwargs) + # after init of super class, use the format to add main class args + self.benchmark_main_class_args.extend([ + "--scale-in-gb", str(scale_in_gb) + ]) + + + + +# ============== Delta benchmark specifications ============== + + +class DeltaBenchmarkSpec(BenchmarkSpec): + """ + Specification of a benchmark using the Delta format. + """ + def __init__(self, delta_version, benchmark_main_class, main_class_args=None, scala_version="2.12", **kwargs): + delta_spark_confs = [ + "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension", + "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" + ] + self.scala_version = scala_version + + if "spark_confs" in kwargs and isinstance(kwargs["spark_confs"], list): + kwargs["spark_confs"].extend(delta_spark_confs) + else: + kwargs["spark_confs"] = delta_spark_confs + + super().__init__( + format_name="delta", + maven_artifacts=self.delta_maven_artifacts(delta_version, self.scala_version), + benchmark_main_class=benchmark_main_class, + main_class_args=main_class_args, + **kwargs + ) + + def update_delta_version(self, new_delta_version): + self.maven_artifacts = \ + DeltaBenchmarkSpec.delta_maven_artifacts(new_delta_version, self.scala_version) + + @staticmethod + def delta_maven_artifacts(delta_version, scala_version): + return f"io.delta:delta-core_{scala_version}:{delta_version},io.delta:delta-contribs_{scala_version}:{delta_version},io.delta:delta-hive_{scala_version}:0.2.0" + + +class DeltaTPCDSDataLoadSpec(TPCDSDataLoadSpec, DeltaBenchmarkSpec): + def __init__(self, delta_version, scale_in_gb=1): + super().__init__(delta_version=delta_version, scale_in_gb=scale_in_gb) + + +class DeltaTPCDSBenchmarkSpec(TPCDSBenchmarkSpec, DeltaBenchmarkSpec): + def __init__(self, delta_version, scale_in_gb=1): + super().__init__(delta_version=delta_version, scale_in_gb=scale_in_gb) + + +class DeltaMergeDataLoadSpec(MergeDataLoadSpec, DeltaBenchmarkSpec): + def __init__(self, delta_version, scale_in_gb=1): + super().__init__(delta_version=delta_version, scale_in_gb=scale_in_gb) + + +class DeltaMergeBenchmarkSpec(MergeBenchmarkSpec, DeltaBenchmarkSpec): + def __init__(self, delta_version, scale_in_gb=1): + super().__init__(delta_version=delta_version, scale_in_gb=scale_in_gb) + + + +# ============== Parquet benchmark specifications ============== + + +class ParquetBenchmarkSpec(BenchmarkSpec): + """ + Specification of a benchmark using the Parquet format. + """ + def __init__(self, benchmark_main_class, main_class_args=None, **kwargs): + super().__init__( + format_name="parquet", + maven_artifacts=None, + spark_confs=[], + benchmark_main_class=benchmark_main_class, + main_class_args=main_class_args, + **kwargs + ) + +class ParquetTPCDSDataLoadSpec(TPCDSDataLoadSpec, ParquetBenchmarkSpec): + def __init__(self, scale_in_gb=1): + super().__init__(scale_in_gb=scale_in_gb) + + +class ParquetTPCDSBenchmarkSpec(TPCDSBenchmarkSpec, ParquetBenchmarkSpec): + def __init__(self, scale_in_gb=1): + super().__init__(scale_in_gb=scale_in_gb) + + +# ============== General benchmark execution ============== + + +class Benchmark: + """ + Represents a benchmark that can be run on a remote Spark cluster + :param benchmark_name: A name to be used for uniquely identifying this benchmark. + Added to file names generated by this benchmark. + :param benchmark_spec: Specification of the benchmark. See BenchmarkSpec. + """ + def __init__(self, benchmark_name, benchmark_spec, use_spark_shell, local_delta_dir=None): + now = datetime.now() + self.benchmark_id = now.strftime("%Y%m%d-%H%M%S") + "-" + benchmark_name + self.benchmark_spec = benchmark_spec + + # Add benchmark id as a spark conf so that it get transferred automatically to scala code + self.benchmark_spec.append_spark_confs([f"spark.benchmarkId={self.benchmark_id}"]) + self.output_file = Benchmark.output_file(self.benchmark_id) + self.json_report_file = Benchmark.json_report_file(self.benchmark_id) + self.completed_file = Benchmark.completed_file(self.benchmark_id) + self.use_spark_shell = use_spark_shell + self.local_delta_dir = local_delta_dir + + def run(self, cluster_hostname, ssh_id_file, ssh_user): + if self.local_delta_dir and isinstance(self.benchmark_spec, DeltaBenchmarkSpec): + # Upload new Delta jar to cluster and update spec to use the jar's version + delta_version_to_use = \ + self.upload_delta_jars_to_cluster_and_get_version(cluster_hostname, ssh_id_file, ssh_user) + self.benchmark_spec.update_delta_version(delta_version_to_use) + + jar_path_in_cluster = self.upload_jar_to_cluster(cluster_hostname, ssh_id_file, ssh_user) + self.install_dependencies_via_ssh(cluster_hostname, ssh_id_file, ssh_user) + self.start_benchmark_via_ssh(cluster_hostname, ssh_id_file, jar_path_in_cluster, ssh_user) + Benchmark.wait_for_completion(cluster_hostname, ssh_id_file, self.benchmark_id, ssh_user) + + def spark_submit_script_content(self, jar_path): + return f""" +#!/bin/bash +jps | grep "Spark" | cut -f 1 -d ' ' | xargs kill -9 +set -e +{self.benchmark_spec.get_sparksubmit_cmd(jar_path)} 2>&1 | tee {self.output_file} +""".strip() + + def spark_shell_script_content(self, jar_path): + shell_init_file_name = f"{self.benchmark_id}_shell_init.scala" + benchmark_cmd_line_params_str = \ + ', '.join(f'"{w}"' for w in self.benchmark_spec.benchmark_main_class_args) + call_main_with_args = \ + f"{self.benchmark_spec.benchmark_main_class}.main(Array[String]({benchmark_cmd_line_params_str}))" + shell_init_file_content = \ + "try { %s } catch { case t => println(t); println(\"FAILED\"); System.exit(1) } ; System.exit(0)" % call_main_with_args + shell_cmd = self.benchmark_spec.get_sparkshell_cmd(jar_path, shell_init_file_name) + return f""" +#!/bin/bash +jps | grep "Spark" | cut -f 1 -d ' ' | xargs kill -9 +echo '{shell_init_file_content}' > {shell_init_file_name} +{shell_cmd} 2>&1 | tee {self.output_file} +touch {self.completed_file} +""".strip() + + def upload_jar_to_cluster(self, cluster_hostname, ssh_id_file, ssh_user, delta_version_to_use=None): + # Compile JAR + # Note: Deleting existing JARs instead of sbt clean is faster + if os.path.exists("target"): + run_cmd("""find target -name "*.jar" -type f -delete""", stream_output=True) + run_cmd("build/sbt assembly", stream_output=True) + (_, out, _) = run_cmd("find target -name *.jar") + print(">>> Benchmark JAR compiled\n") + + # Upload JAR + jar_local_path = out.decode("utf-8").strip() + jar_remote_path = f"{self.benchmark_id}-benchmarks.jar" + scp_cmd = \ + f"scp -C -i {ssh_id_file} {jar_local_path} {ssh_user}@{cluster_hostname}:{jar_remote_path}" + print(scp_cmd) + run_cmd(scp_cmd, stream_output=True) + print(">>> Benchmark JAR uploaded to cluster\n") + return f"~/{jar_remote_path}" + + def install_dependencies_via_ssh(self, cluster_hostname, ssh_id_file, ssh_user): + script_file_name = f"{self.benchmark_id}-install-deps.sh" + script_file_text = """ +#!/bin/bash +package='screen' +if [ -x "$(command -v yum)" ]; then + if rpm -q $package; then + echo "$package has already been installed" + else + sudo yum -y install $package + fi +elif [ -x "$(command -v apt)" ]; then + if dpkg -s $package; then + echo "$package has already been installed" + else + sudo apt install $package + fi +else + echo "Failed to install packages: Package manager not found. You must manually install: $package">&2; exit 1; +fi + + + """.strip() + self.copy_script_via_ssh(cluster_hostname, ssh_id_file, ssh_user, script_file_name, script_file_text) + print(">>> Install dependencies script generated and uploaded\n") + + job_cmd = ( + f"ssh -i {ssh_id_file} {ssh_user}@{cluster_hostname} " + + f"bash {script_file_name}" + ) + print(job_cmd) + run_cmd(job_cmd, stream_output=True) + print(">>> Dependencies have been installed\n") + + def start_benchmark_via_ssh(self, cluster_hostname, ssh_id_file, jar_path, ssh_user): + # Generate and upload the script to run the benchmark + script_file_name = f"{self.benchmark_id}-cmd.sh" + if self.use_spark_shell: + script_file_text = self.spark_shell_script_content(jar_path) + else: + script_file_text = self.spark_submit_script_content(jar_path) + + self.copy_script_via_ssh(cluster_hostname, ssh_id_file, ssh_user, script_file_name, script_file_text) + print(">>> Benchmark script generated and uploaded\n") + + # Start the script + job_cmd = ( + f"ssh -i {ssh_id_file} {ssh_user}@{cluster_hostname} " + + f"screen -d -m bash {script_file_name}" + ) + print(job_cmd) + run_cmd(job_cmd, stream_output=True) + + # Print the screen where it is running + run_cmd(f"ssh -i {ssh_id_file} {ssh_user}@{cluster_hostname}" + + f""" "screen -ls ; sleep 2; echo Files for this benchmark: ; ls {self.benchmark_id}*" """, + stream_output=True, throw_on_error=False) + print(f">>> Benchmark id {self.benchmark_id} started in a screen. Stdout piped into {self.output_file}. " + f"Final report will be generated on completion in {self.json_report_file}.\n") + + @staticmethod + def copy_script_via_ssh(cluster_hostname, ssh_id_file, ssh_user, script_file_name, script_file_text): + try: + script_file = open(script_file_name, "w") + script_file.write(script_file_text) + script_file.close() + + scp_cmd = ( + f"scp -i {ssh_id_file} {script_file_name}" + + f" {ssh_user}@{cluster_hostname}:{script_file_name}" + ) + print(scp_cmd) + run_cmd(scp_cmd, stream_output=True) + run_cmd_over_ssh(f"chmod +x {script_file_name}", cluster_hostname, ssh_id_file, ssh_user, + throw_on_error=False) + finally: + if os.path.exists(script_file_name): + os.remove(script_file_name) + + @staticmethod + def output_file(benchmark_id): + return f"{benchmark_id}-out.txt" + + @staticmethod + def json_report_file(benchmark_id): + return f"{benchmark_id}-report.json" + + @staticmethod + def csv_report_file(benchmark_id): + return f"{benchmark_id}-report.csv" + + @staticmethod + def completed_file(benchmark_id): + return f"{benchmark_id}-completed.txt" + + @staticmethod + def wait_for_completion(cluster_hostname, ssh_id_file, benchmark_id, ssh_user, copy_report=True): + completed = False + succeeded = False + output_file = Benchmark.output_file(benchmark_id) + completed_file = Benchmark.completed_file(benchmark_id) + json_report_file = Benchmark.json_report_file(benchmark_id) + csv_report_file = Benchmark.csv_report_file(benchmark_id) + + print(f"\nWaiting for completion of benchmark id {benchmark_id}") + while not completed: + # Print the size of the output file to show progress + (_, out, _) = run_cmd_over_ssh(f"stat -c '%n: [%y] [%s bytes]' {output_file}", + cluster_hostname, ssh_id_file, ssh_user, + throw_on_error=False) + out = out.decode("utf-8").strip() + print(out) + if "No such file" in out: + print(">>> Benchmark failed to start") + return + + # Check for the existence of the completed file + (_, out, _) = run_cmd_over_ssh(f"ls {completed_file}", cluster_hostname, ssh_id_file, ssh_user, + throw_on_error=False) + if completed_file in out.decode("utf-8"): + completed = True + else: + time.sleep(60) + + # Check the last few lines of output files to identify success + (_, out, _) = run_cmd_over_ssh(f"tail {output_file}", cluster_hostname, ssh_id_file, ssh_user, + throw_on_error=False) + if "SUCCESS" in out.decode("utf-8"): + succeeded = True + print(">>> Benchmark completed with success\n") + else: + print(">>> Benchmark completed with failure\n") + + # Download reports + if copy_report: + Benchmark.download_file(output_file, cluster_hostname, ssh_id_file, ssh_user) + if succeeded: + report_files = [json_report_file, csv_report_file] + for report_file in report_files: + Benchmark.download_file(report_file, cluster_hostname, ssh_id_file, ssh_user) + print(">>> Downloaded reports to local directory") + + + @staticmethod + def download_file(file, cluster_hostname, ssh_id_file, ssh_user): + run_cmd(f"scp -C -i {ssh_id_file} " + + f"{ssh_user}@{cluster_hostname}:{file} {file}", + stream_output=True) + + def upload_delta_jars_to_cluster_and_get_version(self, cluster_hostname, ssh_id_file, ssh_user): + if not self.local_delta_dir: + raise Exception("Path to delta repo not specified") + delta_repo_dir = os.path.abspath(self.local_delta_dir) + + with WorkingDirectory(delta_repo_dir): + # Compile Delta JARs by publishing to local maven cache + print(f"Compiling Delta to local dir {delta_repo_dir}") + local_maven_delta_dir = os.path.expanduser("~/.ivy2/local/io.delta/") + if os.path.exists(local_maven_delta_dir): + run_cmd(f"rm -rf {local_maven_delta_dir}", stream_output=True) + print(f"Cleared local maven cache at {local_maven_delta_dir}") + run_cmd("build/sbt publishLocal", stream_output=False, throw_on_error=True) + + # Get the new version + (_, out, _) = run_cmd("""build/sbt "show version" """) + version = out.decode("utf-8").strip().rsplit("\n", 1)[-1].rsplit(" ", 1)[-1].strip() + if not version: + raise Exception(f"Could not find the version from the sbt output:\n--\n{out}\n-") + + # Upload JARs to cluster's local maven cache + remote_maven_dir = ".ivy2/local/" # must have "/" at the end + run_cmd_over_ssh( + f"rm -rf {remote_maven_dir}/* .ivy2/cache/io.delta .ivy2/jars/io.delta*", + cluster_hostname, ssh_id_file, ssh_user, stream_output=True, throw_on_error=False) + run_cmd_over_ssh(f"mkdir -p {remote_maven_dir}", cluster_hostname, + ssh_id_file, ssh_user, stream_output=True) + scp_cmd = f"""scp -r -C -i {ssh_id_file} {local_maven_delta_dir.rstrip("/")} """ +\ + f"{ssh_user}@{cluster_hostname}:{remote_maven_dir}" + print(scp_cmd) + run_cmd(scp_cmd, stream_output=True) + print(f">>> Delta {version} JAR uploaded to cluster\n") + return version diff --git a/benchmarks/scripts/utils.py b/benchmarks/scripts/utils.py new file mode 100644 index 00000000000..8ea0e8a8ba0 --- /dev/null +++ b/benchmarks/scripts/utils.py @@ -0,0 +1,67 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import shlex +import subprocess + + +def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, **kwargs): + if isinstance(cmd, str): + cmd = shlex.split(cmd) + cmd_env = os.environ.copy() + if env: + cmd_env.update(env) + + if stream_output: + child = subprocess.Popen(cmd, env=cmd_env, **kwargs) + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception("Non-zero exitcode: %s" % (exit_code)) + return exit_code + else: + child = subprocess.Popen( + cmd, + env=cmd_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + (stdout, stderr) = child.communicate() + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception( + "Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" % + (exit_code, stdout, stderr)) + return exit_code, stdout, stderr + + +def run_cmd_over_ssh(cmd, host, ssh_id_file, user, **kwargs): + full_cmd = f"""ssh -i {ssh_id_file} {user}@{host} "{cmd}" """ + return run_cmd(full_cmd, **kwargs) + + +# pylint: disable=too-few-public-methods +class WorkingDirectory(object): + def __init__(self, working_directory): + self.working_directory = working_directory + self.old_workdir = os.getcwd() + + def __enter__(self): + os.chdir(self.working_directory) + + def __exit__(self, tpe, value, traceback): + os.chdir(self.old_workdir) + diff --git a/benchmarks/src/main/scala/benchmark/Benchmark.scala b/benchmarks/src/main/scala/benchmark/Benchmark.scala new file mode 100644 index 00000000000..9f2a3e4acab --- /dev/null +++ b/benchmarks/src/main/scala/benchmark/Benchmark.scala @@ -0,0 +1,281 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package benchmark + +import java.net.URI +import java.nio.file.{Files, Paths} +import java.nio.charset.StandardCharsets + +import scala.collection.mutable +import scala.language.postfixOps +import scala.sys.process._ +import scala.util.control.NonFatal + +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.fasterxml.jackson.annotation.JsonPropertyOrder +import com.fasterxml.jackson.databind.{DeserializationFeature, MapperFeature, ObjectMapper} +import com.fasterxml.jackson.module.scala.{DefaultScalaModule, ScalaObjectMapper} + +import org.apache.spark.SparkUtils +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.internal.SQLConf + +trait BenchmarkConf extends Product { + /** Cloud path where benchmark data is going to be written. */ + def benchmarkPath: Option[String] + + /** Get the database location given the database name and the benchmark path. */ + def dbLocation(dbName: String, suffix: String = ""): String = { + benchmarkPath.map(p => s"$p/databases/${dbName}_${suffix}").getOrElse { + throw new IllegalArgumentException("Benchmark path must be specified") + } + } + + /** Cloud path where benchmark reports will be uploaded. */ + def reportUploadPath: String = { + benchmarkPath.map(p => s"$p/reports/").getOrElse { + throw new IllegalArgumentException("Benchmark path must be specified") + } + } + def jsonReportUploadPath: String = s"$reportUploadPath/json/" + def csvReportUploadPath: String = s"$reportUploadPath/csv/" + + /** Get the benchmark conf details as a map. */ + def asMap: Map[String, String] = SparkUtils.caseClassToMap(this) +} + +@JsonPropertyOrder(alphabetic=true) +case class QueryResult( + name: String, + iteration: Option[Int], + durationMs: Option[Long], + errorMsg: Option[String]) + +@JsonPropertyOrder(alphabetic=true) +case class SparkEnvironmentInfo( + @JsonPropertyOrder(alphabetic=true) + sparkBuildInfo: Map[String, String], + @JsonPropertyOrder(alphabetic=true) + runtimeInfo: Map[String, String], + @JsonPropertyOrder(alphabetic=true) + sparkProps: Map[String, String], + @JsonPropertyOrder(alphabetic=true) + hadoopProps: Map[String, String], + @JsonPropertyOrder(alphabetic=true) + systemProps: Map[String, String], + @JsonPropertyOrder(alphabetic=true) + classpathEntries: Map[String, String]) + +@JsonPropertyOrder(alphabetic=true) +case class BenchmarkReport( + @JsonPropertyOrder(alphabetic=true) + benchmarkSpecs: Map[String, String], + queryResults: Array[QueryResult], + extraMetrics: Map[String, Double], + sparkEnvInfo: SparkEnvironmentInfo) + +/** + * Base class for any benchmark with the core functionality of measuring SQL query durations + * and printing the details as json in a report file. + */ +abstract class Benchmark(private val conf: BenchmarkConf) { + + /* Methods that implementations should override. */ + + protected def runInternal(): Unit + + /* Fields and methods that implementations should not have to override */ + + final protected lazy val spark = { + val s = SparkSession.builder() + .config("spark.ui.proxyBase", "") + .getOrCreate() + log("Spark started with configuration:\n" + + s.conf.getAll.toSeq.sortBy(_._1).map(x => x._1 + ": " + x._2).mkString("\t", "\n\t", "\n")) + s.sparkContext.setLogLevel("WARN") + sys.props.update("spark.ui.proxyBase", "") + s + } + + val extraConfs: Map[String, String] = Map( + SQLConf.BROADCAST_TIMEOUT.key -> "7200", + SQLConf.CROSS_JOINS_ENABLED.key -> "true" + ) + + private val queryResults = new mutable.ArrayBuffer[QueryResult] + private val extraMetrics = new mutable.HashMap[String, Double] + + protected def run(): Unit = { + try { + log("=" * 80) + log("=" * 80) + runInternal() + log("=" * 80) + } finally { + generateReport() + } + println(s"SUCCESS") + } + + protected def runQuery( + sqlCmd: String, + queryName: String = "", + iteration: Option[Int] = None, + printRows: Boolean = false, + ignoreError: Boolean = true): Seq[Row] = synchronized { + val iterationStr = iteration.map(i => s" - iteration $i").getOrElse("") + var banner = s"$queryName$iterationStr" + if (banner.trim.isEmpty) { + banner = sqlCmd.split("\n")(0).trim + (if (sqlCmd.split("\n").size > 1) "..." else "") + } + log("=" * 80) + log(s"START: $banner") + log("SQL: " + sqlCmd.replaceAll("\n\\s*", " ")) + spark.sparkContext.setJobGroup(banner, banner, interruptOnCancel = true) + try { + val before = System.nanoTime() + val df = spark.sql(sqlCmd) + val r = df.collect() + val after = System.nanoTime() + if (printRows) df.show(false) + val durationMs = (after - before) / (1000 * 1000) + queryResults += QueryResult(queryName, iteration, Some(durationMs), errorMsg = None) + log(s"END took $durationMs ms: $banner") + log("=" * 80) + r + } catch { + case NonFatal(e) => + log(s"ERROR: $banner\n${e.getMessage}") + queryResults += + QueryResult(queryName, iteration, durationMs = None, errorMsg = Some(e.getMessage)) + if (!ignoreError) throw e else Nil + } + } + + + protected def runFunc( + queryName: String = "", + iteration: Option[Int] = None, + ignoreError: Boolean = true)(f: => Unit): Unit = synchronized { + val iterationStr = iteration.map(i => s" - iteration $i").getOrElse("") + var banner = s"$queryName$iterationStr" + log("=" * 80) + log(s"START: $banner") + spark.sparkContext.setJobGroup(banner, banner, interruptOnCancel = true) + try { + val before = System.nanoTime() + f + val after = System.nanoTime() + val durationMs = (after - before) / (1000 * 1000) + queryResults += QueryResult(queryName, iteration, Some(durationMs), errorMsg = None) + log(s"END took $durationMs ms: $banner") + log("=" * 80) + } catch { + case NonFatal(e) => + log(s"ERROR: $banner\n${e.getMessage}") + queryResults += + QueryResult(queryName, iteration, durationMs = None, errorMsg = Some(e.getMessage)) + if (!ignoreError) throw e else spark.emptyDataFrame + } + } + + + protected def reportExtraMetric(name: String, value: Double): Unit = synchronized { + extraMetrics += (name -> value) + } + + protected def getQueryResults(): Array[QueryResult] = synchronized { queryResults.toArray } + + private def generateJSONReport(report: BenchmarkReport): Unit = synchronized { + import Benchmark._ + + val resultJson = toPrettyJson(report) + val resultFileName = + if (benchmarkId.trim.isEmpty) "report.json" else s"$benchmarkId-report.json" + val reportLocalPath = Paths.get(resultFileName).toAbsolutePath() + Files.write(reportLocalPath, resultJson.getBytes(StandardCharsets.UTF_8)) + println(s"RESULT:\n$resultJson") + uploadFile(reportLocalPath.toString, conf.jsonReportUploadPath) + } + + private def generateCSVReport(): Unit = synchronized { + val csvHeader = "name,iteration,durationMs" + val csvRows = queryResults.map { r => + s"${r.name},${r.iteration.getOrElse(1)},${r.durationMs.getOrElse(-1)}" + } + val csvText = (Seq(csvHeader) ++ csvRows).mkString("\n") + val resultFileName = + if (benchmarkId.trim.isEmpty) "report.csv" else s"$benchmarkId-report.csv" + val reportLocalPath = Paths.get(resultFileName).toAbsolutePath() + Files.write(reportLocalPath, csvText.getBytes(StandardCharsets.UTF_8)) + uploadFile(reportLocalPath.toString, conf.csvReportUploadPath) + } + + private def generateReport(): Unit = synchronized { + val report = BenchmarkReport( + benchmarkSpecs = conf.asMap + ("benchmarkId" -> benchmarkId), + queryResults = queryResults.toArray, + extraMetrics = extraMetrics.toMap, + sparkEnvInfo = SparkUtils.getEnvironmentInfo(spark.sparkContext) + ) + generateJSONReport(report) + generateCSVReport() + } + + private def uploadFile(localPath: String, targetPath: String): Unit = { + val targetUri = new URI(targetPath) + val sanitizedTargetPath = targetUri.normalize().toString + val scheme = new URI(targetPath).getScheme + try { + if (scheme.equals("s3")) s"aws s3 cp $localPath $sanitizedTargetPath/" ! + else if (scheme.equals("gs")) s"gsutil cp $localPath $sanitizedTargetPath/" ! + else throw new IllegalArgumentException(String.format("Unsupported scheme %s.", scheme)) + + println(s"FILE UPLOAD: Uploaded $localPath to $sanitizedTargetPath") + } catch { + case NonFatal(e) => + log(s"FILE UPLOAD: Failed to upload $localPath to $sanitizedTargetPath: $e") + } + } + + protected def benchmarkId: String = + sys.env.getOrElse("BENCHMARK_ID", spark.conf.getOption("spark.benchmarkId").getOrElse("")) + + protected def log(str: => String): Unit = { + println(s"${java.time.LocalDateTime.now} $str") + } +} + +object Benchmark { + private lazy val mapper = { + val _mapper = new ObjectMapper with ScalaObjectMapper + _mapper.setSerializationInclusion(Include.NON_ABSENT) + _mapper.enable(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY) + _mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + _mapper.registerModule(DefaultScalaModule) + _mapper + } + + def toJson[T: Manifest](obj: T): String = { + mapper.writeValueAsString(obj) + } + + def toPrettyJson[T: Manifest](obj: T): String = { + mapper.writerWithDefaultPrettyPrinter().writeValueAsString(obj) + } +} diff --git a/benchmarks/src/main/scala/benchmark/MergeBenchmark.scala b/benchmarks/src/main/scala/benchmark/MergeBenchmark.scala new file mode 100644 index 00000000000..19ccbe2966b --- /dev/null +++ b/benchmarks/src/main/scala/benchmark/MergeBenchmark.scala @@ -0,0 +1,145 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package benchmark + +import java.util.UUID + +import org.apache.spark.SparkUtils +import org.apache.spark.sql.Row + +trait MergeConf extends BenchmarkConf { + def scaleInGB: Int + def tableName: String = "web_returns" + def userDefinedDbName: Option[String] + def dbName: String = userDefinedDbName.getOrElse(s"merge_sf${scaleInGB}") + def dbLocation: String = dbLocation(dbName) +} + +case class MergeBenchmarkConf( + scaleInGB: Int = 0, + userDefinedDbName: Option[String] = None, + iterations: Int = 3, + benchmarkPath: Option[String] = None) extends MergeConf { +} + +object MergeBenchmarkConf { + import scopt.OParser + private val builder = OParser.builder[MergeBenchmarkConf] + private val argParser = { + import builder._ + OParser.sequence( + programName("Merge Benchmark"), + opt[String]("scale-in-gb") + .required() + .valueName("") + .action((x, c) => c.copy(scaleInGB = x.toInt)) + .text("Scale factor in GBs of the TPCDS benchmark"), + opt[String]("benchmark-path") + .required() + .valueName("") + .action((x, c) => c.copy(benchmarkPath = Some(x))) + .text("Cloud path to be used for creating table and generating reports"), + opt[String]("iterations") + .optional() + .valueName("") + .action((x, c) => c.copy(iterations = x.toInt)) + .text("Number of times to run the queries")) + } + + def parse(args: Array[String]): Option[MergeBenchmarkConf] = { + OParser.parse(argParser, args, MergeBenchmarkConf()) + } +} + +class MergeBenchmark(conf: MergeBenchmarkConf) extends Benchmark(conf) { + /** + * Runs every merge test case multiple times and records the duration. + */ + override def runInternal(): Unit = { + for ((k, v) <- extraConfs) spark.conf.set(k, v) + spark.sparkContext.setLogLevel("WARN") + log("All configs:\n\t" + spark.conf.getAll.toSeq.sortBy(_._1).mkString("\n\t")) + spark.sql(s"USE ${conf.dbName}") + + val targetRowCount = spark.read.table(s"`${conf.dbName}`.`target_${conf.tableName}`").count + + for (iteration <- 1 to conf.iterations) { + MergeTestCases.testCases.foreach { runMerge(_, targetRowCount, iteration = Some(iteration)) } + } + val results = getQueryResults().filter(_.name.startsWith("q")) + if (results.forall(x => x.errorMsg.isEmpty && x.durationMs.nonEmpty) ) { + val medianDurationSecPerQuery = results.groupBy(_.name).map { case (q, results) => + assert(results.length == conf.iterations) + val medianMs = SparkUtils.median(results.map(_.durationMs.get), alreadySorted = false) + (q, medianMs / 1000.0) + } + val sumOfMedians = medianDurationSecPerQuery.values.sum + reportExtraMetric("merge-result-seconds", sumOfMedians) + } + } + + /** + * Merge test runner performing the following steps: + * - Clone a fresh target table. + * - Run the merge test case. + * - Check invariants. + * - Drop the cloned table. + */ + protected def runMerge( + testCase: MergeTestCase, + targetRowCount: Long, + iteration: Option[Int] = None, + printRows: Boolean = false, + ignoreError: Boolean = true): Seq[Row] = synchronized { + withCloneTargetTable(testCase.name) { targetTable => + val result = super.runQuery( + testCase.sqlCmd(targetTable), + testCase.name, + iteration, + printRows, + ignoreError) + testCase.validate(result, targetRowCount) + result + } + } + + /** + * Clones the target table before each test case to use a fresh target table and drops the clone + * afterwards. + */ + protected def withCloneTargetTable[T](testCaseName: String)(f: String => T): T = { + val target = s"`${conf.dbName}`.`target_${conf.tableName}`" + val clonedTableName = s"`${conf.dbName}`.`${conf.tableName}_${generateShortUUID()}`" + runQuery(s"CREATE TABLE $clonedTableName SHALLOW CLONE $target", s"clone-target-$testCaseName") + try { + f(clonedTableName) + } finally { + runQuery(s"DROP TABLE IF EXISTS $clonedTableName", s"drop-target-clone-$testCaseName") + } + } + + protected def generateShortUUID(): String = + UUID.randomUUID.toString.replace("-", "_").take(8) +} + +object MergeBenchmark { + def main(args: Array[String]): Unit = { + MergeBenchmarkConf.parse(args).foreach { conf => + new MergeBenchmark(conf).run() + } + } +} diff --git a/benchmarks/src/main/scala/benchmark/MergeDataLoad.scala b/benchmarks/src/main/scala/benchmark/MergeDataLoad.scala new file mode 100644 index 00000000000..95f82551602 --- /dev/null +++ b/benchmarks/src/main/scala/benchmark/MergeDataLoad.scala @@ -0,0 +1,208 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package benchmark + +import java.util.Locale + +import org.apache.spark.sql.functions.{col, countDistinct, hash, isnull, max, rand} + + +case class MergeDataLoadConf( + scaleInGB: Int = 0, + userDefinedDbName: Option[String] = None, + loadFromPath: Option[String] = None, + benchmarkPath: Option[String] = None, + excludeNulls: Boolean = true) extends MergeConf { +} + +/** + * Represents a table configuration used as a source in merge test cases. Each [[MergeTestCase]] has + * one [[MergeSourceTable]] associated with it, the data loader will collect all source table + * configurations for all tests and create the required source tables. + * @param filesMatchedFraction Fraction of files from the base table that will get sampled to + * create the source table. + * @param rowsMatchedFraction Fraction of rows from the selected files that will get sampled to form + * the part of the source table that matches the merge condition. + * @param rowsNotMatchedFraction Fraction of rows from the selected files that will get sampled to + * form the part of the source table that doesn't match the merge + * condition. + */ +case class MergeSourceTable( + filesMatchedFraction: Double, + rowsMatchedFraction: Double, + rowsNotMatchedFraction: Double) { + def name: String = formatTableName(s"source_" + + s"_filesMatchedFraction_$filesMatchedFraction" + + s"_rowsMatchedFraction_$rowsMatchedFraction" + + s"_rowsNotMatchedFraction_$rowsNotMatchedFraction") + + protected def formatTableName(s: String): String = { + s.toLowerCase(Locale.ROOT).replaceAll("\\s+", "_").replaceAll("[-,.]", "_") + } +} + +object MergeDataLoadConf { + import scopt.OParser + private val builder = OParser.builder[MergeDataLoadConf] + private val argParser = { + import builder._ + OParser.sequence( + programName("Merge Data Load"), + opt[String]("scale-in-gb") + .required() + .valueName("") + .action((x, c) => c.copy(scaleInGB = x.toInt)) + .text("Scale factor of the Merge benchmark"), + opt[String]("benchmark-path") + .required() + .valueName("") + .action((x, c) => c.copy(benchmarkPath = Some(x))) + .text("Cloud storage path to be used for creating table and generating reports"), + opt[String]("db-name") + .optional() + .valueName("") + .action((x, c) => c.copy(userDefinedDbName = Some(x))) + .text("Name of the target database to create with TPC-DS tables in necessary format"), + opt[String]("load-from-path") + .optional() + .valueName("") + .action((x, c) => c.copy(loadFromPath = Some(x))) + .text("The location of the TPC-DS raw input data"), + opt[String]("exclude-nulls") + .optional() + .valueName("true/false") + .action((x, c) => c.copy(excludeNulls = x.toBoolean)) + .text("Whether to remove null primary keys when loading data, default = false")) + } + + def parse(args: Array[String]): Option[MergeDataLoadConf] = { + OParser.parse(argParser, args, MergeDataLoadConf()) + } +} + +class MergeDataLoad(conf: MergeDataLoadConf) extends Benchmark(conf) { + + protected def targetTableFullName = s"`${conf.dbName}`.`target_${conf.tableName}`" + + protected def dataLoadFromPath: String = conf.loadFromPath.getOrElse { + s"s3://devrel-delta-datasets/tpcds-2.13/tpcds_sf${conf.scaleInGB}_parquet/${conf.tableName}/" + } + + /** + * Creates the target table and all source table configuration used in merge test cases. + */ + def runInternal(): Unit = { + val dbName = conf.dbName + val dbLocation = conf.dbLocation(dbName, suffix = benchmarkId.replace("-", "_")) + val dbCatalog = "spark_catalog" + + require(Seq(1, 3000).contains(conf.scaleInGB), "") + + log(s"====== Creating database =======") + runQuery(s"DROP DATABASE IF EXISTS ${dbName} CASCADE", s"drop-database") + runQuery(s"CREATE DATABASE IF NOT EXISTS ${dbName}", s"create-database") + + log(s"====== Creating merge target table =======") + loadMergeTargetTable() + log(s"====== Creating merge source tables =======") + MergeTestCases.testCases.map(_.sourceTable).distinct.foreach(loadMergeSourceTable) + log(s"====== Created all tables in database ${dbName} at '${dbLocation}' =======") + + runQuery(s"USE $dbCatalog.$dbName;") + runQuery("SHOW TABLES", printRows = true) + } + + /** + * Creates the target Delta table and performs sanity checks. This table will be cloned before + * each merge test case and the clone serves as a single-use merge target table. + */ + protected def loadMergeTargetTable(): Unit = { + val dbLocation = conf.dbLocation(conf.dbName, suffix = benchmarkId.replace("-", "_")) + val location = s"${dbLocation}/${conf.tableName}/" + val format = "parquet" + + runQuery(s"DROP TABLE IF EXISTS $targetTableFullName", s"drop-table-$targetTableFullName") + + runQuery( + s"""CREATE TABLE $targetTableFullName + USING DELTA + LOCATION '$location' + SELECT * FROM `${format}`.`$dataLoadFromPath` + """, s"create-table-$targetTableFullName", ignoreError = true) + + val sourceRowCount = + spark.sql(s"SELECT * FROM `${format}`.`$dataLoadFromPath`").count() + val targetRowCount = spark.table(targetTableFullName).count() + val targetFileCount = + spark.table(targetTableFullName).select(countDistinct("_metadata.file_path")) + log(s"Target file count: $targetFileCount") + log(s"Target row count: $targetRowCount") + + assert(targetRowCount == sourceRowCount, + s"Row count mismatch: source table = $sourceRowCount, " + + s"target $targetTableFullName = $targetRowCount") + } + + /** + * Creates a table that will be used as a merge source table in the merge test cases. The table is + * created by sampling the merge target table created by [[loadMergeTargetTable]]. The merge test + * cases don't modify the source table and a single source table is reused across different test + * cases if the same source table configuration is used. + */ + protected def loadMergeSourceTable(sourceTableConf: MergeSourceTable): Unit = { + val fullTableName = s"`${conf.dbName}`.`${sourceTableConf.name}`" + val dbLocation = conf.dbLocation(conf.dbName, suffix = benchmarkId.replace("-", "_")) + + runQuery(s"DROP TABLE IF EXISTS $fullTableName", s"drop-table-${sourceTableConf.name}") + + val fullTableDF = spark.read.format("delta") + .load(s"${dbLocation}/${conf.tableName}/") + // Sample files based on their file path. + val sampledFilesDF = fullTableDF + .select("_metadata.file_path") + .distinct + .sample(sourceTableConf.filesMatchedFraction) + + // Read the data from the sampled files and sample two sets of rows for MATCHED clauses and + // NOT MATCHED clauses respectively. + val sampledDataDF = fullTableDF + .withColumn("file_path", col("_metadata.file_path")) + .join(sampledFilesDF, "file_path") + log(s"Matching files row count: ${sampledDataDF.count}") + + val numberOfNulls = sampledDataDF.filter(isnull(col("wr_order_number"))).count + log(s"wr_order_number contains $numberOfNulls null values") + val matchedData = sampledDataDF.sample(sourceTableConf.rowsMatchedFraction) + val notMatchedData = sampledDataDF.sample(sourceTableConf.rowsNotMatchedFraction) + .withColumn("wr_order_number", rand()) + .withColumn("wr_item_sk", rand()) + + val data = matchedData.union(notMatchedData) + + val dupes = data.groupBy("wr_order_number", "wr_item_sk").count.filter("count > 1") + log(s"Duplicates: ${dupes.collect().mkString("Array(", ",\n", ")")}") + data.write.format("delta").saveAsTable(fullTableName) + } +} + +object MergeDataLoad { + def main(args: Array[String]): Unit = { + MergeDataLoadConf.parse(args).foreach { conf => + new MergeDataLoad(conf).run() + } + } +} diff --git a/benchmarks/src/main/scala/benchmark/MergeTestCases.scala b/benchmarks/src/main/scala/benchmark/MergeTestCases.scala new file mode 100644 index 00000000000..110ce6c4317 --- /dev/null +++ b/benchmarks/src/main/scala/benchmark/MergeTestCases.scala @@ -0,0 +1,223 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package benchmark + +import org.apache.spark.sql.Row + +trait MergeTestCase { + /** + * Name of the test case used e.p. in the test results. + */ + def name: String + + /** + * The source table configuration to use for the test case. When a test case is defined, + * [[MergeDataLoad]] will collect all source table configuration and create the source tables + * required by all tests. + */ + def sourceTable: MergeSourceTable + + /** + * The merge command to execute as a SQL string. + */ + def sqlCmd(targetTable: String): String + + /** + * Each test case can define invariants to check after the merge command runs to ensure that the + * benchmark results are valid. + */ + def validate(mergeStats: Seq[Row], targetRowCount: Long): Unit +} + +/** + * Trait shared by all insert-only merge test cases. + */ +trait InsertOnlyTestCase extends MergeTestCase { + val filesMatchedFraction: Double + val rowsNotMatchedFraction: Double + + override def sourceTable: MergeSourceTable = MergeSourceTable( + filesMatchedFraction, + rowsMatchedFraction = 0, + rowsNotMatchedFraction) + + override def validate(mergeStats: Seq[Row], targetRowCount: Long): Unit = { + assert(mergeStats.length == 1) + assert(mergeStats.head.getAs[Long]("num_updated_rows") == 0) + assert(mergeStats.head.getAs[Long]("num_deleted_rows") == 0) + } +} + +/** + * A merge test case with a single WHEN NOT MATCHED THEN INSERT * clause. + */ +case class SingleInsertOnlyTestCase( + filesMatchedFraction: Double, + rowsNotMatchedFraction: Double) extends InsertOnlyTestCase { + + override val name: String = "single_insert_only" + + s"_filesMatchedFraction_$filesMatchedFraction" + + s"_rowsNotMatchedFraction_$rowsNotMatchedFraction" + + + override def sqlCmd(targetTable: String): String = { + s"""MERGE INTO $targetTable t + |USING ${sourceTable.name} s + |ON t.wr_order_number = s.wr_order_number AND t.wr_item_sk = s.wr_item_sk + |WHEN NOT MATCHED THEN INSERT *""".stripMargin + } +} + +/** + * A merge test case with two WHEN NOT MATCHED (AND condition) THEN INSERT * clauses. + */ +case class MultipleInsertOnlyTestCase( + filesMatchedFraction: Double, + rowsNotMatchedFraction: Double) extends InsertOnlyTestCase { + + override val name: String = "multiple_insert_only" + + s"_filesMatchedFraction_$filesMatchedFraction" + + s"_rowsNotMatchedFraction_$rowsNotMatchedFraction" + + override def sqlCmd(targetTable: String): String = { + s"""MERGE INTO $targetTable t + |USING ${sourceTable.name} s + |ON t.wr_order_number = s.wr_order_number AND t.wr_item_sk = s.wr_item_sk + |WHEN NOT MATCHED AND s.wr_item_sk % 2 = 0 THEN INSERT * + |WHEN NOT MATCHED THEN INSERT *""".stripMargin + } +} + +/** + * A merge test case with a single WHEN MATCHED THEN DELETED clause. + */ +case class DeleteOnlyTestCase( + filesMatchedFraction: Double, + rowsMatchedFraction: Double) extends MergeTestCase { + + override val name: String = "delete_only" + + s"_filesMatchedFraction_$filesMatchedFraction" + + s"_rowsMatchedFraction_$rowsMatchedFraction" + + override def sourceTable: MergeSourceTable = MergeSourceTable( + filesMatchedFraction, + rowsMatchedFraction, + rowsNotMatchedFraction = 0) + + override def sqlCmd(targetTable: String): String = { + s"""MERGE INTO $targetTable t + |USING ${sourceTable.name} s + |ON t.wr_order_number = s.wr_order_number AND t.wr_item_sk = s.wr_item_sk + |WHEN MATCHED THEN DELETE""".stripMargin + } + + override def validate(mergeStats: Seq[Row], targetRowCount: Long): Unit = { + assert(mergeStats.length == 1) + assert(mergeStats.head.getAs[Long]("num_updated_rows") == 0) + assert(mergeStats.head.getAs[Long]("num_inserted_rows") == 0) + } +} + +/** + * A merge test case with a MATCHED UPDATE and a NOT MATCHED INSERT clause. + */ +case class UpsertTestCase( + filesMatchedFraction: Double, + rowsMatchedFraction: Double, + rowsNotMatchedFraction: Double) extends MergeTestCase { + + override val name: String = "upsert" + + s"_filesMatchedFraction_$filesMatchedFraction" + + s"_rowsMatchedFraction_$rowsMatchedFraction" + + s"_rowsNotMatchedFraction_$rowsNotMatchedFraction" + + override def sourceTable: MergeSourceTable = MergeSourceTable( + filesMatchedFraction, + rowsMatchedFraction, + rowsNotMatchedFraction) + + override def sqlCmd(targetTable: String): String = { + s"""MERGE INTO $targetTable t + |USING ${sourceTable.name} s + |ON t.wr_order_number = s.wr_order_number AND t.wr_item_sk = s.wr_item_sk + |WHEN MATCHED THEN UPDATE SET * + |WHEN NOT MATCHED THEN INSERT *""".stripMargin + } + + override def validate(mergeStats: Seq[Row], targetRowCount: Long): Unit = { + assert(mergeStats.length == 1) + assert(mergeStats.head.getAs[Long]("num_deleted_rows") == 0) + } +} + +object MergeTestCases { + def testCases: Seq[MergeTestCase] = + insertOnlyTestCases ++ + deleteOnlyTestCases ++ + upsertTestCases + + def insertOnlyTestCases: Seq[MergeTestCase] = + Seq(0.05, 0.5, 1.0).flatMap { rowsNotMatchedFraction => + Seq( + SingleInsertOnlyTestCase( + filesMatchedFraction = 0.05, + rowsNotMatchedFraction), + + MultipleInsertOnlyTestCase( + filesMatchedFraction = 0.05, + rowsNotMatchedFraction) + ) + } + + def deleteOnlyTestCases: Seq[MergeTestCase] = Seq( + DeleteOnlyTestCase( + filesMatchedFraction = 0.05, + rowsMatchedFraction = 0.05)) + + def upsertTestCases: Seq[MergeTestCase] = Seq( + Seq(0.0, 0.01, 0.1).map { rowsMatchedFraction => + UpsertTestCase( + filesMatchedFraction = 0.05, + rowsMatchedFraction, + rowsNotMatchedFraction = 0.1) + }, + + Seq(0.5, 0.99, 1.0).map { rowsMatchedFraction => + UpsertTestCase( + filesMatchedFraction = 0.05, + rowsMatchedFraction, + rowsNotMatchedFraction = 0.001) + }, + + Seq( + UpsertTestCase( + filesMatchedFraction = 0.05, + rowsMatchedFraction = 0.1, + rowsNotMatchedFraction = 0.0), + + UpsertTestCase( + filesMatchedFraction = 0.5, + rowsMatchedFraction = 0.01, + rowsNotMatchedFraction = 0.001), + + UpsertTestCase( + filesMatchedFraction = 1.0, + rowsMatchedFraction = 0.01, + rowsNotMatchedFraction = 0.001) + ) + ).flatten +} diff --git a/benchmarks/src/main/scala/benchmark/TPCDSBenchmark.scala b/benchmarks/src/main/scala/benchmark/TPCDSBenchmark.scala new file mode 100644 index 00000000000..0017974e2ae --- /dev/null +++ b/benchmarks/src/main/scala/benchmark/TPCDSBenchmark.scala @@ -0,0 +1,115 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package benchmark + +import benchmark.TPCDSBenchmarkQueries._ + +trait TPCDSConf extends BenchmarkConf { + protected def format: Option[String] + def scaleInGB: Int + def userDefinedDbName: Option[String] + + def formatName: String = format.getOrElse { + throw new IllegalArgumentException("format must be specified") + } + def dbName: String = userDefinedDbName.getOrElse(s"tpcds_sf${scaleInGB}_${formatName}") + def dbLocation: String = dbLocation(dbName) +} + +case class TPCDSBenchmarkConf( + protected val format: Option[String] = None, + scaleInGB: Int = 0, + userDefinedDbName: Option[String] = None, + iterations: Int = 3, + benchmarkPath: Option[String] = None) extends TPCDSConf + +object TPCDSBenchmarkConf { + import scopt.OParser + private val builder = OParser.builder[TPCDSBenchmarkConf] + private val argParser = { + import builder._ + OParser.sequence( + programName("TPC-DS Benchmark"), + opt[String]("format") + .required() + .action((x, c) => c.copy(format = Some(x))) + .text("Spark's short name for the file format to use"), + opt[String]("scale-in-gb") + .required() + .valueName("") + .action((x, c) => c.copy(scaleInGB = x.toInt)) + .text("Scale factor of the TPCDS benchmark"), + opt[String]("benchmark-path") + .required() + .valueName("") + .action((x, c) => c.copy(benchmarkPath = Some(x))) + .text("Cloud path to be used for creating table and generating reports"), + opt[String]("iterations") + .optional() + .valueName("") + .action((x, c) => c.copy(iterations = x.toInt)) + .text("Number of times to run the queries"), + ) + } + + def parse(args: Array[String]): Option[TPCDSBenchmarkConf] = { + OParser.parse(argParser, args, TPCDSBenchmarkConf()) + } +} + +class TPCDSBenchmark(conf: TPCDSBenchmarkConf) extends Benchmark(conf) { + val queries: Map[String, String] = { + if (conf.scaleInGB <= 3000) TPCDSQueries3TB + else if (conf.scaleInGB == 10) TPCDSQueries10TB + else throw new IllegalArgumentException( + s"Unsupported scale factor of ${conf.scaleInGB} GB") + } + + val dbName = conf.dbName + + def runInternal(): Unit = { + for ((k, v) <- extraConfs) spark.conf.set(k, v) + spark.sparkContext.setLogLevel("WARN") + log("All configs:\n\t" + spark.conf.getAll.toSeq.sortBy(_._1).mkString("\n\t")) + spark.sql(s"USE $dbName") + for (iteration <- 1 to conf.iterations) { + queries.toSeq.sortBy(_._1).foreach { case (name, sql) => + runQuery(sql, iteration = Some(iteration), queryName = name) + } + } + val results = getQueryResults().filter(_.name.startsWith("q")) + if (results.forall(x => x.errorMsg.isEmpty && x.durationMs.nonEmpty) ) { + val medianDurationSecPerQuery = results.groupBy(_.name).map { case (q, results) => + assert(results.size == conf.iterations) + val medianMs = results.map(_.durationMs.get).sorted + .drop(math.floor(conf.iterations / 2.0).toInt).head + (q, medianMs / 1000.0) + } + val sumOfMedians = medianDurationSecPerQuery.map(_._2).sum + reportExtraMetric("tpcds-result-seconds", sumOfMedians) + } + } +} + +object TPCDSBenchmark { + def main(args: Array[String]): Unit = { + TPCDSBenchmarkConf.parse(args).foreach { conf => + new TPCDSBenchmark(conf).run() + } + } +} + diff --git a/benchmarks/src/main/scala/benchmark/TPCDSBenchmarkQueries.scala b/benchmarks/src/main/scala/benchmark/TPCDSBenchmarkQueries.scala new file mode 100644 index 00000000000..f3e82c55945 --- /dev/null +++ b/benchmarks/src/main/scala/benchmark/TPCDSBenchmarkQueries.scala @@ -0,0 +1,38153 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package benchmark + +object TPCDSBenchmarkQueries { + val TPCDSQueries3TB = Map( + "q1" -> + """ +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'TN' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""", + "q2" -> + """ +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""", + "q3" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""", + "q4" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q5" -> + """ +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q6" -> + """ +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""", + "q7" -> + """ +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q8" -> + """ +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""", + "q9" -> + """ +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 2972190 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 111711138 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 127958920 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 41162107 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 25211875 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1""", + "q10" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Allen County','Jefferson County','Lamar County','Dakota County','Park County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""", + "q11" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_login + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1998 + and t_s_secyear.dyear = 1998+1 + and t_w_firstyear.dyear = 1998 + and t_w_secyear.dyear = 1998+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_login +limit 100""", + "q12" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Men', 'Books', 'Children') + and ws_sold_date_sk = d_date_sk + and d_date between cast('1998-03-28' as date) + and (cast('1998-03-28' as date) + INTERVAL 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q13" -> + """ +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Unknown' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'S' + and cd_education_status = 'College' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WV', 'GA', 'TX') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('TN', 'KY', 'SC') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('OK', 'NE', 'CA') + and ss_net_profit between 50 and 250 + ))""", + "q14a" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1998 AND 1998 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1998 AND 1998 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1998 AND 1998 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""", + "q14b" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1998 AND 1998 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1998 AND 1998 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1998 AND 1998 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1998 + 1 + and d_moy = 12 + and d_dom = 20) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1998 + and d_moy = 12 + and d_dom = 20) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100""", + "q15" -> + """ +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100""", + "q16" -> + """ +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-2-01' and + (cast('2001-2-01' as date) + INTERVAL 60 days) +and cs1.cs_ship_date_sk = d_date_skq +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'MS' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Jackson County','Daviess County','Walker County','Dauphin County', + 'Mobile County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""", + "q17" -> + """ +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '1999Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""", + "q18" -> + """ +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'F' and + cd1.cd_education_status = 'Primary' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (6,7,3,11,12,8) and + d_year = 1999 and + ca_state in ('IL','WV','KS' + ,'GA','LA','PA','TX') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""", + "q19" -> + """ +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=26 + and d_moy=12 + and d_year=2000 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """, + "q20" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Books', 'Home', 'Jewelry') + and cs_sold_date_sk = d_date_sk + and d_date between cast('1998-05-08' as date) + and (cast('1998-05-08' as date) + INTERVAL 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q21" -> + """ +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-05-22' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('2000-05-22' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('2000-05-22' as date) - INTERVAL 30 days) + and (cast ('2000-05-22' as date) + INTERVAL 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""", + "q22" -> + """ +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1199 and 1199 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""", + "q23a" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 5 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 5 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100""", + "q23b" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 5 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 5 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100""", + "q24a" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid_inc_tax) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'navy' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q24b" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid_inc_tax) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'beige' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q25" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2002 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2002 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2002 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q26" -> + """ +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'M' and + cd_education_status = '2 yr Degree' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2002 + group by i_item_id + order by i_item_id + limit 100""", + "q27" -> + """ +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'F' and + cd_marital_status = 'S' and + cd_education_status = 'Advanced Degree' and + d_year = 2000 and + s_state in ('WA','LA', 'LA', 'TX', 'AL', 'PA') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""", + "q28" -> + """ +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 189 and 189+10 + or ss_coupon_amt between 4483 and 4483+1000 + or ss_wholesale_cost between 24 and 24+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 71 and 71+10 + or ss_coupon_amt between 14775 and 14775+1000 + or ss_wholesale_cost between 38 and 38+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 183 and 183+10 + or ss_coupon_amt between 13456 and 13456+1000 + or ss_wholesale_cost between 31 and 31+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 4905 and 4905+1000 + or ss_wholesale_cost between 27 and 27+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 180 and 180+10 + or ss_coupon_amt between 17430 and 17430+1000 + or ss_wholesale_cost between 57 and 57+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 49 and 49+10 + or ss_coupon_amt between 2950 and 2950+1000 + or ss_wholesale_cost between 52 and 52+20)) B6 +limit 100""", + "q29" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_quantity) as store_sales_quantity + ,stddev_samp(sr_return_quantity) as store_returns_quantity + ,stddev_samp(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1998 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1998 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1998,1998+1,1998+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q30" -> + """ +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'GA' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100""", + "q31" -> + """ +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1998 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1998 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1998 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1998 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1998 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1998 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.ca_county""", + "q32" -> + """ +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 948 +and i_item_sk = cs_item_sk +and d_date between '1998-02-03' and + (cast('1998-02-03' as date) + INTERVAL 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-02-03' and + (cast('1998-02-03' as date) + INTERVAL 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100""", + "q33" -> + """ +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 2 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 2 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 2 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""", + "q34" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Jefferson Davis Parish','Levy County','Coal County','Oglethorpe County', + 'Mobile County','Gage County','Richland County','Gogebic County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""", + "q35" -> + """ +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + stddev_samp(cd_dep_count), + stddev_samp(cd_dep_count), + min(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + stddev_samp(cd_dep_employed_count), + stddev_samp(cd_dep_employed_count), + min(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + stddev_samp(cd_dep_college_count), + stddev_samp(cd_dep_college_count), + min(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""", + "q36" -> + """ +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1998 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('OH','WV','PA','TN', + 'MN','MO','NM','MI') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""", + "q37" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 35 and 35 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-01-20' as date) and (cast('2001-01-20' as date) + interval 60 days) + and i_manufact_id in (928,715,942,861) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q38" -> + """ +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1222 and 1222 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1222 and 1222 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1222 and 1222 + 11 +) hot_cust +limit 100""", + "q39a" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1998 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q39b" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1998 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q40" -> + """ +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-02-02' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1999-02-02' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1999-02-02' as date) - INTERVAL 30 days) + and (cast ('1999-02-02' as date) + INTERVAL 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""", + "q41" -> + """ +select distinct(i_product_name) + from item i1 + where i_manufact_id between 732 and 732+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'beige' or i_color = 'spring') and + (i_units = 'Tsp' or i_units = 'Ton') and + (i_size = 'petite' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'white' or i_color = 'pale') and + (i_units = 'Box' or i_units = 'Dram') and + (i_size = 'large' or i_size = 'economy') + ) or + (i_category = 'Men' and + (i_color = 'midnight' or i_color = 'frosted') and + (i_units = 'Bunch' or i_units = 'Carton') and + (i_size = 'small' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'azure' or i_color = 'goldenrod') and + (i_units = 'Pallet' or i_units = 'Gross') and + (i_size = 'petite' or i_size = 'extra large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'brown' or i_color = 'hot') and + (i_units = 'Tbl' or i_units = 'Cup') and + (i_size = 'petite' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'powder' or i_color = 'honeydew') and + (i_units = 'Bundle' or i_units = 'Unknown') and + (i_size = 'large' or i_size = 'economy') + ) or + (i_category = 'Men' and + (i_color = 'antique' or i_color = 'purple') and + (i_units = 'N/A' or i_units = 'Dozen') and + (i_size = 'small' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'lavender' or i_color = 'tomato') and + (i_units = 'Lb' or i_units = 'Oz') and + (i_size = 'petite' or i_size = 'extra large') + )))) > 0 + order by i_product_name + limit 100""", + "q42" -> + """ +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2002 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """, + "q43" -> + """ +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1999 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""", + "q44" -> + """ +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 321 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 321 + and ss_addr_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 321 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 321 + and ss_addr_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""", + "q45" -> + """ +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1999 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100""", + "q46" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 2) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Antioch','Mount Vernon','Jamestown','Wilson','Farmington') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""", + "q47" -> + """ +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.s_company_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, avg_monthly_sales + limit 100""", + "q48" -> + """ +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1999 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'College' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'W' + and + cd_education_status = 'Secondary' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('NE', 'IA', 'NY') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('IN', 'TN', 'OH') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KS', 'CA', 'CO') + and ss_net_profit between 50 and 25000 + ) + )""", + "q49" -> + """ +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + limit 100""", + "q50" -> + """ +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 1999 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""", + "q51" -> + """ +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1176 and 1176+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1176 and 1176+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""", + "q52" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2001 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """, + "q53" -> + """ +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1218,1218+1,1218+2,1218+3,1218+4,1218+5,1218+6,1218+7,1218+8,1218+9,1218+10,1218+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""", + "q54" -> + """ +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Music' + and i_class = 'country' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 7 + and d_year = 2001 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 2001 and d_moy = 7) + and (select distinct d_month_seq+3 + from date_dim where d_year = 2001 and d_moy = 7) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""", + "q55" -> + """ +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=87 + and d_moy=11 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """, + "q56" -> + """ +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('tan','lace','gainsboro')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('tan','lace','gainsboro')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('tan','lace','gainsboro')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""", + "q57" -> + """ +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.cc_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, avg_monthly_sales + limit 100""", + "q58" -> + """ +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-03-26')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-03-26')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2000-03-26')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""", + "q59" -> + """ +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1199 and 1199 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1199+ 12 and 1199 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""", + "q60" -> + """ +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Men')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Men')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Men')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""", + "q61" -> + """ +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 2001 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 2001 + and d_moy = 11) all_sales +order by promotions, total +limit 100""", + "q62" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1194 and 1194 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""", + "q63" -> + """ +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1205,1205+1,1205+2,1205+3,1205+4,1205+5,1205+6,1205+7,1205+8,1205+9,1205+10,1205+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""", + "q64" -> + """ +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('peach','misty','drab','chocolate','almond','saddle') and + i_current_price between 75 and 75 + 10 and + i_current_price between 75 + 1 and 75 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""", + "q65" -> + """ +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1208 and 1208+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1208 and 1208+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""", + "q66" -> + """ +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'HARMSTORF' || ',' || 'USPS' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 24285 and 24285+28800 + and sm_carrier in ('HARMSTORF','USPS') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'HARMSTORF' || ',' || 'USPS' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_list_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_list_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_list_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_list_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_list_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_list_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_list_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_list_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_list_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_list_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_list_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_list_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 24285 AND 24285+28800 + and sm_carrier in ('HARMSTORF','USPS') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""", + "q67" -> + """ +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1196 and 1196+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""", + "q68" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 1 or + household_demographics.hd_vehicle_count= -1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Bethel','Summit') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""", + "q69" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('OK','GA','VA') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2004 and + d_moy between 4 and 4+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2004 and + d_moy between 4 and 4+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2004 and + d_moy between 4 and 4+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""", + "q70" -> + """ +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1197 and 1197+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1197 and 1197+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""", + "q71" -> + """ +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=1999 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=1999 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=1999 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """, + "q72" -> + """ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + interval 5 days + and hd_buy_potential = '>10000' + and d1.d_year = 2002 + and cd_marital_status = 'D' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""", + "q73" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '501-1000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Franklin Parish','Ziebach County','Luce County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""", + "q74" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 3,1,2 +limit 100""", + "q75" -> + """ +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2001 + AND prev_yr.d_year=2001-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""", + "q76" -> + """ +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_cdemo_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_cdemo_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_ship_hdemo_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_hdemo_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_ship_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_customer_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""", + "q77" -> + """ +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q78" -> + """ +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2002 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""", + "q79" -> + """ +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""", + "q80" -> + """ +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1999-08-12' as date) + and (cast('1999-08-12' as date) + INTERVAL 60 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1999-08-12' as date) + and (cast('1999-08-12' as date) + INTERVAL 60 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1999-08-12' as date) + and (cast('1999-08-12' as date) + INTERVAL 60 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q81" -> + """ +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =2001 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'NC' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""", + "q82" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 82 and 82+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-03-10' as date) and (cast('2002-03-10' as date) + INTERVAL 60 days) + and i_manufact_id in (941,920,105,693) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q83" -> + """ +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1999-04-14','1999-09-28','1999-11-12'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1999-04-14','1999-09-28','1999-11-12'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1999-04-14','1999-09-28','1999-11-12'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""", + "q84" -> + """ +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Antioch' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 55019 + and ib_upper_bound <= 55019 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""", + "q85" -> + """ +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '2 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'W' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('OK', 'TX', 'MO') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('GA', 'KS', 'NC') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('VA', 'WI', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""", + "q86" -> + """ +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1180 and 1180+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""", + "q87" -> + """ +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1204 and 1204+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1204 and 1204+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1204 and 1204+11) +) cool_cust""", + "q88" -> + """ +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s8""", + "q89" -> + """ +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2001) and + ((i_category in ('Women','Music','Home') and + i_class in ('fragrances','pop','bedding') + ) + or (i_category in ('Books','Men','Children') and + i_class in ('home repair','sports-apparel','infants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""", + "q90" -> + """ +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 8 and 8+1 + and household_demographics.hd_dep_count = 4 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 19 and 19+1 + and household_demographics.hd_dep_count = 4 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""", + "q91" -> + """ +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2002 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '5001-10000%' +and ca_gmt_offset = -6 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""", + "q92" -> + """ +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 561 +and i_item_sk = ws_item_sk +and d_date between '2001-03-13' and + (cast('2001-03-13' as date) + INTERVAL 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2001-03-13' and + (cast('2001-03-13' as date) + INTERVAL 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""", + "q93" -> + """ +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 64') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""", + "q94" -> + """ +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-5-01' and + (cast('2001-5-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q95" -> + """ +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2000-3-01' and + (cast('2000-3-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TN' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q96" -> + """ +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 16 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 4 + and store.s_store_name = 'ese' +order by count(*) +limit 100""", + "q97" -> + """ +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1209 and 1209 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1209 and 1209 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""", + "q98" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Home', 'Shoes') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-04-12' as date) + and (cast('2001-04-12' as date) + interval 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""", + "q99" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1203 and 1203 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""", + "q1" -> + """ +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""", + "q2" -> + """ +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""", + "q3" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""", + "q4" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q5" -> + """ +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q6" -> + """ +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""", + "q7" -> + """ +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q8" -> + """ +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""", + "q9" -> + """ +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 98972190 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 160856845 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 12733327 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 96251173 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 80049606 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1""", + "q10" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Fillmore County','McPherson County','Bonneville County','Boone County','Brown County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 3 and 3+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 3 ANd 3+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 3 and 3+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""", + "q11" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q12" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Electronics', 'Books', 'Women') + and ws_sold_date_sk = d_date_sk + and d_date between cast('1998-01-06' as date) + and (cast('1998-01-06' as date) + INTERVAL 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q13" -> + """ +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Secondary' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = 'College' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('TX', 'OK', 'MI') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WA', 'NC', 'OH') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'FL', 'GA') + and ss_net_profit between 50 and 250 + ))""", + "q14a" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 2000 AND 2000 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 2000 AND 2000 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 2000 AND 2000 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""", + "q14b" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 2000 AND 2000 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 2000 AND 2000 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 2000 AND 2000 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 2000 + 1 + and d_moy = 12 + and d_dom = 15) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 2000 + and d_moy = 12 + and d_dom = 15) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100""", + "q15" -> + """ +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + group by ca_zip + order by ca_zip + limit 100""", + "q16" -> + """ +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '1999-4-01' and + (cast('1999-4-01' as date) + INTERVAL 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'IL' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Richland County','Bronx County','Maverick County','Mesa County', + 'Raleigh County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""", + "q17" -> + """ +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""", + "q18" -> + """ +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'Unknown' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (5,1,4,7,8,9) and + d_year = 2002 and + ca_state in ('AR','TX','NC' + ,'GA','MS','WV','AL') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""", + "q19" -> + """ +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=16 + and d_moy=12 + and d_year=1998 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """, + "q20" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Shoes', 'Electronics', 'Children') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-03-14' as date) + and (cast('2001-03-14' as date) + INTERVAL 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q21" -> + """ +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-03-20' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1999-03-20' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1999-03-20' as date) - INTERVAL 30 days) + and (cast ('1999-03-20' as date) + INTERVAL 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""", + "q22" -> + """ +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1186 and 1186 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""", + "q23a" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 3 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 3 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100""", + "q23b" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 3 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 3 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100""", + "q24a" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'snow' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q24b" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'chiffon' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q25" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q26" -> + """ +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'S' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100""", + "q27" -> + """ +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'F' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2000 and + s_state in ('AL','IN', 'SC', 'NY', 'OH', 'FL') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""", + "q28" -> + """ +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 73 and 73+10 + or ss_coupon_amt between 7826 and 7826+1000 + or ss_wholesale_cost between 70 and 70+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 152 and 152+10 + or ss_coupon_amt between 2196 and 2196+1000 + or ss_wholesale_cost between 56 and 56+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 53 and 53+10 + or ss_coupon_amt between 3430 and 3430+1000 + or ss_wholesale_cost between 13 and 13+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 182 and 182+10 + or ss_coupon_amt between 3262 and 3262+1000 + or ss_wholesale_cost between 20 and 20+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 85 and 85+10 + or ss_coupon_amt between 3310 and 3310+1000 + or ss_wholesale_cost between 37 and 37+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 180 and 180+10 + or ss_coupon_amt between 12592 and 12592+1000 + or ss_wholesale_cost between 22 and 22+20)) B6 +limit 100""", + "q29" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_quantity) as store_sales_quantity + ,stddev_samp(sr_return_quantity) as store_returns_quantity + ,stddev_samp(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1998 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1998 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1998,1998+1,1998+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q30" -> + """ +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'GA' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100""", + "q31" -> + """ +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1999 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1999 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1999 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1999 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1999 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1999 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year""", + "q32" -> + """ +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 66 +and i_item_sk = cs_item_sk +and d_date between '2002-03-29' and + (cast('2002-03-29' as date) + INTERVAL 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '2002-03-29' and + (cast('2002-03-29' as date) + INTERVAL 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100""", + "q33" -> + """ +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Home')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Home')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Home')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""", + "q34" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Salem County','Terrell County','Arthur County','Oglethorpe County', + 'Lunenburg County','Perry County','Halifax County','Sumner County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""", + "q35" -> + """ +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + avg(cd_dep_count), + min(cd_dep_count), + stddev_samp(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + min(cd_dep_employed_count), + stddev_samp(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + min(cd_dep_college_count), + stddev_samp(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""", + "q36" -> + """ +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('IN','AL','MI','MN', + 'TN','LA','FL','NM') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""", + "q37" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 39 and 39 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-01-16' as date) and (cast('2001-01-16' as date) + interval 60 days) + and i_manufact_id in (765,886,889,728) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q38" -> + """ +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1186 and 1186 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1186 and 1186 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1186 and 1186 + 11 +) hot_cust +limit 100""", + "q39a" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=2 + and inv2.d_moy=2+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q39b" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=2 + and inv2.d_moy=2+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q40" -> + """ +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-03-18' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('2000-03-18' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('2000-03-18' as date) - INTERVAL 30 days) + and (cast ('2000-03-18' as date) + INTERVAL 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""", + "q41" -> + """ +select distinct(i_product_name) + from item i1 + where i_manufact_id between 970 and 970+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'frosted' or i_color = 'rose') and + (i_units = 'Lb' or i_units = 'Gross') and + (i_size = 'medium' or i_size = 'large') + ) or + (i_category = 'Women' and + (i_color = 'chocolate' or i_color = 'black') and + (i_units = 'Box' or i_units = 'Dram') and + (i_size = 'economy' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'slate' or i_color = 'magenta') and + (i_units = 'Carton' or i_units = 'Bundle') and + (i_size = 'N/A' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'cornflower' or i_color = 'firebrick') and + (i_units = 'Pound' or i_units = 'Oz') and + (i_size = 'medium' or i_size = 'large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'almond' or i_color = 'steel') and + (i_units = 'Tsp' or i_units = 'Case') and + (i_size = 'medium' or i_size = 'large') + ) or + (i_category = 'Women' and + (i_color = 'purple' or i_color = 'aquamarine') and + (i_units = 'Bunch' or i_units = 'Gram') and + (i_size = 'economy' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'lavender' or i_color = 'papaya') and + (i_units = 'Pallet' or i_units = 'Cup') and + (i_size = 'N/A' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'maroon' or i_color = 'cyan') and + (i_units = 'Each' or i_units = 'N/A') and + (i_size = 'medium' or i_size = 'large') + )))) > 0 + order by i_product_name + limit 100""", + "q42" -> + """ +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """, + "q43" -> + """ +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 2001 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""", + "q44" -> + """ +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 366 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 366 + and ss_cdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 366 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 366 + and ss_cdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""", + "q45" -> + """ +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 1998 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100""", + "q46" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 0 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_city in ('Five Forks','Oakland','Fairview','Winchester','Farmington') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""", + "q47" -> + """ +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 1999 or + ( d_year = 1999-1 and d_moy =12) or + ( d_year = 1999+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.s_store_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 1999 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, sum_sales + limit 100""", + "q48" -> + """ +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = 'Unknown' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'W' + and + cd_education_status = 'College' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'Primary' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MI', 'GA', 'NH') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('TX', 'KY', 'SD') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('NY', 'OH', 'FL') + and ss_net_profit between 50 and 25000 + ) + )""", + "q49" -> + """ +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + limit 100""", + "q50" -> + """ +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 1998 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""", + "q51" -> + """ +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1214 and 1214+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1214 and 1214+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""", + "q52" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=2000 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """, + "q53" -> + """ +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""", + "q54" -> + """ +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Books' + and i_class = 'business' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 2 + and d_year = 2000 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 2000 and d_moy = 2) + and (select distinct d_month_seq+3 + from date_dim where d_year = 2000 and d_moy = 2) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""", + "q55" -> + """ +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=13 + and d_moy=11 + and d_year=1999 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """, + "q56" -> + """ +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('chiffon','smoke','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 5 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('chiffon','smoke','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 5 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('chiffon','smoke','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 5 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""", + "q57" -> + """ +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 1999 or + ( d_year = 1999-1 and d_moy =12) or + ( d_year = 1999+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 1999 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, avg_monthly_sales + limit 100""", + "q58" -> + """ +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-21')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-21')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-21')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""", + "q59" -> + """ +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1205 and 1205 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1205+ 12 and 1205 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""", + "q60" -> + """ +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""", + "q61" -> + """ +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Sports' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -6 + and d_year = 2001 + and d_moy = 12) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Sports' + and s_gmt_offset = -6 + and d_year = 2001 + and d_moy = 12) all_sales +order by promotions, total +limit 100""", + "q62" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1215 and 1215 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""", + "q63" -> + """ +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1211,1211+1,1211+2,1211+3,1211+4,1211+5,1211+6,1211+7,1211+8,1211+9,1211+10,1211+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""", + "q64" -> + """ +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('azure','gainsboro','misty','blush','hot','lemon') and + i_current_price between 80 and 80 + 10 and + i_current_price between 80 + 1 and 80 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 1999 and + cs2.syear = 1999 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""", + "q65" -> + """ +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""", + "q66" -> + """ +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'MSC' || ',' || 'GERMA' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 9453 and 9453+28800 + and sm_carrier in ('MSC','GERMA') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'MSC' || ',' || 'GERMA' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_list_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_list_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_list_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_list_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_list_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_list_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_list_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_list_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_list_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_list_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_list_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_list_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 9453 AND 9453+28800 + and sm_carrier in ('MSC','GERMA') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""", + "q67" -> + """ +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1185 and 1185+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""", + "q68" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 4 or + household_demographics.hd_vehicle_count= 0) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Pleasant Hill','Bethel') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""", + "q69" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('MO','MN','AZ') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""", + "q70" -> + """ +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1218 and 1218+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1218 and 1218+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""", + "q71" -> + """ +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2000 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2000 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2000 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """, + "q72" -> + """ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + interval 5 days + and hd_buy_potential = '1001-5000' + and d1.d_year = 2000 + and cd_marital_status = 'D' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""", + "q73" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Lea County','Furnas County','Pennington County','Bronx County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""", + "q74" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,sum(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1998,1998+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,sum(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (1998,1998+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 3,1,2 +limit 100""", + "q75" -> + """ +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2001 + AND prev_yr.d_year=2001-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""", + "q76" -> + """ +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_customer_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_customer_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_ship_addr_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_addr_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_ship_mode_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_mode_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""", + "q77" -> + """ +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q78" -> + """ +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2001 +order by + ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""", + "q79" -> + """ +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count > 3) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""", + "q80" -> + """ +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2002-08-06' as date) + and (cast('2002-08-06' as date) + INTERVAL 60 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2002-08-06' as date) + and (cast('2002-08-06' as date) + INTERVAL 60 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2002-08-06' as date) + and (cast('2002-08-06' as date) + INTERVAL 60 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q81" -> + """ +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'TX' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""", + "q82" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 49 and 49+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-01-28' as date) and (cast('2001-01-28' as date) + INTERVAL 60 days) + and i_manufact_id in (80,675,292,17) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q83" -> + """ +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""", + "q84" -> + """ +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 37855 + and ib_upper_bound <= 37855 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""", + "q85" -> + """ +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'College' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Secondary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('TX', 'VA', 'CA') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('AR', 'NE', 'MO') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('IA', 'MS', 'WA') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""", + "q86" -> + """ +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1215 and 1215+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""", + "q87" -> + """ +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11) +) cool_cust""", + "q88" -> + """ +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s8""", + "q89" -> + """ +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Music','Books') and + i_class in ('glassware','classical','fiction') + ) + or (i_category in ('Jewelry','Sports','Women') and + i_class in ('semi-precious','baseball','dresses') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""", + "q90" -> + """ +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 9 and 9+1 + and household_demographics.hd_dep_count = 3 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 16 and 16+1 + and household_demographics.hd_dep_count = 3 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""", + "q91" -> + """ +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2000 +and d_moy = 12 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""", + "q92" -> + """ +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 356 +and i_item_sk = ws_item_sk +and d_date between '2001-03-12' and + (cast('2001-03-12' as date) + INTERVAL 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2001-03-12' and + (cast('2001-03-12' as date) + INTERVAL 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""", + "q93" -> + """ +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 66') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""", + "q94" -> + """ +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-4-01' and + (cast('1999-4-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'NE' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q95" -> + """ +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2002-4-01' and + (cast('2002-4-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'AL' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q96" -> + """ +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 16 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 6 + and store.s_store_name = 'ese' +order by count(*) +limit 100""", + "q97" -> + """ +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1190 and 1190 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1190 and 1190 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""", + "q98" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Home', 'Sports', 'Men') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2002-01-05' as date) + and (cast('2002-01-05' as date) + interval 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""", + "q99" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1178 and 1178 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""", + "q1" -> + """ +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NY' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""", + "q2" -> + """ +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""", + "q3" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""", + "q4" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q5" -> + """ +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q6" -> + """ +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""", + "q7" -> + """ +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q8" -> + """ +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""", + "q9" -> + """ +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 578972190 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 536856786 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 12733327 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 205136171 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 1192341092 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1""", + "q10" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Baltimore city','Stafford County','Greene County','Ballard County','Franklin County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 1 and 1+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 1 ANd 1+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 1 and 1+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""", + "q11" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag +limit 100""", + "q12" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Children', 'Shoes', 'Women') + and ws_sold_date_sk = d_date_sk + and d_date between cast('1998-06-19' as date) + and (cast('1998-06-19' as date) + INTERVAL 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q13" -> + """ +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'College' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'S' + and cd_education_status = 'Primary' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('GA', 'IN', 'NY') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('ND', 'WV', 'TX') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KS', 'NC', 'NM') + and ss_net_profit between 50 and 250 + ))""", + "q14a" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1998 AND 1998 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1998 AND 1998 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1998 AND 1998 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""", + "q14b" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1998 AND 1998 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1998 AND 1998 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1998 AND 1998 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1998 + 1 + and d_moy = 12 + and d_dom = 17) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1998 + and d_moy = 12 + and d_dom = 17) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100""", + "q15" -> + """ +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + group by ca_zip + order by ca_zip + limit 100""", + "q16" -> + """ +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-3-01' and + (cast('2001-3-01' as date) + INTERVAL 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'PA' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Luce County','Franklin Parish','Sierra County','Williamson County', + 'Kittitas County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""", + "q17" -> + """ +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2001Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""", + "q18" -> + """ +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'Unknown' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (5,7,8,6,12,4) and + d_year = 2000 and + ca_state in ('MO','NY','ME' + ,'MI','IA','OH','MS') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""", + "q19" -> + """ +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=55 + and d_moy=11 + and d_year=1998 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """, + "q20" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Shoes', 'Electronics', 'Home') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2000-05-15' as date) + and (cast('2000-05-15' as date) + INTERVAL 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q21" -> + """ +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2002-02-15' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('2002-02-15' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('2002-02-15' as date) - INTERVAL 30 days) + and (cast ('2002-02-15' as date) + INTERVAL 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""", + "q22" -> + """ +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1202 and 1202 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""", + "q23a" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 4 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 4 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100""", + "q23b" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 4 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 4 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100""", + "q24a" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid_inc_tax) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=5 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'cyan' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q24b" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid_inc_tax) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 5 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'ivory' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q25" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_net_profit) as store_sales_profit + ,stddev_samp(sr_net_loss) as store_returns_loss + ,stddev_samp(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q26" -> + """ +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'M' and + cd_education_status = 'Unknown' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q27" -> + """ +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'F' and + cd_marital_status = 'D' and + cd_education_status = '2 yr Degree' and + d_year = 1999 and + s_state in ('MI','WV', 'MI', 'NY', 'TN', 'MI') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""", + "q28" -> + """ +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 151 and 151+10 + or ss_coupon_amt between 4349 and 4349+1000 + or ss_wholesale_cost between 75 and 75+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 45 and 45+10 + or ss_coupon_amt between 12490 and 12490+1000 + or ss_wholesale_cost between 37 and 37+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 54 and 54+10 + or ss_coupon_amt between 13038 and 13038+1000 + or ss_wholesale_cost between 17 and 17+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 178 and 178+10 + or ss_coupon_amt between 10744 and 10744+1000 + or ss_wholesale_cost between 51 and 51+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 49 and 49+10 + or ss_coupon_amt between 8494 and 8494+1000 + or ss_wholesale_cost between 56 and 56+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 0 and 0+10 + or ss_coupon_amt between 17854 and 17854+1000 + or ss_wholesale_cost between 31 and 31+20)) B6 +limit 100""", + "q29" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,max(ss_quantity) as store_sales_quantity + ,max(sr_return_quantity) as store_returns_quantity + ,max(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q30" -> + """ +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'MD' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100""", + "q31" -> + """ +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1999 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1999 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1999 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1999 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1999 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1999 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by store_q2_q3_increase""", + "q32" -> + """ +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 7 +and i_item_sk = cs_item_sk +and d_date between '2000-01-21' and + (cast('2000-01-21' as date) + INTERVAL 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '2000-01-21' and + (cast('2000-01-21' as date) + INTERVAL 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100""", + "q33" -> + """ +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""", + "q34" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '501-1000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Levy County','Val Verde County','Porter County','Nowata County', + 'Lincoln County','Brazos County','Franklin Parish','Pipestone County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""", + "q35" -> + """ +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + sum(cd_dep_count), + sum(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + sum(cd_dep_employed_count), + sum(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + sum(cd_dep_college_count), + sum(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""", + "q36" -> + """ +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('MO','AL','OH','WV', + 'AL','MN','TN','WA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""", + "q37" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 57 and 57 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-04-19' as date) and (cast('2001-04-19' as date) + interval 60 days) + and i_manufact_id in (804,916,707,680) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q38" -> + """ +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 +) hot_cust +limit 100""", + "q39a" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=3 + and inv2.d_moy=3+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q39b" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=3 + and inv2.d_moy=3+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q40" -> + """ +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-04-09' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('2000-04-09' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('2000-04-09' as date) - INTERVAL 30 days) + and (cast ('2000-04-09' as date) + INTERVAL 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""", + "q41" -> + """ +select distinct(i_product_name) + from item i1 + where i_manufact_id between 917 and 917+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'antique' or i_color = 'pale') and + (i_units = 'Tbl' or i_units = 'Case') and + (i_size = 'small' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'snow' or i_color = 'lemon') and + (i_units = 'Box' or i_units = 'Ounce') and + (i_size = 'economy' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'green' or i_color = 'blue') and + (i_units = 'Gross' or i_units = 'Ton') and + (i_size = 'large' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'cream' or i_color = 'frosted') and + (i_units = 'Bundle' or i_units = 'Gram') and + (i_size = 'small' or i_size = 'extra large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'orange' or i_color = 'spring') and + (i_units = 'Lb' or i_units = 'Carton') and + (i_size = 'small' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'lawn' or i_color = 'violet') and + (i_units = 'Oz' or i_units = 'Cup') and + (i_size = 'economy' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'navy' or i_color = 'linen') and + (i_units = 'Pound' or i_units = 'Unknown') and + (i_size = 'large' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'almond' or i_color = 'olive') and + (i_units = 'Pallet' or i_units = 'Bunch') and + (i_size = 'small' or i_size = 'extra large') + )))) > 0 + order by i_product_name + limit 100""", + "q42" -> + """ +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """, + "q43" -> + """ +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 2000 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""", + "q44" -> + """ +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 731 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 731 + and ss_promo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 731 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 731 + and ss_promo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""", + "q45" -> + """ +select ca_zip, ca_city, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2000 + group by ca_zip, ca_city + order by ca_zip, ca_city + limit 100""", + "q46" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 1 or + household_demographics.hd_vehicle_count= 2) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_city in ('Buena Vista','Friendship','Monroe','Oak Hill','Randolph') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""", + "q47" -> + """ +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 1999 or + ( d_year = 1999-1 and d_moy =12) or + ( d_year = 1999+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 1999 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, sum_sales + limit 100""", + "q48" -> + """ +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1999 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'S' + and + cd_education_status = 'Primary' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'College' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'U' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('ND', 'NC', 'TX') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('VA', 'IA', 'AR') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MA', 'FL', 'TN') + and ss_net_profit between 50 and 25000 + ) + )""", + "q49" -> + """ +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + limit 100""", + "q50" -> + """ +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2002 +and d2.d_moy = 10 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""", + "q51" -> + """ +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1213 and 1213+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1213 and 1213+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""", + "q52" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """, + "q53" -> + """ +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1219,1219+1,1219+2,1219+3,1219+4,1219+5,1219+6,1219+7,1219+8,1219+9,1219+10,1219+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""", + "q54" -> + """ +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Men' + and i_class = 'shirts' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 2 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 2) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 2) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""", + "q55" -> + """ +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=96 + and d_moy=11 + and d_year=2000 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """, + "q56" -> + """ +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('antique','white','smoke')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 6 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('antique','white','smoke')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 6 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('antique','white','smoke')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 6 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""", + "q57" -> + """ +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.cc_name + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, sum_sales + limit 100""", + "q58" -> + """ +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2001-01-27')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2001-01-27')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2001-01-27')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""", + "q59" -> + """ +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1177 and 1177 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1177+ 12 and 1177 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""", + "q60" -> + """ +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy = 8 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy = 8 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy = 8 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""", + "q61" -> + """ +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Home' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -6 + and d_year = 1999 + and d_moy = 12) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Home' + and s_gmt_offset = -6 + and d_year = 1999 + and d_moy = 12) all_sales +order by promotions, total +limit 100""", + "q62" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1191 and 1191 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""", + "q63" -> + """ +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1193,1193+1,1193+2,1193+3,1193+4,1193+5,1193+6,1193+7,1193+8,1193+9,1193+10,1193+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""", + "q64" -> + """ +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('orange','aquamarine','olive','linen','smoke','coral') and + i_current_price between 74 and 74 + 10 and + i_current_price between 74 + 1 and 74 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2001 and + cs2.syear = 2001 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""", + "q65" -> + """ +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1195 and 1195+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1195 and 1195+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""", + "q66" -> + """ +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'LATVIAN' || ',' || 'ALLIANCE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_ext_list_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_ext_list_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_ext_list_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_ext_list_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_ext_list_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_ext_list_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_ext_list_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_ext_list_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_ext_list_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_ext_list_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_ext_list_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_ext_list_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 1998 + and t_time between 16224 and 16224+28800 + and sm_carrier in ('LATVIAN','ALLIANCE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'LATVIAN' || ',' || 'ALLIANCE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 1998 + and t_time between 16224 AND 16224+28800 + and sm_carrier in ('LATVIAN','ALLIANCE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""", + "q67" -> + """ +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1203 and 1203+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""", + "q68" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 3 or + household_demographics.hd_vehicle_count= -1) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Jamestown','Pine Hill') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""", + "q69" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CA','MT','SD') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""", + "q70" -> + """ +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1215 and 1215+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1215 and 1215+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""", + "q71" -> + """ +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=11 + and d_year=1998 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=11 + and d_year=1998 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=11 + and d_year=1998 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """, + "q72" -> + """ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + interval 5 days + and hd_buy_potential = '1001-5000' + and d1.d_year = 1998 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""", + "q73" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_county in ('Van Buren County','Terrell County','Belknap County','Kootenai County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""", + "q74" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,3,1 +limit 100""", + "q75" -> + """ +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Music' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Music' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Music') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2001 + AND prev_yr.d_year=2001-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""", + "q76" -> + """ +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_promo_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_promo_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_site_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_site_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_bill_addr_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_bill_addr_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""", + "q77" -> + """ +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q78" -> + """ +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2001 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""", + "q79" -> + """ +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 6 or household_demographics.hd_vehicle_count > -1) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""", + "q80" -> + """ +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2000-08-25' as date) + and (cast('2000-08-25' as date) + INTERVAL 60 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2000-08-25' as date) + and (cast('2000-08-25' as date) + INTERVAL 60 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2000-08-25' as date) + and (cast('2000-08-25' as date) + INTERVAL 60 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q81" -> + """ +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =2000 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'SC' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""", + "q82" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 6 and 6+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-02-23' as date) and (cast('2001-02-23' as date) + INTERVAL 60 days) + and i_manufact_id in (669,623,578,379) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q83" -> + """ +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-01-15','2001-09-03','2001-11-17'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-01-15','2001-09-03','2001-11-17'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-01-15','2001-09-03','2001-11-17'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""", + "q84" -> + """ +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Walnut Grove' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 53669 + and ib_upper_bound <= 53669 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""", + "q85" -> + """ +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Secondary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'W' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('AZ', 'SD', 'TN') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('TX', 'GA', 'IA') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'VT', 'AL') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""", + "q86" -> + """ +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1195 and 1195+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""", + "q87" -> + """ +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1194 and 1194+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1194 and 1194+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1194 and 1194+11) +) cool_cust""", + "q88" -> + """ +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8""", + "q89" -> + """ +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Shoes','Electronics') and + i_class in ('flatware','mens','televisions') + ) + or (i_category in ('Women','Sports','Music') and + i_class in ('maternity','camping','rock') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""", + "q90" -> + """ +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 8 and 8+1 + and household_demographics.hd_dep_count = 4 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 20 and 20+1 + and household_demographics.hd_dep_count = 4 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""", + "q91" -> + """ +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2001 +and d_moy = 12 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -6 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""", + "q92" -> + """ +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 7 +and i_item_sk = ws_item_sk +and d_date between '2000-01-16' and + (cast('2000-01-16' as date) + INTERVAL 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2000-01-16' and + (cast('2000-01-16' as date) + INTERVAL 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""", + "q93" -> + """ +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 24') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""", + "q94" -> + """ +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-2-01' and + (cast('2001-2-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'VT' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q95" -> + """ +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-3-01' and + (cast('2001-3-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TN' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q96" -> + """ +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 20 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 6 + and store.s_store_name = 'ese' +order by count(*) +limit 100""", + "q97" -> + """ +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1206 and 1206 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1206 and 1206 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""", + "q98" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Sports', 'Books', 'Electronics') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2002-06-29' as date) + and (cast('2002-06-29' as date) + interval 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""", + "q99" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1199 and 1199 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""", + "q1" -> + """ +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'MO' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""", + "q2" -> + """ +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""", + "q3" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""", + "q4" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q5" -> + """ +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q6" -> + """ +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""", + "q7" -> + """ +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q8" -> + """ +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""", + "q9" -> + """ +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 4502397049 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4756228269 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 4101835064 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 4583261513 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 4208819283 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1""", + "q10" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Grady County','Marion County','Decatur County','Lyman County','Beaver County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 2 and 2+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 2 ANd 2+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 2 and 2+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""", + "q11" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag +limit 100""", + "q12" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Children', 'Jewelry', 'Music') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-05-11' as date) + and (cast('2001-05-11' as date) + INTERVAL 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q13" -> + """ +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = 'Primary' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'S' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('SC', 'WY', 'TX') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('NY', 'NE', 'GA') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('AL', 'AR', 'MI') + and ss_net_profit between 50 and 250 + ))""", + "q14a" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""", + "q14b" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1999 + 1 + and d_moy = 12 + and d_dom = 5) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1999 + and d_moy = 12 + and d_dom = 5) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100""", + "q15" -> + """ +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 1998 + group by ca_zip + order by ca_zip + limit 100""", + "q16" -> + """ +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2000-3-01' and + (cast('2000-3-01' as date) + INTERVAL 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'IA' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Luce County','Wadena County','Jefferson Davis Parish','Daviess County', + 'Williamson County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""", + "q17" -> + """ +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '1999Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""", + "q18" -> + """ +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'F' and + cd1.cd_education_status = 'Unknown' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (4,8,12,10,11,9) and + d_year = 2001 and + ca_state in ('AR','IA','TX' + ,'KS','LA','NC','SD') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""", + "q19" -> + """ +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=63 + and d_moy=11 + and d_year=2002 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """, + "q20" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Electronics', 'Children', 'Home') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2002-03-19' as date) + and (cast('2002-03-19' as date) + INTERVAL 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q21" -> + """ +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-04-12' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1999-04-12' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1999-04-12' as date) - INTERVAL 30 days) + and (cast ('1999-04-12' as date) + INTERVAL 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""", + "q22" -> + """ +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1188 and 1188 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""", + "q23a" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1998,1998+1,1998+2,1998+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1998,1998+1,1998+2,1998+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1998 + and d_moy = 7 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1998 + and d_moy = 7 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100""", + "q23b" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1998,1998 + 1,1998 + 2,1998 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1998,1998+1,1998+2,1998+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 1998 + and d_moy = 7 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 1998 + and d_moy = 7 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100""", + "q24a" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'goldenrod' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q24b" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'magenta' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q25" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,min(ss_net_profit) as store_sales_profit + ,min(sr_net_loss) as store_returns_loss + ,min(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2002 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2002 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2002 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q26" -> + """ +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'M' and + cd_education_status = '4 yr Degree' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100""", + "q27" -> + """ +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'M' and + cd_education_status = 'Secondary' and + d_year = 1999 and + s_state in ('AL','FL', 'TX', 'NM', 'MI', 'GA') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""", + "q28" -> + """ +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 74 and 74+10 + or ss_coupon_amt between 2949 and 2949+1000 + or ss_wholesale_cost between 49 and 49+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 136 and 136+10 + or ss_coupon_amt between 10027 and 10027+1000 + or ss_wholesale_cost between 53 and 53+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 73 and 73+10 + or ss_coupon_amt between 1451 and 1451+1000 + or ss_wholesale_cost between 78 and 78+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 87 and 87+10 + or ss_coupon_amt between 17007 and 17007+1000 + or ss_wholesale_cost between 55 and 55+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 112 and 112+10 + or ss_coupon_amt between 17243 and 17243+1000 + or ss_wholesale_cost between 2 and 2+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 119 and 119+10 + or ss_coupon_amt between 4954 and 4954+1000 + or ss_wholesale_cost between 22 and 22+20)) B6 +limit 100""", + "q29" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_quantity) as store_sales_quantity + ,stddev_samp(sr_return_quantity) as store_returns_quantity + ,stddev_samp(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (2000,2000+1,2000+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q30" -> + """ +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2001 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'MI' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100""", + "q31" -> + """ +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by store_q1_q2_increase""", + "q32" -> + """ +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 490 +and i_item_sk = cs_item_sk +and d_date between '1999-01-27' and + (cast('1999-01-27' as date) + INTERVAL 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1999-01-27' and + (cast('1999-01-27' as date) + INTERVAL 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100""", + "q33" -> + """ +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""", + "q34" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Nez Perce County','Murray County','Surry County','Calhoun County', + 'Wilkinson County','Brown County','Wallace County','Carter County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""", + "q35" -> + """ +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + stddev_samp(cd_dep_count), + sum(cd_dep_count), + min(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + stddev_samp(cd_dep_employed_count), + sum(cd_dep_employed_count), + min(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + stddev_samp(cd_dep_college_count), + sum(cd_dep_college_count), + min(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""", + "q36" -> + """ +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('MN','TX','TX','IN', + 'CA','LA','NM','TX') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""", + "q37" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 16 and 16 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-06-05' as date) and (cast('2002-06-05' as date) + interval 60 days) + and i_manufact_id in (841,790,796,739) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q38" -> + """ +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1203 and 1203 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1203 and 1203 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1203 and 1203 + 11 +) hot_cust +limit 100""", + "q39a" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=3 + and inv2.d_moy=3+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q39b" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=3 + and inv2.d_moy=3+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q40" -> + """ +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-04-27' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1999-04-27' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1999-04-27' as date) - INTERVAL 30 days) + and (cast ('1999-04-27' as date) + INTERVAL 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""", + "q41" -> + """ +select distinct(i_product_name) + from item i1 + where i_manufact_id between 841 and 841+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'bisque' or i_color = 'khaki') and + (i_units = 'Carton' or i_units = 'Box') and + (i_size = 'large' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'antique' or i_color = 'sandy') and + (i_units = 'Pallet' or i_units = 'Cup') and + (i_size = 'petite' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'forest' or i_color = 'brown') and + (i_units = 'Dram' or i_units = 'Ton') and + (i_size = 'economy' or i_size = 'medium') + ) or + (i_category = 'Men' and + (i_color = 'chartreuse' or i_color = 'light') and + (i_units = 'Pound' or i_units = 'Dozen') and + (i_size = 'large' or i_size = 'extra large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'turquoise' or i_color = 'chocolate') and + (i_units = 'Bundle' or i_units = 'Unknown') and + (i_size = 'large' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'maroon' or i_color = 'pale') and + (i_units = 'Each' or i_units = 'Tbl') and + (i_size = 'petite' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'almond' or i_color = 'floral') and + (i_units = 'Gross' or i_units = 'N/A') and + (i_size = 'economy' or i_size = 'medium') + ) or + (i_category = 'Men' and + (i_color = 'drab' or i_color = 'plum') and + (i_units = 'Bunch' or i_units = 'Case') and + (i_size = 'large' or i_size = 'extra large') + )))) > 0 + order by i_product_name + limit 100""", + "q42" -> + """ +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2002 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """, + "q43" -> + """ +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -5 and + d_year = 2002 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""", + "q44" -> + """ +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 709 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 709 + and ss_addr_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 709 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 709 + and ss_addr_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""", + "q45" -> + """ +select ca_zip, ca_state, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2002 + group by ca_zip, ca_state + order by ca_zip, ca_state + limit 100""", + "q46" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 0 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Johnson','Norwood','Cambridge','Klondike','Rock Hill') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""", + "q47" -> + """ +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.s_store_name, v1.s_company_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, psum + limit 100""", + "q48" -> + """ +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2000 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'U' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'S' + and + cd_education_status = 'Primary' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'W' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OH', 'GA') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WV', 'AZ', 'NM') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('NY', 'PA', 'KY') + and ss_net_profit between 50 and 25000 + ) + )""", + "q49" -> + """ +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 11 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 11 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 11 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + limit 100""", + "q50" -> + """ +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""", + "q51" -> + """ +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1177 and 1177+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1177 and 1177+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""", + "q52" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=2001 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """, + "q53" -> + """ +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1188,1188+1,1188+2,1188+3,1188+4,1188+5,1188+6,1188+7,1188+8,1188+9,1188+10,1188+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""", + "q54" -> + """ +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Men' + and i_class = 'pants' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 5 + and d_year = 2002 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 2002 and d_moy = 5) + and (select distinct d_month_seq+3 + from date_dim where d_year = 2002 and d_moy = 5) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""", + "q55" -> + """ +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=67 + and d_moy=11 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """, + "q56" -> + """ +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('blanched','spring','seashell')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('blanched','spring','seashell')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('blanched','spring','seashell')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""", + "q57" -> + """ +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, sum_sales + limit 100""", + "q58" -> + """ +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-05-24')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-05-24')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2000-05-24')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""", + "q59" -> + """ +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1197 and 1197 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1197+ 12 and 1197 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""", + "q60" -> + """ +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""", + "q61" -> + """ +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Jewelry' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 2002 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Jewelry' + and s_gmt_offset = -7 + and d_year = 2002 + and d_moy = 11) all_sales +order by promotions, total +limit 100""", + "q62" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1194 and 1194 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""", + "q63" -> + """ +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1222,1222+1,1222+2,1222+3,1222+4,1222+5,1222+6,1222+7,1222+8,1222+9,1222+10,1222+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""", + "q64" -> + """ +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('ivory','purple','almond','bisque','lawn','azure') and + i_current_price between 60 and 60 + 10 and + i_current_price between 60 + 1 and 60 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2001 and + cs2.syear = 2001 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""", + "q65" -> + """ +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1185 and 1185+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1185 and 1185+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""", + "q66" -> + """ +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'FEDEX' || ',' || 'MSC' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_ext_list_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_ext_list_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_ext_list_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_ext_list_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_ext_list_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_ext_list_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_ext_list_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_ext_list_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_ext_list_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_ext_list_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_ext_list_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_ext_list_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_profit * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_profit * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_profit * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_profit * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_profit * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_profit * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_profit * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_profit * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_profit * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_profit * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_profit * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_profit * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 2662 and 2662+28800 + and sm_carrier in ('FEDEX','MSC') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'FEDEX' || ',' || 'MSC' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_list_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_list_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_list_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_list_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_list_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_list_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_list_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_list_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_list_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_list_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_list_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_list_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_profit * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_profit * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_profit * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_profit * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_profit * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_profit * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_profit * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_profit * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_profit * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_profit * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_profit * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_profit * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 2662 AND 2662+28800 + and sm_carrier in ('FEDEX','MSC') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""", + "q67" -> + """ +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1177 and 1177+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""", + "q68" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 5 or + household_demographics.hd_vehicle_count= 4) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Lodi','Richmond') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""", + "q69" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('IL','FL','SD') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""", + "q70" -> + """ +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1206 and 1206+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1206 and 1206+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""", + "q71" -> + """ +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=11 + and d_year=1999 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=11 + and d_year=1999 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=11 + and d_year=1999 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """, + "q72" -> + """ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + interval 5 days + and hd_buy_potential = '1001-5000' + and d1.d_year = 2000 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""", + "q73" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Humboldt County','Hickman County','Galax city','Abbeville County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""", + "q74" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 3,1,2 +limit 100""", + "q75" -> + """ +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Books' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Books' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Books') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2001 + AND prev_yr.d_year=2001-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""", + "q76" -> + """ +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_promo_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_promo_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_ship_addr_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_addr_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_ship_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_customer_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""", + "q77" -> + """ +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q78" -> + """ +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_item_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2000 +order by + ss_item_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""", + "q79" -> + """ +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 5 or household_demographics.hd_vehicle_count > -1) + and date_dim.d_dow = 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""", + "q80" -> + """ +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-19' as date) + and (cast('2001-08-19' as date) + INTERVAL 60 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-19' as date) + and (cast('2001-08-19' as date) + INTERVAL 60 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-19' as date) + and (cast('2001-08-19' as date) + INTERVAL 60 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q81" -> + """ +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1999 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'MO' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""", + "q82" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 68 and 68+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-08' as date) and (cast('2002-05-08' as date) + INTERVAL 60 days) + and i_manufact_id in (562,370,230,182) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q83" -> + """ +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-02-20','2000-10-08','2000-11-04'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-02-20','2000-10-08','2000-11-04'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-02-20','2000-10-08','2000-11-04'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""", + "q84" -> + """ +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Buena Vista' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 49786 + and ib_upper_bound <= 49786 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""", + "q85" -> + """ +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '2 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('IA', 'ND', 'FL') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('OH', 'MS', 'VA') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MN', 'LA', 'TX') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""", + "q86" -> + """ +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1217 and 1217+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""", + "q87" -> + """ +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1224 and 1224+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1224 and 1224+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1224 and 1224+11) +) cool_cust""", + "q88" -> + """ +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s8""", + "q89" -> + """ +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2001) and + ((i_category in ('Children','Home','Women') and + i_class in ('toddlers','flatware','fragrances') + ) + or (i_category in ('Music','Electronics','Shoes') and + i_class in ('country','dvd/vcr players','mens') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""", + "q90" -> + """ +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 7 and 7+1 + and household_demographics.hd_dep_count = 1 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 20 and 20+1 + and household_demographics.hd_dep_count = 1 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""", + "q91" -> + """ +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1998 +and d_moy = 12 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -6 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""", + "q92" -> + """ +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 172 +and i_item_sk = ws_item_sk +and d_date between '1999-01-12' and + (cast('1999-01-12' as date) + INTERVAL 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1999-01-12' and + (cast('1999-01-12' as date) + INTERVAL 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""", + "q93" -> + """ +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 58') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""", + "q94" -> + """ +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2002-3-01' and + (cast('2002-3-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'GA' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q95" -> + """ +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-3-01' and + (cast('2001-3-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'NE' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q96" -> + """ +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 16 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 0 + and store.s_store_name = 'ese' +order by count(*) +limit 100""", + "q97" -> + """ +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1219 and 1219 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1219 and 1219 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""", + "q98" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Books', 'Children', 'Sports') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-03-10' as date) + and (cast('2001-03-10' as date) + interval 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""", + "q99" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1205 and 1205 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""" + ) + + val TPCDSQueries10TB = Map( + "q1" -> + """ +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'TN' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""", + "q2" -> + """ +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""", + "q3" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""", + "q4" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q5" -> + """ +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q6" -> + """ +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""", + "q7" -> + """ +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q8" -> + """ +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""", + "q9" -> + """ +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 2972190 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 111711138 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 127958920 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 41162107 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 25211875 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1""", + "q10" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Allen County','Jefferson County','Lamar County','Dakota County','Park County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""", + "q11" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_login + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1998 + and t_s_secyear.dyear = 1998+1 + and t_w_firstyear.dyear = 1998 + and t_w_secyear.dyear = 1998+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_login +limit 100""", + "q12" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Men', 'Books', 'Children') + and ws_sold_date_sk = d_date_sk + and d_date between cast('1998-03-28' as date) + and (cast('1998-03-28' as date) + INTERVAL 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q13" -> + """ +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Unknown' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'S' + and cd_education_status = 'College' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WV', 'GA', 'TX') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('TN', 'KY', 'SC') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('OK', 'NE', 'CA') + and ss_net_profit between 50 and 250 + ))""", + "q14a" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1998 AND 1998 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1998 AND 1998 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1998 AND 1998 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""", + "q14b" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1998 AND 1998 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1998 AND 1998 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1998 AND 1998 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1998 + 1 + and d_moy = 12 + and d_dom = 20) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1998 + and d_moy = 12 + and d_dom = 20) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100""", + "q15" -> + """ +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100""", + "q16" -> + """ +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-2-01' and + (cast('2001-2-01' as date) + INTERVAL 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'MS' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Jackson County','Daviess County','Walker County','Dauphin County', + 'Mobile County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""", + "q17" -> + """ +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '1999Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""", + "q18" -> + """ +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'F' and + cd1.cd_education_status = 'Primary' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (6,7,3,11,12,8) and + d_year = 1999 and + ca_state in ('IL','WV','KS' + ,'GA','LA','PA','TX') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""", + "q19" -> + """ +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=26 + and d_moy=12 + and d_year=2000 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """, + "q20" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Books', 'Home', 'Jewelry') + and cs_sold_date_sk = d_date_sk + and d_date between cast('1998-05-08' as date) + and (cast('1998-05-08' as date) + INTERVAL 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q21" -> + """ +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-05-22' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('2000-05-22' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('2000-05-22' as date) - INTERVAL 30 days) + and (cast ('2000-05-22' as date) + INTERVAL 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""", + "q22" -> + """ +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1199 and 1199 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""", + "q23a" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 5 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 5 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100""", + "q23b" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 5 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 5 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100""", + "q24a" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid_inc_tax) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'navy' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q24b" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid_inc_tax) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'beige' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q25" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2002 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2002 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2002 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q26" -> + """ +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'M' and + cd_education_status = '2 yr Degree' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2002 + group by i_item_id + order by i_item_id + limit 100""", + "q27" -> + """ +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'F' and + cd_marital_status = 'S' and + cd_education_status = 'Advanced Degree' and + d_year = 2000 and + s_state in ('WA','LA', 'LA', 'TX', 'AL', 'PA') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""", + "q28" -> + """ +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 189 and 189+10 + or ss_coupon_amt between 4483 and 4483+1000 + or ss_wholesale_cost between 24 and 24+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 71 and 71+10 + or ss_coupon_amt between 14775 and 14775+1000 + or ss_wholesale_cost between 38 and 38+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 183 and 183+10 + or ss_coupon_amt between 13456 and 13456+1000 + or ss_wholesale_cost between 31 and 31+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 4905 and 4905+1000 + or ss_wholesale_cost between 27 and 27+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 180 and 180+10 + or ss_coupon_amt between 17430 and 17430+1000 + or ss_wholesale_cost between 57 and 57+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 49 and 49+10 + or ss_coupon_amt between 2950 and 2950+1000 + or ss_wholesale_cost between 52 and 52+20)) B6 +limit 100""", + "q29" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_quantity) as store_sales_quantity + ,stddev_samp(sr_return_quantity) as store_returns_quantity + ,stddev_samp(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1998 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1998 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1998,1998+1,1998+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q30" -> + """ +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'GA' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100""", + "q31" -> + """ +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1998 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1998 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1998 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1998 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1998 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1998 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.ca_county""", + "q32" -> + """ +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 948 +and i_item_sk = cs_item_sk +and d_date between '1998-02-03' and + (cast('1998-02-03' as date) + INTERVAL 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-02-03' and + (cast('1998-02-03' as date) + INTERVAL 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100""", + "q33" -> + """ +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 2 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 2 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 2 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""", + "q34" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Jefferson Davis Parish','Levy County','Coal County','Oglethorpe County', + 'Mobile County','Gage County','Richland County','Gogebic County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""", + "q35" -> + """ +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + stddev_samp(cd_dep_count), + stddev_samp(cd_dep_count), + min(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + stddev_samp(cd_dep_employed_count), + stddev_samp(cd_dep_employed_count), + min(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + stddev_samp(cd_dep_college_count), + stddev_samp(cd_dep_college_count), + min(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""", + "q36" -> + """ +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1998 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('OH','WV','PA','TN', + 'MN','MO','NM','MI') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""", + "q37" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 35 and 35 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-01-20' as date) and (cast('2001-01-20' as date) + interval 60 days) + and i_manufact_id in (928,715,942,861) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q38" -> + """ +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1222 and 1222 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1222 and 1222 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1222 and 1222 + 11 +) hot_cust +limit 100""", + "q39a" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1998 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q39b" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1998 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q40" -> + """ +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-02-02' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1999-02-02' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1999-02-02' as date) - INTERVAL 30 days) + and (cast ('1999-02-02' as date) + INTERVAL 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""", + "q41" -> + """ +select distinct(i_product_name) + from item i1 + where i_manufact_id between 732 and 732+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'beige' or i_color = 'spring') and + (i_units = 'Tsp' or i_units = 'Ton') and + (i_size = 'petite' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'white' or i_color = 'pale') and + (i_units = 'Box' or i_units = 'Dram') and + (i_size = 'large' or i_size = 'economy') + ) or + (i_category = 'Men' and + (i_color = 'midnight' or i_color = 'frosted') and + (i_units = 'Bunch' or i_units = 'Carton') and + (i_size = 'small' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'azure' or i_color = 'goldenrod') and + (i_units = 'Pallet' or i_units = 'Gross') and + (i_size = 'petite' or i_size = 'extra large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'brown' or i_color = 'hot') and + (i_units = 'Tbl' or i_units = 'Cup') and + (i_size = 'petite' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'powder' or i_color = 'honeydew') and + (i_units = 'Bundle' or i_units = 'Unknown') and + (i_size = 'large' or i_size = 'economy') + ) or + (i_category = 'Men' and + (i_color = 'antique' or i_color = 'purple') and + (i_units = 'N/A' or i_units = 'Dozen') and + (i_size = 'small' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'lavender' or i_color = 'tomato') and + (i_units = 'Lb' or i_units = 'Oz') and + (i_size = 'petite' or i_size = 'extra large') + )))) > 0 + order by i_product_name + limit 100""", + "q42" -> + """ +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2002 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """, + "q43" -> + """ +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1999 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""", + "q44" -> + """ +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 321 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 321 + and ss_addr_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 321 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 321 + and ss_addr_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""", + "q45" -> + """ +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1999 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100""", + "q46" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 2) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Antioch','Mount Vernon','Jamestown','Wilson','Farmington') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""", + "q47" -> + """ +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.s_company_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, avg_monthly_sales + limit 100""", + "q48" -> + """ +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1999 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'College' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'W' + and + cd_education_status = 'Secondary' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('NE', 'IA', 'NY') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('IN', 'TN', 'OH') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KS', 'CA', 'CO') + and ss_net_profit between 50 and 25000 + ) + )""", + "q49" -> + """ +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + limit 100""", + "q50" -> + """ +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 1999 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""", + "q51" -> + """ +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1176 and 1176+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1176 and 1176+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""", + "q52" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2001 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """, + "q53" -> + """ +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1218,1218+1,1218+2,1218+3,1218+4,1218+5,1218+6,1218+7,1218+8,1218+9,1218+10,1218+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""", + "q54" -> + """ +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Music' + and i_class = 'country' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 7 + and d_year = 2001 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 2001 and d_moy = 7) + and (select distinct d_month_seq+3 + from date_dim where d_year = 2001 and d_moy = 7) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""", + "q55" -> + """ +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=87 + and d_moy=11 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """, + "q56" -> + """ +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('tan','lace','gainsboro')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('tan','lace','gainsboro')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('tan','lace','gainsboro')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""", + "q57" -> + """ +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.cc_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, avg_monthly_sales + limit 100""", + "q58" -> + """ +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-03-26')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-03-26')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2000-03-26')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""", + "q59" -> + """ +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1199 and 1199 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1199+ 12 and 1199 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""", + "q60" -> + """ +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Men')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Men')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Men')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""", + "q61" -> + """ +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 2001 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 2001 + and d_moy = 11) all_sales +order by promotions, total +limit 100""", + "q62" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1194 and 1194 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""", + "q63" -> + """ +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1205,1205+1,1205+2,1205+3,1205+4,1205+5,1205+6,1205+7,1205+8,1205+9,1205+10,1205+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""", + "q64" -> + """ +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('peach','misty','drab','chocolate','almond','saddle') and + i_current_price between 75 and 75 + 10 and + i_current_price between 75 + 1 and 75 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""", + "q65" -> + """ +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1208 and 1208+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1208 and 1208+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""", + "q66" -> + """ +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'HARMSTORF' || ',' || 'USPS' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 24285 and 24285+28800 + and sm_carrier in ('HARMSTORF','USPS') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'HARMSTORF' || ',' || 'USPS' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_list_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_list_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_list_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_list_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_list_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_list_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_list_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_list_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_list_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_list_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_list_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_list_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 24285 AND 24285+28800 + and sm_carrier in ('HARMSTORF','USPS') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""", + "q67" -> + """ +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1196 and 1196+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""", + "q68" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 1 or + household_demographics.hd_vehicle_count= -1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Bethel','Summit') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""", + "q69" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('OK','GA','VA') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2004 and + d_moy between 4 and 4+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2004 and + d_moy between 4 and 4+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2004 and + d_moy between 4 and 4+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""", + "q70" -> + """ +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1197 and 1197+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1197 and 1197+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""", + "q71" -> + """ +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=1999 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=1999 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=1999 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """, + "q72" -> + """ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + interval 5 days + and hd_buy_potential = '>10000' + and d1.d_year = 2002 + and cd_marital_status = 'D' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""", + "q73" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '501-1000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Franklin Parish','Ziebach County','Luce County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""", + "q74" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 3,1,2 +limit 100""", + "q75" -> + """ +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2001 + AND prev_yr.d_year=2001-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""", + "q76" -> + """ +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_cdemo_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_cdemo_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_ship_hdemo_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_hdemo_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_ship_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_customer_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""", + "q77" -> + """ +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-27' as date) + and (cast('2001-08-27' as date) + INTERVAL 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q78" -> + """ +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2002 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""", + "q79" -> + """ +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""", + "q80" -> + """ +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1999-08-12' as date) + and (cast('1999-08-12' as date) + INTERVAL 60 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1999-08-12' as date) + and (cast('1999-08-12' as date) + INTERVAL 60 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1999-08-12' as date) + and (cast('1999-08-12' as date) + INTERVAL 60 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q81" -> + """ +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =2001 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'NC' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""", + "q82" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 82 and 82+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-03-10' as date) and (cast('2002-03-10' as date) + INTERVAL 60 days) + and i_manufact_id in (941,920,105,693) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q83" -> + """ +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1999-04-14','1999-09-28','1999-11-12'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1999-04-14','1999-09-28','1999-11-12'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1999-04-14','1999-09-28','1999-11-12'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""", + "q84" -> + """ +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Antioch' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 55019 + and ib_upper_bound <= 55019 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""", + "q85" -> + """ +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '2 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'W' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('OK', 'TX', 'MO') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('GA', 'KS', 'NC') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('VA', 'WI', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""", + "q86" -> + """ +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1180 and 1180+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""", + "q87" -> + """ +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1204 and 1204+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1204 and 1204+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1204 and 1204+11) +) cool_cust""", + "q88" -> + """ +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s8""", + "q89" -> + """ +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2001) and + ((i_category in ('Women','Music','Home') and + i_class in ('fragrances','pop','bedding') + ) + or (i_category in ('Books','Men','Children') and + i_class in ('home repair','sports-apparel','infants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""", + "q90" -> + """ +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 8 and 8+1 + and household_demographics.hd_dep_count = 4 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 19 and 19+1 + and household_demographics.hd_dep_count = 4 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""", + "q91" -> + """ +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2002 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '5001-10000%' +and ca_gmt_offset = -6 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""", + "q92" -> + """ +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 561 +and i_item_sk = ws_item_sk +and d_date between '2001-03-13' and + (cast('2001-03-13' as date) + INTERVAL 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2001-03-13' and + (cast('2001-03-13' as date) + INTERVAL 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""", + "q93" -> + """ +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 64') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""", + "q94" -> + """ +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-5-01' and + (cast('2001-5-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q95" -> + """ +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2000-3-01' and + (cast('2000-3-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TN' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q96" -> + """ +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 16 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 4 + and store.s_store_name = 'ese' +order by count(*) +limit 100""", + "q97" -> + """ +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1209 and 1209 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1209 and 1209 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""", + "q98" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Home', 'Shoes') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-04-12' as date) + and (cast('2001-04-12' as date) + interval 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""", + "q99" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1203 and 1203 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""", + "q1" -> + """ +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""", + "q2" -> + """ +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""", + "q3" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""", + "q4" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q5" -> + """ +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q6" -> + """ +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""", + "q7" -> + """ +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q8" -> + """ +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""", + "q9" -> + """ +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 98972190 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 160856845 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 12733327 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 96251173 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 80049606 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1""", + "q10" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Fillmore County','McPherson County','Bonneville County','Boone County','Brown County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 3 and 3+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 3 ANd 3+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 3 and 3+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""", + "q11" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q12" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Electronics', 'Books', 'Women') + and ws_sold_date_sk = d_date_sk + and d_date between cast('1998-01-06' as date) + and (cast('1998-01-06' as date) + INTERVAL 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q13" -> + """ +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Secondary' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = 'College' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('TX', 'OK', 'MI') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WA', 'NC', 'OH') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'FL', 'GA') + and ss_net_profit between 50 and 250 + ))""", + "q14a" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 2000 AND 2000 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 2000 AND 2000 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 2000 AND 2000 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""", + "q14b" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 2000 AND 2000 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 2000 AND 2000 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 2000 AND 2000 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 2000 and 2000 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 2000 + 1 + and d_moy = 12 + and d_dom = 15) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 2000 + and d_moy = 12 + and d_dom = 15) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100""", + "q15" -> + """ +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + group by ca_zip + order by ca_zip + limit 100""", + "q16" -> + """ +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '1999-4-01' and + (cast('1999-4-01' as date) + INTERVAL 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'IL' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Richland County','Bronx County','Maverick County','Mesa County', + 'Raleigh County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""", + "q17" -> + """ +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""", + "q18" -> + """ +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'Unknown' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (5,1,4,7,8,9) and + d_year = 2002 and + ca_state in ('AR','TX','NC' + ,'GA','MS','WV','AL') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""", + "q19" -> + """ +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=16 + and d_moy=12 + and d_year=1998 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """, + "q20" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Shoes', 'Electronics', 'Children') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-03-14' as date) + and (cast('2001-03-14' as date) + INTERVAL 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q21" -> + """ +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-03-20' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1999-03-20' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1999-03-20' as date) - INTERVAL 30 days) + and (cast ('1999-03-20' as date) + INTERVAL 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""", + "q22" -> + """ +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1186 and 1186 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""", + "q23a" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 3 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 3 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100""", + "q23b" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 3 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 3 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100""", + "q24a" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'snow' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q24b" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'chiffon' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q25" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q26" -> + """ +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'S' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100""", + "q27" -> + """ +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'F' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2000 and + s_state in ('AL','IN', 'SC', 'NY', 'OH', 'FL') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""", + "q28" -> + """ +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 73 and 73+10 + or ss_coupon_amt between 7826 and 7826+1000 + or ss_wholesale_cost between 70 and 70+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 152 and 152+10 + or ss_coupon_amt between 2196 and 2196+1000 + or ss_wholesale_cost between 56 and 56+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 53 and 53+10 + or ss_coupon_amt between 3430 and 3430+1000 + or ss_wholesale_cost between 13 and 13+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 182 and 182+10 + or ss_coupon_amt between 3262 and 3262+1000 + or ss_wholesale_cost between 20 and 20+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 85 and 85+10 + or ss_coupon_amt between 3310 and 3310+1000 + or ss_wholesale_cost between 37 and 37+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 180 and 180+10 + or ss_coupon_amt between 12592 and 12592+1000 + or ss_wholesale_cost between 22 and 22+20)) B6 +limit 100""", + "q29" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_quantity) as store_sales_quantity + ,stddev_samp(sr_return_quantity) as store_returns_quantity + ,stddev_samp(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1998 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1998 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1998,1998+1,1998+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q30" -> + """ +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'GA' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100""", + "q31" -> + """ +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1999 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1999 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1999 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1999 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1999 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1999 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year""", + "q32" -> + """ +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 66 +and i_item_sk = cs_item_sk +and d_date between '2002-03-29' and + (cast('2002-03-29' as date) + INTERVAL 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '2002-03-29' and + (cast('2002-03-29' as date) + INTERVAL 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100""", + "q33" -> + """ +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Home')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Home')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Home')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""", + "q34" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Salem County','Terrell County','Arthur County','Oglethorpe County', + 'Lunenburg County','Perry County','Halifax County','Sumner County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""", + "q35" -> + """ +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + avg(cd_dep_count), + min(cd_dep_count), + stddev_samp(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + min(cd_dep_employed_count), + stddev_samp(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + min(cd_dep_college_count), + stddev_samp(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""", + "q36" -> + """ +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('IN','AL','MI','MN', + 'TN','LA','FL','NM') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""", + "q37" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 39 and 39 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-01-16' as date) and (cast('2001-01-16' as date) + interval 60 days) + and i_manufact_id in (765,886,889,728) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q38" -> + """ +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1186 and 1186 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1186 and 1186 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1186 and 1186 + 11 +) hot_cust +limit 100""", + "q39a" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=2 + and inv2.d_moy=2+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q39b" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=2 + and inv2.d_moy=2+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q40" -> + """ +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-03-18' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('2000-03-18' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('2000-03-18' as date) - INTERVAL 30 days) + and (cast ('2000-03-18' as date) + INTERVAL 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""", + "q41" -> + """ +select distinct(i_product_name) + from item i1 + where i_manufact_id between 970 and 970+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'frosted' or i_color = 'rose') and + (i_units = 'Lb' or i_units = 'Gross') and + (i_size = 'medium' or i_size = 'large') + ) or + (i_category = 'Women' and + (i_color = 'chocolate' or i_color = 'black') and + (i_units = 'Box' or i_units = 'Dram') and + (i_size = 'economy' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'slate' or i_color = 'magenta') and + (i_units = 'Carton' or i_units = 'Bundle') and + (i_size = 'N/A' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'cornflower' or i_color = 'firebrick') and + (i_units = 'Pound' or i_units = 'Oz') and + (i_size = 'medium' or i_size = 'large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'almond' or i_color = 'steel') and + (i_units = 'Tsp' or i_units = 'Case') and + (i_size = 'medium' or i_size = 'large') + ) or + (i_category = 'Women' and + (i_color = 'purple' or i_color = 'aquamarine') and + (i_units = 'Bunch' or i_units = 'Gram') and + (i_size = 'economy' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'lavender' or i_color = 'papaya') and + (i_units = 'Pallet' or i_units = 'Cup') and + (i_size = 'N/A' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'maroon' or i_color = 'cyan') and + (i_units = 'Each' or i_units = 'N/A') and + (i_size = 'medium' or i_size = 'large') + )))) > 0 + order by i_product_name + limit 100""", + "q42" -> + """ +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """, + "q43" -> + """ +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 2001 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""", + "q44" -> + """ +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 366 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 366 + and ss_cdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 366 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 366 + and ss_cdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""", + "q45" -> + """ +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 1998 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100""", + "q46" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 0 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_city in ('Five Forks','Oakland','Fairview','Winchester','Farmington') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""", + "q47" -> + """ +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 1999 or + ( d_year = 1999-1 and d_moy =12) or + ( d_year = 1999+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.s_store_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 1999 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, sum_sales + limit 100""", + "q48" -> + """ +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = 'Unknown' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'W' + and + cd_education_status = 'College' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'Primary' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MI', 'GA', 'NH') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('TX', 'KY', 'SD') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('NY', 'OH', 'FL') + and ss_net_profit between 50 and 25000 + ) + )""", + "q49" -> + """ +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + limit 100""", + "q50" -> + """ +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 1998 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""", + "q51" -> + """ +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1214 and 1214+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1214 and 1214+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""", + "q52" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=2000 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """, + "q53" -> + """ +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""", + "q54" -> + """ +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Books' + and i_class = 'business' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 2 + and d_year = 2000 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 2000 and d_moy = 2) + and (select distinct d_month_seq+3 + from date_dim where d_year = 2000 and d_moy = 2) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""", + "q55" -> + """ +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=13 + and d_moy=11 + and d_year=1999 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """, + "q56" -> + """ +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('chiffon','smoke','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 5 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('chiffon','smoke','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 5 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('chiffon','smoke','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 5 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""", + "q57" -> + """ +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 1999 or + ( d_year = 1999-1 and d_moy =12) or + ( d_year = 1999+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 1999 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, avg_monthly_sales + limit 100""", + "q58" -> + """ +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-21')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-21')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-21')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""", + "q59" -> + """ +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1205 and 1205 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1205+ 12 and 1205 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""", + "q60" -> + """ +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""", + "q61" -> + """ +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Sports' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -6 + and d_year = 2001 + and d_moy = 12) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Sports' + and s_gmt_offset = -6 + and d_year = 2001 + and d_moy = 12) all_sales +order by promotions, total +limit 100""", + "q62" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1215 and 1215 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""", + "q63" -> + """ +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1211,1211+1,1211+2,1211+3,1211+4,1211+5,1211+6,1211+7,1211+8,1211+9,1211+10,1211+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""", + "q64" -> + """ +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('azure','gainsboro','misty','blush','hot','lemon') and + i_current_price between 80 and 80 + 10 and + i_current_price between 80 + 1 and 80 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 1999 and + cs2.syear = 1999 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""", + "q65" -> + """ +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""", + "q66" -> + """ +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'MSC' || ',' || 'GERMA' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_ship_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 9453 and 9453+28800 + and sm_carrier in ('MSC','GERMA') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'MSC' || ',' || 'GERMA' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_list_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_list_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_list_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_list_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_list_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_list_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_list_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_list_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_list_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_list_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_list_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_list_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 9453 AND 9453+28800 + and sm_carrier in ('MSC','GERMA') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""", + "q67" -> + """ +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1185 and 1185+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""", + "q68" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 4 or + household_demographics.hd_vehicle_count= 0) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Pleasant Hill','Bethel') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""", + "q69" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('MO','MN','AZ') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""", + "q70" -> + """ +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1218 and 1218+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1218 and 1218+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""", + "q71" -> + """ +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2000 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2000 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2000 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """, + "q72" -> + """ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + interval 5 days + and hd_buy_potential = '1001-5000' + and d1.d_year = 2000 + and cd_marital_status = 'D' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""", + "q73" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Lea County','Furnas County','Pennington County','Bronx County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""", + "q74" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,sum(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1998,1998+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,sum(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (1998,1998+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 3,1,2 +limit 100""", + "q75" -> + """ +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2001 + AND prev_yr.d_year=2001-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""", + "q76" -> + """ +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_customer_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_customer_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_ship_addr_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_addr_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_ship_mode_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_mode_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""", + "q77" -> + """ +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-16' as date) + and (cast('2000-08-16' as date) + INTERVAL 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q78" -> + """ +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2001 +order by + ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""", + "q79" -> + """ +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count > 3) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""", + "q80" -> + """ +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2002-08-06' as date) + and (cast('2002-08-06' as date) + INTERVAL 60 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2002-08-06' as date) + and (cast('2002-08-06' as date) + INTERVAL 60 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2002-08-06' as date) + and (cast('2002-08-06' as date) + INTERVAL 60 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q81" -> + """ +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'TX' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""", + "q82" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 49 and 49+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-01-28' as date) and (cast('2001-01-28' as date) + INTERVAL 60 days) + and i_manufact_id in (80,675,292,17) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q83" -> + """ +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-17','2000-08-22','2000-11-17'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""", + "q84" -> + """ +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 37855 + and ib_upper_bound <= 37855 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""", + "q85" -> + """ +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'College' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Secondary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('TX', 'VA', 'CA') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('AR', 'NE', 'MO') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('IA', 'MS', 'WA') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""", + "q86" -> + """ +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1215 and 1215+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""", + "q87" -> + """ +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1221 and 1221+11) +) cool_cust""", + "q88" -> + """ +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s8""", + "q89" -> + """ +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Music','Books') and + i_class in ('glassware','classical','fiction') + ) + or (i_category in ('Jewelry','Sports','Women') and + i_class in ('semi-precious','baseball','dresses') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""", + "q90" -> + """ +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 9 and 9+1 + and household_demographics.hd_dep_count = 3 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 16 and 16+1 + and household_demographics.hd_dep_count = 3 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""", + "q91" -> + """ +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2000 +and d_moy = 12 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""", + "q92" -> + """ +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 356 +and i_item_sk = ws_item_sk +and d_date between '2001-03-12' and + (cast('2001-03-12' as date) + INTERVAL 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2001-03-12' and + (cast('2001-03-12' as date) + INTERVAL 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""", + "q93" -> + """ +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 66') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""", + "q94" -> + """ +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-4-01' and + (cast('1999-4-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'NE' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q95" -> + """ +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2002-4-01' and + (cast('2002-4-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'AL' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q96" -> + """ +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 16 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 6 + and store.s_store_name = 'ese' +order by count(*) +limit 100""", + "q97" -> + """ +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1190 and 1190 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1190 and 1190 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""", + "q98" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Home', 'Sports', 'Men') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2002-01-05' as date) + and (cast('2002-01-05' as date) + interval 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""", + "q99" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1178 and 1178 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""", + "q1" -> + """ +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NY' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""", + "q2" -> + """ +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""", + "q3" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""", + "q4" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q5" -> + """ +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q6" -> + """ +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""", + "q7" -> + """ +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q8" -> + """ +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""", + "q9" -> + """ +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 578972190 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 536856786 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 12733327 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 205136171 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 1192341092 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1""", + "q10" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Baltimore city','Stafford County','Greene County','Ballard County','Franklin County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 1 and 1+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 1 ANd 1+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2000 and + d_moy between 1 and 1+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""", + "q11" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag +limit 100""", + "q12" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Children', 'Shoes', 'Women') + and ws_sold_date_sk = d_date_sk + and d_date between cast('1998-06-19' as date) + and (cast('1998-06-19' as date) + INTERVAL 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q13" -> + """ +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'College' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'S' + and cd_education_status = 'Primary' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('GA', 'IN', 'NY') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('ND', 'WV', 'TX') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KS', 'NC', 'NM') + and ss_net_profit between 50 and 250 + ))""", + "q14a" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1998 AND 1998 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1998 AND 1998 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1998 AND 1998 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""", + "q14b" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1998 AND 1998 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1998 AND 1998 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1998 AND 1998 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1998 + 1 + and d_moy = 12 + and d_dom = 17) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1998 + and d_moy = 12 + and d_dom = 17) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100""", + "q15" -> + """ +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + group by ca_zip + order by ca_zip + limit 100""", + "q16" -> + """ +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-3-01' and + (cast('2001-3-01' as date) + INTERVAL 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'PA' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Luce County','Franklin Parish','Sierra County','Williamson County', + 'Kittitas County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""", + "q17" -> + """ +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2001Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""", + "q18" -> + """ +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'Unknown' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (5,7,8,6,12,4) and + d_year = 2000 and + ca_state in ('MO','NY','ME' + ,'MI','IA','OH','MS') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""", + "q19" -> + """ +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=55 + and d_moy=11 + and d_year=1998 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """, + "q20" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Shoes', 'Electronics', 'Home') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2000-05-15' as date) + and (cast('2000-05-15' as date) + INTERVAL 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q21" -> + """ +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2002-02-15' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('2002-02-15' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('2002-02-15' as date) - INTERVAL 30 days) + and (cast ('2002-02-15' as date) + INTERVAL 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""", + "q22" -> + """ +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1202 and 1202 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""", + "q23a" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 4 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 4 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100""", + "q23b" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 4 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 4 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100""", + "q24a" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid_inc_tax) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=5 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'cyan' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q24b" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid_inc_tax) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 5 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'ivory' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q25" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_net_profit) as store_sales_profit + ,stddev_samp(sr_net_loss) as store_returns_loss + ,stddev_samp(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q26" -> + """ +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'M' and + cd_education_status = 'Unknown' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q27" -> + """ +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'F' and + cd_marital_status = 'D' and + cd_education_status = '2 yr Degree' and + d_year = 1999 and + s_state in ('MI','WV', 'MI', 'NY', 'TN', 'MI') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""", + "q28" -> + """ +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 151 and 151+10 + or ss_coupon_amt between 4349 and 4349+1000 + or ss_wholesale_cost between 75 and 75+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 45 and 45+10 + or ss_coupon_amt between 12490 and 12490+1000 + or ss_wholesale_cost between 37 and 37+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 54 and 54+10 + or ss_coupon_amt between 13038 and 13038+1000 + or ss_wholesale_cost between 17 and 17+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 178 and 178+10 + or ss_coupon_amt between 10744 and 10744+1000 + or ss_wholesale_cost between 51 and 51+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 49 and 49+10 + or ss_coupon_amt between 8494 and 8494+1000 + or ss_wholesale_cost between 56 and 56+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 0 and 0+10 + or ss_coupon_amt between 17854 and 17854+1000 + or ss_wholesale_cost between 31 and 31+20)) B6 +limit 100""", + "q29" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,max(ss_quantity) as store_sales_quantity + ,max(sr_return_quantity) as store_returns_quantity + ,max(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q30" -> + """ +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'MD' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100""", + "q31" -> + """ +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1999 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1999 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1999 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1999 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1999 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1999 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by store_q2_q3_increase""", + "q32" -> + """ +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 7 +and i_item_sk = cs_item_sk +and d_date between '2000-01-21' and + (cast('2000-01-21' as date) + INTERVAL 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '2000-01-21' and + (cast('2000-01-21' as date) + INTERVAL 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100""", + "q33" -> + """ +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""", + "q34" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '501-1000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Levy County','Val Verde County','Porter County','Nowata County', + 'Lincoln County','Brazos County','Franklin Parish','Pipestone County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""", + "q35" -> + """ +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + sum(cd_dep_count), + sum(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + sum(cd_dep_employed_count), + sum(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + sum(cd_dep_college_count), + sum(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""", + "q36" -> + """ +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('MO','AL','OH','WV', + 'AL','MN','TN','WA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""", + "q37" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 57 and 57 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-04-19' as date) and (cast('2001-04-19' as date) + interval 60 days) + and i_manufact_id in (804,916,707,680) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q38" -> + """ +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 +) hot_cust +limit 100""", + "q39a" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=3 + and inv2.d_moy=3+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q39b" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=3 + and inv2.d_moy=3+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q40" -> + """ +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-04-09' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('2000-04-09' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('2000-04-09' as date) - INTERVAL 30 days) + and (cast ('2000-04-09' as date) + INTERVAL 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""", + "q41" -> + """ +select distinct(i_product_name) + from item i1 + where i_manufact_id between 917 and 917+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'antique' or i_color = 'pale') and + (i_units = 'Tbl' or i_units = 'Case') and + (i_size = 'small' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'snow' or i_color = 'lemon') and + (i_units = 'Box' or i_units = 'Ounce') and + (i_size = 'economy' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'green' or i_color = 'blue') and + (i_units = 'Gross' or i_units = 'Ton') and + (i_size = 'large' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'cream' or i_color = 'frosted') and + (i_units = 'Bundle' or i_units = 'Gram') and + (i_size = 'small' or i_size = 'extra large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'orange' or i_color = 'spring') and + (i_units = 'Lb' or i_units = 'Carton') and + (i_size = 'small' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'lawn' or i_color = 'violet') and + (i_units = 'Oz' or i_units = 'Cup') and + (i_size = 'economy' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'navy' or i_color = 'linen') and + (i_units = 'Pound' or i_units = 'Unknown') and + (i_size = 'large' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'almond' or i_color = 'olive') and + (i_units = 'Pallet' or i_units = 'Bunch') and + (i_size = 'small' or i_size = 'extra large') + )))) > 0 + order by i_product_name + limit 100""", + "q42" -> + """ +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """, + "q43" -> + """ +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 2000 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""", + "q44" -> + """ +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 731 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 731 + and ss_promo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 731 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 731 + and ss_promo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""", + "q45" -> + """ +select ca_zip, ca_city, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2000 + group by ca_zip, ca_city + order by ca_zip, ca_city + limit 100""", + "q46" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 1 or + household_demographics.hd_vehicle_count= 2) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_city in ('Buena Vista','Friendship','Monroe','Oak Hill','Randolph') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""", + "q47" -> + """ +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 1999 or + ( d_year = 1999-1 and d_moy =12) or + ( d_year = 1999+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 1999 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, sum_sales + limit 100""", + "q48" -> + """ +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1999 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'S' + and + cd_education_status = 'Primary' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'College' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'U' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('ND', 'NC', 'TX') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('VA', 'IA', 'AR') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MA', 'FL', 'TN') + and ss_net_profit between 50 and 25000 + ) + )""", + "q49" -> + """ +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 11 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + limit 100""", + "q50" -> + """ +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2002 +and d2.d_moy = 10 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""", + "q51" -> + """ +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1213 and 1213+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1213 and 1213+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""", + "q52" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """, + "q53" -> + """ +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1219,1219+1,1219+2,1219+3,1219+4,1219+5,1219+6,1219+7,1219+8,1219+9,1219+10,1219+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""", + "q54" -> + """ +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Men' + and i_class = 'shirts' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 2 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 2) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 2) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""", + "q55" -> + """ +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=96 + and d_moy=11 + and d_year=2000 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """, + "q56" -> + """ +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('antique','white','smoke')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 6 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('antique','white','smoke')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 6 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('antique','white','smoke')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 6 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""", + "q57" -> + """ +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.cc_name + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, sum_sales + limit 100""", + "q58" -> + """ +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2001-01-27')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2001-01-27')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2001-01-27')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""", + "q59" -> + """ +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1177 and 1177 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1177+ 12 and 1177 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""", + "q60" -> + """ +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy = 8 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy = 8 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy = 8 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""", + "q61" -> + """ +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Home' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -6 + and d_year = 1999 + and d_moy = 12) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Home' + and s_gmt_offset = -6 + and d_year = 1999 + and d_moy = 12) all_sales +order by promotions, total +limit 100""", + "q62" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1191 and 1191 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""", + "q63" -> + """ +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1193,1193+1,1193+2,1193+3,1193+4,1193+5,1193+6,1193+7,1193+8,1193+9,1193+10,1193+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""", + "q64" -> + """ +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('orange','aquamarine','olive','linen','smoke','coral') and + i_current_price between 74 and 74 + 10 and + i_current_price between 74 + 1 and 74 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2001 and + cs2.syear = 2001 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""", + "q65" -> + """ +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1195 and 1195+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1195 and 1195+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""", + "q66" -> + """ +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'LATVIAN' || ',' || 'ALLIANCE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_ext_list_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_ext_list_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_ext_list_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_ext_list_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_ext_list_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_ext_list_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_ext_list_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_ext_list_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_ext_list_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_ext_list_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_ext_list_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_ext_list_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 1998 + and t_time between 16224 and 16224+28800 + and sm_carrier in ('LATVIAN','ALLIANCE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'LATVIAN' || ',' || 'ALLIANCE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 1998 + and t_time between 16224 AND 16224+28800 + and sm_carrier in ('LATVIAN','ALLIANCE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""", + "q67" -> + """ +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1203 and 1203+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""", + "q68" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 3 or + household_demographics.hd_vehicle_count= -1) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Jamestown','Pine Hill') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""", + "q69" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CA','MT','SD') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2003 and + d_moy between 2 and 2+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""", + "q70" -> + """ +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1215 and 1215+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1215 and 1215+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""", + "q71" -> + """ +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=11 + and d_year=1998 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=11 + and d_year=1998 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=11 + and d_year=1998 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """, + "q72" -> + """ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + interval 5 days + and hd_buy_potential = '1001-5000' + and d1.d_year = 1998 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""", + "q73" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_county in ('Van Buren County','Terrell County','Belknap County','Kootenai County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""", + "q74" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,3,1 +limit 100""", + "q75" -> + """ +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Music' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Music' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Music') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2001 + AND prev_yr.d_year=2001-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""", + "q76" -> + """ +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_promo_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_promo_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_site_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_site_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_bill_addr_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_bill_addr_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""", + "q77" -> + """ +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-22' as date) + and (cast('2001-08-22' as date) + INTERVAL 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q78" -> + """ +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2001 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""", + "q79" -> + """ +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 6 or household_demographics.hd_vehicle_count > -1) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""", + "q80" -> + """ +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2000-08-25' as date) + and (cast('2000-08-25' as date) + INTERVAL 60 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2000-08-25' as date) + and (cast('2000-08-25' as date) + INTERVAL 60 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2000-08-25' as date) + and (cast('2000-08-25' as date) + INTERVAL 60 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q81" -> + """ +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =2000 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'SC' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""", + "q82" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 6 and 6+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-02-23' as date) and (cast('2001-02-23' as date) + INTERVAL 60 days) + and i_manufact_id in (669,623,578,379) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q83" -> + """ +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-01-15','2001-09-03','2001-11-17'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-01-15','2001-09-03','2001-11-17'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-01-15','2001-09-03','2001-11-17'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""", + "q84" -> + """ +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Walnut Grove' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 53669 + and ib_upper_bound <= 53669 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""", + "q85" -> + """ +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Secondary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'W' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('AZ', 'SD', 'TN') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('TX', 'GA', 'IA') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'VT', 'AL') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""", + "q86" -> + """ +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1195 and 1195+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""", + "q87" -> + """ +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1194 and 1194+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1194 and 1194+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1194 and 1194+11) +) cool_cust""", + "q88" -> + """ +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8""", + "q89" -> + """ +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Shoes','Electronics') and + i_class in ('flatware','mens','televisions') + ) + or (i_category in ('Women','Sports','Music') and + i_class in ('maternity','camping','rock') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""", + "q90" -> + """ +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 8 and 8+1 + and household_demographics.hd_dep_count = 4 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 20 and 20+1 + and household_demographics.hd_dep_count = 4 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""", + "q91" -> + """ +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2001 +and d_moy = 12 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -6 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""", + "q92" -> + """ +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 7 +and i_item_sk = ws_item_sk +and d_date between '2000-01-16' and + (cast('2000-01-16' as date) + INTERVAL 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2000-01-16' and + (cast('2000-01-16' as date) + INTERVAL 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""", + "q93" -> + """ +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 24') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""", + "q94" -> + """ +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-2-01' and + (cast('2001-2-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'VT' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q95" -> + """ +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-3-01' and + (cast('2001-3-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TN' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q96" -> + """ +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 20 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 6 + and store.s_store_name = 'ese' +order by count(*) +limit 100""", + "q97" -> + """ +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1206 and 1206 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1206 and 1206 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""", + "q98" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Sports', 'Books', 'Electronics') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2002-06-29' as date) + and (cast('2002-06-29' as date) + interval 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""", + "q99" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1199 and 1199 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""", + "q1" -> + """ +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'MO' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""", + "q2" -> + """ +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""", + "q3" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""", + "q4" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""", + "q5" -> + """ +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + INTERVAL 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q6" -> + """ +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""", + "q7" -> + """ +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""", + "q8" -> + """ +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""", + "q9" -> + """ +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 4502397049 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4756228269 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 4101835064 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 4583261513 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 4208819283 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1""", + "q10" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Grady County','Marion County','Decatur County','Lyman County','Beaver County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 2 and 2+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 2 ANd 2+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 2 and 2+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""", + "q11" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag +limit 100""", + "q12" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Children', 'Jewelry', 'Music') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-05-11' as date) + and (cast('2001-05-11' as date) + INTERVAL 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q13" -> + """ +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = 'Primary' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'S' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('SC', 'WY', 'TX') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('NY', 'NE', 'GA') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('AL', 'AR', 'MI') + and ss_net_profit between 50 and 250 + ))""", + "q14a" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""", + "q14b" -> + """ +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1999 + 1 + and d_moy = 12 + and d_dom = 5) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1999 + and d_moy = 12 + and d_dom = 5) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100""", + "q15" -> + """ +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 1998 + group by ca_zip + order by ca_zip + limit 100""", + "q16" -> + """ +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2000-3-01' and + (cast('2000-3-01' as date) + INTERVAL 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'IA' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Luce County','Wadena County','Jefferson Davis Parish','Daviess County', + 'Williamson County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""", + "q17" -> + """ +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '1999Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""", + "q18" -> + """ +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'F' and + cd1.cd_education_status = 'Unknown' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (4,8,12,10,11,9) and + d_year = 2001 and + ca_state in ('AR','IA','TX' + ,'KS','LA','NC','SD') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""", + "q19" -> + """ +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=63 + and d_moy=11 + and d_year=2002 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """, + "q20" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Electronics', 'Children', 'Home') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2002-03-19' as date) + and (cast('2002-03-19' as date) + INTERVAL 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""", + "q21" -> + """ +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-04-12' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1999-04-12' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1999-04-12' as date) - INTERVAL 30 days) + and (cast ('1999-04-12' as date) + INTERVAL 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""", + "q22" -> + """ +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1188 and 1188 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""", + "q23a" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1998,1998+1,1998+2,1998+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1998,1998+1,1998+2,1998+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1998 + and d_moy = 7 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1998 + and d_moy = 7 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100""", + "q23b" -> + """ +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1998,1998 + 1,1998 + 2,1998 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1998,1998+1,1998+2,1998+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 1998 + and d_moy = 7 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 1998 + and d_moy = 7 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100""", + "q24a" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'goldenrod' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q24b" -> + """ +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'magenta' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name""", + "q25" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,min(ss_net_profit) as store_sales_profit + ,min(sr_net_loss) as store_returns_loss + ,min(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2002 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2002 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2002 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q26" -> + """ +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'M' and + cd_education_status = '4 yr Degree' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100""", + "q27" -> + """ +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'M' and + cd_education_status = 'Secondary' and + d_year = 1999 and + s_state in ('AL','FL', 'TX', 'NM', 'MI', 'GA') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""", + "q28" -> + """ +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 74 and 74+10 + or ss_coupon_amt between 2949 and 2949+1000 + or ss_wholesale_cost between 49 and 49+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 136 and 136+10 + or ss_coupon_amt between 10027 and 10027+1000 + or ss_wholesale_cost between 53 and 53+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 73 and 73+10 + or ss_coupon_amt between 1451 and 1451+1000 + or ss_wholesale_cost between 78 and 78+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 87 and 87+10 + or ss_coupon_amt between 17007 and 17007+1000 + or ss_wholesale_cost between 55 and 55+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 112 and 112+10 + or ss_coupon_amt between 17243 and 17243+1000 + or ss_wholesale_cost between 2 and 2+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 119 and 119+10 + or ss_coupon_amt between 4954 and 4954+1000 + or ss_wholesale_cost between 22 and 22+20)) B6 +limit 100""", + "q29" -> + """ +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_quantity) as store_sales_quantity + ,stddev_samp(sr_return_quantity) as store_returns_quantity + ,stddev_samp(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (2000,2000+1,2000+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""", + "q30" -> + """ +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2001 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'MI' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100""", + "q31" -> + """ +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by store_q1_q2_increase""", + "q32" -> + """ +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 490 +and i_item_sk = cs_item_sk +and d_date between '1999-01-27' and + (cast('1999-01-27' as date) + INTERVAL 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1999-01-27' and + (cast('1999-01-27' as date) + INTERVAL 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100""", + "q33" -> + """ +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""", + "q34" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Nez Perce County','Murray County','Surry County','Calhoun County', + 'Wilkinson County','Brown County','Wallace County','Carter County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""", + "q35" -> + """ +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + stddev_samp(cd_dep_count), + sum(cd_dep_count), + min(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + stddev_samp(cd_dep_employed_count), + sum(cd_dep_employed_count), + min(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + stddev_samp(cd_dep_college_count), + sum(cd_dep_college_count), + min(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""", + "q36" -> + """ +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('MN','TX','TX','IN', + 'CA','LA','NM','TX') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""", + "q37" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 16 and 16 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-06-05' as date) and (cast('2002-06-05' as date) + interval 60 days) + and i_manufact_id in (841,790,796,739) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q38" -> + """ +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1203 and 1203 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1203 and 1203 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1203 and 1203 + 11 +) hot_cust +limit 100""", + "q39a" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=3 + and inv2.d_moy=3+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q39b" -> + """ +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=3 + and inv2.d_moy=3+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""", + "q40" -> + """ +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-04-27' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1999-04-27' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1999-04-27' as date) - INTERVAL 30 days) + and (cast ('1999-04-27' as date) + INTERVAL 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""", + "q41" -> + """ +select distinct(i_product_name) + from item i1 + where i_manufact_id between 841 and 841+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'bisque' or i_color = 'khaki') and + (i_units = 'Carton' or i_units = 'Box') and + (i_size = 'large' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'antique' or i_color = 'sandy') and + (i_units = 'Pallet' or i_units = 'Cup') and + (i_size = 'petite' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'forest' or i_color = 'brown') and + (i_units = 'Dram' or i_units = 'Ton') and + (i_size = 'economy' or i_size = 'medium') + ) or + (i_category = 'Men' and + (i_color = 'chartreuse' or i_color = 'light') and + (i_units = 'Pound' or i_units = 'Dozen') and + (i_size = 'large' or i_size = 'extra large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'turquoise' or i_color = 'chocolate') and + (i_units = 'Bundle' or i_units = 'Unknown') and + (i_size = 'large' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'maroon' or i_color = 'pale') and + (i_units = 'Each' or i_units = 'Tbl') and + (i_size = 'petite' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'almond' or i_color = 'floral') and + (i_units = 'Gross' or i_units = 'N/A') and + (i_size = 'economy' or i_size = 'medium') + ) or + (i_category = 'Men' and + (i_color = 'drab' or i_color = 'plum') and + (i_units = 'Bunch' or i_units = 'Case') and + (i_size = 'large' or i_size = 'extra large') + )))) > 0 + order by i_product_name + limit 100""", + "q42" -> + """ +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2002 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """, + "q43" -> + """ +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -5 and + d_year = 2002 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""", + "q44" -> + """ +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 709 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 709 + and ss_addr_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 709 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 709 + and ss_addr_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""", + "q45" -> + """ +select ca_zip, ca_state, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2002 + group by ca_zip, ca_state + order by ca_zip, ca_state + limit 100""", + "q46" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 0 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Johnson','Norwood','Cambridge','Klondike','Rock Hill') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""", + "q47" -> + """ +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.s_store_name, v1.s_company_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, psum + limit 100""", + "q48" -> + """ +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2000 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'U' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'S' + and + cd_education_status = 'Primary' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'W' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OH', 'GA') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WV', 'AZ', 'NM') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('NY', 'PA', 'KY') + and ss_net_profit between 50 and 25000 + ) + )""", + "q49" -> + """ +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 11 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 11 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 11 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + limit 100""", + "q50" -> + """ +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""", + "q51" -> + """ +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1177 and 1177+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1177 and 1177+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""", + "q52" -> + """ +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=2001 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """, + "q53" -> + """ +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1188,1188+1,1188+2,1188+3,1188+4,1188+5,1188+6,1188+7,1188+8,1188+9,1188+10,1188+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""", + "q54" -> + """ +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Men' + and i_class = 'pants' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 5 + and d_year = 2002 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 2002 and d_moy = 5) + and (select distinct d_month_seq+3 + from date_dim where d_year = 2002 and d_moy = 5) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""", + "q55" -> + """ +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=67 + and d_moy=11 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """, + "q56" -> + """ +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('blanched','spring','seashell')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('blanched','spring','seashell')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('blanched','spring','seashell')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 6 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -7 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""", + "q57" -> + """ +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, sum_sales + limit 100""", + "q58" -> + """ +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-05-24')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-05-24')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2000-05-24')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""", + "q59" -> + """ +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1197 and 1197 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1197+ 12 and 1197 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""", + "q60" -> + """ +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 10 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""", + "q61" -> + """ +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Jewelry' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 2002 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Jewelry' + and s_gmt_offset = -7 + and d_year = 2002 + and d_moy = 11) all_sales +order by promotions, total +limit 100""", + "q62" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1194 and 1194 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""", + "q63" -> + """ +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1222,1222+1,1222+2,1222+3,1222+4,1222+5,1222+6,1222+7,1222+8,1222+9,1222+10,1222+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""", + "q64" -> + """ +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('ivory','purple','almond','bisque','lawn','azure') and + i_current_price between 60 and 60 + 10 and + i_current_price between 60 + 1 and 60 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2001 and + cs2.syear = 2001 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""", + "q65" -> + """ +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1185 and 1185+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1185 and 1185+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""", + "q66" -> + """ +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'FEDEX' || ',' || 'MSC' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_ext_list_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_ext_list_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_ext_list_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_ext_list_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_ext_list_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_ext_list_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_ext_list_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_ext_list_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_ext_list_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_ext_list_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_ext_list_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_ext_list_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_profit * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_profit * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_profit * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_profit * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_profit * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_profit * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_profit * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_profit * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_profit * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_profit * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_profit * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_profit * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 2662 and 2662+28800 + and sm_carrier in ('FEDEX','MSC') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'FEDEX' || ',' || 'MSC' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_list_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_list_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_list_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_list_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_list_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_list_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_list_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_list_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_list_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_list_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_list_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_list_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_profit * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_profit * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_profit * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_profit * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_profit * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_profit * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_profit * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_profit * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_profit * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_profit * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_profit * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_profit * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 2662 AND 2662+28800 + and sm_carrier in ('FEDEX','MSC') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""", + "q67" -> + """ +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1177 and 1177+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""", + "q68" -> + """ +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 5 or + household_demographics.hd_vehicle_count= 4) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Lodi','Richmond') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""", + "q69" -> + """ +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('IL','FL','SD') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""", + "q70" -> + """ +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1206 and 1206+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1206 and 1206+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""", + "q71" -> + """ +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=11 + and d_year=1999 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=11 + and d_year=1999 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=11 + and d_year=1999 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """, + "q72" -> + """ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + interval 5 days + and hd_buy_potential = '1001-5000' + and d1.d_year = 2000 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""", + "q73" -> + """ +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Humboldt County','Hickman County','Galax city','Abbeville County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""", + "q74" -> + """ +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 3,1,2 +limit 100""", + "q75" -> + """ +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Books' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Books' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Books') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2001 + AND prev_yr.d_year=2001-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""", + "q76" -> + """ +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_promo_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_promo_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_ship_addr_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_addr_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_ship_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_customer_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""", + "q77" -> + """ +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-16' as date) + and (cast('2001-08-16' as date) + INTERVAL 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q78" -> + """ +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_item_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2000 +order by + ss_item_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""", + "q79" -> + """ +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 5 or household_demographics.hd_vehicle_count > -1) + and date_dim.d_dow = 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""", + "q80" -> + """ +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-19' as date) + and (cast('2001-08-19' as date) + INTERVAL 60 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-19' as date) + and (cast('2001-08-19' as date) + INTERVAL 60 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-19' as date) + and (cast('2001-08-19' as date) + INTERVAL 60 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""", + "q81" -> + """ +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1999 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'MO' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""", + "q82" -> + """ +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 68 and 68+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-08' as date) and (cast('2002-05-08' as date) + INTERVAL 60 days) + and i_manufact_id in (562,370,230,182) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""", + "q83" -> + """ +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-02-20','2000-10-08','2000-11-04'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-02-20','2000-10-08','2000-11-04'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-02-20','2000-10-08','2000-11-04'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""", + "q84" -> + """ +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Buena Vista' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 49786 + and ib_upper_bound <= 49786 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""", + "q85" -> + """ +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '2 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('IA', 'ND', 'FL') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('OH', 'MS', 'VA') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MN', 'LA', 'TX') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""", + "q86" -> + """ +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1217 and 1217+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""", + "q87" -> + """ +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1224 and 1224+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1224 and 1224+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1224 and 1224+11) +) cool_cust""", + "q88" -> + """ +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s8""", + "q89" -> + """ +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2001) and + ((i_category in ('Children','Home','Women') and + i_class in ('toddlers','flatware','fragrances') + ) + or (i_category in ('Music','Electronics','Shoes') and + i_class in ('country','dvd/vcr players','mens') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""", + "q90" -> + """ +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 7 and 7+1 + and household_demographics.hd_dep_count = 1 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 20 and 20+1 + and household_demographics.hd_dep_count = 1 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""", + "q91" -> + """ +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1998 +and d_moy = 12 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -6 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""", + "q92" -> + """ +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 172 +and i_item_sk = ws_item_sk +and d_date between '1999-01-12' and + (cast('1999-01-12' as date) + INTERVAL 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1999-01-12' and + (cast('1999-01-12' as date) + INTERVAL 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""", + "q93" -> + """ +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 58') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""", + "q94" -> + """ +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2002-3-01' and + (cast('2002-3-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'GA' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q95" -> + """ +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-3-01' and + (cast('2001-3-01' as date) + INTERVAL 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'NE' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""", + "q96" -> + """ +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 16 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 0 + and store.s_store_name = 'ese' +order by count(*) +limit 100""", + "q97" -> + """ +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1219 and 1219 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1219 and 1219 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""", + "q98" -> + """ +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Books', 'Children', 'Sports') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-03-10' as date) + and (cast('2001-03-10' as date) + interval 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""", + "q99" -> + """ +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1205 and 1205 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""" + ) +} \ No newline at end of file diff --git a/benchmarks/src/main/scala/benchmark/TPCDSDataLoad.scala b/benchmarks/src/main/scala/benchmark/TPCDSDataLoad.scala new file mode 100644 index 00000000000..8415d24f9d2 --- /dev/null +++ b/benchmarks/src/main/scala/benchmark/TPCDSDataLoad.scala @@ -0,0 +1,687 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package benchmark + +case class TPCDSDataLoadConf( + protected val format: Option[String] = None, + scaleInGB: Int = 0, + userDefinedDbName: Option[String] = None, + sourcePath: Option[String] = None, + benchmarkPath: Option[String] = None, + excludeNulls: Boolean = true) extends TPCDSConf + +object TPCDSDataLoadConf { + import scopt.OParser + private val builder = OParser.builder[TPCDSDataLoadConf] + private val argParser = { + import builder._ + OParser.sequence( + programName("TPC-DS Data Load"), + opt[String]("format") + .required() + .action((x, c) => c.copy(format = Some(x))) + .text("file format to use"), + opt[String]("scale-in-gb") + .required() + .valueName("") + .action((x, c) => c.copy(scaleInGB = x.toInt)) + .text("Scale factor of the TPCDS benchmark"), + opt[String]("benchmark-path") + .required() + .valueName("") + .action((x, c) => c.copy(benchmarkPath = Some(x))) + .text("Cloud storage path to be used for creating table and generating reports"), + opt[String]("db-name") + .optional() + .valueName("") + .action((x, c) => c.copy(userDefinedDbName = Some(x))) + .text("Name of the target database to create with TPC-DS tables in necessary format"), + opt[String]("source-path") + .optional() + .valueName("") + .action((x, c) => c.copy(sourcePath = Some(x))) + .text("The location of the TPC-DS raw input data"), + opt[String]("exclude-nulls") + .optional() + .valueName("true/false") + .action((x, c) => c.copy(excludeNulls = x.toBoolean)) + .text("Whether to remove null primary keys when loading data, default = false"), + ) + } + + def parse(args: Array[String]): Option[TPCDSDataLoadConf] = { + OParser.parse(argParser, args, TPCDSDataLoadConf()) + } +} + +class TPCDSDataLoad(conf: TPCDSDataLoadConf) extends Benchmark(conf) { + import TPCDSDataLoad._ + + def runInternal(): Unit = { + val dbName = conf.dbName + val dbLocation = conf.dbLocation(dbName, suffix=benchmarkId.replace("-", "_")) + val dbCatalog = "spark_catalog" + + val partitionTables = true + val primaryKeys = true + + val sourceFormat = "parquet" + require(conf.scaleInGB > 0) + require(Seq(1, 3000).contains(conf.scaleInGB), "") + val sourceLocation = conf.sourcePath.getOrElse { + s"s3://devrel-delta-datasets/tpcds-2.13/tpcds_sf${conf.scaleInGB}_parquet/" + } + + runQuery(s"DROP DATABASE IF EXISTS ${dbName} CASCADE", s"drop-database") + runQuery(s"CREATE DATABASE IF NOT EXISTS ${dbName}", s"create-database") + + // Iterate through all the source tables + tableNamesTpcds.foreach { tableName => + val sourceTableLocation = s"${sourceLocation}/${tableName}/" + val targetLocation = s"${dbLocation}/${tableName}/" + val fullTableName = s"`$dbName`.`$tableName`" + log(s"Generating $tableName at $dbLocation/$tableName") + val partitionedBy = + if (!partitionTables || tablePartitionKeys(tableName)(0).isEmpty) "" + else "PARTITIONED BY " + tablePartitionKeys(tableName).mkString("(", ", ", ")") + + // Excluding nulls automatically when n + val excludeNulls = + if (!partitionTables || tablePartitionKeys(tableName)(0).isEmpty) "" + else "WHERE " + tablePartitionKeys(tableName)(0) + " IS NOT NULL" + + var tableOptions = "" + runQuery(s"DROP TABLE IF EXISTS $fullTableName", s"drop-table-$tableName") + + runQuery(s"""CREATE TABLE $fullTableName + USING ${conf.formatName} + $partitionedBy $tableOptions + LOCATION '$targetLocation' + SELECT * FROM `${sourceFormat}`.`$sourceTableLocation` $excludeNulls + """, s"create-table-$tableName", ignoreError = true) + + val sourceCount = + spark.sql(s"SELECT * FROM `${sourceFormat}`.`$sourceTableLocation` ${excludeNulls}").count() + val targetCount = spark.table(fullTableName).count() + assert(targetCount == sourceCount, + s"Row count mismatch: source table = $sourceCount, target $fullTableName = $targetCount") + } + log(s"====== Created all tables in database ${dbName} at '${dbLocation}' =======") + + runQuery(s"USE ${dbCatalog}.${dbName};") + runQuery("SHOW TABLES", printRows = true) + + } +} +object TPCDSDataLoad { + def main(args: Array[String]): Unit = { + TPCDSDataLoadConf.parse(args).foreach { conf => + new TPCDSDataLoad(conf).run() + } + } + + val tableNamesTpcds = Seq( + // with partitions + "inventory", "catalog_returns", "catalog_sales", "store_returns", "web_returns", "web_sales", "store_sales", + // no partitions + "call_center", "catalog_page", "customer_address", "customer_demographics", "customer", "date_dim", + "household_demographics", "income_band", "item", "promotion", "reason", "ship_mode", "store", "time_dim", + "warehouse", "web_page", "web_site" + ).sorted + + + val tableColumnSchemas = Map( + "dbgen_version" -> """ + dv_version varchar(16) , + dv_create_date date , + dv_create_time time , + dv_cmdline_args varchar(200) +""", + "call_center" -> """ + cc_call_center_sk integer not null, + cc_call_center_id char(16) not null, + cc_rec_start_date date , + cc_rec_end_date date , + cc_closed_date_sk integer , + cc_open_date_sk integer , + cc_name varchar(50) , + cc_class varchar(50) , + cc_employees integer , + cc_sq_ft integer , + cc_hours char(20) , + cc_manager varchar(40) , + cc_mkt_id integer , + cc_mkt_class char(50) , + cc_mkt_desc varchar(100) , + cc_market_manager varchar(40) , + cc_division integer , + cc_division_name varchar(50) , + cc_company integer , + cc_company_name char(50) , + cc_street_number char(10) , + cc_street_name varchar(60) , + cc_street_type char(15) , + cc_suite_number char(10) , + cc_city varchar(60) , + cc_county varchar(30) , + cc_state char(2) , + cc_zip char(10) , + cc_country varchar(20) , + cc_gmt_offset decimal(5,2) , + cc_tax_percentage decimal(5,2) +""", + "catalog_page" -> """ + cp_catalog_page_sk integer not null, + cp_catalog_page_id char(16) not null, + cp_start_date_sk integer , + cp_end_date_sk integer , + cp_department varchar(50) , + cp_catalog_number integer , + cp_catalog_page_number integer , + cp_description varchar(100) , + cp_type varchar(100) +""", + "catalog_returns" -> """ + cr_returned_date_sk integer , + cr_returned_time_sk integer , + cr_item_sk integer not null, + cr_refunded_customer_sk integer , + cr_refunded_cdemo_sk integer , + cr_refunded_hdemo_sk integer , + cr_refunded_addr_sk integer , + cr_returning_customer_sk integer , + cr_returning_cdemo_sk integer , + cr_returning_hdemo_sk integer , + cr_returning_addr_sk integer , + cr_call_center_sk integer , + cr_catalog_page_sk integer , + cr_ship_mode_sk integer , + cr_warehouse_sk integer , + cr_reason_sk integer , + cr_order_number bigint not null, + cr_return_quantity integer , + cr_return_amount decimal(7,2) , + cr_return_tax decimal(7,2) , + cr_return_amt_inc_tax decimal(7,2) , + cr_fee decimal(7,2) , + cr_return_ship_cost decimal(7,2) , + cr_refunded_cash decimal(7,2) , + cr_reversed_charge decimal(7,2) , + cr_store_credit decimal(7,2) , + cr_net_loss decimal(7,2) +""", + "catalog_sales" -> """ + cs_sold_date_sk integer , + cs_sold_time_sk integer , + cs_ship_date_sk integer , + cs_bill_customer_sk integer , + cs_bill_cdemo_sk integer , + cs_bill_hdemo_sk integer , + cs_bill_addr_sk integer , + cs_ship_customer_sk integer , + cs_ship_cdemo_sk integer , + cs_ship_hdemo_sk integer , + cs_ship_addr_sk integer , + cs_call_center_sk integer , + cs_catalog_page_sk integer , + cs_ship_mode_sk integer , + cs_warehouse_sk integer , + cs_item_sk integer not null, + cs_promo_sk integer , + cs_order_number bigint not null, + cs_quantity integer , + cs_wholesale_cost decimal(7,2) , + cs_list_price decimal(7,2) , + cs_sales_price decimal(7,2) , + cs_ext_discount_amt decimal(7,2) , + cs_ext_sales_price decimal(7,2) , + cs_ext_wholesale_cost decimal(7,2) , + cs_ext_list_price decimal(7,2) , + cs_ext_tax decimal(7,2) , + cs_coupon_amt decimal(7,2) , + cs_ext_ship_cost decimal(7,2) , + cs_net_paid decimal(7,2) , + cs_net_paid_inc_tax decimal(7,2) , + cs_net_paid_inc_ship decimal(7,2) , + cs_net_paid_inc_ship_tax decimal(7,2) , + cs_net_profit decimal(7,2) +""", + "customer" -> """ + c_customer_sk integer not null, + c_customer_id char(16) not null, + c_current_cdemo_sk integer , + c_current_hdemo_sk integer , + c_current_addr_sk integer , + c_first_shipto_date_sk integer , + c_first_sales_date_sk integer , + c_salutation char(10) , + c_first_name char(20) , + c_last_name char(30) , + c_preferred_cust_flag char(1) , + c_birth_day integer , + c_birth_month integer , + c_birth_year integer , + c_birth_country varchar(20) , + c_login char(13) , + c_email_address char(50) , + c_last_review_date_sk integer +""", + "customer_address" -> """ + ca_address_sk integer not null, + ca_address_id char(16) not null, + ca_street_number char(10) , + ca_street_name varchar(60) , + ca_street_type char(15) , + ca_suite_number char(10) , + ca_city varchar(60) , + ca_county varchar(30) , + ca_state char(2) , + ca_zip char(10) , + ca_country varchar(20) , + ca_gmt_offset decimal(5,2) , + ca_location_type char(20) +""", + "customer_demographics" -> """ + cd_demo_sk integer not null, + cd_gender char(1) , + cd_marital_status char(1) , + cd_education_status char(20) , + cd_purchase_estimate integer , + cd_credit_rating char(10) , + cd_dep_count integer , + cd_dep_employed_count integer , + cd_dep_college_count integer +""", + "date_dim" -> """ + d_date_sk integer not null, + d_date_id char(16) not null, + d_date date , + d_month_seq integer , + d_week_seq integer , + d_quarter_seq integer , + d_year integer , + d_dow integer , + d_moy integer , + d_dom integer , + d_qoy integer , + d_fy_year integer , + d_fy_quarter_seq integer , + d_fy_week_seq integer , + d_day_name char(9) , + d_quarter_name char(6) , + d_holiday char(1) , + d_weekend char(1) , + d_following_holiday char(1) , + d_first_dom integer , + d_last_dom integer , + d_same_day_ly integer , + d_same_day_lq integer , + d_current_day char(1) , + d_current_week char(1) , + d_current_month char(1) , + d_current_quarter char(1) , + d_current_year char(1) +""", + "household_demographics" -> """ + hd_demo_sk integer not null, + hd_income_band_sk integer , + hd_buy_potential char(15) , + hd_dep_count integer , + hd_vehicle_count integer +""", + + "income_band" -> """ + ib_income_band_sk integer not null, + ib_lower_bound integer , + ib_upper_bound integer +""", + "inventory" -> """ + inv_date_sk integer not null, + inv_item_sk integer not null, + inv_warehouse_sk integer not null, + inv_quantity_on_hand integer +""", + "item" -> """ + i_item_sk integer not null, + i_item_id char(16) not null, + i_rec_start_date date , + i_rec_end_date date , + i_item_desc varchar(200) , + i_current_price decimal(7,2) , + i_wholesale_cost decimal(7,2) , + i_brand_id integer , + i_brand char(50) , + i_class_id integer , + i_class char(50) , + i_category_id integer , + i_category char(50) , + i_manufact_id integer , + i_manufact char(50) , + i_size char(20) , + i_formulation char(20) , + i_color char(20) , + i_units char(10) , + i_container char(10) , + i_manager_id integer , + i_product_name char(50) +""", + "promotion" -> """ + p_promo_sk integer not null, + p_promo_id char(16) not null, + p_start_date_sk integer , + p_end_date_sk integer , + p_item_sk integer , + p_cost decimal(15,2) , + p_response_target integer , + p_promo_name char(50) , + p_channel_dmail char(1) , + p_channel_email char(1) , + p_channel_catalog char(1) , + p_channel_tv char(1) , + p_channel_radio char(1) , + p_channel_press char(1) , + p_channel_event char(1) , + p_channel_demo char(1) , + p_channel_details varchar(100) , + p_purpose char(15) , + p_discount_active char(1) +""", + "reason" -> """ + r_reason_sk integer not null, + r_reason_id char(16) not null, + r_reason_desc char(100) +""", + "ship_mode" -> """ + sm_ship_mode_sk integer not null, + sm_ship_mode_id char(16) not null, + sm_type char(30) , + sm_code char(10) , + sm_carrier char(20) , + sm_contract char(20) +""", + "store" -> """ + s_store_sk integer not null, + s_store_id char(16) not null, + s_rec_start_date date , + s_rec_end_date date , + s_closed_date_sk integer , + s_store_name varchar(50) , + s_number_employees integer , + s_floor_space integer , + s_hours char(20) , + s_manager varchar(40) , + s_market_id integer , + s_geography_class varchar(100) , + s_market_desc varchar(100) , + s_market_manager varchar(40) , + s_division_id integer , + s_division_name varchar(50) , + s_company_id integer , + s_company_name varchar(50) , + s_street_number varchar(10) , + s_street_name varchar(60) , + s_street_type char(15) , + s_suite_number char(10) , + s_city varchar(60) , + s_county varchar(30) , + s_state char(2) , + s_zip char(10) , + s_country varchar(20) , + s_gmt_offset decimal(5,2) , + s_tax_precentage decimal(5,2) +""", + "store_returns" -> """ + sr_returned_date_sk integer , + sr_return_time_sk integer , + sr_item_sk integer not null, + sr_customer_sk integer , + sr_cdemo_sk integer , + sr_hdemo_sk integer , + sr_addr_sk integer , + sr_store_sk integer , + sr_reason_sk integer , + sr_ticket_number bigint not null, + sr_return_quantity integer , + sr_return_amt decimal(7,2) , + sr_return_tax decimal(7,2) , + sr_return_amt_inc_tax decimal(7,2) , + sr_fee decimal(7,2) , + sr_return_ship_cost decimal(7,2) , + sr_refunded_cash decimal(7,2) , + sr_reversed_charge decimal(7,2) , + sr_store_credit decimal(7,2) , + sr_net_loss decimal(7,2) +""", + + "store_sales" -> """ + ss_sold_date_sk integer , + ss_sold_time_sk integer , + ss_item_sk integer not null, + ss_customer_sk integer , + ss_cdemo_sk integer , + ss_hdemo_sk integer , + ss_addr_sk integer , + ss_store_sk integer , + ss_promo_sk integer , + ss_ticket_number bigint not null, + ss_quantity integer , + ss_wholesale_cost decimal(7,2) , + ss_list_price decimal(7,2) , + ss_sales_price decimal(7,2) , + ss_ext_discount_amt decimal(7,2) , + ss_ext_sales_price decimal(7,2) , + ss_ext_wholesale_cost decimal(7,2) , + ss_ext_list_price decimal(7,2) , + ss_ext_tax decimal(7,2) , + ss_coupon_amt decimal(7,2) , + ss_net_paid decimal(7,2) , + ss_net_paid_inc_tax decimal(7,2) , + ss_net_profit decimal(7,2) +""", + "time_dim" -> """ + t_time_sk integer not null, + t_time_id char(16) not null, + t_time integer , + t_hour integer , + t_minute integer , + t_second integer , + t_am_pm char(2) , + t_shift char(20) , + t_sub_shift char(20) , + t_meal_time char(20) +""", + "warehouse" -> """ + w_warehouse_sk integer not null, + w_warehouse_id char(16) not null, + w_warehouse_name varchar(20) , + w_warehouse_sq_ft integer , + w_street_number char(10) , + w_street_name varchar(60) , + w_street_type char(15) , + w_suite_number char(10) , + w_city varchar(60) , + w_county varchar(30) , + w_state char(2) , + w_zip char(10) , + w_country varchar(20) , + w_gmt_offset decimal(5,2) +""", + "web_page" -> """ + wp_web_page_sk integer not null, + wp_web_page_id char(16) not null, + wp_rec_start_date date , + wp_rec_end_date date , + wp_creation_date_sk integer , + wp_access_date_sk integer , + wp_autogen_flag char(1) , + wp_customer_sk integer , + wp_url varchar(100) , + wp_type char(50) , + wp_char_count integer , + wp_link_count integer , + wp_image_count integer , + wp_max_ad_count integer +""", + "web_returns" -> """ + wr_returned_date_sk integer , + wr_returned_time_sk integer , + wr_item_sk integer not null, + wr_refunded_customer_sk integer , + wr_refunded_cdemo_sk integer , + wr_refunded_hdemo_sk integer , + wr_refunded_addr_sk integer , + wr_returning_customer_sk integer , + wr_returning_cdemo_sk integer , + wr_returning_hdemo_sk integer , + wr_returning_addr_sk integer , + wr_web_page_sk integer , + wr_reason_sk integer , + wr_order_number bigint not null, + wr_return_quantity integer , + wr_return_amt decimal(7,2) , + wr_return_tax decimal(7,2) , + wr_return_amt_inc_tax decimal(7,2) , + wr_fee decimal(7,2) , + wr_return_ship_cost decimal(7,2) , + wr_refunded_cash decimal(7,2) , + wr_reversed_charge decimal(7,2) , + wr_account_credit decimal(7,2) , + wr_net_loss decimal(7,2) +""", + "web_sales" -> """ + ws_sold_date_sk integer , + ws_sold_time_sk integer , + ws_ship_date_sk integer , + ws_item_sk integer not null, + ws_bill_customer_sk integer , + ws_bill_cdemo_sk integer , + ws_bill_hdemo_sk integer , + ws_bill_addr_sk integer , + ws_ship_customer_sk integer , + ws_ship_cdemo_sk integer , + ws_ship_hdemo_sk integer , + ws_ship_addr_sk integer , + ws_web_page_sk integer , + ws_web_site_sk integer , + ws_ship_mode_sk integer , + ws_warehouse_sk integer , + ws_promo_sk integer , + ws_order_number bigint not null, + ws_quantity integer , + ws_wholesale_cost decimal(7,2) , + ws_list_price decimal(7,2) , + ws_sales_price decimal(7,2) , + ws_ext_discount_amt decimal(7,2) , + ws_ext_sales_price decimal(7,2) , + ws_ext_wholesale_cost decimal(7,2) , + ws_ext_list_price decimal(7,2) , + ws_ext_tax decimal(7,2) , + ws_coupon_amt decimal(7,2) , + ws_ext_ship_cost decimal(7,2) , + ws_net_paid decimal(7,2) , + ws_net_paid_inc_tax decimal(7,2) , + ws_net_paid_inc_ship decimal(7,2) , + ws_net_paid_inc_ship_tax decimal(7,2) , + ws_net_profit decimal(7,2) +""", + "web_site" -> """ + web_site_sk integer not null, + web_site_id char(16) not null, + web_rec_start_date date , + web_rec_end_date date , + web_name varchar(50) , + web_open_date_sk integer , + web_close_date_sk integer , + web_class varchar(50) , + web_manager varchar(40) , + web_mkt_id integer , + web_mkt_class varchar(50) , + web_mkt_desc varchar(100) , + web_market_manager varchar(40) , + web_company_id integer , + web_company_name char(50) , + web_street_number char(10) , + web_street_name varchar(60) , + web_street_type char(15) , + web_suite_number char(10) , + web_city varchar(60) , + web_county varchar(30) , + web_state char(2) , + web_zip char(10) , + web_country varchar(20) , + web_gmt_offset decimal(5,2) , + web_tax_percentage decimal(5,2) +""" + ) + + val tablePrimaryKeys = Map( + "dbgen_version" -> Seq(""), + "call_center" -> Seq("cc_call_center_sk"), + "catalog_page" -> Seq("cp_catalog_page_sk"), + "catalog_returns" -> Seq("cr_item_sk", "cr_order_number"), + "catalog_sales" -> Seq("cs_item_sk", "cs_order_number"), + "customer" -> Seq("c_customer_sk"), + "customer_address" -> Seq("ca_address_sk"), + "customer_demographics" -> Seq("cd_demo_sk"), + "date_dim" -> Seq("d_date_sk"), + "household_demographics" -> Seq("hd_demo_sk"), + "income_band" -> Seq("ib_income_band_sk"), + "inventory" -> Seq("inv_date_sk", "inv_item_sk", "inv_warehouse_sk"), + "item" -> Seq("i_item_sk"), + "promotion" -> Seq("p_promo_sk"), + "reason" -> Seq("r_reason_sk"), + "ship_mode" -> Seq("sm_ship_mode_sk"), + "store" -> Seq("s_store_sk"), + "store_returns" -> Seq("sr_item_sk", "sr_ticket_number"), + "store_sales" -> Seq("ss_item_sk", "ss_ticket_number"), + "time_dim" -> Seq("t_time_sk"), + "warehouse" -> Seq("w_warehouse_sk"), + "web_page" -> Seq("wp_web_page_sk"), + "web_returns" -> Seq("wr_item_sk", "wr_order_number"), + "web_sales" -> Seq("ws_item_sk", "ws_order_number"), + "web_site" -> Seq("web_site_sk") + ) + + + val tablePartitionKeys = Map( + "dbgen_version" -> Seq(""), + "call_center" -> Seq(""), + "catalog_page" -> Seq(""), + "catalog_returns" -> Seq("cr_returned_date_sk"), + "catalog_sales" -> Seq("cs_sold_date_sk"), + "customer" -> Seq(""), + "customer_address" -> Seq(""), + "customer_demographics" -> Seq(""), + "date_dim" -> Seq(""), + "household_demographics" -> Seq(""), + "income_band" -> Seq(""), + "inventory" -> Seq("inv_date_sk"), + "item" -> Seq(""), + "promotion" -> Seq(""), + "reason" -> Seq(""), + "ship_mode" -> Seq(""), + "store" -> Seq(""), + "store_returns" -> Seq("sr_returned_date_sk"), + "store_sales" -> Seq("ss_sold_date_sk"), + "time_dim" -> Seq(""), + "warehouse" -> Seq(""), + "web_page" -> Seq(""), + "web_returns" -> Seq("wr_returned_date_sk"), + "web_sales" -> Seq("ws_sold_date_sk"), + "web_site" -> Seq("") + ) +} diff --git a/benchmarks/src/main/scala/benchmark/TestBenchmark.scala b/benchmarks/src/main/scala/benchmark/TestBenchmark.scala new file mode 100644 index 00000000000..d5fc47177d5 --- /dev/null +++ b/benchmarks/src/main/scala/benchmark/TestBenchmark.scala @@ -0,0 +1,81 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package benchmark + +case class TestBenchmarkConf( + dbName: Option[String] = None, + benchmarkPath: Option[String] = None) extends BenchmarkConf + +object TestBenchmarkConf { + import scopt.OParser + private val builder = OParser.builder[TestBenchmarkConf] + private val argParser = { + import builder._ + OParser.sequence( + programName("Test Benchmark"), + opt[String]("test-param") + .required() + .action((x, c) => c) // ignore + .text("Name of the target database to create with TPC-DS tables in necessary format"), + opt[String]("benchmark-path") + .optional() + .action((x, c) => c.copy(benchmarkPath = Some(x))) + .text("Cloud path to be used for creating table and generating reports"), + opt[String]("db-name") + .optional() + .action((x, c) => c.copy(dbName = Some(x))) + .text("Name of the test database to create") + ) + } + + def parse(args: Array[String]): Option[TestBenchmarkConf] = { + OParser.parse(argParser, args, TestBenchmarkConf()) + } +} + +class TestBenchmark(conf: TestBenchmarkConf) extends Benchmark(conf) { + def runInternal(): Unit = { + // Test Spark SQL + runQuery("SELECT 1 AS X", "sql-test") + if (conf.benchmarkPath.isEmpty) { + log("Skipping the delta read / write test as benchmark path has not been provided") + return + } + + val dbName = conf.dbName.getOrElse(benchmarkId.replaceAll("-", "_")) + val dbLocation = conf.dbLocation(dbName) + + // Run database management tests + runQuery("SHOW DATABASES", "db-list-test") + runQuery(s"""CREATE DATABASE IF NOT EXISTS $dbName LOCATION "$dbLocation" """, "db-create-test") + runQuery(s"USE $dbName", "db-use-test") + + // Run table tests + val tableName = "test" + runQuery(s"DROP TABLE IF EXISTS $tableName", "table-drop-test") + runQuery(s"CREATE TABLE $tableName USING delta SELECT 1 AS x", "table-create-test") + runQuery(s"SELECT * FROM $tableName", "table-query-test") + } +} + +object TestBenchmark { + def main(args: Array[String]): Unit = { + println("All command line args = " + args.toSeq) + TestBenchmarkConf.parse(args).foreach { conf => + new TestBenchmark(conf).run() + } + } +} diff --git a/benchmarks/src/main/scala/org/apache/spark/SparkUtils.scala b/benchmarks/src/main/scala/org/apache/spark/SparkUtils.scala new file mode 100644 index 00000000000..db36c17fa95 --- /dev/null +++ b/benchmarks/src/main/scala/org/apache/spark/SparkUtils.scala @@ -0,0 +1,57 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import benchmark.SparkEnvironmentInfo + +import org.apache.spark.util.Utils + +object SparkUtils { + def getEnvironmentInfo(sc: SparkContext): SparkEnvironmentInfo = { + val info = sc.statusStore.environmentInfo() + val sparkBuildInfo = Map( + "sparkBuildBranch" -> SPARK_BRANCH, + "sparkBuildVersion" -> SPARK_VERSION, + "sparkBuildDate" -> SPARK_BUILD_DATE, + "sparkBuildUser" -> SPARK_BUILD_USER, + "sparkBuildRevision" -> SPARK_REVISION + ) + + SparkEnvironmentInfo( + sparkBuildInfo = sparkBuildInfo, + runtimeInfo = caseClassToMap(info.runtime), + sparkProps = Utils.redact(sc.conf, info.sparkProperties).toMap, + hadoopProps = Utils.redact(sc.conf, info.hadoopProperties).toMap + .filterKeys(k => !k.startsWith("mapred") && !k.startsWith("yarn")), + systemProps = Utils.redact(sc.conf, info.systemProperties).toMap, + classpathEntries = info.classpathEntries.toMap + ) + } + + def caseClassToMap(obj: Object): Map[String, String] = { + obj.getClass.getDeclaredFields.flatMap { f => + f.setAccessible(true) + val valueOption = f.get(obj) match { + case o: Option[_] => o.map(_.toString) + case s => Some(s.toString) + } + valueOption.map(value => f.getName -> value) + }.toMap + } + + def median(sizes: Array[Long], alreadySorted: Boolean): Long = Utils.median(sizes, alreadySorted) +} diff --git a/build.sbt b/build.sbt new file mode 100644 index 00000000000..252f717fdcf --- /dev/null +++ b/build.sbt @@ -0,0 +1,1314 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off line.size.limit + +import java.nio.file.Files +import Mima._ +import Unidoc._ + +// Scala versions +val scala212 = "2.12.17" +val scala213 = "2.13.8" +val all_scala_versions = Seq(scala212, scala213) + +// Due to how publishArtifact is determined for javaOnlyReleaseSettings, incl. storage +// It was necessary to change default_scala_version to scala213 in build.sbt +// to build the project with Scala 2.13 only +// As a setting, it's possible to set it on command line easily +// sbt 'set default_scala_version := 2.13.8' [commands] +// FIXME Why not use scalaVersion? +val default_scala_version = settingKey[String]("Default Scala version") +Global / default_scala_version := scala212 + +// Dependent library versions +val sparkVersion = "3.5.0" +val flinkVersion = "1.16.1" +val hadoopVersion = "3.3.4" +val scalaTestVersion = "3.2.15" +val scalaTestVersionForConnectors = "3.0.8" +val parquet4sVersion = "1.9.4" + +// Versions for Hive 3 +val hadoopVersionForHive3 = "3.1.0" +val hiveVersion = "3.1.2" +val tezVersion = "0.9.2" + +// Versions for Hive 2 +val hadoopVersionForHive2 = "2.7.2" +val hive2Version = "2.3.3" +val tezVersionForHive2 = "0.8.4" + +scalaVersion := default_scala_version.value + +// crossScalaVersions must be set to Nil on the root project +crossScalaVersions := Nil + +// For Java 11 use the following on command line +// sbt 'set targetJvm := "11"' [commands] +val targetJvm = settingKey[String]("Target JVM version") +Global / targetJvm := "1.8" + +lazy val commonSettings = Seq( + organization := "io.delta", + scalaVersion := default_scala_version.value, + crossScalaVersions := all_scala_versions, + fork := true, + scalacOptions ++= Seq(s"-target:jvm-${targetJvm.value}", "-Ywarn-unused:imports"), + javacOptions ++= Seq("-source", targetJvm.value), + // -target cannot be passed as a parameter to javadoc. See https://github.com/sbt/sbt/issues/355 + Compile / compile / javacOptions ++= Seq("-target", targetJvm.value), + + // Make sure any tests in any project that uses Spark is configured for running well locally + Test / javaOptions ++= Seq( + "-Dspark.ui.enabled=false", + "-Dspark.ui.showConsoleProgress=false", + "-Dspark.databricks.delta.snapshotPartitions=2", + "-Dspark.sql.shuffle.partitions=5", + "-Ddelta.log.cacheSize=3", + "-Dspark.sql.sources.parallelPartitionDiscovery.parallelism=5", + "-Xmx1024m" + ), + + testOptions += Tests.Argument("-oF"), + + // Unidoc settings: by default dont document any source file + unidocSourceFilePatterns := Nil, +) + +lazy val spark = (project in file("spark")) + .dependsOn(storage) + .enablePlugins(Antlr4Plugin) + .settings ( + name := "delta-spark", + commonSettings, + scalaStyleSettings, + sparkMimaSettings, + releaseSettings, + libraryDependencies ++= Seq( + // Adding test classifier seems to break transitive resolution of the core dependencies + "org.apache.spark" %% "spark-hive" % sparkVersion % "provided", + "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", + "org.apache.spark" %% "spark-core" % sparkVersion % "provided", + "org.apache.spark" %% "spark-catalyst" % sparkVersion % "provided", + + // Test deps + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "org.scalatestplus" %% "scalacheck-1-15" % "3.2.9.0" % "test", + "junit" % "junit" % "4.12" % "test", + "com.novocode" % "junit-interface" % "0.11" % "test", + "org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-hive" % sparkVersion % "test" classifier "tests", + ), + // For adding staged Spark RC versions, Ex: + // resolvers += "Apche Spark 3.5.0 (RC1) Staging" at "https://repository.apache.org/content/repositories/orgapachespark-1444/", + Compile / packageBin / mappings := (Compile / packageBin / mappings).value ++ + listPythonFiles(baseDirectory.value.getParentFile / "python"), + + Antlr4 / antlr4Version:= "4.9.3", + Antlr4 / antlr4PackageName := Some("io.delta.sql.parser"), + Antlr4 / antlr4GenListener := true, + Antlr4 / antlr4GenVisitor := true, + + Test / testOptions += Tests.Argument("-oDF"), + Test / testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"), + + // Don't execute in parallel since we can't have multiple Sparks in the same JVM + Test / parallelExecution := false, + + scalacOptions ++= Seq( + "-P:genjavadoc:strictVisibility=true" // hide package private types and methods in javadoc + ), + + javaOptions += "-Xmx1024m", + + // Configurations to speed up tests and reduce memory footprint + Test / javaOptions ++= Seq( + "-Dspark.ui.enabled=false", + "-Dspark.ui.showConsoleProgress=false", + "-Dspark.databricks.delta.snapshotPartitions=2", + "-Dspark.sql.shuffle.partitions=5", + "-Ddelta.log.cacheSize=3", + "-Dspark.sql.sources.parallelPartitionDiscovery.parallelism=5", + "-Xmx1024m" + ), + + // Required for testing table features see https://github.com/delta-io/delta/issues/1602 + Test / envVars += ("DELTA_TESTING", "1"), + + // Hack to avoid errors related to missing repo-root/target/scala-2.12/classes/ + createTargetClassesDir := { + val dir = baseDirectory.value.getParentFile / "target" / "scala-2.12" / "classes" + Files.createDirectories(dir.toPath) + }, + Compile / compile := ((Compile / compile) dependsOn createTargetClassesDir).value, + // Generate the package object to provide the version information in runtime. + Compile / sourceGenerators += Def.task { + val file = (Compile / sourceManaged).value / "io" / "delta" / "package.scala" + IO.write(file, + s"""package io + | + |package object delta { + | val VERSION = "${version.value}" + |} + |""".stripMargin) + Seq(file) + }, + TestParallelization.settings, + + // Unidoc settings + unidocSourceFilePatterns := Seq(SourceFilePattern("io/delta/tables/", "io/delta/exceptions/")), + ) + .configureUnidoc(generateScalaDoc = true) + +lazy val contribs = (project in file("contribs")) + .dependsOn(spark % "compile->compile;test->test;provided->provided") + .settings ( + name := "delta-contribs", + commonSettings, + scalaStyleSettings, + releaseSettings, + Compile / packageBin / mappings := (Compile / packageBin / mappings).value ++ + listPythonFiles(baseDirectory.value.getParentFile / "python"), + + Test / testOptions += Tests.Argument("-oDF"), + Test / testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"), + + // Don't execute in parallel since we can't have multiple Sparks in the same JVM + Test / parallelExecution := false, + + javaOptions += "-Xmx1024m", + + // Configurations to speed up tests and reduce memory footprint + Test / javaOptions ++= Seq( + "-Dspark.ui.enabled=false", + "-Dspark.ui.showConsoleProgress=false", + "-Dspark.databricks.delta.snapshotPartitions=2", + "-Dspark.sql.shuffle.partitions=5", + "-Ddelta.log.cacheSize=3", + "-Dspark.sql.sources.parallelPartitionDiscovery.parallelism=5", + "-Xmx1024m" + ), + + // Hack to avoid errors related to missing repo-root/target/scala-2.12/classes/ + createTargetClassesDir := { + val dir = baseDirectory.value.getParentFile / "target" / "scala-2.12" / "classes" + Files.createDirectories(dir.toPath) + }, + Compile / compile := ((Compile / compile) dependsOn createTargetClassesDir).value + ).configureUnidoc() + +lazy val sharing = (project in file("sharing")) + .dependsOn(spark % "compile->compile;test->test;provided->provided") + .settings( + name := "delta-sharing-spark", + commonSettings, + scalaStyleSettings, + releaseSettings, + Test / javaOptions ++= Seq("-ea"), + libraryDependencies ++= Seq( + "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", + + "io.delta" %% "delta-sharing-client" % "1.0.4", + + // Test deps + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "org.scalatestplus" %% "scalacheck-1-15" % "3.2.9.0" % "test", + "junit" % "junit" % "4.12" % "test", + "com.novocode" % "junit-interface" % "0.11" % "test", + "org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-hive" % sparkVersion % "test" classifier "tests", + ) + ).configureUnidoc() + +lazy val kernelApi = (project in file("kernel/kernel-api")) + .settings( + name := "delta-kernel-api", + commonSettings, + scalaStyleSettings, + javaOnlyReleaseSettings, + Test / javaOptions ++= Seq("-ea"), + libraryDependencies ++= Seq( + "org.roaringbitmap" % "RoaringBitmap" % "0.9.25", + "org.slf4j" % "slf4j-api" % "1.7.36", + + "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5" % "test", + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "junit" % "junit" % "4.13" % "test", + "com.novocode" % "junit-interface" % "0.11" % "test", + "org.slf4j" % "slf4j-log4j12" % "1.7.36" % "test" + ), + javaCheckstyleSettings("kernel/dev/checkstyle.xml"), + // Unidoc settings + unidocSourceFilePatterns := Seq(SourceFilePattern("io/delta/kernel/")), + ).configureUnidoc(docTitle = "Delta Kernel") + +lazy val kernelDefaults = (project in file("kernel/kernel-defaults")) + .dependsOn(kernelApi) + .dependsOn(spark % "test->test") + .dependsOn(goldenTables % "test") + .settings( + name := "delta-kernel-defaults", + commonSettings, + scalaStyleSettings, + javaOnlyReleaseSettings, + Test / javaOptions ++= Seq("-ea"), + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client-runtime" % hadoopVersion, + "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5", + "org.apache.parquet" % "parquet-hadoop" % "1.12.3", + + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "junit" % "junit" % "4.13" % "test", + "commons-io" % "commons-io" % "2.8.0" % "test", + "com.novocode" % "junit-interface" % "0.11" % "test", + "org.slf4j" % "slf4j-log4j12" % "1.7.36" % "test", + + "org.apache.spark" %% "spark-hive" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests", + ), + javaCheckstyleSettings("kernel/dev/checkstyle.xml"), + // Unidoc settings + unidocSourceFilePatterns += SourceFilePattern("io/delta/kernel/"), + ).configureUnidoc(docTitle = "Delta Kernel Defaults") + +// TODO javastyle tests +// TODO unidoc +// TODO(scott): figure out a better way to include tests in this project +lazy val storage = (project in file("storage")) + .settings ( + name := "delta-storage", + commonSettings, + javaOnlyReleaseSettings, + libraryDependencies ++= Seq( + // User can provide any 2.x or 3.x version. We don't use any new fancy APIs. Watch out for + // versions with known vulnerabilities. + "org.apache.hadoop" % "hadoop-common" % hadoopVersion % "provided", + + // Note that the org.apache.hadoop.fs.s3a.Listing::createFileStatusListingIterator 3.3.1 API + // is not compatible with 3.3.2. + "org.apache.hadoop" % "hadoop-aws" % hadoopVersion % "provided", + + // Test Deps + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + ), + + // Unidoc settings + unidocSourceFilePatterns += SourceFilePattern("/LogStore.java", "/CloseableIterator.java"), + ).configureUnidoc() + +lazy val storageS3DynamoDB = (project in file("storage-s3-dynamodb")) + .dependsOn(storage % "compile->compile;test->test;provided->provided") + .dependsOn(spark % "test->test") + .settings ( + name := "delta-storage-s3-dynamodb", + commonSettings, + javaOnlyReleaseSettings, + + // uncomment only when testing FailingS3DynamoDBLogStore. this will include test sources in + // a separate test jar. + // Test / publishArtifact := true, + + libraryDependencies ++= Seq( + "com.amazonaws" % "aws-java-sdk" % "1.12.262" % "provided", + + // Test Deps + "org.apache.hadoop" % "hadoop-aws" % hadoopVersion % "test", // RemoteFileChangedException + ) + ).configureUnidoc() + +val icebergSparkRuntimeArtifactName = { + val (expMaj, expMin, _) = getMajorMinorPatch(sparkVersion) + s"iceberg-spark-runtime-$expMaj.$expMin" +} + +lazy val testDeltaIcebergJar = (project in file("testDeltaIcebergJar")) + // delta-iceberg depends on delta-spark! So, we need to include it during our test. + .dependsOn(spark % "test") + .settings( + name := "test-delta-iceberg-jar", + commonSettings, + skipReleaseSettings, + exportJars := true, + Compile / unmanagedJars += (iceberg / assembly).value, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion, + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" + ) + ) + +val deltaIcebergSparkIncludePrefixes = Seq( + // We want everything from this package + "org/apache/spark/sql/delta/icebergShaded", + + // We only want the files in this project from this package. e.g. we want to exclude + // org/apache/spark/sql/delta/commands/convert/ConvertTargetFile.class (from delta-spark project). + "org/apache/spark/sql/delta/commands/convert/IcebergFileManifest", + "org/apache/spark/sql/delta/commands/convert/IcebergSchemaUtils", + "org/apache/spark/sql/delta/commands/convert/IcebergTable" +) + +// Build using: build/sbt clean icebergShaded/compile iceberg/compile +// It will fail the first time, just re-run it. +// scalastyle:off println +lazy val iceberg = (project in file("iceberg")) + .dependsOn(spark % "compile->compile;test->test;provided->provided") + .settings ( + name := "delta-iceberg", + commonSettings, + scalaStyleSettings, + releaseSettings, + libraryDependencies ++= Seq( + // Fix Iceberg's legacy java.lang.NoClassDefFoundError: scala/jdk/CollectionConverters$ error + // due to legacy scala. + "org.scala-lang.modules" %% "scala-collection-compat" % "2.1.1", + "org.apache.iceberg" %% icebergSparkRuntimeArtifactName % "1.4.0" % "provided", + "com.github.ben-manes.caffeine" % "caffeine" % "2.9.3" + ), + Compile / unmanagedJars += (icebergShaded / assembly).value, + // Generate the assembly JAR as the package JAR + Compile / packageBin := assembly.value, + assembly / assemblyJarName := s"${name.value}_${scalaBinaryVersion.value}-${version.value}.jar", + assembly / logLevel := Level.Info, + assembly / test := {}, + assembly / assemblyExcludedJars := { + // Note: the input here is only `libraryDependencies` jars, not `.dependsOn(_)` jars. + val allowedJars = Seq( + s"iceberg-shaded_${scalaBinaryVersion.value}-${version.value}.jar", + s"scala-library-${scala212}.jar", + s"scala-library-${scala213}.jar", + s"scala-collection-compat_${scalaBinaryVersion.value}-2.1.1.jar", + "caffeine-2.9.3.jar", + // Note: We are excluding + // - antlr4-runtime-4.9.3.jar + // - checker-qual-3.19.0.jar + // - error_prone_annotations-2.10.0.jar + ) + val cp = (assembly / fullClasspath).value + + // Return `true` when we want the jar `f` to be excluded from the assembly jar + cp.filter { f => + val doExclude = !allowedJars.contains(f.data.getName) + println(s"Excluding jar: ${f.data.getName} ? $doExclude") + doExclude + } + }, + assembly / assemblyMergeStrategy := { + // Project iceberg `dependsOn` spark and accidentally brings in it, along with its + // compile-time dependencies (like delta-storage). We want these excluded from the + // delta-iceberg jar. + case PathList("io", "delta", xs @ _*) => + // - delta-storage will bring in classes: io/delta/storage + // - delta-spark will bring in classes: io/delta/exceptions/, io/delta/implicits, + // io/delta/package, io/delta/sql, io/delta/tables, + println(s"Discarding class: io/delta/${xs.mkString("/")}") + MergeStrategy.discard + case PathList("com", "databricks", xs @ _*) => + // delta-spark will bring in com/databricks/spark/util + println(s"Discarding class: com/databricks/${xs.mkString("/")}") + MergeStrategy.discard + case PathList("org", "apache", "spark", xs @ _*) + if !deltaIcebergSparkIncludePrefixes.exists { prefix => + s"org/apache/spark/${xs.mkString("/")}".startsWith(prefix) } => + println(s"Discarding class: org/apache/spark/${xs.mkString("/")}") + MergeStrategy.discard + case PathList("scoverage", xs @ _*) => + println(s"Discarding class: scoverage/${xs.mkString("/")}") + MergeStrategy.discard + case x => + println(s"Including class: $x") + (assembly / assemblyMergeStrategy).value(x) + }, + assemblyPackageScala / assembleArtifact := false + ) +// scalastyle:on println + +lazy val generateIcebergJarsTask = TaskKey[Unit]("generateIcebergJars", "Generate Iceberg JARs") + +lazy val icebergShaded = (project in file("icebergShaded")) + .dependsOn(spark % "provided") + .settings ( + name := "iceberg-shaded", + commonSettings, + skipReleaseSettings, + + // Compile, patch and generated Iceberg JARs + generateIcebergJarsTask := { + import sys.process._ + val scriptPath = baseDirectory.value / "generate_iceberg_jars.py" + // Download iceberg code in `iceberg_src` dir and generate the JARs in `lib` dir + Seq("python3", scriptPath.getPath)! + }, + Compile / unmanagedJars := (Compile / unmanagedJars).dependsOn(generateIcebergJarsTask).value, + cleanFiles += baseDirectory.value / "iceberg_src", + cleanFiles += baseDirectory.value / "lib", + + // Generated shaded Iceberg JARs + Compile / packageBin := assembly.value, + assembly / assemblyJarName := s"${name.value}_${scalaBinaryVersion.value}-${version.value}.jar", + assembly / logLevel := Level.Info, + assembly / test := {}, + assembly / assemblyShadeRules := Seq( + ShadeRule.rename("org.apache.iceberg.**" -> "shadedForDelta.@0").inAll, + ), + assemblyPackageScala / assembleArtifact := false, + // Make the 'compile' invoke the 'assembly' task to generate the uber jar. + ) + +lazy val hive = (project in file("connectors/hive")) + .dependsOn(standaloneCosmetic) + .settings ( + name := "delta-hive", + commonSettings, + releaseSettings, + + // Minimal dependencies to compile the codes. This project doesn't run any tests so we don't + // need any runtime dependencies. + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersionForHive3 % "provided", + "org.apache.hive" % "hive-exec" % hiveVersion % "provided" classifier "core", + "org.apache.hive" % "hive-metastore" % hiveVersion % "provided" + ), + ) + +lazy val hiveAssembly = (project in file("connectors/hive-assembly")) + .dependsOn(hive) + .settings( + name := "delta-hive-assembly", + Compile / unmanagedJars += (hive / Compile / packageBin / packageBin).value, + commonSettings, + skipReleaseSettings, + + assembly / logLevel := Level.Info, + assembly / assemblyJarName := s"${name.value}_${scalaBinaryVersion.value}-${version.value}.jar", + assembly / test := {}, + assembly / assemblyMergeStrategy := { + // Discard `module-info.class` to fix the `different file contents found` error. + // TODO Upgrade SBT to 1.5 which will do this automatically + case "module-info.class" => MergeStrategy.discard + // Discard unused `parquet.thrift` so that we don't conflict the file used by the user + case "parquet.thrift" => MergeStrategy.discard + // Discard the jackson service configs that we don't need. These files are not shaded so + // adding them may conflict with other jackson version used by the user. + case PathList("META-INF", "services", xs @ _*) => MergeStrategy.discard + case x => MergeStrategy.first + }, + // Make the 'compile' invoke the 'assembly' task to generate the uber jar. + Compile / packageBin := assembly.value + ) + +lazy val hiveTest = (project in file("connectors/hive-test")) + .dependsOn(goldenTables % "test") + .settings ( + name := "hive-test", + // Make the project use the assembly jar to ensure we are testing the assembly jar that users + // will use in real environment. + Compile / unmanagedJars += (hiveAssembly / Compile / packageBin / packageBin).value, + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersionForHive3 % "provided", + "org.apache.hive" % "hive-exec" % hiveVersion % "provided" classifier "core" excludeAll( + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.apache.hive" % "hive-metastore" % hiveVersion % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.apache.hive", "hive-exec") + ), + "org.apache.hive" % "hive-cli" % hiveVersion % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule("org.apache.hive", "hive-exec"), + ExclusionRule("com.google.guava", "guava"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test" + ) + ) + +lazy val hiveMR = (project in file("connectors/hive-mr")) + .dependsOn(hiveTest % "test->test") + .settings ( + name := "hive-mr", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersionForHive3 % "provided", + "org.apache.hive" % "hive-exec" % hiveVersion % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm") + ), + "org.apache.hadoop" % "hadoop-common" % hadoopVersionForHive3 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-mapreduce-client-hs" % hadoopVersionForHive3 % "test", + "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % hadoopVersionForHive3 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-yarn-server-tests" % hadoopVersionForHive3 % "test" classifier "tests", + "org.apache.hive" % "hive-cli" % hiveVersion % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("com.google.guava", "guava"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm") + ), + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test" + ) + ) + +lazy val hiveTez = (project in file("connectors/hive-tez")) + .dependsOn(hiveTest % "test->test") + .settings ( + name := "hive-tez", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersionForHive3 % "provided" excludeAll ( + ExclusionRule(organization = "com.google.protobuf") + ), + "com.google.protobuf" % "protobuf-java" % "2.5.0", + "org.apache.hive" % "hive-exec" % hiveVersion % "provided" classifier "core" excludeAll( + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.jodd" % "jodd-core" % "3.5.2", + "org.apache.hive" % "hive-metastore" % hiveVersion % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.apache.hive", "hive-exec") + ), + "org.apache.hadoop" % "hadoop-common" % hadoopVersionForHive3 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-mapreduce-client-hs" % hadoopVersionForHive3 % "test", + "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % hadoopVersionForHive3 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-yarn-server-tests" % hadoopVersionForHive3 % "test" classifier "tests", + "org.apache.hive" % "hive-cli" % hiveVersion % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule("org.apache.hive", "hive-exec"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersionForHive3 % "test", + "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersionForHive3 % "test", + "org.apache.tez" % "tez-mapreduce" % tezVersion % "test", + "org.apache.tez" % "tez-dag" % tezVersion % "test", + "org.apache.tez" % "tez-tests" % tezVersion % "test" classifier "tests", + "com.esotericsoftware" % "kryo-shaded" % "4.0.2" % "test", + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test" + ) + ) + + +lazy val hive2MR = (project in file("connectors/hive2-mr")) + .dependsOn(goldenTables % "test") + .settings ( + name := "hive2-mr", + commonSettings, + skipReleaseSettings, + Compile / unmanagedJars ++= Seq( + (hiveAssembly / Compile / packageBin / packageBin).value, // delta-hive assembly + (hiveTest / Test / packageBin / packageBin).value + ), + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersionForHive2 % "provided", + "org.apache.hive" % "hive-exec" % hive2Version % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm") + ), + "org.apache.hadoop" % "hadoop-common" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-mapreduce-client-hs" % hadoopVersionForHive2 % "test", + "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-yarn-server-tests" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hive" % "hive-cli" % hive2Version % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("com.google.guava", "guava"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm") + ), + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test" + ) + ) + +lazy val hive2Tez = (project in file("connectors/hive2-tez")) + .dependsOn(goldenTables % "test") + .settings ( + name := "hive2-tez", + commonSettings, + skipReleaseSettings, + Compile / unmanagedJars ++= Seq( + (hiveAssembly / Compile / packageBin / packageBin).value, + (hiveTest / Test / packageBin / packageBin).value + ), + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersionForHive2 % "provided" excludeAll ( + ExclusionRule(organization = "com.google.protobuf") + ), + "com.google.protobuf" % "protobuf-java" % "2.5.0", + "org.apache.hive" % "hive-exec" % hive2Version % "provided" classifier "core" excludeAll( + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.jodd" % "jodd-core" % "3.5.2", + "org.apache.hive" % "hive-metastore" % hive2Version % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.apache.hive", "hive-exec") + ), + "org.apache.hadoop" % "hadoop-common" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-mapreduce-client-hs" % hadoopVersionForHive2 % "test", + "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-yarn-server-tests" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hive" % "hive-cli" % hive2Version % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule("org.apache.hive", "hive-exec"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersionForHive2 % "test", + "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersionForHive2 % "test", + "org.apache.tez" % "tez-mapreduce" % tezVersionForHive2 % "test", + "org.apache.tez" % "tez-dag" % tezVersionForHive2 % "test", + "org.apache.tez" % "tez-tests" % tezVersionForHive2 % "test" classifier "tests", + "com.esotericsoftware" % "kryo-shaded" % "4.0.2" % "test", + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test" + ) + ) + +/** + * We want to publish the `standalone` project's shaded JAR (created from the + * build/sbt standalone/assembly command). + * + * However, build/sbt standalone/publish and build/sbt standalone/publishLocal will use the + * non-shaded JAR from the build/sbt standalone/package command. + * + * So, we create an impostor, cosmetic project used only for publishing. + * + * build/sbt standaloneCosmetic/package + * - creates connectors/standalone/target/scala-2.12/delta-standalone-original-shaded_2.12-0.2.1-SNAPSHOT.jar + * (this is the shaded JAR we want) + * + * build/sbt standaloneCosmetic/publishM2 + * - packages the shaded JAR (above) and then produces: + * -- .m2/repository/io/delta/delta-standalone_2.12/0.2.1-SNAPSHOT/delta-standalone_2.12-0.2.1-SNAPSHOT.pom + * -- .m2/repository/io/delta/delta-standalone_2.12/0.2.1-SNAPSHOT/delta-standalone_2.12-0.2.1-SNAPSHOT.jar + * -- .m2/repository/io/delta/delta-standalone_2.12/0.2.1-SNAPSHOT/delta-standalone_2.12-0.2.1-SNAPSHOT-sources.jar + * -- .m2/repository/io/delta/delta-standalone_2.12/0.2.1-SNAPSHOT/delta-standalone_2.12-0.2.1-SNAPSHOT-javadoc.jar + */ +lazy val standaloneCosmetic = project + .dependsOn(storage) // this doesn't impact the output artifact (jar), only the pom.xml dependencies + .settings( + name := "delta-standalone", + commonSettings, + releaseSettings, + exportJars := true, + Compile / packageBin := (standaloneParquet / assembly).value, + Compile / packageSrc := (standalone / Compile / packageSrc).value, + libraryDependencies ++= scalaCollectionPar(scalaVersion.value) ++ Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "org.apache.parquet" % "parquet-hadoop" % "1.12.3" % "provided", + // parquet4s-core dependencies that are not shaded are added with compile scope. + "com.chuusai" %% "shapeless" % "2.3.4", + "org.scala-lang.modules" %% "scala-collection-compat" % "2.4.3" + ) + ) + +lazy val testStandaloneCosmetic = (project in file("connectors/testStandaloneCosmetic")) + .dependsOn(standaloneCosmetic) + .dependsOn(goldenTables % "test") + .settings( + name := "test-standalone-cosmetic", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion, + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test", + ) + ) + +/** + * A test project to verify `ParquetSchemaConverter` APIs are working after the user provides + * `parquet-hadoop`. We use a separate project because we want to test whether Delta Standlone APIs + * except `ParquetSchemaConverter` are working without `parquet-hadoop` in testStandaloneCosmetic`. + */ +lazy val testParquetUtilsWithStandaloneCosmetic = project.dependsOn(standaloneCosmetic) + .settings( + name := "test-parquet-utils-with-standalone-cosmetic", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion, + "org.apache.parquet" % "parquet-hadoop" % "1.12.3" % "provided", + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test", + ) + ) + +def scalaCollectionPar(version: String) = version match { + case v if v.startsWith("2.13.") => + Seq("org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4") + case _ => Seq() +} + +/** + * The public API ParquetSchemaConverter exposes Parquet classes in its methods so we cannot apply + * shading rules on it. However, sbt-assembly doesn't allow excluding a single file. Hence, we + * create a separate project to skip the shading. + */ +lazy val standaloneParquet = (project in file("connectors/standalone-parquet")) + .dependsOn(standaloneWithoutParquetUtils) + .settings( + name := "delta-standalone-parquet", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.parquet" % "parquet-hadoop" % "1.12.3" % "provided", + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test" + ), + assemblyPackageScala / assembleArtifact := false + ) + +/** A dummy project to allow `standaloneParquet` depending on the shaded standalone jar. */ +lazy val standaloneWithoutParquetUtils = project + .settings( + name := "delta-standalone-without-parquet-utils", + commonSettings, + skipReleaseSettings, + exportJars := true, + Compile / packageBin := (standalone / assembly).value + ) + +// TODO scalastyle settings +lazy val standalone = (project in file("connectors/standalone")) + .dependsOn(storage % "compile->compile;provided->provided") + .dependsOn(goldenTables % "test") + .settings( + name := "delta-standalone-original", + commonSettings, + skipReleaseSettings, + standaloneMimaSettings, + // When updating any dependency here, we should also review `pomPostProcess` in project + // `standaloneCosmetic` and update it accordingly. + libraryDependencies ++= scalaCollectionPar(scalaVersion.value) ++ Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "com.github.mjakubowski84" %% "parquet4s-core" % parquet4sVersion excludeAll ( + ExclusionRule("org.slf4j", "slf4j-api") + ), + "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.12.3", + "org.json4s" %% "json4s-jackson" % "3.7.0-M11" excludeAll ( + ExclusionRule("com.fasterxml.jackson.core"), + ExclusionRule("com.fasterxml.jackson.module") + ), + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test", + ), + Compile / sourceGenerators += Def.task { + val file = (Compile / sourceManaged).value / "io" / "delta" / "standalone" / "package.scala" + IO.write(file, + s"""package io.delta + | + |package object standalone { + | val VERSION = "${version.value}" + | val NAME = "Delta Standalone" + |} + |""".stripMargin) + Seq(file) + }, + + /** + * Standalone packaged (unshaded) jar. + * + * Build with `build/sbt standalone/package` command. + * e.g. connectors/standalone/target/scala-2.12/delta-standalone-original-unshaded_2.12-0.2.1-SNAPSHOT.jar + */ + artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) => + artifact.name + "-unshaded" + "_" + sv.binary + "-" + module.revision + "." + artifact.extension + }, + + /** + * Standalone assembly (shaded) jar. This is what we want to release. + * + * Build with `build/sbt standalone/assembly` command. + * e.g. connectors/standalone/target/scala-2.12/delta-standalone-original-shaded_2.12-0.2.1-SNAPSHOT.jar + */ + assembly / logLevel := Level.Info, + assembly / test := {}, + assembly / assemblyJarName := s"${name.value}-shaded_${scalaBinaryVersion.value}-${version.value}.jar", + // We exclude jars first, and then we shade what is remaining. Note: the input here is only + // `libraryDependencies` jars, not `.dependsOn(_)` jars. + assembly / assemblyExcludedJars := { + val cp = (assembly / fullClasspath).value + val allowedPrefixes = Set("META_INF", "io", "json4s", "jackson", "paranamer", + "parquet4s", "parquet-", "audience-annotations", "commons-pool") + cp.filter { f => + !allowedPrefixes.exists(prefix => f.data.getName.startsWith(prefix)) + } + }, + assembly / assemblyShadeRules := Seq( + ShadeRule.rename("com.fasterxml.jackson.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("com.thoughtworks.paranamer.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("org.json4s.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("com.github.mjakubowski84.parquet4s.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("org.apache.commons.pool.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("org.apache.parquet.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("shaded.parquet.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("org.apache.yetus.audience.**" -> "shadedelta.@0").inAll + ), + assembly / assemblyMergeStrategy := { + // Discard `module-info.class` to fix the `different file contents found` error. + // TODO Upgrade SBT to 1.5 which will do this automatically + case "module-info.class" => MergeStrategy.discard + // Discard unused `parquet.thrift` so that we don't conflict the file used by the user + case "parquet.thrift" => MergeStrategy.discard + // Discard the jackson service configs that we don't need. These files are not shaded so + // adding them may conflict with other jackson version used by the user. + case PathList("META-INF", "services", xs @ _*) => MergeStrategy.discard + // This project `.dependsOn` delta-storage, and its classes will be included by default + // in this assembly jar. Manually discard them since it is already a compile-time dependency. + case PathList("io", "delta", "storage", xs @ _*) => MergeStrategy.discard + case x => + val oldStrategy = (assembly / assemblyMergeStrategy).value + oldStrategy(x) + }, + assembly / artifact := { + val art = (assembly / artifact).value + art.withClassifier(Some("assembly")) + }, + addArtifact(assembly / artifact, assembly), + + // Unidoc setting + unidocSourceFilePatterns += SourceFilePattern("io/delta/standalone/"), + javaCheckstyleSettings("connectors/dev/checkstyle.xml") + ).configureUnidoc() + + +/* +TODO (TD): Tests are failing for some reason +lazy val compatibility = (project in file("connectors/oss-compatibility-tests")) + // depend on standalone test codes as well + .dependsOn(standalone % "compile->compile;test->test") + .dependsOn(spark % "test -> compile") + .settings( + name := "compatibility", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + // Test Dependencies + "io.netty" % "netty-buffer" % "4.1.63.Final" % "test", + "org.scalatest" %% "scalatest" % "3.1.0" % "test", + "commons-io" % "commons-io" % "2.8.0" % "test", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test", + "org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests", + ) + ) + */ + +lazy val goldenTables = (project in file("connectors/golden-tables")) + .dependsOn(spark % "test") // depends on delta-spark + .settings( + name := "golden-tables", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + // Test Dependencies + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "commons-io" % "commons-io" % "2.8.0" % "test", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test", + "org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests" + ) + ) + +def sqlDeltaImportScalaVersion(scalaBinaryVersion: String): String = { + scalaBinaryVersion match { + // sqlDeltaImport doesn't support 2.11. We return 2.12 so that we can resolve the dependencies + // but we will not publish sqlDeltaImport with Scala 2.11. + case "2.11" => "2.12" + case _ => scalaBinaryVersion + } +} + +lazy val sqlDeltaImport = (project in file("connectors/sql-delta-import")) + .dependsOn(spark) + .settings ( + name := "sql-delta-import", + commonSettings, + skipReleaseSettings, + publishArtifact := scalaBinaryVersion.value != "2.11", + Test / publishArtifact := false, + libraryDependencies ++= Seq( + "io.netty" % "netty-buffer" % "4.1.63.Final" % "test", + "org.apache.spark" % ("spark-sql_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % sparkVersion % "provided", + "org.rogach" %% "scallop" % "3.5.1", + "org.scalatest" %% "scalatest" % scalaTestVersionForConnectors % "test", + "com.h2database" % "h2" % "1.4.200" % "test", + "org.apache.spark" % ("spark-catalyst_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % sparkVersion % "test", + "org.apache.spark" % ("spark-core_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % sparkVersion % "test", + "org.apache.spark" % ("spark-sql_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % sparkVersion % "test" + ) + ) + +def flinkScalaVersion(scalaBinaryVersion: String): String = { + scalaBinaryVersion match { + // Flink doesn't support 2.13. We return 2.12 so that we can resolve the dependencies but we + // will not publish Flink connector with Scala 2.13. + case "2.13" => "2.12" + case _ => scalaBinaryVersion + } +} + +lazy val flink = (project in file("connectors/flink")) + .dependsOn(standaloneCosmetic % "provided") + .dependsOn(kernelApi) + .dependsOn(kernelDefaults) + .settings ( + name := "delta-flink", + commonSettings, + releaseSettings, + flinkMimaSettings, + publishArtifact := scalaBinaryVersion.value == "2.12", // only publish once + autoScalaLibrary := false, // exclude scala-library from dependencies + Test / publishArtifact := false, + pomExtra := + https://github.com/delta-io/delta + + git@github.com:delta-io/delta.git + scm:git:git@github.com:delta-io/delta.git + + + + pkubit-g + Paweł Kubit + https://github.com/pkubit-g + + + kristoffSC + Krzysztof Chmielewski + https://github.com/kristoffSC + + , + crossPaths := false, + libraryDependencies ++= Seq( + "org.apache.flink" % "flink-parquet" % flinkVersion % "provided", + "org.apache.flink" % "flink-table-common" % flinkVersion % "provided", + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "org.apache.flink" % "flink-connector-files" % flinkVersion % "provided", + "org.apache.flink" % "flink-table-runtime" % flinkVersion % "provided", + "org.apache.flink" % "flink-scala_2.12" % flinkVersion % "provided", + "org.apache.flink" % "flink-connector-hive_2.12" % flinkVersion % "provided", + "org.apache.flink" % "flink-table-planner_2.12" % flinkVersion % "provided", + + "org.apache.flink" % "flink-connector-files" % flinkVersion % "test" classifier "tests", + "org.apache.flink" % "flink-runtime-web" % flinkVersion % "test", + "org.apache.flink" % "flink-sql-gateway-api" % flinkVersion % "test", + "org.apache.flink" % "flink-connector-test-utils" % flinkVersion % "test", + "org.apache.flink" % "flink-clients" % flinkVersion % "test", + "org.apache.flink" % "flink-test-utils" % flinkVersion % "test", + "org.apache.hadoop" % "hadoop-common" % hadoopVersion % "test" classifier "tests", + "org.mockito" % "mockito-inline" % "4.11.0" % "test", + "net.aichler" % "jupiter-interface" % JupiterKeys.jupiterVersion.value % Test, + "org.junit.vintage" % "junit-vintage-engine" % "5.8.2" % "test", + "org.mockito" % "mockito-junit-jupiter" % "4.11.0" % "test", + "org.junit.jupiter" % "junit-jupiter-params" % "5.8.2" % "test", + "io.github.artsok" % "rerunner-jupiter" % "2.1.6" % "test", + + // Exclusions due to conflicts with Flink's libraries from table planer, hive, calcite etc. + "org.apache.hive" % "hive-metastore" % "3.1.2" % "test" excludeAll( + ExclusionRule("org.apache.avro", "avro"), + ExclusionRule("org.slf4j", "slf4j-log4j12"), + ExclusionRule("org.pentaho"), + ExclusionRule("org.apache.hbase"), + ExclusionRule("org.apache.hbase"), + ExclusionRule("co.cask.tephra"), + ExclusionRule("com.google.code.findbugs", "jsr305"), + ExclusionRule("org.eclipse.jetty.aggregate", "module: 'jetty-all"), + ExclusionRule("org.eclipse.jetty.orbit", "javax.servlet"), + ExclusionRule("org.apache.parquet", "parquet-hadoop-bundle"), + ExclusionRule("com.tdunning", "json"), + ExclusionRule("javax.transaction", "transaction-api"), + ExclusionRule("'com.zaxxer", "HikariCP"), + ), + // Exclusions due to conflicts with Flink's libraries from table planer, hive, calcite etc. + "org.apache.hive" % "hive-exec" % "3.1.2" % "test" classifier "core" excludeAll( + ExclusionRule("'org.apache.avro", "avro"), + ExclusionRule("org.slf4j", "slf4j-log4j12"), + ExclusionRule("org.pentaho"), + ExclusionRule("com.google.code.findbugs", "jsr305"), + ExclusionRule("org.apache.calcite.avatica"), + ExclusionRule("org.apache.calcite"), + ExclusionRule("org.apache.hive", "hive-llap-tez"), + ExclusionRule("org.apache.logging.log4j"), + ExclusionRule("com.google.protobuf", "protobuf-java"), + ), + ), + // generating source java class with version number to be passed during commit to the DeltaLog as engine info + // (part of transaction's metadata) + Compile / sourceGenerators += Def.task { + val file = (Compile / sourceManaged).value / "io" / "delta" / "flink" / "internal" / "Meta.java" + IO.write(file, + s"""package io.delta.flink.internal; + | + |final public class Meta { + | public static final String FLINK_VERSION = "${flinkVersion}"; + | public static final String CONNECTOR_VERSION = "${version.value}"; + |} + |""".stripMargin) + Seq(file) + }, + + // Unidoc settings + unidocSourceFilePatterns += SourceFilePattern("io/delta/flink/"), + // TODO: this is the config that was used before archiving connectors but it has + // standalone-specific import orders + javaCheckstyleSettings("connectors/dev/checkstyle.xml") + ).configureUnidoc() + +/** + * Get list of python files and return the mapping between source files and target paths + * in the generated package JAR. + */ +def listPythonFiles(pythonBase: File): Seq[(File, String)] = { + val pythonExcludeDirs = pythonBase / "lib" :: pythonBase / "doc" :: pythonBase / "bin" :: Nil + import scala.collection.JavaConverters._ + val pythonFiles = Files.walk(pythonBase.toPath).iterator().asScala + .map { path => path.toFile() } + .filter { file => file.getName.endsWith(".py") && ! file.getName.contains("test") } + .filter { file => ! pythonExcludeDirs.exists { base => IO.relativize(base, file).nonEmpty} } + .toSeq + + pythonFiles pair Path.relativeTo(pythonBase) +} + +ThisBuild / parallelExecution := false + +val createTargetClassesDir = taskKey[Unit]("create target classes dir") + +/* + ****************** + * Project groups * + ****************** + */ + +// Don't use these groups for any other projects +lazy val sparkGroup = project + .aggregate(spark, contribs, storage, storageS3DynamoDB, iceberg, testDeltaIcebergJar, sharing) + .settings( + // crossScalaVersions must be set to Nil on the aggregating project + crossScalaVersions := Nil, + publishArtifact := false, + publish / skip := false, + ) + +lazy val kernelGroup = project + .aggregate(kernelApi, kernelDefaults) + .settings( + // crossScalaVersions must be set to Nil on the aggregating project + crossScalaVersions := Nil, + publishArtifact := false, + publish / skip := false, + unidocSourceFilePatterns := { + (kernelApi / unidocSourceFilePatterns).value.scopeToProject(kernelApi) ++ + (kernelDefaults / unidocSourceFilePatterns).value.scopeToProject(kernelDefaults) + } + ).configureUnidoc(docTitle = "Delta Kernel") + +/* + *********************** + * ScalaStyle settings * + *********************** + */ +ThisBuild / scalastyleConfig := baseDirectory.value / "scalastyle-config.xml" + +lazy val compileScalastyle = taskKey[Unit]("compileScalastyle") +lazy val testScalastyle = taskKey[Unit]("testScalastyle") + +lazy val scalaStyleSettings = Seq( + compileScalastyle := (Compile / scalastyle).toTask("").value, + + Compile / compile := ((Compile / compile) dependsOn compileScalastyle).value, + + testScalastyle := (Test / scalastyle).toTask("").value, + + Test / test := ((Test / test) dependsOn testScalastyle).value +) + +/* + **************************** + * Java checkstyle settings * + **************************** + */ + +def javaCheckstyleSettings(checkstyleFile: String): Def.SettingsDefinition = { + // Can be run explicitly via: build/sbt $module/checkstyle + // Will automatically be run during compilation (e.g. build/sbt compile) + // and during tests (e.g. build/sbt test) + Seq( + checkstyleConfigLocation := CheckstyleConfigLocation.File(checkstyleFile), + checkstyleSeverityLevel := Some(CheckstyleSeverityLevel.Error), + (Compile / checkstyle) := (Compile / checkstyle).triggeredBy(Compile / compile).value, + (Test / checkstyle) := (Test / checkstyle).triggeredBy(Test / compile).value + ) +} + +/* + ******************** + * Release settings * + ******************** + */ +import ReleaseTransformations._ + +lazy val skipReleaseSettings = Seq( + publishArtifact := false, + publish / skip := true +) + + +// Release settings for artifact that contains only Java source code +lazy val javaOnlyReleaseSettings = releaseSettings ++ Seq( + // drop off Scala suffix from artifact names + crossPaths := false, + + // we publish jars for each scalaVersion in crossScalaVersions. however, we only need to publish + // one java jar. thus, only do so when the current scala version == default scala version + publishArtifact := { + val (expMaj, expMin, _) = getMajorMinorPatch(default_scala_version.value) + s"$expMaj.$expMin" == scalaBinaryVersion.value + }, + + // exclude scala-library from dependencies in generated pom.xml + autoScalaLibrary := false, +) + +lazy val releaseSettings = Seq( + publishMavenStyle := true, + publishArtifact := true, + Test / publishArtifact := false, + releasePublishArtifactsAction := PgpKeys.publishSigned.value, + releaseCrossBuild := true, + pgpPassphrase := sys.env.get("PGP_PASSPHRASE").map(_.toArray), + + // TODO: This isn't working yet ... + sonatypeProfileName := "io.delta", // sonatype account domain name prefix / group ID + credentials += Credentials( + "Sonatype Nexus Repository Manager", + "oss.sonatype.org", + sys.env.getOrElse("SONATYPE_USERNAME", ""), + sys.env.getOrElse("SONATYPE_PASSWORD", "") + ), + publishTo := { + val nexus = "https://oss.sonatype.org/" + if (isSnapshot.value) { + Some("snapshots" at nexus + "content/repositories/snapshots") + } else { + Some("releases" at nexus + "service/local/staging/deploy/maven2") + } + }, + licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0")), + pomExtra := + https://delta.io/ + + git@github.com:delta-io/delta.git + scm:git:git@github.com:delta-io/delta.git + + + + marmbrus + Michael Armbrust + https://github.com/marmbrus + + + brkyvz + Burak Yavuz + https://github.com/brkyvz + + + jose-torres + Jose Torres + https://github.com/jose-torres + + + liwensun + Liwen Sun + https://github.com/liwensun + + + mukulmurthy + Mukul Murthy + https://github.com/mukulmurthy + + + tdas + Tathagata Das + https://github.com/tdas + + + zsxwing + Shixiong Zhu + https://github.com/zsxwing + + + scottsand-db + Scott Sandre + https://github.com/scottsand-db + + + windpiger + Jun Song + https://github.com/windpiger + + +) + +// Looks like some of release settings should be set for the root project as well. +publishArtifact := false // Don't release the root project +publish / skip := true +publishTo := Some("snapshots" at "https://oss.sonatype.org/content/repositories/snapshots") +releaseCrossBuild := false // Don't use sbt-release's cross facility +releaseProcess := Seq[ReleaseStep]( + checkSnapshotDependencies, + inquireVersions, + runTest, + setReleaseVersion, + commitReleaseVersion, + tagRelease, + releaseStepCommandAndRemaining("+publishSigned"), + // Do NOT use `sonatypeBundleRelease` - it will actually release to Maven! We want to do that + // manually. + // + // Do NOT use `sonatypePromote` - it will promote the closed staging repository (i.e. sync to + // Maven central) + // + // See https://github.com/xerial/sbt-sonatype#publishing-your-artifact. + // + // - sonatypePrepare: Drop the existing staging repositories (if exist) and create a new staging + // repository using sonatypeSessionName as a unique key + // - sonatypeBundleUpload: Upload your local staging folder contents to a remote Sonatype + // repository + // - sonatypeClose: closes your staging repository at Sonatype. This step verifies Maven central + // sync requirement, GPG-signature, javadoc and source code presence, pom.xml + // settings, etc + // TODO: this isn't working yet + // releaseStepCommand("sonatypePrepare; sonatypeBundleUpload; sonatypeClose"), + setNextVersion, + commitNextVersion +) diff --git a/build/sbt b/build/sbt new file mode 100755 index 00000000000..044a2929bde --- /dev/null +++ b/build/sbt @@ -0,0 +1,183 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2021) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so +# that we can run Hive to generate the golden answer. This is not required for normal development +# or testing. +if [ -n "$HIVE_HOME" ]; then + for i in "$HIVE_HOME"/lib/* + do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i" + done + export HADOOP_CLASSPATH +fi + +realpath () { +( + TARGET_FILE="$1" + + cd "$(dirname "$TARGET_FILE")" + TARGET_FILE="$(basename "$TARGET_FILE")" + + COUNT=0 + while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] + do + TARGET_FILE="$(readlink "$TARGET_FILE")" + cd $(dirname "$TARGET_FILE") + TARGET_FILE="$(basename $TARGET_FILE)" + COUNT=$(($COUNT + 1)) + done + + echo "$(pwd -P)/"$TARGET_FILE"" +) +} + +if [[ "$JENKINS_URL" != "" ]]; then + # Make Jenkins use Google Mirror first as Maven Central may ban us + SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories" + export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG" +fi + +. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash + + +declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" +declare -r sbt_opts_file=".sbtopts" +declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" + +usage() { + cat < path to global settings/plugins directory (default: ~/.sbt) + -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) + -ivy path to local Ivy repository (default: ~/.ivy2) + -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) + -no-share use all local caches; no sharing + -no-global uses global caches, but does not use global ~/.sbt directory. + -jvm-debug Turn on JVM debugging, open at the given port. + -batch Disable interactive mode + + # sbt version (default: from project/build.properties if present, else latest release) + -sbt-version use the specified version of sbt + -sbt-jar use the specified jar as the sbt launcher + -sbt-rc use an RC version of sbt + -sbt-snapshot use a snapshot version of sbt + + # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) + -java-home alternate JAVA_HOME + + # jvm options and output control + JAVA_OPTS environment variable, if unset uses "$java_opts" + SBT_OPTS environment variable, if unset uses "$default_sbt_opts" + .sbtopts if this file exists in the current directory, it is + prepended to the runner args + /etc/sbt/sbtopts if this file exists, it is prepended to the runner args + -Dkey=val pass -Dkey=val directly to the java runtime + -J-X pass option -X directly to the java runtime + (-J is stripped) + -S-X add -X to sbt's scalacOptions (-S is stripped) + -PmavenProfiles Enable a maven profile for the build. + +In the case of duplicated or conflicting options, the order above +shows precedence: JAVA_OPTS lowest, command line options highest. +EOM +} + +process_my_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; + -no-share) addJava "$noshare_opts" && shift ;; + -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; + -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; + -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; + -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; + -batch) exec /dev/null) + if [[ ! $? ]]; then + saved_stty="" + fi +} + +saveSttySettings +trap onExit INT + +run "$@" + +exit_status=$? +onExit diff --git a/build/sbt-config/repositories b/build/sbt-config/repositories new file mode 100644 index 00000000000..b12fece5fd5 --- /dev/null +++ b/build/sbt-config/repositories @@ -0,0 +1,12 @@ +[repositories] + local + local-preloaded-ivy: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext] + local-preloaded: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/} + gcs-maven-central-mirror: https://maven-central.storage-download.googleapis.com/repos/central/data/ + maven-central + typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] + bintray-typesafe-sbt-plugin-releases: https://dl.bintray.com/typesafe/sbt-plugins/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] + repos-spark-packages: https://repos.spark-packages.org + typesafe-releases: https://repo.typesafe.com/typesafe/releases/ diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash new file mode 100755 index 00000000000..0d58bb7269c --- /dev/null +++ b/build/sbt-launch-lib.bash @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# + +# A library to simplify using the SBT launcher from other packages. +# Note: This should be used by tools like giter8/conscript etc. + +# TODO - Should we merge the main SBT script with this library? + +if test -z "$HOME"; then + declare -r script_dir="$(dirname "$script_path")" +else + declare -r script_dir="$HOME/.sbt" +fi + +declare -a residual_args +declare -a java_args +declare -a scalac_args +declare -a sbt_commands +declare -a maven_profiles + +if test -x "$JAVA_HOME/bin/java"; then + echo -e "Using $JAVA_HOME as default JAVA_HOME." + echo "Note, this will be overridden by -java-home if it is set." + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi + +echoerr () { + echo 1>&2 "$@" +} +vlog () { + [[ $verbose || $debug ]] && echoerr "$@" +} +dlog () { + [[ $debug ]] && echoerr "$@" +} + +acquire_sbt_jar () { + SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` + + # Download sbt from mirror URL if the environment variable is provided + if [[ "${SBT_VERSION}" == "0.13.18" ]] && [[ -n "${SBT_MIRROR_JAR_URL}" ]]; then + URL1="${SBT_MIRROR_JAR_URL}" + elif [[ "${SBT_VERSION}" == "1.5.5" ]] && [[ -n "${SBT_1_5_5_MIRROR_JAR_URL}" ]]; then + URL1="${SBT_1_5_5_MIRROR_JAR_URL}" + else + URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar + fi + + JAR=build/sbt-launch-${SBT_VERSION}.jar + sbt_jar=$JAR + + if [[ ! -f "$sbt_jar" ]]; then + # Download sbt launch jar if it hasn't been downloaded yet + if [ ! -f "${JAR}" ]; then + # Download + printf 'Attempting to fetch sbt from %s\n' "${URL1}" + JAR_DL="${JAR}.part" + if [ $(command -v curl) ]; then + curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + elif [ $(command -v wget) ]; then + wget --quiet ${URL1} -O "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + else + printf "You do not have curl or wget installed, please install sbt manually from https://www.scala-sbt.org/\n" + exit -1 + fi + fi + if [ ! -f "${JAR}" ]; then + # We failed to download + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from https://www.scala-sbt.org/\n" + exit -1 + fi + printf "Launching sbt from ${JAR}\n" + fi +} + +execRunner () { + # print the arguments one to a line, quoting any containing spaces + [[ $verbose || $debug ]] && echo "# Executing command line:" && { + for arg; do + if printf "%s\n" "$arg" | grep -q ' '; then + printf "\"%s\"\n" "$arg" + else + printf "%s\n" "$arg" + fi + done + echo "" + } + + "$@" +} + +addJava () { + dlog "[addJava] arg = '$1'" + java_args=( "${java_args[@]}" "$1" ) +} + +enableProfile () { + dlog "[enableProfile] arg = '$1'" + maven_profiles=( "${maven_profiles[@]}" "$1" ) + export SBT_MAVEN_PROFILES="${maven_profiles[@]}" +} + +addSbt () { + dlog "[addSbt] arg = '$1'" + sbt_commands=( "${sbt_commands[@]}" "$1" ) +} +addResidual () { + dlog "[residual] arg = '$1'" + residual_args=( "${residual_args[@]}" "$1" ) +} +addDebugger () { + addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" +} + +# a ham-fisted attempt to move some memory settings in concert +# so they need not be dicked around with individually. +get_mem_opts () { + local mem=${1:-1000} + local perm=$(( $mem / 4 )) + (( $perm > 256 )) || perm=256 + (( $perm < 4096 )) || perm=4096 + local codecache=$(( $perm / 2 )) + + echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m" +} + +require_arg () { + local type="$1" + local opt="$2" + local arg="$3" + if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then + echo "$opt requires <$type> argument" 1>&2 + exit 1 + fi +} + +is_function_defined() { + declare -f "$1" > /dev/null +} + +process_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -h|-help) usage; exit 1 ;; + -v|-verbose) verbose=1 && shift ;; + -d|-debug) debug=1 && shift ;; + + -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; + -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; + -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; + -batch) exec + if (fs.exists(path)) { + throw new FileAlreadyExistsException(path.toString) + } else { + throw new IllegalStateException(s"Failed due to concurrent write", e) + } + } + } + } + + private def isPreconditionFailure(x: Throwable): Boolean = { + Throwables.getCausalChain(x) + .stream() + .filter(p => p != null) + .filter(p => p.getMessage != null) + .filter(p => p.getMessage.contains(preconditionFailedExceptionMessage)) + .findFirst + .isPresent; + } + + override def invalidateCache(): Unit = {} + + override def isPartialWriteVisible(path: Path): Boolean = false + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = false +} diff --git a/contribs/src/main/scala/io/delta/storage/OracleCloudLogStore.scala b/contribs/src/main/scala/io/delta/storage/OracleCloudLogStore.scala new file mode 100644 index 00000000000..739887d16ee --- /dev/null +++ b/contribs/src/main/scala/io/delta/storage/OracleCloudLogStore.scala @@ -0,0 +1,64 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf +import org.apache.spark.annotation.Unstable + +/** + * :: Unstable :: + * + * LogStore implementation for OCI (Oracle Cloud Infrastructure). + * + * We assume the following from OCI (Oracle Cloud Infrastructure)'s BmcFilesystem implementations: + * - Rename without overwrite is atomic. + * - List-after-write is consistent. + * + * Regarding file creation, this implementation: + * - Uses atomic rename when overwrite is false; if the destination file exists or the rename + * fails, throws an exception. + * - Uses create-with-overwrite when overwrite is true. This does not make the file atomically + * visible and therefore the caller must handle partial files. + * + * @note This class is not meant for direct access but for configuration based on storage system. + * See https://docs.delta.io/latest/delta-storage.html for details. + */ +@Unstable +class OracleCloudLogStore(sparkConf: SparkConf, initHadoopConf: Configuration) + extends org.apache.spark.sql.delta.storage.HadoopFileSystemLogStore(sparkConf, initHadoopConf) { + + override def write(path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit = { + write(path, actions, overwrite, getHadoopConfiguration) + } + + override def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + writeWithRename(path, actions, overwrite, hadoopConf) + } + + override def invalidateCache(): Unit = {} + + override def isPartialWriteVisible(path: Path): Boolean = true + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = true +} diff --git a/contribs/src/test/scala/io/delta/storage/IBMCOSLogStoreSuite.scala b/contribs/src/test/scala/io/delta/storage/IBMCOSLogStoreSuite.scala new file mode 100644 index 00000000000..3b760528b42 --- /dev/null +++ b/contribs/src/test/scala/io/delta/storage/IBMCOSLogStoreSuite.scala @@ -0,0 +1,36 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage + +import org.apache.spark.sql.delta.{FakeFileSystem, LogStoreSuiteBase} + +class IBMCOSLogStoreSuite extends LogStoreSuiteBase { + + protected override def sparkConf = { + super.sparkConf.set(logStoreClassConfKey, logStoreClassName) + .set("spark.hadoop.fs.cos.atomic.write", "true") + } + + override val logStoreClassName: String = classOf[IBMCOSLogStore].getName + + testHadoopConf( + expectedErrMsg = ".*No FileSystem for scheme.*fake.*", + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") + + protected def shouldUseRenameToWriteCheckpoint: Boolean = false +} diff --git a/contribs/src/test/scala/io/delta/storage/OracleCloudLogStoreSuite.scala b/contribs/src/test/scala/io/delta/storage/OracleCloudLogStoreSuite.scala new file mode 100644 index 00000000000..cf881940a7d --- /dev/null +++ b/contribs/src/test/scala/io/delta/storage/OracleCloudLogStoreSuite.scala @@ -0,0 +1,31 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage + +import org.apache.spark.sql.delta.{FakeFileSystem, LogStoreSuiteBase} + +class OracleCloudLogStoreSuite extends LogStoreSuiteBase { + + override val logStoreClassName: String = classOf[OracleCloudLogStore].getName + + testHadoopConf( + expectedErrMsg = "No FileSystem for scheme \"fake\"", + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") + + protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml new file mode 100644 index 00000000000..66b4f1b34c9 --- /dev/null +++ b/dev/checkstyle-suppressions.xml @@ -0,0 +1,29 @@ + + + + + + + + diff --git a/dev/lint-python b/dev/lint-python new file mode 100755 index 00000000000..f6bd9471327 --- /dev/null +++ b/dev/lint-python @@ -0,0 +1,219 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# define test binaries + versions +PYDOCSTYLE_BUILD="pydocstyle" +MINIMUM_PYDOCSTYLE="3.0.0" + +FLAKE8_BUILD="flake8" +MINIMUM_FLAKE8="3.5.0" + +PYCODESTYLE_BUILD="pycodestyle" +MINIMUM_PYCODESTYLE="2.4.0" + + +function compile_python_test { + local COMPILE_STATUS= + local COMPILE_REPORT= + + if [[ ! "$1" ]]; then + echo "No python files found! Something is very wrong -- exiting." + exit 1; + fi + + # compileall: https://docs.python.org/2/library/compileall.html + echo "starting python compilation test..." + COMPILE_REPORT=$( (python3 -B -mcompileall -q -l $1) 2>&1) + COMPILE_STATUS=$? + + if [ $COMPILE_STATUS -ne 0 ]; then + echo "Python compilation failed with the following errors:" + echo "$COMPILE_REPORT" + echo "$COMPILE_STATUS" + exit "$COMPILE_STATUS" + else + echo "python compilation succeeded." + echo + fi +} + +function pycodestyle_test { + local PYCODESTYLE_STATUS= + local PYCODESTYLE_REPORT= + local RUN_LOCAL_PYCODESTYLE= + local VERSION= + local EXPECTED_PYCODESTYLE= + local PYCODESTYLE_SCRIPT_PATH="$DELTA_ROOT_DIR/dev/pycodestyle-$MINIMUM_PYCODESTYLE.py" + local PYCODESTYLE_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/PyCQA/pycodestyle/$MINIMUM_PYCODESTYLE/pycodestyle.py" + + if [[ ! "$1" ]]; then + echo "No python files found! Something is very wrong -- exiting." + exit 1; + fi + + # check for locally installed pycodestyle & version + RUN_LOCAL_PYCODESTYLE="False" + if hash "$PYCODESTYLE_BUILD" 2> /dev/null; then + VERSION=$( $PYCODESTYLE_BUILD --version 2> /dev/null) + EXPECTED_PYCODESTYLE=$( (python3 -c 'from distutils.version import LooseVersion; + print(LooseVersion("""'${VERSION[0]}'""") >= LooseVersion("""'$MINIMUM_PYCODESTYLE'"""))')\ + 2> /dev/null) + + if [ "$EXPECTED_PYCODESTYLE" == "True" ]; then + RUN_LOCAL_PYCODESTYLE="True" + fi + fi + + # download the right version or run locally + if [ $RUN_LOCAL_PYCODESTYLE == "False" ]; then + # Get pycodestyle at runtime so that we don't rely on it being installed on the build server. + # See: https://github.com/apache/spark/pull/1744#issuecomment-50982162 + # Updated to the latest official version of pep8. pep8 is formally renamed to pycodestyle. + echo "downloading pycodestyle from $PYCODESTYLE_SCRIPT_REMOTE_PATH..." + if [ ! -e "$PYCODESTYLE_SCRIPT_PATH" ]; then + curl --silent -o "$PYCODESTYLE_SCRIPT_PATH" "$PYCODESTYLE_SCRIPT_REMOTE_PATH" + local curl_status="$?" + + if [ "$curl_status" -ne 0 ]; then + echo "Failed to download pycodestyle.py from $PYCODESTYLE_SCRIPT_REMOTE_PATH" + exit "$curl_status" + fi + fi + + echo "starting pycodestyle test..." + PYCODESTYLE_REPORT=$( (python3 "$PYCODESTYLE_SCRIPT_PATH" --config=dev/tox.ini $1) 2>&1) + PYCODESTYLE_STATUS=$? + else + # we have the right version installed, so run locally + echo "starting pycodestyle test..." + PYCODESTYLE_REPORT=$( ($PYCODESTYLE_BUILD --config=dev/tox.ini $1) 2>&1) + PYCODESTYLE_STATUS=$? + fi + + if [ $PYCODESTYLE_STATUS -ne 0 ]; then + echo "pycodestyle checks failed:" + echo "$PYCODESTYLE_REPORT" + exit "$PYCODESTYLE_STATUS" + else + echo "pycodestyle checks passed." + echo + fi +} + +function flake8_test { + local FLAKE8_VERSION= + local VERSION= + local EXPECTED_FLAKE8= + local FLAKE8_REPORT= + local FLAKE8_STATUS= + + if ! hash "$FLAKE8_BUILD" 2> /dev/null; then + echo "The flake8 command was not found." + echo "flake8 checks failed." + exit 1 + fi + + FLAKE8_VERSION="$($FLAKE8_BUILD --version 2> /dev/null)" + VERSION=($FLAKE8_VERSION) + EXPECTED_FLAKE8=$( (python3 -c 'from distutils.version import LooseVersion; + print(LooseVersion("""'${VERSION[0]}'""") >= LooseVersion("""'$MINIMUM_FLAKE8'"""))') \ + 2> /dev/null) + + if [[ "$EXPECTED_FLAKE8" == "False" ]]; then + echo "\ +The minimum flake8 version needs to be $MINIMUM_FLAKE8. Your current version is $FLAKE8_VERSION + +flake8 checks failed." + exit 1 + fi + + echo "starting $FLAKE8_BUILD test..." + FLAKE8_REPORT=$( ($FLAKE8_BUILD $1 --count --select=E901,E999,F821,F822,F823 \ + --max-line-length=100 --show-source --statistics) 2>&1) + FLAKE8_STATUS=$? + + if [ "$FLAKE8_STATUS" -ne 0 ]; then + echo "flake8 checks failed:" + echo "$FLAKE8_REPORT" + echo "$FLAKE8_STATUS" + exit "$FLAKE8_STATUS" + else + echo "flake8 checks passed." + echo + fi +} + +function pydocstyle_test { + local PYDOCSTYLE_REPORT= + local PYDOCSTYLE_STATUS= + local PYDOCSTYLE_VERSION= + local EXPECTED_PYDOCSTYLE= + + # Exclude auto-generated configuration file. + local DOC_PATHS_TO_CHECK="$( cd "${DELTA_ROOT_DIR}" && find . -name "*.py" | grep -vF 'functions.py' )" + + # Check python document style, skip check if pydocstyle is not installed. + if ! hash "$PYDOCSTYLE_BUILD" 2> /dev/null; then + echo "The pydocstyle command was not found. Skipping pydocstyle checks for now." + echo + return + fi + + PYDOCSTYLE_VERSION="$($PYDOCSTYLEBUILD --version 2> /dev/null)" + EXPECTED_PYDOCSTYLE=$(python3 -c 'from distutils.version import LooseVersion; \ + print(LooseVersion("""'$PYDOCSTYLE_VERSION'""") >= LooseVersion("""'$MINIMUM_PYDOCSTYLE'"""))' \ + 2> /dev/null) + + if [[ "$EXPECTED_PYDOCSTYLE" == "False" ]]; then + echo "\ +The minimum version of pydocstyle needs to be $MINIMUM_PYDOCSTYLE. +Your current version is $PYDOCSTYLE_VERSION. +Skipping pydocstyle checks for now." + echo + return + fi + + echo "starting $PYDOCSTYLE_BUILD test..." + PYDOCSTYLE_REPORT=$( ($PYDOCSTYLE_BUILD --config=dev/tox.ini $DOC_PATHS_TO_CHECK) 2>&1) + PYDOCSTYLE_STATUS=$? + + if [ "$PYDOCSTYLE_STATUS" -ne 0 ]; then + echo "pydocstyle checks failed:" + echo "$PYDOCSTYLE_REPORT" + exit "$PYDOCSTYLE_STATUS" + else + echo "pydocstyle checks passed." + echo + fi +} + +SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" +DELTA_ROOT_DIR="$(dirname "${SCRIPT_DIR}")" + +pushd "$DELTA_ROOT_DIR" &> /dev/null + +PYTHON_SOURCE="$(find "${DELTA_ROOT_DIR}/python" -name "*.py")" + +compile_python_test "$PYTHON_SOURCE" +pycodestyle_test "$PYTHON_SOURCE" +#flake8_test "$PYTHON_SOURCE" +pydocstyle_test + +echo +echo "all lint-python tests passed!" + +popd &> /dev/null diff --git a/dev/tox.ini b/dev/tox.ini new file mode 100644 index 00000000000..b21a1c8c15a --- /dev/null +++ b/dev/tox.ini @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[pycodestyle] +ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504 +max-line-length=100 +exclude=cloudpickle.py,heapq3.py,shared.py,python/docs/conf.py,work/*/*.py,python/.eggs/*,dist/* +[pydocstyle] +ignore=D100,D101,D102,D103,D104,D105,D106,D107,D200,D201,D202,D203,D204,D205,D206,D207,D208,D209,D210,D211,D212,D213,D214,D215,D300,D301,D302,D400,D401,D402,D403,D404,D405,D406,D407,D408,D409,D410,D411,D412,D413,D414,D415,D417 diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000000..10471c63bf8 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,9 @@ +## Delta Lake examples +In this folder there are examples taken from the delta.io quickstart guide and docs. They are available in both Scala and Python and can be run if the prerequisites are satisfied. + +### Prerequisites +* See [Set up Apache Spark with Delta Lake](https://docs.delta.io/latest/quick-start.html#set-up-apache-spark-with-delta-lake). + +### Instructions +* To run an example in Python run `spark-submit --packages io.delta:delta-spark_2.12:{Delta Lake version} PATH/TO/EXAMPLE` +* To run the Scala examples, `cd examples/scala` and run `./build/sbt "runMain example.{Example class name}"` e.g. `./build/sbt "runMain example.Quickstart"` diff --git a/examples/cheat_sheet/delta_lake_cheat_sheet.pdf b/examples/cheat_sheet/delta_lake_cheat_sheet.pdf new file mode 100644 index 00000000000..59a00f551ab Binary files /dev/null and b/examples/cheat_sheet/delta_lake_cheat_sheet.pdf differ diff --git a/examples/python/change_data_feed.py b/examples/python/change_data_feed.py new file mode 100644 index 00000000000..361da9c6fee --- /dev/null +++ b/examples/python/change_data_feed.py @@ -0,0 +1,143 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.sql import SparkSession +from pyspark.sql.functions import col, expr +from delta.tables import DeltaTable +import shutil + + +path = "/tmp/delta-change-data-feed/student" +otherPath = "/tmp/delta-change-data-feed/student_source" + +# Enable SQL commands and Update/Delete/Merge for the current spark session. +# we need to set the following configs +spark = SparkSession.builder \ + .appName("Change Data Feed") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + + +def cleanup(): + shutil.rmtree(path, ignore_errors=True) + shutil.rmtree(otherPath, ignore_errors=True) + spark.sql("DROP TABLE IF EXISTS student") + spark.sql("DROP TABLE IF EXISTS student_source") + + +def read_cdc_by_table_name(starting_version): + return spark.read.format("delta") \ + .option("readChangeFeed", "true") \ + .option("startingVersion", str(starting_version)) \ + .table("student") \ + .orderBy("_change_type", "id") + + +def stream_cdc_by_table_name(starting_version): + return spark.readStream.format("delta") \ + .option("readChangeFeed", "true") \ + .option("startingVersion", str(starting_version)) \ + .table("student") \ + .writeStream \ + .format("console") \ + .option("numRows", 1000) \ + .start() + + +cleanup() + +try: + # =============== Create student table =============== + spark.sql('''CREATE TABLE student (id INT, name STRING, age INT) + USING DELTA + PARTITIONED BY (age) + TBLPROPERTIES (delta.enableChangeDataFeed = true) + LOCATION '{0}' + '''.format(path)) + + spark.range(0, 10) \ + .selectExpr( + "CAST(id as INT) as id", + "CAST(id as STRING) as name", + "CAST(id % 4 + 18 as INT) as age") \ + .write.format("delta").mode("append").save(path) # v1 + + # =============== Show table data + changes =============== + + print("(v1) Initial Table") + spark.read.format("delta").load(path).orderBy("id").show() + + print("(v1) CDC changes") + read_cdc_by_table_name(1).show() + + table = DeltaTable.forPath(spark, path) + + # =============== Perform UPDATE =============== + + print("(v2) Updated id -> id + 1") + table.update(set={"id": expr("id + 1")}) # v2 + read_cdc_by_table_name(2).show() + + # =============== Perform DELETE =============== + + print("(v3) Deleted where id >= 7") + table.delete(condition=expr("id >= 7")) # v3 + read_cdc_by_table_name(3).show() + + # =============== Perform partition DELETE =============== + + print("(v4) Deleted where age = 18") + table.delete(condition=expr("age = 18")) # v4, partition delete + read_cdc_by_table_name(4).show() + + # =============== Create source table for MERGE =============== + + spark.sql('''CREATE TABLE student_source (id INT, name STRING, age INT) + USING DELTA + LOCATION '{0}' + '''.format(otherPath)) + spark.range(0, 3) \ + .selectExpr( + "CAST(id as INT) as id", + "CAST(id as STRING) as name", + "CAST(id % 4 + 18 as INT) as age") \ + .write.format("delta").mode("append").saveAsTable("student_source") + source = spark.sql("SELECT * FROM student_source") + + # =============== Perform MERGE =============== + + table.alias("target") \ + .merge( + source.alias("source"), + "target.id = source.id")\ + .whenMatchedUpdate(set={"id": "source.id", "age": "source.age + 10"}) \ + .whenNotMatchedInsertAll() \ + .execute() # v5 + print("(v5) Merged with a source table") + read_cdc_by_table_name(5).show() + + # =============== Stream changes =============== + + print("Streaming by table name") + cdfStream = stream_cdc_by_table_name(0) + cdfStream.awaitTermination(10) + cdfStream.stop() + +finally: + cleanup() + spark.stop() diff --git a/examples/python/image_storage.py b/examples/python/image_storage.py new file mode 100644 index 00000000000..175904a3d7b --- /dev/null +++ b/examples/python/image_storage.py @@ -0,0 +1,90 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This example shows +# 1 - How to load the TensorFlow Flowers Images into a dataframe +# 2 - Manipulate the dataframe +# 3 - Write the dataframe to a Delta Lake table +# 4 - Read the new Delta Lake table + +import pyspark.sql.functions as fn +from pyspark.sql import SparkSession +from delta import configure_spark_with_delta_pip +import shutil +from urllib import request +import os + +# To run this example directly, set up the spark session using the following 2 commands +# You will need to run using Python3 +# You will also need to install the python packages pyspark and delta-spark, we advise using pip +builder = ( + SparkSession.builder + .appName('image_storage') + .config('spark.sql.extensions', 'io.delta.sql.DeltaSparkSessionExtension') + .config('spark.sql.catalog.spark_catalog', 'org.apache.spark.sql.delta.catalog.DeltaCatalog') +) + +# This is only for testing staged release artifacts. Ignore this completely. +if os.getenv('EXTRA_MAVEN_REPO'): + builder = builder.config("spark.jars.repositories", os.getenv('EXTRA_MAVEN_REPO')) + +spark = configure_spark_with_delta_pip(builder).getOrCreate() + +# Flowers dataset from the TensorFlow team - https://www.tensorflow.org/datasets/catalog/tf_flowers +imageGzipUrl = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz" +imageGzipPath = "/tmp/flower_photos.tgz" +imagePath = "/tmp/image-folder" +deltaPath = "/tmp/delta-table" + +# Clear previous run's zipper file, image folder and delta tables +if os.path.exists(imageGzipPath): + os.remove(imageGzipPath) +shutil.rmtree(imagePath, ignore_errors=True) +shutil.rmtree(deltaPath, ignore_errors=True) + +request.urlretrieve(imageGzipUrl, imageGzipPath) +shutil.unpack_archive(imageGzipPath, imagePath) + +# read the images from the flowers dataset +images = spark.read.format("binaryFile").\ + option("recursiveFileLookup", "true").\ + option("pathGlobFilter", "*.jpg").\ + load(imagePath) + +# Knowing the file path, extract the flower type and filename using substring_index +# Remember, Spark dataframes are immutable, here we are just reusing the images dataframe +images = images.withColumn("flowerType_filename", fn.substring_index(images.path, "/", -2)) +images = images.withColumn("flowerType", fn.substring_index(images.flowerType_filename, "/", 1)) +images = images.withColumn("filename", fn.substring_index(images.flowerType_filename, "/", -1)) +images = images.drop("flowerType_filename") +images.show() + +# Select the columns we want to write out to +df = images.select("path", "content", "flowerType", "filename").repartition(4) +df.show() + +# Write out the delta table to the given path, this will overwrite any table that is currently there +df.write.format("delta").mode("overwrite").save(deltaPath) + +# Reads the delta table that was just written +dfDelta = spark.read.format("delta").load(deltaPath) +dfDelta.show() + +# Cleanup +if os.path.exists(imageGzipPath): + os.remove(imageGzipPath) +shutil.rmtree(imagePath) +shutil.rmtree(deltaPath) diff --git a/examples/python/missing_delta_storage_jar.py b/examples/python/missing_delta_storage_jar.py new file mode 100644 index 00000000000..c59701c26a1 --- /dev/null +++ b/examples/python/missing_delta_storage_jar.py @@ -0,0 +1,44 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.sql import SparkSession +import shutil + +path = "/tmp/delta-table/missing_logstore_jar" + +try: + # Clear any previous runs + shutil.rmtree(path, ignore_errors=True) + + spark = SparkSession.builder \ + .appName("missing logstore jar") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + + spark.range(0, 5).write.format("delta").save(path) + +except Exception as e: + assert "Please ensure that the delta-storage dependency is included." in str(e) + print("SUCCESS - error was thrown, as expected") + +else: + assert False, "The write to the delta table should have thrown without the delta-storage JAR." + +finally: + # cleanup + shutil.rmtree(path, ignore_errors=True) diff --git a/examples/python/quickstart.py b/examples/python/quickstart.py new file mode 100644 index 00000000000..f2688aeac03 --- /dev/null +++ b/examples/python/quickstart.py @@ -0,0 +1,87 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.sql import SparkSession +from pyspark.sql.functions import col, expr +from delta.tables import DeltaTable +import shutil + +# Clear any previous runs +shutil.rmtree("/tmp/delta-table", ignore_errors=True) + +# Enable SQL commands and Update/Delete/Merge for the current spark session. +# we need to set the following configs +spark = SparkSession.builder \ + .appName("quickstart") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + +# Create a table +print("############# Creating a table ###############") +data = spark.range(0, 5) +data.write.format("delta").save("/tmp/delta-table") + +# Read the table +print("############ Reading the table ###############") +df = spark.read.format("delta").load("/tmp/delta-table") +df.show() + +# Upsert (merge) new data +print("########### Upsert new data #############") +newData = spark.range(0, 20) + +deltaTable = DeltaTable.forPath(spark, "/tmp/delta-table") + +deltaTable.alias("oldData")\ + .merge( + newData.alias("newData"), + "oldData.id = newData.id")\ + .whenMatchedUpdate(set={"id": col("newData.id")})\ + .whenNotMatchedInsert(values={"id": col("newData.id")})\ + .execute() + +deltaTable.toDF().show() + +# Update table data +print("########## Overwrite the table ###########") +data = spark.range(5, 10) +data.write.format("delta").mode("overwrite").save("/tmp/delta-table") +deltaTable.toDF().show() + +deltaTable = DeltaTable.forPath(spark, "/tmp/delta-table") + +# Update every even value by adding 100 to it +print("########### Update to the table(add 100 to every even value) ##############") +deltaTable.update( + condition=expr("id % 2 == 0"), + set={"id": expr("id + 100")}) + +deltaTable.toDF().show() + +# Delete every even value +print("######### Delete every even value ##############") +deltaTable.delete(condition=expr("id % 2 == 0")) +deltaTable.toDF().show() + +# Read old version of data using time travel +print("######## Read old data using time travel ############") +df = spark.read.format("delta").option("versionAsOf", 0).load("/tmp/delta-table") +df.show() + +# cleanup +shutil.rmtree("/tmp/delta-table") diff --git a/examples/python/quickstart_sql.py b/examples/python/quickstart_sql.py new file mode 100644 index 00000000000..675abb575de --- /dev/null +++ b/examples/python/quickstart_sql.py @@ -0,0 +1,83 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.sql import SparkSession + +tableName = "tbltestpython" + +# Enable SQL/DML commands and Metastore tables for the current spark session. +# We need to set the following configs + +spark = SparkSession.builder \ + .appName("quickstart_sql") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + +# Clear any previous runs +spark.sql("DROP TABLE IF EXISTS " + tableName) +spark.sql("DROP TABLE IF EXISTS newData") + +try: + # Create a table + print("############# Creating a table ###############") + spark.sql("CREATE TABLE %s(id LONG) USING delta" % tableName) + spark.sql("INSERT INTO %s VALUES 0, 1, 2, 3, 4" % tableName) + + # Read the table + print("############ Reading the table ###############") + spark.sql("SELECT * FROM %s" % tableName).show() + + # Upsert (merge) new data + print("########### Upsert new data #############") + spark.sql("CREATE TABLE newData(id LONG) USING parquet") + spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6") + + spark.sql('''MERGE INTO {0} USING newData + ON {0}.id = newData.id + WHEN MATCHED THEN + UPDATE SET {0}.id = newData.id + WHEN NOT MATCHED THEN INSERT * + '''.format(tableName)) + + spark.sql("SELECT * FROM %s" % tableName).show() + + # Update table data + print("########## Overwrite the table ###########") + spark.sql("INSERT OVERWRITE %s select * FROM (VALUES 5, 6, 7, 8, 9) x (id)" % tableName) + spark.sql("SELECT * FROM %s" % tableName).show() + + # Update every even value by adding 100 to it + print("########### Update to the table(add 100 to every even value) ##############") + spark.sql("UPDATE {0} SET id = (id + 100) WHERE (id % 2 == 0)".format(tableName)) + spark.sql("SELECT * FROM %s" % tableName).show() + + # Delete every even value + print("######### Delete every even value ##############") + spark.sql("DELETE FROM {0} WHERE (id % 2 == 0)".format(tableName)) + spark.sql("SELECT * FROM %s" % tableName).show() + + # Read old version of data using time travel + print("######## Read old data using time travel ############") + df = spark.read.format("delta").option("versionAsOf", 0).table(tableName) + df.show() + +finally: + # cleanup + spark.sql("DROP TABLE " + tableName) + spark.sql("DROP TABLE IF EXISTS newData") + spark.stop() diff --git a/examples/python/quickstart_sql_on_paths.py b/examples/python/quickstart_sql_on_paths.py new file mode 100644 index 00000000000..18f1fe061b7 --- /dev/null +++ b/examples/python/quickstart_sql_on_paths.py @@ -0,0 +1,64 @@ + +from pyspark.sql import SparkSession +import shutil + +table_dir = "/tmp/delta-table" +# Clear any previous runs +shutil.rmtree(table_dir, ignore_errors=True) + +# Enable SQL/DML commands and Metastore tables for the current spark session. +# We need to set the following configs + +spark = SparkSession.builder \ + .appName("quickstart_sql_on_paths") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + +# Clear any previous runs +spark.sql("DROP TABLE IF EXISTS newData") + +try: + # Create a table + print("############# Creating a table ###############") + spark.sql("CREATE TABLE delta.`%s`(id LONG) USING delta" % table_dir) + spark.sql("INSERT INTO delta.`%s` VALUES 0, 1, 2, 3, 4" % table_dir) + + # Read the table + print("############ Reading the table ###############") + spark.sql("SELECT * FROM delta.`%s`" % table_dir).show() + + # Upsert (merge) new data + print("########### Upsert new data #############") + spark.sql("CREATE TABLE newData(id LONG) USING parquet") + spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6") + + spark.sql('''MERGE INTO delta.`{0}` AS data USING newData + ON data.id = newData.id + WHEN MATCHED THEN + UPDATE SET data.id = newData.id + WHEN NOT MATCHED THEN INSERT * + '''.format(table_dir)) + + spark.sql("SELECT * FROM delta.`%s`" % table_dir).show() + + # Update table data + print("########## Overwrite the table ###########") + spark.sql("INSERT OVERWRITE delta.`%s` select * FROM (VALUES 5, 6, 7, 8, 9) x (id)" % table_dir) + spark.sql("SELECT * FROM delta.`%s`" % table_dir).show() + + # Update every even value by adding 100 to it + print("########### Update to the table(add 100 to every even value) ##############") + spark.sql("UPDATE delta.`{0}` SET id = (id + 100) WHERE (id % 2 == 0)".format(table_dir)) + spark.sql("SELECT * FROM delta.`%s`" % table_dir).show() + + # Delete every even value + print("######### Delete every even value ##############") + spark.sql("DELETE FROM delta.`{0}` WHERE (id % 2 == 0)".format(table_dir)) + spark.sql("SELECT * FROM delta.`%s`" % table_dir).show() + +finally: + # cleanup + spark.sql("DROP TABLE IF EXISTS newData") + spark.stop() diff --git a/examples/python/streaming.py b/examples/python/streaming.py new file mode 100644 index 00000000000..636d2fb02e0 --- /dev/null +++ b/examples/python/streaming.py @@ -0,0 +1,120 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.sql import SparkSession +from pyspark.sql.functions import col +from delta.tables import DeltaTable +import shutil +import random + + +# Enable SQL commands and Update/Delete/Merge for the current spark session. +# we need to set the following configs +spark = SparkSession.builder \ + .appName("streaming") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + +shutil.rmtree("/tmp/delta-streaming/", ignore_errors=True) + +# Create a table(key, value) of some data +data = spark.range(8) +data = data.withColumn("value", data.id + random.randint(0, 5000)) +data.write.format("delta").save("/tmp/delta-streaming/delta-table") + +# Stream writes to the table +print("####### Streaming write ######") +streamingDf = spark.readStream.format("rate").load() +stream = streamingDf.selectExpr("value as id").writeStream\ + .format("delta")\ + .option("checkpointLocation", "/tmp/delta-streaming/checkpoint")\ + .start("/tmp/delta-streaming/delta-table2") +stream.awaitTermination(10) +stream.stop() + +# Stream reads from a table +print("##### Reading from stream ######") +stream2 = spark.readStream.format("delta").load("/tmp/delta-streaming/delta-table2")\ + .writeStream\ + .format("console")\ + .start() +stream2.awaitTermination(10) +stream2.stop() + +# Streaming aggregates in Update mode +print("####### Streaming upgrades in update mode ########") + + +# Function to upsert microBatchOutputDF into Delta Lake table using merge +def upsertToDelta(microBatchOutputDF, batchId): + t = deltaTable.alias("t").merge(microBatchOutputDF.alias("s"), "s.id = t.id")\ + .whenMatchedUpdateAll()\ + .whenNotMatchedInsertAll()\ + .execute() + + +streamingAggregatesDF = spark.readStream.format("rate").load()\ + .withColumn("id", col("value") % 10)\ + .drop("timestamp") +# Write the output of a streaming aggregation query into Delta Lake table +deltaTable = DeltaTable.forPath(spark, "/tmp/delta-streaming/delta-table") +print("############# Original Delta Table ###############") +deltaTable.toDF().show() +stream3 = streamingAggregatesDF.writeStream\ + .format("delta") \ + .foreachBatch(upsertToDelta) \ + .outputMode("update") \ + .start() +stream3.awaitTermination(10) +stream3.stop() +print("########### DeltaTable after streaming upsert #########") +deltaTable.toDF().show() + +# Streaming append and concurrent repartition using data change = false +# tbl1 is the sink and tbl2 is the source +print("############ Streaming appends with concurrent table repartition ##########") +tbl1 = "/tmp/delta-streaming/delta-table4" +tbl2 = "/tmp/delta-streaming/delta-table5" +numRows = 10 +spark.range(numRows).write.mode("overwrite").format("delta").save(tbl1) +spark.read.format("delta").load(tbl1).show() +spark.range(numRows, numRows * 10).write.mode("overwrite").format("delta").save(tbl2) + + +# Start reading tbl2 as a stream and do a streaming write to tbl1 +# Prior to Delta 0.5.0 this would throw StreamingQueryException: Detected a data update in the +# source table. This is currently not supported. +stream4 = spark.readStream.format("delta").load(tbl2).writeStream.format("delta")\ + .option("checkpointLocation", "/tmp/delta-streaming/checkpoint/tbl1") \ + .outputMode("append") \ + .start(tbl1) + +# repartition table while streaming job is running +spark.read.format("delta").load(tbl2).repartition(10).write\ + .format("delta")\ + .mode("overwrite")\ + .option("dataChange", "false")\ + .save(tbl2) + +stream4.awaitTermination(10) +stream4.stop() +print("######### After streaming write #########") +spark.read.format("delta").load(tbl1).show() + +# cleanup +shutil.rmtree("/tmp/delta-streaming/", ignore_errors=True) diff --git a/examples/python/table_exists.py b/examples/python/table_exists.py new file mode 100644 index 00000000000..8102d9d7404 --- /dev/null +++ b/examples/python/table_exists.py @@ -0,0 +1,57 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.sql import SparkSession +from pyspark.sql.utils import AnalysisException +import shutil + +def exists(spark, filepath): + """Checks if a delta table exists at `filepath`""" + try: + spark.read.load(path=filepath, format="delta") + except AnalysisException as exception: + if "is not a Delta table" in exception.desc or "Path does not exist" in exception.desc: + return False + raise exception + return True + +spark = SparkSession.builder \ + .appName("table_exists") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + +filepath = "/tmp/table_exists" + +# Clear any previous runs +shutil.rmtree(filepath, ignore_errors=True) + +# Verify table doesn't exist yet +print(f"Verifying table does not exist at {filepath}") +assert not exists(spark, filepath) + +# Create a delta table at filepath +print(f"Creating delta table at {filepath}") +data = spark.range(0, 5) +data.write.format("delta").save(filepath) + +# Verify table now exists +print(f"Verifying table exists at {filepath}") +assert exists(spark, filepath) + +# Clean up +shutil.rmtree(filepath) diff --git a/examples/python/using_with_pip.py b/examples/python/using_with_pip.py new file mode 100644 index 00000000000..16983190c4d --- /dev/null +++ b/examples/python/using_with_pip.py @@ -0,0 +1,62 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import shutil + +# flake8: noqa +import os +from pyspark.sql import SparkSession +from delta import * + +builder = SparkSession.builder \ + .appName("with-pip") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + +# This is only for testing staged release artifacts. Ignore this completely. +if os.getenv('EXTRA_MAVEN_REPO'): + builder = builder.config("spark.jars.repositories", os.getenv('EXTRA_MAVEN_REPO')) + +# This configuration tells Spark to download the Delta Lake JAR that is needed to operate +# in Spark. Use this only when the Pypi package Delta Lake is locally installed with pip. +# This configuration is not needed if the this python program is executed with +# spark-submit or pyspark shell with the --package arguments. +spark = configure_spark_with_delta_pip(builder).getOrCreate() + + +# Clear previous run's delta-tables +shutil.rmtree("/tmp/delta-table", ignore_errors=True) + +print("########### Create a Parquet table ##############") +data = spark.range(0, 5) +data.write.format("parquet").save("/tmp/delta-table") + +print("########### Convert to Delta ###########") +DeltaTable.convertToDelta(spark, "parquet.`/tmp/delta-table`") + +print("########### Read table with DataFrames ###########") +df = spark.read.format("delta").load("/tmp/delta-table") +df.show() + +print("########### Read table with DeltaTable ###########") +deltaTable = DeltaTable.forPath(spark, "/tmp/delta-table") +deltaTable.toDF().show() + +spark.stop() + +# cleanup +shutil.rmtree("/tmp/delta-table") diff --git a/examples/python/utilities.py b/examples/python/utilities.py new file mode 100644 index 00000000000..13ed45aba51 --- /dev/null +++ b/examples/python/utilities.py @@ -0,0 +1,68 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.sql import SparkSession +from delta.tables import DeltaTable +import shutil + +spark = SparkSession.builder \ + .appName("utilities") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .config("spark.sql.sources.parallelPartitionDiscovery.parallelism", "4") \ + .getOrCreate() + +# Clear previous run's delta-tables +shutil.rmtree("/tmp/delta-table", ignore_errors=True) + +# Create a table +print("########### Create a Parquet table ##############") +data = spark.range(0, 5) +data.write.format("parquet").save("/tmp/delta-table") + +# Convert to delta +print("########### Convert to Delta ###########") +DeltaTable.convertToDelta(spark, "parquet.`/tmp/delta-table`") + +# Read the table +df = spark.read.format("delta").load("/tmp/delta-table") +df.show() + +deltaTable = DeltaTable.forPath(spark, "/tmp/delta-table") +print("######## Vacuum the table ########") +deltaTable.vacuum() + +print("######## Describe history for the table ######") +deltaTable.history().show() + +print("######## Describe details for the table ######") +deltaTable.detail().show() + +# Generate manifest +print("######## Generating manifest ######") +deltaTable.generate("SYMLINK_FORMAT_MANIFEST") + +# SQL Vacuum +print("####### SQL Vacuum #######") +spark.sql("VACUUM '%s' RETAIN 169 HOURS" % "/tmp/delta-table").collect() + +# SQL describe history +print("####### SQL Describe History ########") +print(spark.sql("DESCRIBE HISTORY delta.`%s`" % ("/tmp/delta-table")).collect()) + +# cleanup +shutil.rmtree("/tmp/delta-table") diff --git a/examples/scala/.scalafmt.conf b/examples/scala/.scalafmt.conf new file mode 100644 index 00000000000..0956a691c8c --- /dev/null +++ b/examples/scala/.scalafmt.conf @@ -0,0 +1,2 @@ +version = "3.4.0" +runner.dialect = scala213 \ No newline at end of file diff --git a/examples/scala/README.md b/examples/scala/README.md new file mode 100644 index 00000000000..4068d8f67c2 --- /dev/null +++ b/examples/scala/README.md @@ -0,0 +1,27 @@ +# delta scala examples + +This directory contains a set of spark & delta examples. + +Execute `./build/sbt run` and choose which main class to run. + +``` +Multiple main classes detected. Select one to run: + [1] example.Quickstart + [2] example.QuickstartSQL + [3] example.QuickstartSQLOnPaths + [4] example.Streaming + [5] example.Utilities +``` + +You can specify delta lake version and scala version with environment variables `DELTA_VERSION`, `SCALA_VERSION` or editing `build.sbt`. + +If you are faced with `java.lang.IllegalAccessError: class org.apache.spark.storage.StorageUtils$ (in unnamed module @0x******) cannot access class sun.nio.ch.DirectBuffer (in module java.base) because module java.base does not export sun.nio.ch to unnamed module` when you use Java 9 or later, add jvm option in `build.sbt`. + +```diff +lazy val root = (project in file(".")) + .settings( + run / fork := true, ++ run / javaOptions ++= Seq( ++ "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED" ++ ), +``` diff --git a/examples/scala/build.sbt b/examples/scala/build.sbt new file mode 100644 index 00000000000..f8300311f0b --- /dev/null +++ b/examples/scala/build.sbt @@ -0,0 +1,136 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import scala.util.{Success, Try} +name := "example" +organization := "com.example" +organizationName := "example" + +val scala212 = "2.12.17" +val scala213 = "2.13.8" +val deltaVersion = "3.0.0" +val icebergVersion = "1.4.1" + +def getMajorMinor(version: String): (Int, Int) = { + val majorMinor = Try { + val splitVersion = version.split('.') + (splitVersion(0).toInt, splitVersion(1).toInt) + } + majorMinor match { + case Success(_) => (majorMinor.get._1, majorMinor.get._2) + case _ => + throw new RuntimeException(s"Unsupported delta version: $version. " + + s"Please check https://docs.delta.io/latest/releases.html") + } +} +val lookupSparkVersion: PartialFunction[(Int, Int), String] = { + // versions 3.0.0 and above + case (major, minor) if major >= 3 => "3.5.0" + // versions 2.4.x + case (major, minor) if major == 2 && minor == 4 => "3.4.0" + // versions 2.3.x + case (major, minor) if major == 2 && minor == 3 => "3.3.2" + // versions 2.2.x + case (major, minor) if major == 2 && minor == 2 => "3.3.1" + // versions 2.1.x + case (major, minor) if major == 2 && minor == 1 => "3.3.0" + // versions 1.0.0 to 2.0.x + case (major, minor) if major == 1 || (major == 2 && minor == 0) => "3.2.1" + // versions 0.7.x to 0.8.x + case (major, minor) if major == 0 && (minor == 7 || minor == 8) => "3.0.2" + // versions below 0.7 + case (major, minor) if major == 0 && minor < 7 => "2.4.4" +} + +val getScalaVersion = settingKey[String]( + s"get scala version from environment variable SCALA_VERSION. If it doesn't exist, use $scala213" +) +val getDeltaVersion = settingKey[String]( + s"get delta version from environment variable DELTA_VERSION. If it doesn't exist, use $deltaVersion" +) +val getDeltaArtifactName = settingKey[String]( + s"get delta artifact name based on the delta version. either `delta-core` or `delta-spark`." +) +val getIcebergSparkRuntimeArtifactName = settingKey[String]( + s"get iceberg-spark-runtime name based on the delta version." +) +getScalaVersion := { + sys.env.get("SCALA_VERSION") match { + case Some("2.12") => + scala212 + case Some("2.13") => + scala213 + case Some(v) => + println( + s"[warn] Invalid SCALA_VERSION. Expected 2.12 or 2.13 but got $v. Fallback to $scala213." + ) + scala213 + case None => + scala213 + } +} + +scalaVersion := getScalaVersion.value +version := "0.1.0" + +getDeltaVersion := { + sys.env.get("DELTA_VERSION") match { + case Some(v) => + println(s"Using Delta version $v") + v + case None => + deltaVersion + } +} + +getDeltaArtifactName := { + val deltaVersion = getDeltaVersion.value + if (deltaVersion.charAt(0).asDigit >= 3) "delta-spark" else "delta-core" +} + +getIcebergSparkRuntimeArtifactName := { + val (expMaj, expMin) = getMajorMinor(lookupSparkVersion.apply( + getMajorMinor(getDeltaVersion.value))) + s"iceberg-spark-runtime-$expMaj.$expMin" +} + +lazy val extraMavenRepo = sys.env.get("EXTRA_MAVEN_REPO").toSeq.map { repo => + resolvers += "Delta" at repo +} + +lazy val root = (project in file(".")) + .settings( + run / fork := true, + name := "hello-world", + crossScalaVersions := Seq(scala212, scala213), + libraryDependencies ++= Seq( + "io.delta" %% getDeltaArtifactName.value % getDeltaVersion.value, + "io.delta" %% "delta-iceberg" % getDeltaVersion.value, + "org.apache.spark" %% "spark-sql" % lookupSparkVersion.apply( + getMajorMinor(getDeltaVersion.value) + ), + "org.apache.spark" %% "spark-hive" % lookupSparkVersion.apply( + getMajorMinor(getDeltaVersion.value) + ), + "org.apache.iceberg" %% getIcebergSparkRuntimeArtifactName.value % icebergVersion, + "org.apache.iceberg" % "iceberg-hive-metastore" % icebergVersion + ), + extraMavenRepo, + resolvers += Resolver.mavenLocal, + scalacOptions ++= Seq( + "-deprecation", + "-feature" + ) + ) diff --git a/examples/scala/build/sbt b/examples/scala/build/sbt new file mode 100755 index 00000000000..044a2929bde --- /dev/null +++ b/examples/scala/build/sbt @@ -0,0 +1,183 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2021) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so +# that we can run Hive to generate the golden answer. This is not required for normal development +# or testing. +if [ -n "$HIVE_HOME" ]; then + for i in "$HIVE_HOME"/lib/* + do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i" + done + export HADOOP_CLASSPATH +fi + +realpath () { +( + TARGET_FILE="$1" + + cd "$(dirname "$TARGET_FILE")" + TARGET_FILE="$(basename "$TARGET_FILE")" + + COUNT=0 + while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] + do + TARGET_FILE="$(readlink "$TARGET_FILE")" + cd $(dirname "$TARGET_FILE") + TARGET_FILE="$(basename $TARGET_FILE)" + COUNT=$(($COUNT + 1)) + done + + echo "$(pwd -P)/"$TARGET_FILE"" +) +} + +if [[ "$JENKINS_URL" != "" ]]; then + # Make Jenkins use Google Mirror first as Maven Central may ban us + SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories" + export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG" +fi + +. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash + + +declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" +declare -r sbt_opts_file=".sbtopts" +declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" + +usage() { + cat < path to global settings/plugins directory (default: ~/.sbt) + -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) + -ivy path to local Ivy repository (default: ~/.ivy2) + -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) + -no-share use all local caches; no sharing + -no-global uses global caches, but does not use global ~/.sbt directory. + -jvm-debug Turn on JVM debugging, open at the given port. + -batch Disable interactive mode + + # sbt version (default: from project/build.properties if present, else latest release) + -sbt-version use the specified version of sbt + -sbt-jar use the specified jar as the sbt launcher + -sbt-rc use an RC version of sbt + -sbt-snapshot use a snapshot version of sbt + + # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) + -java-home alternate JAVA_HOME + + # jvm options and output control + JAVA_OPTS environment variable, if unset uses "$java_opts" + SBT_OPTS environment variable, if unset uses "$default_sbt_opts" + .sbtopts if this file exists in the current directory, it is + prepended to the runner args + /etc/sbt/sbtopts if this file exists, it is prepended to the runner args + -Dkey=val pass -Dkey=val directly to the java runtime + -J-X pass option -X directly to the java runtime + (-J is stripped) + -S-X add -X to sbt's scalacOptions (-S is stripped) + -PmavenProfiles Enable a maven profile for the build. + +In the case of duplicated or conflicting options, the order above +shows precedence: JAVA_OPTS lowest, command line options highest. +EOM +} + +process_my_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; + -no-share) addJava "$noshare_opts" && shift ;; + -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; + -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; + -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; + -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; + -batch) exec /dev/null) + if [[ ! $? ]]; then + saved_stty="" + fi +} + +saveSttySettings +trap onExit INT + +run "$@" + +exit_status=$? +onExit diff --git a/examples/scala/build/sbt-config/repositories b/examples/scala/build/sbt-config/repositories new file mode 100644 index 00000000000..14aa46c66de --- /dev/null +++ b/examples/scala/build/sbt-config/repositories @@ -0,0 +1,11 @@ +[repositories] + local + local-preloaded-ivy: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext] + local-preloaded: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/} + gcs-maven-central-mirror: https://maven-central.storage-download.googleapis.com/repos/central/data/ + maven-central + typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] + repos-spark-packages: https://repos.spark-packages.org + typesafe-releases: https://repo.typesafe.com/typesafe/releases/ diff --git a/examples/scala/build/sbt-launch-lib.bash b/examples/scala/build/sbt-launch-lib.bash new file mode 100755 index 00000000000..8f77812a0e1 --- /dev/null +++ b/examples/scala/build/sbt-launch-lib.bash @@ -0,0 +1,189 @@ +#!/usr/bin/env bash +# + +# A library to simplify using the SBT launcher from other packages. +# Note: This should be used by tools like giter8/conscript etc. + +# TODO - Should we merge the main SBT script with this library? + +if test -z "$HOME"; then + declare -r script_dir="$(dirname "$script_path")" +else + declare -r script_dir="$HOME/.sbt" +fi + +declare -a residual_args +declare -a java_args +declare -a scalac_args +declare -a sbt_commands +declare -a maven_profiles + +if test -x "$JAVA_HOME/bin/java"; then + echo -e "Using $JAVA_HOME as default JAVA_HOME." + echo "Note, this will be overridden by -java-home if it is set." + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi + +echoerr () { + echo 1>&2 "$@" +} +vlog () { + [[ $verbose || $debug ]] && echoerr "$@" +} +dlog () { + [[ $debug ]] && echoerr "$@" +} + +acquire_sbt_jar () { + SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` + URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar + JAR=build/sbt-launch-${SBT_VERSION}.jar + + sbt_jar=$JAR + + if [[ ! -f "$sbt_jar" ]]; then + # Download sbt launch jar if it hasn't been downloaded yet + if [ ! -f "${JAR}" ]; then + # Download + printf "Attempting to fetch sbt\n" + JAR_DL="${JAR}.part" + if [ $(command -v curl) ]; then + curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + elif [ $(command -v wget) ]; then + wget --quiet ${URL1} -O "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + else + printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 + fi + fi + if [ ! -f "${JAR}" ]; then + # We failed to download + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 + fi + printf "Launching sbt from ${JAR}\n" + fi +} + +execRunner () { + # print the arguments one to a line, quoting any containing spaces + [[ $verbose || $debug ]] && echo "# Executing command line:" && { + for arg; do + if printf "%s\n" "$arg" | grep -q ' '; then + printf "\"%s\"\n" "$arg" + else + printf "%s\n" "$arg" + fi + done + echo "" + } + + "$@" +} + +addJava () { + dlog "[addJava] arg = '$1'" + java_args=( "${java_args[@]}" "$1" ) +} + +enableProfile () { + dlog "[enableProfile] arg = '$1'" + maven_profiles=( "${maven_profiles[@]}" "$1" ) + export SBT_MAVEN_PROFILES="${maven_profiles[@]}" +} + +addSbt () { + dlog "[addSbt] arg = '$1'" + sbt_commands=( "${sbt_commands[@]}" "$1" ) +} +addResidual () { + dlog "[residual] arg = '$1'" + residual_args=( "${residual_args[@]}" "$1" ) +} +addDebugger () { + addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" +} + +# a ham-fisted attempt to move some memory settings in concert +# so they need not be dicked around with individually. +get_mem_opts () { + local mem=${1:-1000} + local perm=$(( $mem / 4 )) + (( $perm > 256 )) || perm=256 + (( $perm < 4096 )) || perm=4096 + local codecache=$(( $perm / 2 )) + + echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m" +} + +require_arg () { + local type="$1" + local opt="$2" + local arg="$3" + if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then + echo "$opt requires <$type> argument" 1>&2 + exit 1 + fi +} + +is_function_defined() { + declare -f "$1" > /dev/null +} + +process_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -h|-help) usage; exit 1 ;; + -v|-verbose) verbose=1 && shift ;; + -d|-debug) debug=1 && shift ;; + + -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; + -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; + -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; + -batch) exec + val file = new File(p) + if (file.exists()) FileUtils.deleteDirectory(file) + } + spark.sql(s"DROP TABLE IF EXISTS student") + spark.sql(s"DROP TABLE IF EXISTS student_source") + } + + // Note: one could also read by path using `.load(path)` + def readCDCByTableName(startingVersion: Int): DataFrame = { + spark.read.format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion.toString) + .table("student") + .orderBy("_change_type", "id") + } + + // Note: one could also stream by path using `.load(path)` + def streamCDCByTableName(startingVersion: Int): StreamingQuery = { + spark.readStream.format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion.toString) + .table("student") + .writeStream + .format("console") + .option("numRows", 1000) + .start() + } + + cleanup() + + try { + // =============== Create student table =============== + + spark.sql( + s""" + |CREATE TABLE student (id INT, name STRING, age INT) + |USING DELTA + |PARTITIONED BY (age) + |TBLPROPERTIES (delta.enableChangeDataFeed = true) + |LOCATION '$path'""".stripMargin) // v0 + + spark.range(0, 10) + .selectExpr( + "CAST(id as INT) as id", + "CAST(id as STRING) as name", + "CAST(id % 4 + 18 as INT) as age") + .write.format("delta").mode("append").save(path) // v1 + + // =============== Show table data + changes =============== + + println("(v1) Initial Table") + spark.read.format("delta").load(path).orderBy("id").show() + + println("(v1) CDC changes") + readCDCByTableName(1).show() + + val table = io.delta.tables.DeltaTable.forPath(path) + + // =============== Perform UPDATE =============== + + println("(v2) Updated id -> id + 1") + table.update(Map("id" -> expr("id + 1"))) // v2 + readCDCByTableName(2).show() + + // =============== Perform DELETE =============== + + println("(v3) Deleted where id >= 7") + table.delete(expr("id >= 7")) // v3 + readCDCByTableName(3).show() + + // =============== Perform partition DELETE =============== + + println("(v4) Deleted where age = 18") + table.delete(expr("age = 18")) // v4, partition delete + readCDCByTableName(4).show() + + // =============== Create source table for MERGE =============== + + spark.sql( + s""" + |CREATE TABLE student_source (id INT, name STRING, age INT) + |USING DELTA + |LOCATION '$otherPath'""".stripMargin) + spark.range(0, 3).selectExpr( + "CAST(id as INT) as id", + "CAST(id as STRING) as name", + "CAST(id % 4 + 18 as INT) as age") + .write.format("delta").mode("append").saveAsTable("student_source") + val source = spark.sql("SELECT * FROM student_source") + + // =============== Perform MERGE =============== + + table + .as("target") + .merge(source.as("source"), "target.id = source.id") + .whenMatched() + .updateExpr( + Map("id" -> "source.id", "age" -> "source.age + 10")) + .whenNotMatched() + .insertAll() + .execute() // v5 + println("(v5) Merged with a source table") + readCDCByTableName(5).show() + + // =============== Stream changes =============== + + println("Streaming by table name") + val cdfStream = streamCDCByTableName(0) + cdfStream.awaitTermination(5000) + cdfStream.stop() + } finally { + cleanup() + spark.stop() + } + } +} diff --git a/examples/scala/src/main/scala/example/Clustering.scala b/examples/scala/src/main/scala/example/Clustering.scala new file mode 100644 index 00000000000..12ffafee19f --- /dev/null +++ b/examples/scala/src/main/scala/example/Clustering.scala @@ -0,0 +1,61 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example + +import org.apache.spark.sql.SparkSession + +object Clustering { + + def main(args: Array[String]): Unit = { + val tableName = "deltatable" + + val deltaSpark = SparkSession + .builder() + .appName("Clustering-Delta") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate() + + // Clear up old session + deltaSpark.sql(s"DROP TABLE IF EXISTS $tableName") + + // Enable preview config for clustering + deltaSpark.conf.set( + "spark.databricks.delta.clusteredTable.enableClusteringTablePreview", "true") + + try { + // Create a table + println("Creating a table") + deltaSpark.sql( + s"""CREATE TABLE $tableName (col1 INT, col2 STRING) using DELTA + |CLUSTER BY (col1, col2)""".stripMargin) + + // Insert new data + println("Insert new data") + deltaSpark.sql(s"INSERT INTO $tableName VALUES (123, '123')") + + // Optimize the table + println("Optimize the table") + deltaSpark.sql(s"OPTIMIZE $tableName") + } finally { + // Cleanup + deltaSpark.sql(s"DROP TABLE IF EXISTS $tableName") + deltaSpark.stop() + } + } +} + diff --git a/examples/scala/src/main/scala/example/Quickstart.scala b/examples/scala/src/main/scala/example/Quickstart.scala new file mode 100644 index 00000000000..58c86805bfb --- /dev/null +++ b/examples/scala/src/main/scala/example/Quickstart.scala @@ -0,0 +1,98 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package example + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{SparkSession, SQLContext} +import io.delta.tables._ + +import org.apache.spark.sql.functions._ +import org.apache.commons.io.FileUtils +import java.io.File + +object Quickstart { + def main(args: Array[String]): Unit = { + + val spark = SparkSession + .builder() + .appName("Quickstart") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog" + ) + .getOrCreate() + + val file = new File("/tmp/delta-table") + if (file.exists()) FileUtils.deleteDirectory(file) + + // Create a table + println("Creating a table") + val path = file.getCanonicalPath + var data = spark.range(0, 5) + data.write.format("delta").save(path) + + // Read table + println("Reading the table") + val df = spark.read.format("delta").load(path) + df.show() + + // Upsert (merge) new data + println("Upsert new data") + val newData = spark.range(0, 20).toDF() + val deltaTable = DeltaTable.forPath(path) + + deltaTable + .as("oldData") + .merge(newData.as("newData"), "oldData.id = newData.id") + .whenMatched() + .update(Map("id" -> col("newData.id"))) + .whenNotMatched() + .insert(Map("id" -> col("newData.id"))) + .execute() + + deltaTable.toDF.show() + + // Update table data + println("Overwrite the table") + data = spark.range(5, 10) + data.write.format("delta").mode("overwrite").save(path) + deltaTable.toDF.show() + + // Update every even value by adding 100 to it + println("Update to the table (add 100 to every even value)") + deltaTable.update( + condition = expr("id % 2 == 0"), + set = Map("id" -> expr("id + 100")) + ) + deltaTable.toDF.show() + + // Delete every even value + deltaTable.delete(condition = expr("id % 2 == 0")) + deltaTable.toDF.show() + + // Read old version of the data using time travel + print("Read old data using time travel") + val df2 = spark.read.format("delta").option("versionAsOf", 0).load(path) + df2.show() + + // Cleanup + FileUtils.deleteDirectory(file) + spark.stop() + } +} diff --git a/examples/scala/src/main/scala/example/QuickstartSQL.scala b/examples/scala/src/main/scala/example/QuickstartSQL.scala new file mode 100644 index 00000000000..b0d536b6153 --- /dev/null +++ b/examples/scala/src/main/scala/example/QuickstartSQL.scala @@ -0,0 +1,91 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package example + +import org.apache.spark.sql.SparkSession +import io.delta.tables._ + +import org.apache.spark.sql.functions._ +import org.apache.commons.io.FileUtils +import java.io.File + +object QuickstartSQL { + def main(args: Array[String]): Unit = { + // Create Spark Conf + val spark = SparkSession + .builder() + .appName("QuickstartSQL") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate() + + val tableName = "tblname" + + // Clear up old session + spark.sql(s"DROP TABLE IF EXISTS $tableName") + spark.sql(s"DROP TABLE IF EXISTS newData") + + try { + // Create a table + println("Creating a table") + spark.sql(s"CREATE TABLE $tableName(id LONG) USING delta") + spark.sql(s"INSERT INTO $tableName VALUES 0, 1, 2, 3, 4") + + // Read table + println("Reading the table") + spark.sql(s"SELECT * FROM $tableName").show() + + // Upsert (merge) new data + println("Upsert new data") + spark.sql("CREATE TABLE newData(id LONG) USING parquet") + spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6") + + spark.sql(s"""MERGE INTO $tableName USING newData + ON ${tableName}.id = newData.id + WHEN MATCHED THEN + UPDATE SET ${tableName}.id = newData.id + WHEN NOT MATCHED THEN INSERT * + """) + + spark.sql(s"SELECT * FROM $tableName").show() + + // Update table data + println("Overwrite the table") + spark.sql(s"INSERT OVERWRITE $tableName VALUES 5, 6, 7, 8, 9") + spark.sql(s"SELECT * FROM $tableName").show() + + // Update every even value by adding 100 to it + println("Update to the table (add 100 to every even value)") + spark.sql(s"UPDATE $tableName SET id = (id + 100) WHERE (id % 2 == 0)") + spark.sql(s"SELECT * FROM $tableName").show() + + // Delete every even value + spark.sql(s"DELETE FROM $tableName WHERE (id % 2 == 0)") + spark.sql(s"SELECT * FROM $tableName").show() + + // Read old version of the data using time travel + print("Read old data using time travel") + spark.sql(s"SELECT * FROM $tableName VERSION AS OF 0").show() + } finally { + // Cleanup + spark.sql(s"DROP TABLE IF EXISTS $tableName") + spark.sql(s"DROP TABLE IF EXISTS newData") + spark.stop() + } + } +} diff --git a/examples/scala/src/main/scala/example/QuickstartSQLOnPaths.scala b/examples/scala/src/main/scala/example/QuickstartSQLOnPaths.scala new file mode 100644 index 00000000000..76977642a78 --- /dev/null +++ b/examples/scala/src/main/scala/example/QuickstartSQLOnPaths.scala @@ -0,0 +1,86 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package example + +import org.apache.spark.sql.SparkSession +import io.delta.tables._ + +import org.apache.spark.sql.functions._ +import org.apache.commons.io.FileUtils +import java.io.File + +object QuickstartSQLOnPaths { + def main(args: Array[String]): Unit = { + // Create Spark Conf + val spark = SparkSession + .builder() + .appName("QuickstartSQLOnPaths") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate() + + val tablePath = new File("/tmp/delta-table") + if (tablePath.exists()) FileUtils.deleteDirectory(tablePath) + + // Clear up old session + spark.sql(s"DROP TABLE IF EXISTS newData") + + try { + // Create a table + println("Creating a table") + spark.sql(s"CREATE TABLE delta.`$tablePath`(id LONG) USING delta") + spark.sql(s"INSERT INTO delta.`$tablePath` VALUES 0, 1, 2, 3, 4") + + // Read table + println("Reading the table") + spark.sql(s"SELECT * FROM delta.`$tablePath`").show() + + // Upsert (merge) new data + println("Upsert new data") + spark.sql("CREATE TABLE newData(id LONG) USING parquet") + spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6") + + spark.sql(s"""MERGE INTO delta.`$tablePath` data USING newData + ON data.id = newData.id + WHEN MATCHED THEN + UPDATE SET data.id = newData.id + WHEN NOT MATCHED THEN INSERT * + """) + + spark.sql(s"SELECT * FROM delta.`$tablePath`").show() + + // Update table data + println("Overwrite the table") + spark.sql(s"INSERT OVERWRITE delta.`$tablePath` VALUES 5, 6, 7, 8, 9") + spark.sql(s"SELECT * FROM delta.`$tablePath`").show() + + // Update every even value by adding 100 to it + println("Update to the table (add 100 to every even value)") + spark.sql(s"UPDATE delta.`$tablePath` SET id = (id + 100) WHERE (id % 2 == 0)") + spark.sql(s"SELECT * FROM delta.`$tablePath`").show() + + // Delete every even value + spark.sql(s"DELETE FROM delta.`$tablePath` WHERE (id % 2 == 0)") + spark.sql(s"SELECT * FROM delta.`$tablePath`").show() + } finally { + // Cleanup + spark.sql(s"DROP TABLE IF EXISTS newData") + spark.stop() + } + } +} diff --git a/examples/scala/src/main/scala/example/Streaming.scala b/examples/scala/src/main/scala/example/Streaming.scala new file mode 100644 index 00000000000..fe6f8acd0a3 --- /dev/null +++ b/examples/scala/src/main/scala/example/Streaming.scala @@ -0,0 +1,180 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package example + +import java.io.File + +import io.delta.tables.DeltaTable +import org.apache.commons.io.FileUtils + +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.functions.col + +object Streaming { + + def main(args: Array[String]): Unit = { + // Create a Spark Session + val spark = SparkSession + .builder() + .appName("Streaming") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog" + ) + .getOrCreate() + + import spark.implicits._ + + val exampleDir = new File("/tmp/delta-streaming/") + if (exampleDir.exists()) FileUtils.deleteDirectory(exampleDir) + + println( + "=== Section 1: write and read delta table using batch queries, and initialize table for later sections" + ) + // Create a table + val data = spark.range(0, 5) + val path = new File("/tmp/delta-streaming/delta-table").getAbsolutePath + data.write.format("delta").save(path) + + // Read table + val df = spark.read.format("delta").load(path) + df.show() + + println("=== Section 2: write and read delta using structured streaming") + val streamingDf = spark.readStream.format("rate").load() + val tablePath2 = new File( + "/tmp/delta-streaming/delta-table2" + ).getCanonicalPath + val checkpointPath = new File( + "/tmp/delta-streaming/checkpoint" + ).getCanonicalPath + val stream = streamingDf + .select($"value" as "id") + .writeStream + .format("delta") + .option("checkpointLocation", checkpointPath) + .start(tablePath2) + + stream.awaitTermination(10000) + stream.stop() + + val stream2 = spark.readStream + .format("delta") + .load(tablePath2) + .writeStream + .format("console") + .start() + + stream2.awaitTermination(10000) + stream2.stop() + + println("=== Section 3: Streaming upserts using MERGE") + // Function to upsert microBatchOutputDF into Delta Lake table using merge + def upsertToDelta(microBatchOutputDF: DataFrame, batchId: Long): Unit = { + val deltaTable = DeltaTable.forPath(path) + deltaTable + .as("t") + .merge( + microBatchOutputDF.select($"value" as "id").as("s"), + "s.id = t.id" + ) + .whenMatched() + .updateAll() + .whenNotMatched() + .insertAll() + .execute() + } + + val streamingAggregatesDf = spark.readStream + .format("rate") + .load() + .withColumn("key", col("value") % 10) + .drop("timestamp") + + // Write the output of a streaming aggregation query into Delta Lake table + println("Original Delta Table") + val deltaTable = DeltaTable.forPath(path) + deltaTable.toDF.show() + + val stream3 = streamingAggregatesDf.writeStream + .format("delta") + .foreachBatch(upsertToDelta _) + .outputMode("update") + .start() + + stream3.awaitTermination(20000) + stream3.stop() + + println("Delta Table after streaming upsert") + deltaTable.toDF.show() + + // Streaming append and concurrent repartition using data change = false + // tbl1 is the sink and tbl2 is the source + println( + "############ Streaming appends with concurrent table repartition ##########" + ) + val tbl1 = "/tmp/delta-streaming/delta-table4" + val tbl2 = "/tmp/delta-streaming/delta-table5" + val numRows = 10 + spark.range(numRows).write.mode("overwrite").format("delta").save(tbl1) + spark.read.format("delta").load(tbl1).show() + spark + .range(numRows, numRows * 10) + .write + .mode("overwrite") + .format("delta") + .save(tbl2) + + // Start reading tbl2 as a stream and do a streaming write to tbl1 + // Prior to Delta 0.5.0 this would throw StreamingQueryException: Detected a data update in the source table. This is currently not supported. + val stream4 = spark.readStream + .format("delta") + .load(tbl2) + .writeStream + .format("delta") + .option( + "checkpointLocation", + new File("/tmp/delta-streaming/checkpoint/tbl1").getCanonicalPath + ) + .outputMode("append") + .start(tbl1) + + Thread.sleep(10 * 1000) + // repartition table while streaming job is running + spark.read + .format("delta") + .load(tbl2) + .repartition(10) + .write + .format("delta") + .mode("overwrite") + .option("dataChange", "false") + .save(tbl2) + + stream4.awaitTermination(5 * 1000) + stream4.stop() + println("######### After streaming write #########") + spark.read.format("delta").load(tbl1).show() + + println("=== In the end, clean all paths") + // Cleanup + if (exampleDir.exists()) FileUtils.deleteDirectory(exampleDir) + spark.stop() + } +} diff --git a/examples/scala/src/main/scala/example/UniForm.scala b/examples/scala/src/main/scala/example/UniForm.scala new file mode 100644 index 00000000000..3b7d0b01b8b --- /dev/null +++ b/examples/scala/src/main/scala/example/UniForm.scala @@ -0,0 +1,107 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example + +import java.io.{File, IOException} +import java.net.ServerSocket + +import org.apache.commons.io.FileUtils + +import org.apache.spark.sql.SparkSession + +/** + * This example relies on an external Hive metastore (HMS) instance to run. + * + * A standalone HMS can be created using the following docker command. + * ************************************************************ + * docker run -d -p 9083:9083 --env SERVICE_NAME=metastore \ + * --name metastore-standalone apache/hive:4.0.0-beta-1 + * ************************************************************ + * The URL of this standalone HMS is thrift://localhost:9083 + * + * By default this hms will use `/opt/hive/data/warehouse` as warehouse path. + * Please make sure this path exists or change it prior to running the example. + */ +object UniForm { + + def main(args: Array[String]): Unit = { + // Update this according to the metastore config + val port = 9083 + val warehousePath = "/opt/hive/data/warehouse/" + + if (!hmsReady(port)) { + print("HMS not available. Exit.") + return + } + + val testTableName = "deltatable" + FileUtils.deleteDirectory(new File(s"${warehousePath}${testTableName}")) + + val deltaSpark = SparkSession + .builder() + .appName("UniForm-Delta") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .config("hive.metastore.uris", s"thrift://localhost:$port") + .config("spark.sql.catalogImplementation", "hive") + .getOrCreate() + + + deltaSpark.sql(s"DROP TABLE IF EXISTS ${testTableName}") + deltaSpark.sql( + s"""CREATE TABLE `${testTableName}` (col1 INT) using DELTA + |TBLPROPERTIES ( + | 'delta.columnMapping.mode' = 'name', + | 'delta.universalFormat.enabledFormats' = 'iceberg' + |)""".stripMargin) + deltaSpark.sql(s"INSERT INTO `$testTableName` VALUES (123)") + + // Wait for the conversion to be done + Thread.sleep(10000) + + val icebergSpark = SparkSession.builder() + .master("local[*]") + .appName("UniForm-Iceberg") + .config("spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") + .config("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog") + .config("hive.metastore.uris", s"thrift://localhost:$port") + .config("spark.sql.catalogImplementation", "hive") + .getOrCreate() + + icebergSpark.sql(s"SELECT * FROM ${testTableName}").show() + } + + def hmsReady(port: Int): Boolean = { + var ss: ServerSocket = null + try { + ss = new ServerSocket(port) + ss.setReuseAddress(true) + return false + } catch { + case e: IOException => + } finally { + if (ss != null) { + try ss.close() + catch { + case e: IOException => + } + } + } + true + } +} diff --git a/examples/scala/src/main/scala/example/Utilities.scala b/examples/scala/src/main/scala/example/Utilities.scala new file mode 100644 index 00000000000..5c8e6608dc5 --- /dev/null +++ b/examples/scala/src/main/scala/example/Utilities.scala @@ -0,0 +1,84 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package example + +import java.io.File + +import io.delta.tables.DeltaTable +import org.apache.commons.io.FileUtils + +import org.apache.spark.sql.SparkSession + +object Utilities { + def main(args: Array[String]): Unit = { + // Create a Spark Session with SQL enabled + val spark = SparkSession + .builder() + .appName("Utilities") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + // control the parallelism for vacuum + .config("spark.sql.sources.parallelPartitionDiscovery.parallelism", "4") + .getOrCreate() + + // Create a table + println("Create a parquet table") + val data = spark.range(0, 5) + val file = new File("/tmp/parquet-table") + val path = file.getAbsolutePath + data.write.format("parquet").save(path) + + // Convert to delta + println("Convert to Delta") + DeltaTable.convertToDelta(spark, s"parquet.`$path`") + + // Read table as delta + var df = spark.read.format("delta").load(path) + + // Read old version of data using time travel + df = spark.read.format("delta").option("versionAsOf", 0).load(path) + df.show() + + val deltaTable = DeltaTable.forPath(path) + + // Utility commands + println("Vacuum the table") + deltaTable.vacuum() + + println("Describe History for the table") + deltaTable.history().show() + + println("Describe Details for the table") + deltaTable.detail().show() + + // Generate manifest + println("Generate Manifest files") + deltaTable.generate("SYMLINK_FORMAT_MANIFEST") + + // SQL utility commands + println("SQL Vacuum") + spark.sql(s"VACUUM '$path' RETAIN 169 HOURS") + + println("SQL Describe History") + println(spark.sql(s"DESCRIBE HISTORY '$path'").collect()) + + // Cleanup + FileUtils.deleteDirectory(new File(path)) + spark.stop() + } +} diff --git a/iceberg/integration_tests/iceberg_converter.py b/iceberg/integration_tests/iceberg_converter.py new file mode 100644 index 00000000000..fc51d59bee3 --- /dev/null +++ b/iceberg/integration_tests/iceberg_converter.py @@ -0,0 +1,67 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.sql import SparkSession +from pyspark.sql.functions import col +from delta.tables import DeltaTable +import shutil +import random + +testRoot = "/tmp/delta-iceberg-converter/" +warehousePath = testRoot + "iceberg_tables" +shutil.rmtree(testRoot, ignore_errors=True) + +# we need to set the following configs +spark = SparkSession.builder \ + .appName("delta-iceberg-converter") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") \ + .config("spark.sql.catalog.local.type", "hadoop") \ + .config("spark.sql.catalog.local.warehouse", warehousePath) \ + .getOrCreate() + +table = "local.db.table" +tablePath = "file://" + warehousePath + "/db/table" + +try: + print("Creating Iceberg table with partitions...") + spark.sql( + "CREATE TABLE {} (id BIGINT, data STRING) USING ICEBERG PARTITIONED BY (data)".format(table)) + spark.sql("INSERT INTO {} VALUES (1, 'a'), (2, 'b')".format(table)) + spark.sql("INSERT INTO {} VALUES (3, 'c')".format(table)) + + print("Converting Iceberg table to Delta table...") + spark.sql("CONVERT TO DELTA iceberg.`{}`".format(tablePath)) + + print("Reading from converted Delta table...") + spark.read.format("delta").load(tablePath).show() + + print("Modifying the converted table...") + spark.sql("INSERT INTO delta.`{}` VALUES (4, 'd')".format(tablePath)) + + print("Reading the final Delta table...") + spark.read.format("delta").load(tablePath).show() + + print("Create an external catalog table using Delta...") + spark.sql("CREATE TABLE converted_delta_table USING delta LOCATION '{}'".format(tablePath)) + + print("Read from the catalog table...") + spark.read.table("converted_delta_table").show() +finally: + # cleanup + shutil.rmtree(testRoot, ignore_errors=True) diff --git a/iceberg/src/main/scala/org/apache/iceberg/transforms/IcebergPartitionUtil.scala b/iceberg/src/main/scala/org/apache/iceberg/transforms/IcebergPartitionUtil.scala new file mode 100644 index 00000000000..7192154d650 --- /dev/null +++ b/iceberg/src/main/scala/org/apache/iceberg/transforms/IcebergPartitionUtil.scala @@ -0,0 +1,193 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.transforms + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.DeltaColumnMapping +import org.apache.spark.sql.delta.sources.DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY +import org.apache.spark.sql.delta.util.{DateFormatter, TimestampFormatter} +import org.apache.iceberg.{PartitionField, PartitionSpec, Schema, StructLike} +import org.apache.iceberg.spark.SparkSchemaUtil +import org.apache.iceberg.types.Type.TypeID +import org.apache.iceberg.types.Types + +import org.apache.spark.sql.types.{DateType, IntegerType, MetadataBuilder, StringType, StructField} + +/** + * Utils to translate Iceberg's partition expressions to Delta generated column expressions. + */ +object IcebergPartitionUtil { + + // scalastyle:off line.size.limit + /** + * Convert the partition values stored in Iceberg metadata to string values, which we will + * directly use in the partitionValues field of AddFiles. Here is how we generate the string + * value from the Iceberg stored partition value for each of the transforms: + * + * Identity + * - Iceberg source code: https://github.com/apache/iceberg/blob/4c98a0f6408d4ccd0d47b076b2f7743d836d28ec/api/src/main/java/org/apache/iceberg/transforms/Identity.java + * - Source column type: any + * - Stored partition value type: same as source type + * - String value generation: for timestamp and date, use our Spark formatter; other types use toString + * + * Timestamps (year, month, day, hour) + * - Iceberg source code: https://github.com/apache/iceberg/blob/4c98a0f6408d4ccd0d47b076b2f7743d836d28ec/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java + * - Source column type: timestamp + * - Stored partition value type: integer + * - String value generation: use Iceberg's Timestamps.toHumanString (which uses yyyy-MM-dd-HH format) + * + * Dates (year, month, day) + * - Iceberg source code: https://github.com/apache/iceberg/blob/4c98a0f6408d4ccd0d47b076b2f7743d836d28ec/api/src/main/java/org/apache/iceberg/transforms/Dates.java + * - Source column type: date + * - Stored partition value type: integer + * - String value generation: use Iceberg's Dates.toHumanString (which uses yyyy-MM-dd format) + * + * Truncate + * - Iceberg source code: https://github.com/apache/iceberg/blob/4c98a0f6408d4ccd0d47b076b2f7743d836d28ec/api/src/main/java/org/apache/iceberg/transforms/Truncate.java + * - Source column type: string, long and int + * - Stored partition value type: string, long and int + * - String value generation: directly use toString + */ + // scalastyle:on line.size.limit + def partitionValueToString( + partField: PartitionField, + partValue: Object, + schema: Schema, + dateFormatter: DateFormatter, + timestampFormatter: TimestampFormatter): String = { + if (partValue == null) return null + partField.transform() match { + case _: Identity[_] => + // Identity transform + // We use our own date and timestamp formatter for date and timestamp types, while simply + // use toString for other input types. + val sourceField = schema.findField(partField.sourceId()) + val sourceType = sourceField.`type`() + if (sourceType.typeId() == TypeID.DATE) { + // convert epoch days to Spark date formatted string + dateFormatter.format(partValue.asInstanceOf[Int]) + } else if (sourceType.typeId == TypeID.TIMESTAMP) { + // convert timestamps to Spark timestamp formatted string + timestampFormatter.format(partValue.asInstanceOf[Long]) + } else { + // all other types can directly toString + partValue.toString + } + case ts: Timestamps => + // Matches all transforms on Timestamp input type: YEAR, MONTH, DAY, HOUR + // We directly use Iceberg's toHumanString(), which takes a timestamp type source column and + // generates the partition value in the string format as follows: + // - YEAR: yyyy + // - MONTH: yyyy-MM + // - DAY: yyyy-MM-dd + // - HOUR: yyyy-MM-dd-HH + ts.toHumanString(Types.TimestampType.withoutZone(), partValue.asInstanceOf[Int]) + case dt: Dates => + // Matches all transform on Date input type: YEAR, MONTH, DAY + // We directly use Iceberg's toHumanString(), which takes a date type source column and + // generates the partition value in the string format as follows: + // - YEAR: yyyy + // - MONTH: yyyy-MM + // - DAY: yyyy-MM-dd + dt.toHumanString(Types.DateType.get(), partValue.asInstanceOf[Int]) + case _: Truncate[_] => + // Truncate transform + // While Iceberg Truncate transform supports multiple input types, our converter + // only supports string and block all other input types. So simply toString suffices. + partValue.toString + case other => + throw new UnsupportedOperationException( + s"unsupported partition transform expression when converting to Delta: $other") + } + } + + def getPartitionFields(partSpec: PartitionSpec, schema: Schema): Seq[StructField] = { + // Skip removed partition fields due to partition evolution. + partSpec.fields.asScala.toSeq.collect { + case partField if !partField.transform().isInstanceOf[VoidTransform[_]] => + val sourceColumnName = schema.findColumnName(partField.sourceId()) + val sourceField = schema.findField(partField.sourceId()) + val sourceType = sourceField.`type`() + + val metadataBuilder = new MetadataBuilder() + + // TODO: Support truncate[Decimal] in partition + val (transformExpr, targetType) = partField.transform() match { + // binary partition values are problematic in Delta, so we block converting if the iceberg + // table has a binary type partition column + case _: Identity[_] if sourceType.typeId() != TypeID.BINARY => + // copy id only for identity transform because source id will be the converted column id + // ids for other columns will be assigned later automatically during schema evolution + metadataBuilder + .putLong(DeltaColumnMapping.COLUMN_MAPPING_METADATA_ID_KEY, sourceField.fieldId()) + ("", SparkSchemaUtil.convert(sourceType)) + + case Timestamps.YEAR | Dates.YEAR => + (s"year($sourceColumnName)", IntegerType) + + case Timestamps.DAY | Dates.DAY => + (s"cast($sourceColumnName as date)", DateType) + + case t: Truncate[_] if sourceType.typeId() == TypeID.STRING => + (s"substring($sourceColumnName, 0, ${t.width()})", StringType) + + case t: Truncate[_] + if sourceType.typeId() == TypeID.LONG || sourceType.typeId() == TypeID.INTEGER => + (icebergNumericTruncateExpression(sourceColumnName, t.width().toLong), + SparkSchemaUtil.convert(sourceType)) + + case Timestamps.MONTH | Dates.MONTH => + (s"date_format($sourceColumnName, 'yyyy-MM')", StringType) + + case Timestamps.HOUR => + (s"date_format($sourceColumnName, 'yyyy-MM-dd-HH')", StringType) + + case other => + throw new UnsupportedOperationException( + s"Unsupported partition transform expression when converting to Delta: " + + s"transform: $other, source data type: ${sourceType.typeId()}") + } + + if (transformExpr != "") { + metadataBuilder.putString(GENERATION_EXPRESSION_METADATA_KEY, transformExpr) + } + + Option(sourceField.doc()).foreach { comment => + metadataBuilder.putString("comment", comment) + } + + val metadata = metadataBuilder.build() + + StructField(partField.name(), + targetType, + nullable = sourceField.isOptional(), + metadata = metadata) + } + } + + /** + * Returns the iceberg transform function of truncate[Integer] and truncate[Long] as an + * expression string, please check the iceberg documents for more details: + * + * https://iceberg.apache.org/spec/#truncate-transform-details + * + * TODO: make this partition expression optimizable. + */ + private def icebergNumericTruncateExpression(colName: String, width: Long): String = + s"$colName - (($colName % $width) + $width) % $width" +} diff --git a/iceberg/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchProcedureException.scala b/iceberg/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchProcedureException.scala new file mode 100644 index 00000000000..2bd6e707b0f --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchProcedureException.scala @@ -0,0 +1,23 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.connector.catalog.Identifier + +class NoSuchProcedureException(ident: Identifier) + extends AnalysisException("Procedure " + ident + " not found") diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergFileManifest.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergFileManifest.scala new file mode 100644 index 00000000000..fcdff99867f --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergFileManifest.scala @@ -0,0 +1,174 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.convert + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.{DeltaColumnMapping, SerializableFileStatus} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.{DateFormatter, TimestampFormatter} +import org.apache.hadoop.fs.Path +import org.apache.iceberg.{PartitionData, RowLevelOperationMode, Table, TableProperties} +import org.apache.iceberg.transforms.IcebergPartitionUtil + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.SerializableConfiguration + +class IcebergFileManifest( + spark: SparkSession, + table: Table, + partitionSchema: StructType) extends ConvertTargetFileManifest with Logging { + + // scalastyle:off sparkimplicits + import spark.implicits._ + // scalastyle:on sparkimplicits + + final val VOID_TRANSFORM = "void" + + private var fileSparkResults: Option[Dataset[ConvertTargetFile]] = None + + private var _numFiles: Option[Long] = None + + val basePath = table.location() + + override def numFiles: Long = { + if (_numFiles.isEmpty) getFileSparkResults() + _numFiles.get + } + + def allFiles: Dataset[ConvertTargetFile] = { + if (fileSparkResults.isEmpty) getFileSparkResults() + fileSparkResults.get + } + + private def getFileSparkResults(): Unit = { + // scalastyle:off deltahadoopconfiguration + val hadoopConf = spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + val serializableConfiguration = new SerializableConfiguration(hadoopConf) + val conf = spark.sparkContext.broadcast(serializableConfiguration) + val format = table + .properties() + .getOrDefault( + TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT) + + if (format.toLowerCase() != "parquet") { + throw new UnsupportedOperationException( + s"Cannot convert Iceberg tables with file format $format. Only parquet is supported.") + } + + val schemaBatchSize = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_IMPORT_BATCH_SIZE_SCHEMA_INFERENCE) + + val partFields = table.spec().fields().asScala + val icebergSchema = table.schema() + // we must use field id to look up the partition value; consider scenario with iceberg + // behavior chance since 1.4.0: + // 1) create table with partition schema (a[col_name]: 1[field_id]), add file1; + // The partition data for file1 is (a:1:some_part_value) + // 2) add new partition col b and the partition schema becomes (a: 1, b: 2), add file2; + // the partition data for file2 is (a:1:some_part_value, b:2:some_part_value) + // 3) remove partition col a, then add file3; + // for iceberg < 1.4.0: the partFields is (a:1(void), b:2); the partition data for + // file3 is (a:1(void):null, b:2:some_part_value); + // for iceberg 1.4.0: the partFields is (b:2); When it reads file1 (a:1:some_part_value), + // it must use the field_id instead of index to look up the partition + // value, as the partField and partitionData from file1 have different + // ordering and thus same index indicates different column. + val physicalNameToField = partFields.collect { + case field if field.transform().toString != VOID_TRANSFORM => + DeltaColumnMapping.getPhysicalName(partitionSchema(field.name)) -> field + }.toMap + + val dateFormatter = DateFormatter() + val timestampFormatter = TimestampFormatter(ConvertUtils.timestampPartitionPattern, + java.util.TimeZone.getDefault) + + // This flag is strongly not recommended to turn on, but we still provide a flag for regression + // purpose. + val unsafeConvertMorTable = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_CONVERT_ICEBERG_UNSAFE_MOR_TABLE_ENABLE) + val properties = CaseInsensitiveMap(table.properties().asScala.toMap) + val isMergeOnReadTable = Seq( + TableProperties.DELETE_MODE, + TableProperties.UPDATE_MODE, + TableProperties.MERGE_MODE + ).exists { propKey => + properties.get(propKey) + .exists(RowLevelOperationMode.fromName(_) == RowLevelOperationMode.MERGE_ON_READ) + } + + var numFiles = 0L + val res = table.newScan().planFiles().iterator().asScala.grouped(schemaBatchSize).map { batch => + logInfo(s"Getting file statuses for a batch of ${batch.size} of files; " + + s"finished $numFiles files so far") + numFiles += batch.length + val filePathWithPartValues = batch.map { fileScanTask => + val filePath = fileScanTask.file().path().toString + // If an Iceberg table has merge on read enabled AND it has deletion file associated with + // the data file, we could not convert directly. + val hasMergeOnReadDeletionFiles = isMergeOnReadTable && fileScanTask.deletes().size() > 0 + if (hasMergeOnReadDeletionFiles && !unsafeConvertMorTable) { + throw new UnsupportedOperationException( + s"Cannot convert Iceberg merge-on-read table with delete files. " + + s"Please trigger an Iceberg compaction and retry the command.") + } + val partitionValues = if (spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_CONVERT_ICEBERG_USE_NATIVE_PARTITION_VALUES)) { + + val icebergPartition = fileScanTask.file().partition() + val icebergPartitionData = icebergPartition.asInstanceOf[PartitionData] + val fieldIdToIdx = icebergPartitionData.getPartitionType.fields().asScala.zipWithIndex + .map(kv => kv._1.fieldId() -> kv._2).toMap + val physicalNameToPartValueMap = physicalNameToField + .map { case (physicalName, field) => + val fieldIndex = fieldIdToIdx.get(field.fieldId()) + val partValueAsString = fieldIndex.map {idx => + val partValue = icebergPartitionData.get(idx) + IcebergPartitionUtil.partitionValueToString( + field, partValue, icebergSchema, dateFormatter, timestampFormatter) + }.getOrElse(null) + physicalName -> partValueAsString + } + Some(physicalNameToPartValueMap) + } else None + (filePath, partitionValues) + } + val numParallelism = Math.min(Math.max(filePathWithPartValues.size, 1), + spark.sparkContext.defaultParallelism) + + val rdd = spark.sparkContext.parallelize(filePathWithPartValues, numParallelism) + .mapPartitions { iterator => + iterator.map { case (filePath, partValues) => + val path = new Path(filePath) + val fs = path.getFileSystem(conf.value.value) + val fileStatus = fs.getFileStatus(path) + ConvertTargetFile(SerializableFileStatus.fromStatus(fileStatus), partValues) + } + } + spark.createDataset(rdd) + }.reduceOption(_.union(_)).getOrElse(spark.emptyDataset[ConvertTargetFile]) + + fileSparkResults = Some(res.cache()) + _numFiles = Some(numFiles) + } + + override def close(): Unit = fileSparkResults.map(_.unpersist()) +} diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergSchemaUtils.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergSchemaUtils.scala new file mode 100644 index 00000000000..3c2f0cf0d81 --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergSchemaUtils.scala @@ -0,0 +1,54 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.convert + +import org.apache.spark.sql.delta.DeltaColumnMapping +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.iceberg.Schema +import org.apache.iceberg.spark.SparkSchemaUtil + +import org.apache.spark.sql.types.{MetadataBuilder, StructType} + +object IcebergSchemaUtils { + + /** + * Given an iceberg schema, convert it to a Spark schema. This conversion will keep the Iceberg + * column IDs (used to read Parquet files) in the field metadata + * + * @param icebergSchema + * @return StructType for the converted schema + */ + def convertIcebergSchemaToSpark(icebergSchema: Schema): StructType = { + // Convert from Iceberg schema to Spark schema but without the column IDs + val baseConvertedSchema = SparkSchemaUtil.convert(icebergSchema) + + // For each field, find the column ID (fieldId) and add to the StructField metadata + SchemaMergingUtils.transformColumns(baseConvertedSchema) { (path, field, _) => + // This should be safe to access fields + // scalastyle:off + // https://github.com/apache/iceberg/blob/d98224a82b104888281d4e901ccf948f9642590b/api/src/main/java/org/apache/iceberg/types/IndexByName.java#L171 + // scalastyle:on + val fieldPath = (path :+ field.name).mkString(".") + val id = icebergSchema.findField(fieldPath).fieldId() + field.copy( + metadata = new MetadataBuilder() + .withMetadata(field.metadata) + .putLong(DeltaColumnMapping.COLUMN_MAPPING_METADATA_ID_KEY, id) + .build()) + } + } +} diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergTable.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergTable.scala new file mode 100644 index 00000000000..c442f0947e7 --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/IcebergTable.scala @@ -0,0 +1,158 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.convert + +import java.util.Locale + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaColumnMappingMode, DeltaConfigs, IdMapping, SerializableFileStatus} +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.iceberg.{Table, TableProperties} +import org.apache.iceberg.hadoop.HadoopTables +import org.apache.iceberg.transforms.IcebergPartitionUtil + +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.execution.datasources.PartitioningUtils +import org.apache.spark.sql.types.StructType + +/** + * A target Iceberg table for conversion to a Delta table. + * + * @param icebergTable the Iceberg table underneath. + * @param existingSchema schema used for incremental update, none for initial conversion. + */ +class IcebergTable( + spark: SparkSession, + icebergTable: Table, + existingSchema: Option[StructType]) extends ConvertTargetTable { + + def this(spark: SparkSession, basePath: String, existingSchema: Option[StructType]) = + // scalastyle:off deltahadoopconfiguration + this(spark, new HadoopTables(spark.sessionState.newHadoopConf).load(basePath), existingSchema) + // scalastyle:on deltahadoopconfiguration + + private val partitionEvolutionEnabled = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_CONVERT_ICEBERG_PARTITION_EVOLUTION_ENABLED) + + private val fieldPathToPhysicalName = + existingSchema.map { + SchemaMergingUtils.explode(_).collect { + case (path, field) if DeltaColumnMapping.hasPhysicalName(field) => + path.map(_.toLowerCase(Locale.ROOT)) -> DeltaColumnMapping.getPhysicalName(field) + }.toMap + }.getOrElse(Map.empty[Seq[String], String]) + + private val convertedSchema = { + // Reuse physical names of existing columns. + val mergedSchema = DeltaColumnMapping.setPhysicalNames( + IcebergSchemaUtils.convertIcebergSchemaToSpark(icebergTable.schema()), + fieldPathToPhysicalName) + + // Assign physical names to new columns. + DeltaColumnMapping.assignPhysicalNames(mergedSchema) + } + + override val requiredColumnMappingMode: DeltaColumnMappingMode = IdMapping + + override val properties: Map[String, String] = { + icebergTable.properties().asScala.toMap + (DeltaConfigs.COLUMN_MAPPING_MODE.key -> "id") + } + + override val partitionSchema: StructType = { + // Reuse physical names of existing columns. + val mergedPartitionSchema = DeltaColumnMapping.setPhysicalNames( + StructType( + IcebergPartitionUtil.getPartitionFields(icebergTable.spec(), icebergTable.schema())), + fieldPathToPhysicalName) + + // Assign physical names to new partition columns. + DeltaColumnMapping.assignPhysicalNames(mergedPartitionSchema) + } + + val tableSchema: StructType = PartitioningUtils.mergeDataAndPartitionSchema( + convertedSchema, + partitionSchema, + spark.sessionState.conf.caseSensitiveAnalysis)._1 + + checkConvertible() + + val fileManifest = new IcebergFileManifest(spark, icebergTable, partitionSchema) + + lazy val numFiles: Long = fileManifest.numFiles + + override val format: String = "iceberg" + + def checkConvertible(): Unit = { + /** + * Having multiple partition specs implies that the Iceberg table has experienced + * partition evolution. (https://iceberg.apache.org/evolution/#partition-evolution) + * We don't support the conversion of such tables right now. + * + * Note that this simple check won't consider the underlying data, so there might be cases + * s.t. the data itself is partitioned using a single spec despite multiple specs created + * in the past. we do not account for that atm due to the complexity of data introspection + */ + + if (!partitionEvolutionEnabled && icebergTable.specs().size() > 1) { + throw new UnsupportedOperationException(IcebergTable.ERR_MULTIPLE_PARTITION_SPECS) + } + + /** + * Existing Iceberg Table that has data imported from table without field ids will need + * to add a custom property to enable the mapping for Iceberg. + * Therefore, we can simply check for the existence of this property to see if there was + * a custom mapping within Iceberg. + * + * Ref: https://www.mail-archive.com/dev@iceberg.apache.org/msg01638.html + */ + if (icebergTable.properties().containsKey(TableProperties.DEFAULT_NAME_MAPPING)) { + throw new UnsupportedOperationException(IcebergTable.ERR_CUSTOM_NAME_MAPPING) + } + + /** + * Delta does not support case sensitive columns while Iceberg does. We should check for + * this here to throw a better message tailored to converting to Delta than the default + * AnalysisException + */ + try { + SchemaMergingUtils.checkColumnNameDuplication(tableSchema, "during convert to Delta") + } catch { + case e: AnalysisException if e.getMessage.contains("during convert to Delta") => + throw new UnsupportedOperationException( + IcebergTable.caseSensitiveConversionExceptionMsg(e.getMessage)) + } + } +} + +object IcebergTable { + /** Error message constants */ + val ERR_MULTIPLE_PARTITION_SPECS = + s"""This Iceberg table has undergone partition evolution. Iceberg tables that had partition + | columns removed can be converted without data loss by setting the SQL configuration + | '${DeltaSQLConf.DELTA_CONVERT_ICEBERG_PARTITION_EVOLUTION_ENABLED.key}' to true. Tables that + | had data columns converted to partition columns will not be able to read the pre-partition + | column values.""".stripMargin + val ERR_CUSTOM_NAME_MAPPING = "Cannot convert Iceberg tables with column name mapping" + + def caseSensitiveConversionExceptionMsg(conflictingColumns: String): String = + s"""Cannot convert table to Delta as the table contains column names that only differ by case. + |$conflictingColumns. Delta does not support case sensitive column names. + |Please rename these columns before converting to Delta. + """.stripMargin +} diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergConversionTransaction.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergConversionTransaction.scala new file mode 100644 index 00000000000..29cbec8d7d3 --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergConversionTransaction.scala @@ -0,0 +1,419 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.icebergShaded + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaFileProviderUtils, Snapshot} +import org.apache.spark.sql.delta.actions.{AddFile, Metadata, RemoveFile} +import org.apache.spark.sql.delta.icebergShaded.IcebergSchemaUtils._ +import org.apache.spark.sql.delta.icebergShaded.IcebergTransactionUtils._ +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.commons.lang3.exception.ExceptionUtils +import org.apache.hadoop.conf.Configuration +import shadedForDelta.org.apache.iceberg.{AppendFiles, DeleteFiles, OverwriteFiles, PendingUpdate, RewriteFiles, Transaction => IcebergTransaction} +import shadedForDelta.org.apache.iceberg.hadoop.HadoopTables +import shadedForDelta.org.apache.iceberg.mapping.MappingUtil +import shadedForDelta.org.apache.iceberg.mapping.NameMappingParser + +import org.apache.spark.sql.catalyst.catalog.CatalogTable + +sealed trait IcebergTableOp +case object CREATE_TABLE extends IcebergTableOp +case object WRITE_TABLE extends IcebergTableOp +case object REPLACE_TABLE extends IcebergTableOp + +/** + * Used to prepare (convert) and then commit a set of Delta actions into the Iceberg table located + * at the same path as [[postCommitSnapshot]] + * + * + * @param conf Configuration for Iceberg Hadoop interactions. + * @param postCommitSnapshot Latest Delta snapshot associated with this Iceberg commit. + * @param tableOp How to instantiate the underlying Iceberg table. Defaults to WRITE_TABLE. + */ +class IcebergConversionTransaction( + protected val catalogTable: CatalogTable, + protected val conf: Configuration, + protected val postCommitSnapshot: Snapshot, + protected val tableOp: IcebergTableOp = WRITE_TABLE, + protected val lastConvertedDeltaVersion: Option[Long] = None) extends DeltaLogging { + + /////////////////////////// + // Nested Helper Classes // + /////////////////////////// + + protected abstract class TransactionHelper(impl: PendingUpdate[_]) { + private var committed = false + + def opType: String + + def commit(): Unit = { + assert(!committed, "Already committed.") + impl.commit() + committed = true + } + + private[icebergShaded]def hasCommitted: Boolean = committed + } + + /** + * API for appending new files in a table. + * + * e.g. INSERT + */ + class AppendOnlyHelper(appender: AppendFiles) extends TransactionHelper(appender) { + + override def opType: String = "append" + + def add(add: AddFile): Unit = { + appender.appendFile( + convertDeltaAddFileToIcebergDataFile( + add, + tablePath, + partitionSpec, + logicalToPhysicalPartitionNames, + postCommitSnapshot.statsSchema, + statsParser, + postCommitSnapshot.deltaLog + ) + ) + } + } + + /** + * API for deleting files from a table. + * + * e.g. DELETE + */ + class RemoveOnlyHelper(deleter: DeleteFiles) extends TransactionHelper(deleter) { + + override def opType: String = "delete" + + def remove(remove: RemoveFile): Unit = { + // We can just use the canonical RemoveFile.path instead of converting RemoveFile to DataFile. + // Note that in other helper APIs, converting a FileAction to a DataFile will also take care + // of canonicalizing the path. + deleter.deleteFile(canonicalizeFilePath(remove, tablePath)) + } + } + + /** + * API for overwriting files in a table. Replaces all the deleted files with the set of additions. + * + * e.g. UPDATE, MERGE + */ + class OverwriteHelper(overwriter: OverwriteFiles) extends TransactionHelper(overwriter) { + + override def opType: String = "overwrite" + + def add(add: AddFile): Unit = { + overwriter.addFile( + convertDeltaAddFileToIcebergDataFile( + add, + tablePath, + partitionSpec, + logicalToPhysicalPartitionNames, + postCommitSnapshot.statsSchema, + statsParser, + postCommitSnapshot.deltaLog + ) + ) + } + + def remove(remove: RemoveFile): Unit = { + overwriter.deleteFile( + convertDeltaRemoveFileToIcebergDataFile( + remove, tablePath, partitionSpec, logicalToPhysicalPartitionNames) + ) + } + } + + /** + * API for rewriting existing files in the table (i.e. replaces one set of data files with another + * set that contains the same data). + * + * e.g. OPTIMIZE + */ + class RewriteHelper(rewriter: RewriteFiles) extends TransactionHelper(rewriter) { + + override def opType: String = "rewrite" + + def rewrite(removes: Seq[RemoveFile], adds: Seq[AddFile]): Unit = { + val dataFilesToDelete = removes.map { f => + assert(!f.dataChange, "Rewrite operation should not add data") + convertDeltaRemoveFileToIcebergDataFile( + f, tablePath, partitionSpec, logicalToPhysicalPartitionNames) + }.toSet.asJava + + val dataFilesToAdd = adds.map { f => + assert(!f.dataChange, "Rewrite operation should not add data") + convertDeltaAddFileToIcebergDataFile( + f, + tablePath, + partitionSpec, + logicalToPhysicalPartitionNames, + postCommitSnapshot.statsSchema, + statsParser, + postCommitSnapshot.deltaLog + ) + }.toSet.asJava + + rewriter.rewriteFiles(dataFilesToDelete, dataFilesToAdd, 0) + } + } + + ////////////////////// + // Member variables // + ////////////////////// + + protected val tablePath = postCommitSnapshot.deltaLog.dataPath + protected val icebergSchema = + convertDeltaSchemaToIcebergSchema(postCommitSnapshot.metadata.schema) + protected val partitionSpec = + createPartitionSpec(icebergSchema, postCommitSnapshot.metadata.partitionColumns) + private val logicalToPhysicalPartitionNames = + getPartitionPhysicalNameMapping(postCommitSnapshot.metadata.partitionSchema) + + /** Parses the stats JSON string to convert Delta stats to Iceberg stats. */ + private val statsParser = + DeltaFileProviderUtils.createJsonStatsParser(postCommitSnapshot.statsSchema) + + /** Visible for testing. */ + private[icebergShaded]val txn = createIcebergTxn() + + /** Tracks if this transaction has already committed. You can only commit once. */ + private var committed = false + + /** Tracks the file updates (add, remove, overwrite, rewrite) made to this table. */ + private val fileUpdates = new ArrayBuffer[TransactionHelper]() + + /** Tracks if this transaction updates only the differences between a prev and new metadata. */ + private var isMetadataUpdate = false + + ///////////////// + // Public APIs // + ///////////////// + + def getAppendOnlyHelper(): AppendOnlyHelper = { + val ret = new AppendOnlyHelper(txn.newAppend()) + fileUpdates += ret + ret + } + + def getRemoveOnlyHelper(): RemoveOnlyHelper = { + val ret = new RemoveOnlyHelper(txn.newDelete()) + fileUpdates += ret + ret + } + + def getOverwriteHelper(): OverwriteHelper = { + val ret = new OverwriteHelper(txn.newOverwrite()) + fileUpdates += ret + ret + } + + def getRewriteHelper(): RewriteHelper = { + val ret = new RewriteHelper(txn.newRewrite()) + fileUpdates += ret + ret + } + + /** + * Handles the following update scenarios + * - partition update -> throws + * - schema update -> sets the full new schema + * - properties update -> applies only the new properties + */ + def updateTableMetadata(newMetadata: Metadata, prevMetadata: Metadata): Unit = { + assert(!isMetadataUpdate, "updateTableMetadata already called") + isMetadataUpdate = true + + // Throws if partition evolution detected + if (newMetadata.partitionColumns != prevMetadata.partitionColumns) { + throw new IllegalStateException("Delta does not support partition evolution") + } + + if (newMetadata.schema != prevMetadata.schema) { + val differenceStr = SchemaUtils.reportDifferences(prevMetadata.schema, newMetadata.schema) + logInfo(s"Detected Delta schema update for table with name=${newMetadata.name}, " + + s"id=${newMetadata.id}:\n$differenceStr") + + txn.setSchema(icebergSchema).commit() + + recordDeltaEvent( + postCommitSnapshot.deltaLog, + "delta.iceberg.conversion.schemaChange", + data = Map( + "version" -> postCommitSnapshot.version, + "deltaSchemaDiff" -> differenceStr, + "icebergSchema" -> icebergSchema.toString.replace('\n', ';') + ) + ) + } + + val (propertyDeletes, propertyAdditions) = + detectPropertiesChange(newMetadata.configuration, prevMetadata.configuration) + + if (propertyDeletes.nonEmpty || propertyAdditions.nonEmpty) { + val updater = txn.updateProperties() + propertyDeletes.foreach(updater.remove) + propertyAdditions.foreach(kv => updater.set(kv._1, kv._2)) + updater.commit() + + recordDeltaEvent( + postCommitSnapshot.deltaLog, + "delta.iceberg.conversion.propertyChange", + data = Map("version" -> postCommitSnapshot.version) ++ + (if (propertyDeletes.nonEmpty) Map("deletes" -> propertyDeletes.toSeq) else Map.empty) ++ + (if (propertyAdditions.nonEmpty) Map("adds" -> propertyAdditions) else Map.empty) + ) + } + } + + def commit(): Unit = { + assert(!committed, "Cannot commit. Transaction already committed.") + + // At least one file or metadata updates is required when writing to an existing table. If + // creating or replacing a table, we can create an empty table with just the table metadata + // (schema, properties, etc.) + if (tableOp == WRITE_TABLE) { + assert(fileUpdates.nonEmpty || isMetadataUpdate, "Cannot commit WRITE. Transaction is empty.") + } + assert(fileUpdates.forall(_.hasCommitted), "Cannot commit. You have uncommitted changes.") + + val nameMapping = NameMappingParser.toJson(MappingUtil.create(icebergSchema)) + + // hard code dummy delta version as -1 for CREATE_TABLE and REPLACE_TABLE, which will be later + // set to correct version in setSchemaTxn. -1 is chosen because it is less than the smallest + // possible legitimate Delta version which is 0. + val deltaVersion = if (tableOp == CREATE_TABLE || tableOp == REPLACE_TABLE) -1 + else postCommitSnapshot.version + + txn.updateProperties() + .set(IcebergConverter.DELTA_VERSION_PROPERTY, deltaVersion.toString) + .set(IcebergConverter.DELTA_TIMESTAMP_PROPERTY, postCommitSnapshot.timestamp.toString) + .set(IcebergConverter.ICEBERG_NAME_MAPPING_PROPERTY, nameMapping) + .commit() + + try { + txn.commitTransaction() + if (tableOp == CREATE_TABLE || tableOp == REPLACE_TABLE) { + // Iceberg CREATE_TABLE and REPLACE_TABLE reassigns the field id in schema, which + // is overwritten by setting Delta schema with Delta generated field id to ensure + // consistency between field id in Iceberg schema after conversion and field id in + // parquet files written by Delta. + val setSchemaTxn = createIcebergTxn(Some(WRITE_TABLE)) + setSchemaTxn.setSchema(icebergSchema).commit() + setSchemaTxn.updateProperties() + .set(IcebergConverter.DELTA_VERSION_PROPERTY, postCommitSnapshot.version.toString) + .commit() + setSchemaTxn.commitTransaction() + } + recordIcebergCommit() + } catch { + case NonFatal(e) => + recordIcebergCommit(Some(e)) + throw e + } + + committed = true + } + + /////////////////////// + // Protected Methods // + /////////////////////// + + protected def createIcebergTxn(tableOpOpt: Option[IcebergTableOp] = None): + IcebergTransaction = { + val hiveCatalog = IcebergTransactionUtils.createHiveCatalog(conf) + val icebergTableId = IcebergTransactionUtils + .convertSparkTableIdentifierToIcebergHive(catalogTable.identifier) + + val tableExists = hiveCatalog.tableExists(icebergTableId) + + def tableBuilder = { + val properties = getIcebergPropertiesFromDeltaProperties( + postCommitSnapshot.metadata.configuration + ) + + hiveCatalog + .buildTable(icebergTableId, icebergSchema) + .withPartitionSpec(partitionSpec) + .withProperties(properties.asJava) + } + + tableOpOpt.getOrElse(tableOp) match { + case WRITE_TABLE => + if (tableExists) { + recordFrameProfile("IcebergConversionTransaction", "loadTable") { + hiveCatalog.loadTable(icebergTableId).newTransaction() + } + } else { + throw new IllegalStateException(s"Cannot write to table $tablePath. Table doesn't exist.") + } + case CREATE_TABLE => + if (tableExists) { + throw new IllegalStateException(s"Cannot create table $tablePath. Table already exists.") + } else { + recordFrameProfile("IcebergConversionTransaction", "createTable") { + tableBuilder.createTransaction() + } + } + case REPLACE_TABLE => + if (tableExists) { + recordFrameProfile("IcebergConversionTransaction", "replaceTable") { + tableBuilder.replaceTransaction() + } + } else { + throw new IllegalStateException(s"Cannot replace table $tablePath. Table doesn't exist.") + } + } + } + + //////////////////// + // Helper Methods // + //////////////////// + + private def recordIcebergCommit(errorOpt: Option[Throwable] = None): Unit = { + val icebergTxnTypes = + if (fileUpdates.nonEmpty) Map("icebergTxnTypes" -> fileUpdates.map(_.opType)) else Map.empty + + val errorData = errorOpt.map { e => + Map( + "exception" -> ExceptionUtils.getMessage(e), + "stackTrace" -> ExceptionUtils.getStackTrace(e) + ) + }.getOrElse(Map.empty) + + + recordDeltaEvent( + postCommitSnapshot.deltaLog, + s"delta.iceberg.conversion.commit.${if (errorOpt.isEmpty) "success" else "error"}", + data = Map( + "version" -> postCommitSnapshot.version, + "timestamp" -> postCommitSnapshot.timestamp, + "tableOp" -> tableOp.getClass.getSimpleName.stripSuffix("$"), + "prevConvertedDeltaVersion" -> lastConvertedDeltaVersion + ) ++ icebergTxnTypes ++ errorData + ) + } + +} diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergConverter.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergConverter.scala new file mode 100644 index 00000000000..f5ab98a2f32 --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergConverter.scala @@ -0,0 +1,417 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.icebergShaded + +import java.util.concurrent.atomic.AtomicReference +import javax.annotation.concurrent.GuardedBy + +import scala.collection.JavaConverters._ +import scala.util.control.Breaks._ +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaFileNotFoundException, DeltaFileProviderUtils, OptimisticTransactionImpl, Snapshot, UniversalFormat, UniversalFormatConverter} +import org.apache.spark.sql.delta.actions.{Action, AddFile, CommitInfo, RemoveFile} +import org.apache.spark.sql.delta.hooks.IcebergConverterHook +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.commons.lang3.exception.ExceptionUtils +import org.apache.hadoop.fs.Path +import shadedForDelta.org.apache.iceberg.hive.{HiveCatalog, HiveTableOperations} + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.catalog.CatalogTable + +object IcebergConverter { + + + /** + * Property to be set in translated Iceberg metadata files. + * Indicates the delta commit version # that it corresponds to. + */ + val DELTA_VERSION_PROPERTY = "delta-version" + + /** + * Property to be set in translated Iceberg metadata files. + * Indicates the timestamp (milliseconds) of the delta commit that it corresponds to. + */ + val DELTA_TIMESTAMP_PROPERTY = "delta-timestamp" + + val ICEBERG_NAME_MAPPING_PROPERTY = "schema.name-mapping.default" +} + +/** + * This class manages the transformation of delta snapshots into their Iceberg equivalent. + */ +class IcebergConverter(spark: SparkSession) + extends UniversalFormatConverter(spark) + with DeltaLogging { + + // Save an atomic reference of the snapshot being converted, and the txn that triggered + // resulted in the specified snapshot + protected val currentConversion = + new AtomicReference[(Snapshot, OptimisticTransactionImpl)]() + protected val standbyConversion = + new AtomicReference[(Snapshot, OptimisticTransactionImpl)]() + + // Whether our async converter thread is active. We may already have an alive thread that is + // about to shutdown, but in such cases this value should return false. + @GuardedBy("asyncThreadLock") + private var asyncConverterThreadActive: Boolean = false + private val asyncThreadLock = new Object + + /** + * Enqueue the specified snapshot to be converted to Iceberg. This will start an async + * job to run the conversion, unless there already is an async conversion running for + * this table. In that case, it will queue up the provided snapshot to be run after + * the existing job completes. + * Note that if there is another snapshot already queued, the previous snapshot will get + * removed from the wait queue. Only one snapshot is queued at any point of time. + * + */ + override def enqueueSnapshotForConversion( + snapshotToConvert: Snapshot, + txn: OptimisticTransactionImpl): Unit = { + if (!UniversalFormat.icebergEnabled(snapshotToConvert.metadata)) { + return + } + val log = snapshotToConvert.deltaLog + // Replace any previously queued snapshot + val previouslyQueued = standbyConversion.getAndSet((snapshotToConvert, txn)) + asyncThreadLock.synchronized { + if (!asyncConverterThreadActive) { + val threadName = IcebergConverterHook.ASYNC_ICEBERG_CONVERTER_THREAD_NAME + + s" [id=${snapshotToConvert.metadata.id}]" + val asyncConverterThread: Thread = new Thread(threadName) { + setDaemon(true) + + override def run(): Unit = + try { + var snapshotAndTxn = getNextSnapshot + while (snapshotAndTxn != null) { + val snapshotVal = snapshotAndTxn._1 + val prevTxn = snapshotAndTxn._2 + try { + logInfo(s"Converting Delta table [path=${log.logPath}, " + + s"tableId=${log.tableId}, version=${snapshotVal.version}] into Iceberg") + convertSnapshot(snapshotVal, prevTxn) + } catch { + case NonFatal(e) => + logWarning(s"Error when writing Iceberg metadata asynchronously", e) + recordDeltaEvent( + log, + "delta.iceberg.conversion.async.error", + data = Map( + "exception" -> ExceptionUtils.getMessage(e), + "stackTrace" -> ExceptionUtils.getStackTrace(e) + ) + ) + } + currentConversion.set(null) + // Pick next snapshot to convert if there's a new one + snapshotAndTxn = getNextSnapshot + } + } finally { + // shuttingdown thread + asyncThreadLock.synchronized { + asyncConverterThreadActive = false + } + } + + // Get a snapshot to convert from the icebergQueue. Sets the queue to null after. + private def getNextSnapshot: (Snapshot, OptimisticTransactionImpl) = + asyncThreadLock.synchronized { + val potentialSnapshotAndTxn = standbyConversion.get() + currentConversion.set(potentialSnapshotAndTxn) + standbyConversion.compareAndSet(potentialSnapshotAndTxn, null) + if (potentialSnapshotAndTxn == null) { + asyncConverterThreadActive = false + } + potentialSnapshotAndTxn + } + } + asyncConverterThread.start() + asyncConverterThreadActive = true + } + } + + // If there already was a snapshot waiting to be converted, log that snapshot info. + if (previouslyQueued != null) { +// previouslyQueued._1.uncache() + recordDeltaEvent( + snapshotToConvert.deltaLog, + "delta.iceberg.conversion.async.backlog", + data = Map( + "newVersion" -> snapshotToConvert.version, + "replacedVersion" -> previouslyQueued._1.version) + ) + } + } + + /** + * Convert the specified snapshot into Iceberg for the given catalogTable + * @param snapshotToConvert the snapshot that needs to be converted to Iceberg + * @param catalogTable the catalogTable this conversion targets. + * @return Converted Delta version and commit timestamp + */ + override def convertSnapshot( + snapshotToConvert: Snapshot, catalogTable: CatalogTable): Option[(Long, Long)] = { + if (!UniversalFormat.icebergEnabled(snapshotToConvert.metadata)) { + return None + } + convertSnapshot(snapshotToConvert, None, catalogTable) + } + + /** + * Convert the specified snapshot into Iceberg when performing an OptimisticTransaction + * on a delta table. + * @param snapshotToConvert the snapshot that needs to be converted to Iceberg + * @param txn the transaction that triggers the conversion. It must + * contain the catalogTable this conversion targets. + * @return Converted Delta version and commit timestamp + */ + override def convertSnapshot( + snapshotToConvert: Snapshot, txn: OptimisticTransactionImpl): Option[(Long, Long)] = { + if (!UniversalFormat.icebergEnabled(snapshotToConvert.metadata)) { + return None + } + txn.catalogTable match { + case Some(table) => convertSnapshot(snapshotToConvert, Some(txn), table) + case _ => + logWarning(s"CatalogTable for table ${snapshotToConvert.deltaLog.tableId} " + + s"is empty in txn. Skip iceberg conversion.") + recordDeltaEvent( + snapshotToConvert.deltaLog, + "delta.iceberg.conversion.skipped.emptyCatalogTable", + data = Map( + "version" -> snapshotToConvert.version + ) + ) + None + } + } + + /** + * Convert the specified snapshot into Iceberg. NOTE: This operation is blocking. Call + * enqueueSnapshotForConversion to run the operation asynchronously. + * @param snapshotToConvert the snapshot that needs to be converted to Iceberg + * @param txnOpt the OptimisticTransaction that created snapshotToConvert. + * Used as a hint to avoid recomputing old metadata. + * @param catalogTable the catalogTable this conversion targets + * @return Converted Delta version and commit timestamp + */ + private def convertSnapshot( + snapshotToConvert: Snapshot, + txnOpt: Option[OptimisticTransactionImpl], + catalogTable: CatalogTable): Option[(Long, Long)] = + recordFrameProfile("Delta", "IcebergConverter.convertSnapshot") { + val log = snapshotToConvert.deltaLog + val lastDeltaVersionConverted: Option[Long] = + loadLastDeltaVersionConverted(snapshotToConvert, catalogTable) + val maxCommitsToConvert = + spark.sessionState.conf.getConf(DeltaSQLConf.ICEBERG_MAX_COMMITS_TO_CONVERT) + + // Nth to convert + if (lastDeltaVersionConverted.exists(_ == snapshotToConvert.version)) { + return None + } + + // Get the most recently converted delta snapshot, if applicable + val prevConvertedSnapshotOpt = (lastDeltaVersionConverted, txnOpt) match { + case (Some(version), Some(txn)) if version == txn.snapshot.version => + Some(txn.snapshot) + // Check how long it has been since we last converted to Iceberg. If outside the threshold, + // fall back to state reconstruction to get the actions, to protect driver from OOMing. + case (Some(version), _) if snapshotToConvert.version - version <= maxCommitsToConvert => + try { + // TODO: We can optimize this by providing a checkpointHint to getSnapshotAt. Check if + // txn.snapshot.version < version. If true, use txn.snapshot's checkpoint as a hint. + Some(log.getSnapshotAt(version)) + } catch { + // If we can't load the file since the last time Iceberg was converted, it's likely that + // the commit file expired. Treat this like a new Iceberg table conversion. + case _: DeltaFileNotFoundException => None + } + case (_, _) => None + } + + val tableOp = (lastDeltaVersionConverted, prevConvertedSnapshotOpt) match { + case (Some(_), Some(_)) => WRITE_TABLE + case (Some(_), None) => REPLACE_TABLE + case (None, None) => CREATE_TABLE + } + + UniversalFormat.enforceSupportInCatalog(catalogTable, snapshotToConvert.metadata) match { + case Some(updatedTable) => spark.sessionState.catalog.alterTable(updatedTable) + case _ => + } + + val icebergTxn = new IcebergConversionTransaction( + catalogTable, log.newDeltaHadoopConf(), snapshotToConvert, tableOp, lastDeltaVersionConverted) + + // Write out the actions taken since the last conversion (or since table creation). + // This is done in batches, with each batch corresponding either to one delta file, + // or to the specified batch size. + val actionBatchSize = + spark.sessionState.conf.getConf(DeltaSQLConf.ICEBERG_MAX_ACTIONS_TO_CONVERT) + prevConvertedSnapshotOpt match { + case Some(prevSnapshot) => + // Read the actions directly from the delta json files. + // TODO: Run this as a spark job on executors + val deltaFiles = DeltaFileProviderUtils.getDeltaFilesInVersionRange( + spark, log, prevSnapshot.version + 1, snapshotToConvert.version) + + recordDeltaEvent( + snapshotToConvert.deltaLog, + "delta.iceberg.conversion.deltaCommitRange", + data = Map( + "fromVersion" -> (prevSnapshot.version + 1), + "toVersion" -> snapshotToConvert.version, + "numDeltaFiles" -> deltaFiles.length + ) + ) + + val actionsToConvert = DeltaFileProviderUtils.parallelReadAndParseDeltaFilesAsIterator( + log, spark, deltaFiles) + actionsToConvert.foreach { actionsIter => + try { + actionsIter.grouped(actionBatchSize).foreach { actionStrs => + runIcebergConversionForActions( + icebergTxn, + actionStrs.map(Action.fromJson), + log.dataPath, + prevConvertedSnapshotOpt) + } + } finally { + actionsIter.close() + } + } + // If the metadata hasn't changed, this will no-op. + icebergTxn.updateTableMetadata(snapshotToConvert.metadata, prevSnapshot.metadata) + + // If we don't have a snapshot of the last converted version, get all the table addFiles + // (via state reconstruction). + case None => + val actionsToConvert = snapshotToConvert.allFiles.toLocalIterator().asScala + + recordDeltaEvent( + snapshotToConvert.deltaLog, + "delta.iceberg.conversion.batch", + data = Map( + "version" -> snapshotToConvert.version, + "numDeltaFiles" -> snapshotToConvert.numOfFiles + ) + ) + + actionsToConvert.grouped(actionBatchSize) + .foreach { actions => + runIcebergConversionForActions(icebergTxn, actions, log.dataPath, None) + } + } + icebergTxn.commit() + Some(snapshotToConvert.version, snapshotToConvert.timestamp) + } + + override def loadLastDeltaVersionConverted( + snapshot: Snapshot, catalogTable: CatalogTable): Option[Long] = + recordFrameProfile("Delta", "IcebergConverter.loadLastDeltaVersionConverted") { + catalogTable.properties.get(IcebergConverter.DELTA_VERSION_PROPERTY).map(_.toLong) + } + + /** + * Build an iceberg TransactionHelper from the provided txn, and commit the set of changes + * specified by the actionsToCommit. + */ + private[delta] def runIcebergConversionForActions( + icebergTxn: IcebergConversionTransaction, + actionsToCommit: Seq[Action], + dataPath: Path, + prevSnapshotOpt: Option[Snapshot]): Unit = { + prevSnapshotOpt match { + case None => + // If we don't have a previous snapshot, that implies that the table is either being + // created or replaced. We can assume that the actions have already been deduped, and + // only addFiles are present. + val appendHelper = icebergTxn.getAppendOnlyHelper() + actionsToCommit.foreach { + case a: AddFile => appendHelper.add(a) + case _ => throw new IllegalStateException(s"Must provide only AddFiles when creating " + + s"or replacing an Iceberg Table $dataPath.") + } + appendHelper.commit() + + case Some(_) => + // We have to go through the seq of actions twice, once to figure out the TransactionHelper + // to use, and then again to commit the actions. This is not too expensive, since the max # + // of actions is <= min(max # actions in delta json, ICEBERG_MAX_ACTIONS_TO_CONVERT) + var hasAdds = false + var hasRemoves = false + var hasDataChange = false + var hasCommitInfo = false + breakable { + for (action <- actionsToCommit) { + action match { + case a: AddFile => + hasAdds = true + if (a.dataChange) hasDataChange = true + case r: RemoveFile => + hasRemoves = true + if (r.dataChange) hasDataChange = true + case _: CommitInfo => hasCommitInfo = true + case _ => // Do nothing + } + if (hasAdds && hasRemoves && hasDataChange && hasCommitInfo) break // Short-circuit + } + } + + // We want to know whether all actions in the commit are contained in this `actionsToCommit` + // group. If yes, then we can safely determine whether the operation is a rewrite, delete, + // append, overwrite, etc. If not, then we can't make any assumptions since we have + // incomplete information, and we default to a rewrite. + val allDeltaActionsCaptured = hasCommitInfo && actionsToCommit.size < + spark.sessionState.conf.getConf(DeltaSQLConf.ICEBERG_MAX_ACTIONS_TO_CONVERT) + + val addsAndRemoves = actionsToCommit + .map(_.wrap) + .filter(sa => sa.remove != null || sa.add != null) + + if (hasAdds && hasRemoves && !hasDataChange && allDeltaActionsCaptured) { + val rewriteHelper = icebergTxn.getRewriteHelper() + val split = addsAndRemoves.partition(_.add == null) + rewriteHelper.rewrite(removes = split._1.map(_.remove), adds = split._2.map(_.add)) + rewriteHelper.commit() + } else if ((hasAdds && hasRemoves) || !allDeltaActionsCaptured) { + val overwriteHelper = icebergTxn.getOverwriteHelper() + addsAndRemoves.foreach { action => + if (action.add != null) { + overwriteHelper.add(action.add) + } else { + overwriteHelper.remove(action.remove) + } + } + overwriteHelper.commit() + } else if (hasAdds) { + val appendHelper = icebergTxn.getAppendOnlyHelper() + addsAndRemoves.foreach(action => appendHelper.add(action.add)) + appendHelper.commit() + } else if (hasRemoves) { + val removeHelper = icebergTxn.getRemoveOnlyHelper() + addsAndRemoves.foreach(action => removeHelper.remove(action.remove)) + removeHelper.commit() + } + } + } +} diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergSchemaUtils.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergSchemaUtils.scala new file mode 100644 index 00000000000..4e61231c64d --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergSchemaUtils.scala @@ -0,0 +1,145 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.icebergShaded + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.DeltaColumnMapping +import org.apache.spark.sql.delta.metering.DeltaLogging +import shadedForDelta.org.apache.iceberg.{Schema => IcebergSchema} +import shadedForDelta.org.apache.iceberg.types.{Type => IcebergType, Types => IcebergTypes} + +import org.apache.spark.sql.types._ + +object IcebergSchemaUtils extends DeltaLogging { + + ///////////////// + // Public APIs // + ///////////////// + + // scalastyle:off line.size.limit + /** + * Delta types are defined here: https://github.com/delta-io/delta/blob/master/PROTOCOL.md#schema-serialization-format + * + * Iceberg types are defined here: https://iceberg.apache.org/spec/#schemas-and-data-types + */ + // scalastyle:on line.size.limit + def convertDeltaSchemaToIcebergSchema(deltaSchema: StructType): IcebergSchema = { + val icebergStruct = convertStruct(deltaSchema) + new IcebergSchema(icebergStruct.fields()) + } + + private[delta] def getNestedFieldId(field: Option[StructField], path: Seq[String]): Int = { + field.get.metadata + .getMetadata(DeltaColumnMapping.COLUMN_MAPPING_METADATA_NESTED_IDS_KEY) + .getLong(path.mkString(".")) + .toInt + } + + //////////////////// + // Helper Methods // + //////////////////// + + /** Visible for testing */ + private[delta] def convertStruct(deltaSchema: StructType): IcebergTypes.StructType = { + /** + * Recursively (i.e. for all nested elements) transforms the delta DataType `elem` into its + * corresponding Iceberg type. + * + * - StructType -> IcebergTypes.StructType + * - ArrayType -> IcebergTypes.ListType + * - MapType -> IcebergTypes.MapType + * - primitive -> IcebergType.PrimitiveType + */ + def transform[E <: DataType](elem: E, field: Option[StructField], name: Seq[String]) + : IcebergType = elem match { + case StructType(fields) => + IcebergTypes.StructType.of(fields.map { f => + if (!DeltaColumnMapping.hasColumnId(f)) { + throw new UnsupportedOperationException("UniForm requires Column Mapping") + } + + IcebergTypes.NestedField.of( + DeltaColumnMapping.getColumnId(f), + f.nullable, + f.name, + transform(f.dataType, Some(f), Seq(DeltaColumnMapping.getPhysicalName(f))), + f.getComment().orNull + ) + }.toList.asJava) + + case ArrayType(elementType, containsNull) => + val currName = name :+ DeltaColumnMapping.PARQUET_LIST_ELEMENT_FIELD_NAME + val id = getNestedFieldId(field, currName) + if (containsNull) { + IcebergTypes.ListType.ofOptional(id, transform(elementType, field, currName)) + } else { + IcebergTypes.ListType.ofRequired(id, transform(elementType, field, currName)) + } + + case MapType(keyType, valueType, valueContainsNull) => + val currKeyName = name :+ DeltaColumnMapping.PARQUET_MAP_KEY_FIELD_NAME + val currValName = name :+ DeltaColumnMapping.PARQUET_MAP_VALUE_FIELD_NAME + val keyId = getNestedFieldId(field, currKeyName) + val valId = getNestedFieldId(field, currValName) + if (valueContainsNull) { + IcebergTypes.MapType.ofOptional( + keyId, + valId, + transform(keyType, field, currKeyName), + transform(valueType, field, currValName) + ) + } else { + IcebergTypes.MapType.ofRequired( + keyId, + valId, + transform(keyType, field, currKeyName), + transform(valueType, field, currValName) + ) + } + + case atomicType: AtomicType => convertAtomic(atomicType) + + case other => + throw new UnsupportedOperationException(s"Cannot convert Delta type $other to Iceberg") + } + + transform(deltaSchema, None, Seq.empty).asStructType() + } + + /** + * Converts delta atomic into an iceberg primitive. + * + * Visible for testing. + * + * https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types + */ + private[delta] def convertAtomic[E <: DataType](elem: E): IcebergType.PrimitiveType = elem match { + case StringType => IcebergTypes.StringType.get() + case LongType => IcebergTypes.LongType.get() + case IntegerType | ShortType | ByteType => IcebergTypes.IntegerType.get() + case FloatType => IcebergTypes.FloatType.get() + case DoubleType => IcebergTypes.DoubleType.get() + case d: DecimalType => IcebergTypes.DecimalType.of(d.precision, d.scale) + case BooleanType => IcebergTypes.BooleanType.get() + case BinaryType => IcebergTypes.BinaryType.get() + case DateType => IcebergTypes.DateType.get() + case TimestampType => IcebergTypes.TimestampType.withZone() + case TimestampNTZType => IcebergTypes.TimestampType.withoutZone() + case _ => throw new UnsupportedOperationException(s"Could not convert atomic type $elem") + } +} diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergStatsConverter.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergStatsConverter.scala new file mode 100644 index 00000000000..a43b5039c85 --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergStatsConverter.scala @@ -0,0 +1,211 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.icebergShaded + +import java.lang.{Long => JLong} +import java.nio.ByteBuffer + +import org.apache.spark.sql.delta.DeltaColumnMapping +import org.apache.spark.sql.delta.stats.{DeltaStatistics, SkippingEligibleDataType} +import shadedForDelta.org.apache.iceberg.types.Conversions + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +/** + * Converts Delta stats to Iceberg stats given an Internal Row representing Delta stats and the + * row's schema. + * + * Iceberg stores stats as a map from column ID to the statistic. For example, lower/upper bound + * statistics are represented as a map from column ID to byte buffer where the byte buffer stores + * any type. + * + * For example, given the following Delta stats schema with column IDs: + * | -- id(0): INT + * | -- person(1): STRUCT + * | name(2): STRUCT + * | -- first(3): STRING + * | -- last(4): STRING + * | height(5): LONG + * + * Iceberg's upper bound statistic map will be: + * {0 -> MAX_ID, 3 -> MAX_FIRST, 4 -> MAX_LAST, 5 -> MAX_HEIGHT} + * + * Iceberg requires the "record count" stat while the "upper bounds", "lower bounds", and + * "null value counts" are optional. See iceberg/DataFile.java. + * Iceberg's "record count" metric is set in `convertFileAction` before the stats conversion. + * If additional metrics are attached to the Iceberg data file, the "record count" metric must be + * left non-null. + */ +case class IcebergStatsConverter(statsRow: InternalRow, statsSchema: StructType) { + + val numRecordsStat: JLong = statsSchema.getFieldIndex(DeltaStatistics.NUM_RECORDS) match { + case Some(fieldIndex) => new JLong(statsRow.getLong(fieldIndex)) + case None => throw new IllegalArgumentException("Delta is missing the 'num records' stat. " + + "Iceberg requires this stat when attaching statistics to the output data file.") + } + + val lowerBoundsStat: Option[Map[Integer, ByteBuffer]] = + getByteBufferBackedColStats(DeltaStatistics.MIN) + + val upperBoundsStat: Option[Map[Integer, ByteBuffer]] = + getByteBufferBackedColStats(DeltaStatistics.MAX) + + val nullValueCountsStat: Option[Map[Integer, JLong]] = + statsSchema.getFieldIndex(DeltaStatistics.NULL_COUNT) match { + case Some(nullCountFieldIdx) => + val nullCountStatSchema = + statsSchema.fields(nullCountFieldIdx).dataType.asInstanceOf[StructType] + Some( + generateIcebergLongMetricMap( + statsRow.getStruct(nullCountFieldIdx, nullCountStatSchema.fields.length), + nullCountStatSchema + ) + ) + case None => None + } + + /** + * Generates Iceberg's metric representation by recursively flattening the Delta stat struct + * (represented as an internal row) and converts the column's physical name to its ID. + * + * Ignores null Delta stats. + * + * @param stats An internal row holding the `ByteBuffer`-based Delta column stats + * (i.e. lower bound). + * @param statsSchema The schema of the `stats` internal row. + * @return Iceberg's ByteBuffer-backed metric representation. + */ + private def generateIcebergByteBufferMetricMap( + stats: InternalRow, + statsSchema: StructType): Map[Integer, ByteBuffer] = { + statsSchema.fields.zipWithIndex.flatMap { case (field, idx) => + field.dataType match { + // Iceberg statistics cannot be null. + case _ if stats.isNullAt(idx) => Map[Integer, ByteBuffer]().empty + // If the stats schema contains a struct type, there is a corresponding struct in the data + // schema. The struct's per-field stats are also stored in the Delta stats struct. See the + // `StatisticsCollection` trait comment for more. + case st: StructType => + generateIcebergByteBufferMetricMap(stats.getStruct(idx, st.fields.length), st) + // Ignore the Delta statistic if the conversion doesn't support the given data type or the + // column ID for this field is missing. + case dt if !DeltaColumnMapping.hasColumnId(field) || + !IcebergStatsConverter.isMinMaxStatTypeSupported(dt) => Map[Integer, ByteBuffer]().empty + case b: ByteType => + // Iceberg stores bytes using integers. + val statVal = stats.getByte(idx).toInt + Map[Integer, ByteBuffer](Integer.valueOf(DeltaColumnMapping.getColumnId(field)) -> + Conversions.toByteBuffer(IcebergSchemaUtils.convertAtomic(b), statVal)) + case s: ShortType => + // Iceberg stores shorts using integers. + val statVal = stats.getShort(idx).toInt + Map[Integer, ByteBuffer](Integer.valueOf(DeltaColumnMapping.getColumnId(field)) -> + Conversions.toByteBuffer(IcebergSchemaUtils.convertAtomic(s), statVal)) + case dt if IcebergStatsConverter.isMinMaxStatTypeSupported(dt) => + val statVal = stats.get(idx, dt) + + // Iceberg's `Conversions.toByteBuffer` method expects the Java object representation + // for string and decimal types. + // Other types supported by Delta's min/max stat such as int, long, boolean, etc., do not + // require a different representation. + val compatibleStatsVal = statVal match { + case u: UTF8String => u.toString + case d: Decimal => d.toJavaBigDecimal + case _ => statVal + } + Map[Integer, ByteBuffer](Integer.valueOf(DeltaColumnMapping.getColumnId(field)) -> + Conversions.toByteBuffer(IcebergSchemaUtils.convertAtomic(dt), compatibleStatsVal)) + } + }.toMap + } + + /** + * Generates Iceberg's metric representation by recursively flattening the Delta stat struct + * (represented as an internal row) and converts the column's physical name to its ID. + * + * @param stats An internal row holding the long-backed Delta column stats (i.e. null counts). + * @param statsSchema The schema of the `stats` internal row. + * @return a map in Iceberg's metric representation. + */ + private def generateIcebergLongMetricMap( + stats: InternalRow, + statsSchema: StructType): Map[Integer, JLong] = { + statsSchema.fields.zipWithIndex.flatMap { case (field, idx) => + field.dataType match { + // If the stats schema contains a struct type, there is a corresponding struct in the data + // schema. The struct's per-field stats are also stored in the Delta stats struct. See the + // `StatisticsCollection` trait comment for more. + case st: StructType => + generateIcebergLongMetricMap(stats.getStruct(idx, st.fields.length), st) + case lt: LongType => + if (DeltaColumnMapping.hasColumnId(field)) { + Map[Integer, JLong](Integer.valueOf(DeltaColumnMapping.getColumnId(field)) -> + new JLong(stats.getLong(idx))) + } else { + Map[Integer, JLong]().empty + } + case _ => throw new UnsupportedOperationException("Expected metric to be a long type.") + } + }.toMap + } + + /** + * @param statName The name of the Delta stat that is being converted. Must be one of the field + * names in the `DeltaStatistics` object. + * @return An option holding Iceberg's statistic representation. Returns `None` if the output + * would otherwise be empty. + */ + private def getByteBufferBackedColStats(statName: String): Option[Map[Integer, ByteBuffer]] = { + statsSchema.getFieldIndex(statName) match { + case Some(statFieldIdx) => + val colStatSchema = statsSchema.fields(statFieldIdx).dataType.asInstanceOf[StructType] + val icebergMetricsMap = generateIcebergByteBufferMetricMap( + statsRow.getStruct(statFieldIdx, colStatSchema.fields.length), + colStatSchema + ) + if (icebergMetricsMap.nonEmpty) { + Some(icebergMetricsMap) + } else { + // The iceberg metrics map may be empty when all Delta stats are null. + None + } + case None => None + } + } +} + +object IcebergStatsConverter { + /** + * Returns true if a min/max statistic of the given Delta data type can be converted into an + * Iceberg metric of equivalent data type. + * + * Currently, nested types and null types are unsupported. + */ + def isMinMaxStatTypeSupported(dt: DataType): Boolean = { + if (!SkippingEligibleDataType(dt)) return false + + dt match { + case _: StringType | _: IntegerType | _: FloatType | _: DoubleType | + _: DoubleType | _: DecimalType | _: BooleanType | _: DateType | _: TimestampType | + // _: LongType TODO: enable after https://github.com/apache/spark/pull/42083 is released + _: TimestampNTZType | _: ByteType | _: ShortType => true + case _ => false + } + } +} diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.scala new file mode 100644 index 00000000000..9d77cccb269 --- /dev/null +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.scala @@ -0,0 +1,229 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.icebergShaded + +import scala.collection.JavaConverters._ +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaConfigs, DeltaLog} +import org.apache.spark.sql.delta.actions.{AddFile, FileAction, RemoveFile} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import shadedForDelta.org.apache.iceberg.{DataFile, DataFiles, FileFormat, PartitionSpec, Schema => IcebergSchema} +import shadedForDelta.org.apache.iceberg.Metrics +// scalastyle:off import.ordering.noEmptyLine +import shadedForDelta.org.apache.iceberg.catalog.{Namespace, TableIdentifier => IcebergTableIdentifier} +// scalastyle:on import.ordering.noEmptyLine +import shadedForDelta.org.apache.iceberg.hive.HiveCatalog + +import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier => SparkTableIdentifier} +import org.apache.spark.sql.types.StructType + +object IcebergTransactionUtils + extends DeltaLogging + { + + ///////////////// + // Public APIs // + ///////////////// + + def createPartitionSpec( + icebergSchema: IcebergSchema, + partitionColumns: Seq[String]): PartitionSpec = { + if (partitionColumns.isEmpty) { + PartitionSpec.unpartitioned + } else { + val builder = PartitionSpec.builderFor(icebergSchema) + for (partitionName <- partitionColumns) { + builder.identity(partitionName) + } + builder.build() + } + } + + def convertDeltaAddFileToIcebergDataFile( + add: AddFile, + tablePath: Path, + partitionSpec: PartitionSpec, + logicalToPhysicalPartitionNames: Map[String, String], + statsSchema: StructType, + statsParser: String => InternalRow, + deltaLog: DeltaLog): DataFile = { + if (add.deletionVector != null) { + throw new UnsupportedOperationException("No support yet for DVs") + } + + var dataFileBuilder = + convertFileAction(add, tablePath, partitionSpec, logicalToPhysicalPartitionNames) + // Attempt to attach the number of records metric regardless of whether the Delta stats + // string is null/empty or not because this metric is required by Iceberg. If the number + // of records is both unavailable here and unavailable in the Delta stats, Iceberg will + // throw an exception when building the data file. + .withRecordCount(add.numLogicalRecords.getOrElse(-1L)) + + if (add.stats != null && add.stats.nonEmpty) { + try { + val statsRow = statsParser(add.stats) + + val metricsConverter = IcebergStatsConverter(statsRow, statsSchema) + val metrics = new Metrics( + metricsConverter.numRecordsStat, // rowCount + null, // columnSizes + null, // valueCounts + metricsConverter.nullValueCountsStat.getOrElse(null).asJava, // nullValueCounts + null, // nanValueCounts + metricsConverter.lowerBoundsStat.getOrElse(null).asJava, // lowerBounds + metricsConverter.upperBoundsStat.getOrElse(null).asJava // upperBounds + ) + + dataFileBuilder = dataFileBuilder.withMetrics(metrics) + } catch { + case NonFatal(e) => + logWarning("Failed to convert Delta stats to Iceberg stats. Iceberg conversion will " + + "attempt to proceed without stats.", e) + } + } + + dataFileBuilder.build() + } + + /** + * Note that APIs like [[shadedForDelta.org.apache.iceberg.OverwriteFiles#deleteFile]] take + * a DataFile, and not a DeleteFile as you might have expected. + */ + def convertDeltaRemoveFileToIcebergDataFile( + remove: RemoveFile, + tablePath: Path, + partitionSpec: PartitionSpec, + logicalToPhysicalPartitionNames: Map[String, String]): DataFile = { + convertFileAction(remove, tablePath, partitionSpec, logicalToPhysicalPartitionNames) + .withRecordCount(remove.numLogicalRecords.getOrElse(0L)) + .build() + } + + /** + * We expose this as a public API since APIs like + * [[shadedForDelta.org.apache.iceberg.DeleteFiles#deleteFile]] actually only need to take in + * a file path String, thus we don't need to actually convert a [[RemoveFile]] into a [[DataFile]] + * in this case. + */ + def canonicalizeFilePath(f: FileAction, tablePath: Path): String = { + // Recall that FileActions can have either relative paths or absolute paths (i.e. from shallow- + // cloned files). + // Iceberg spec requires path be fully qualified path, suitable for constructing a Hadoop Path + if (f.pathAsUri.isAbsolute) f.path else new Path(tablePath, f.path).toString + } + + /** Returns the (deletions, additions) iceberg table property changes. */ + def detectPropertiesChange( + newProperties: Map[String, String], + prevPropertiesOpt: Map[String, String]): (Set[String], Map[String, String]) = { + val newPropertiesIcebergOnly = getIcebergPropertiesFromDeltaProperties(newProperties) + val prevPropertiesOptIcebergOnly = getIcebergPropertiesFromDeltaProperties(prevPropertiesOpt) + + if (prevPropertiesOptIcebergOnly == newPropertiesIcebergOnly) return (Set.empty, Map.empty) + + ( + prevPropertiesOptIcebergOnly.keySet.diff(newPropertiesIcebergOnly.keySet), + newPropertiesIcebergOnly + ) + } + + /** + * Only keep properties whose key starts with "delta.universalformat.config.iceberg" + * and strips the prefix from the key; Note the key is already normalized to lower case. + */ + def getIcebergPropertiesFromDeltaProperties( + properties: Map[String, String]): Map[String, String] = { + val prefix = DeltaConfigs.DELTA_UNIVERSAL_FORMAT_ICEBERG_CONFIG_PREFIX + properties.filterKeys(_.startsWith(prefix)).map(kv => (kv._1.stripPrefix(prefix), kv._2)).toMap + } + + /** Returns the mapping of logicalPartitionColName -> physicalPartitionColName */ + def getPartitionPhysicalNameMapping(partitionSchema: StructType): Map[String, String] = { + partitionSchema.fields.map(f => f.name -> DeltaColumnMapping.getPhysicalName(f)).toMap + } + + //////////////////// + // Helper Methods // + //////////////////// + + /** Visible for testing. */ + private[delta] def convertFileAction( + f: FileAction, + tablePath: Path, + partitionSpec: PartitionSpec, + logicalToPhysicalPartitionNames: Map[String, String]): DataFiles.Builder = { + val absPath = canonicalizeFilePath(f, tablePath) + + var builder = DataFiles + .builder(partitionSpec) + .withPath(absPath) + .withFileSizeInBytes(f.getFileSize) + .withFormat(FileFormat.PARQUET) + + if (partitionSpec.isPartitioned) { + val ICEBERG_NULL_PARTITION_VALUE = "__HIVE_DEFAULT_PARTITION__" + val partitionPath = partitionSpec + .fields() + .asScala + .map(_.name) + .map { logicalPartCol => + // The Iceberg Schema and PartitionSpec all use the column logical names. + // Delta FileAction::partitionValues, however, uses physical names. + val physicalPartKey = logicalToPhysicalPartitionNames(logicalPartCol) + + // ICEBERG_NULL_PARTITION_VALUE is referred in Iceberg lib to mark NULL partition value + val partValue = Option(f.partitionValues(physicalPartKey)) + .getOrElse(ICEBERG_NULL_PARTITION_VALUE) + s"$logicalPartCol=$partValue" + } + .mkString("/") + + builder = builder.withPartitionPath(partitionPath) + } + + builder + } + + /** + * Create an Iceberg HiveCatalog + * @param conf: Hadoop Configuration + * @return + */ + def createHiveCatalog(conf : Configuration) : HiveCatalog = { + val catalog = new HiveCatalog() + catalog.setConf(conf) + catalog.initialize("spark_catalog", Map.empty[String, String].asJava) + catalog + } + + /** + * Encode Spark table identifier to Iceberg table identifier by putting + * only "database" to the "namespace" in Iceberg table identifier. + * See [[HiveCatalog.isValidateNamespace]] + */ + def convertSparkTableIdentifierToIcebergHive( + identifier: SparkTableIdentifier): IcebergTableIdentifier = { + val namespace = (identifier.database) match { + case Some(database) => Namespace.of(database) + case _ => Namespace.empty() + } + IcebergTableIdentifier.of(namespace, identifier.table) + } +} diff --git a/iceberg/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/iceberg/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister new file mode 100644 index 00000000000..9402b69a2d4 --- /dev/null +++ b/iceberg/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -0,0 +1 @@ +org.apache.iceberg.spark.source.IcebergSource diff --git a/iceberg/src/test/scala/org/apache/spark/sql/delta/CloneIcebergSuite.scala b/iceberg/src/test/scala/org/apache/spark/sql/delta/CloneIcebergSuite.scala new file mode 100644 index 00000000000..db031b38e0d --- /dev/null +++ b/iceberg/src/test/scala/org/apache/spark/sql/delta/CloneIcebergSuite.scala @@ -0,0 +1,448 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.sql.Date + +import scala.collection.JavaConverters._ +import scala.util.Try + +import org.apache.spark.sql.delta.commands.convert.ConvertUtils +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.StatisticsCollection +import org.apache.iceberg.hadoop.HadoopTables + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{stringToDate, toJavaDate} +import org.apache.spark.sql.functions.{col, expr, from_json, lit, struct, substring} +import org.apache.spark.sql.types.{Decimal, DecimalType, LongType, StringType, StructField, StructType, TimestampType} +import org.apache.spark.unsafe.types.UTF8String +// scalastyle:on import.ordering.noEmptyLine + +trait CloneIcebergSuiteBase extends QueryTest + with ConvertIcebergToDeltaUtils { + + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set(DeltaSQLConf.DELTA_CONVERT_ICEBERG_PARTITION_EVOLUTION_ENABLED.key, "true") + } + + protected val cloneTable = "clone" + + // The identifier of clone source, can be either path-based or name-based. + protected def sourceIdentifier: String + protected def supportedModes: Seq[String] = Seq("SHALLOW") + + protected def toDate(date: String): Date = { + toJavaDate(stringToDate(UTF8String.fromString(date)).get) + } + + protected def physicalNamesAreEqual( + sourceSchema: StructType, targetSchema: StructType): Boolean = { + + val sourcePathToPhysicalName = SchemaMergingUtils.explode(sourceSchema).map { + case (path, field) => path -> DeltaColumnMapping.getPhysicalName(field) + }.toMap + + val targetPathToPhysicalName = SchemaMergingUtils.explode(targetSchema).map { + case (path, field) => path -> DeltaColumnMapping.getPhysicalName(field) + }.toMap + + targetPathToPhysicalName.foreach { + case (path, physicalName) => + if (!sourcePathToPhysicalName.contains(path) || + physicalName != sourcePathToPhysicalName(path)) { + return false + } + } + + sourcePathToPhysicalName.size == targetPathToPhysicalName.size + } + + protected def testClone(testName: String)(f: String => Unit): Unit = + supportedModes.foreach { mode => test(s"$testName - $mode") { f(mode) } } + + testClone("table with deleted files") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data) + |TBLPROPERTIES ('write.format.default' = 'PARQUET')""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + spark.sql(s"DELETE FROM $table WHERE data > 'a'") + checkAnswer(spark.sql(s"SELECT * from $table"), Row(1, "a") :: Nil) + + spark.sql(s"CREATE TABLE $cloneTable $mode CLONE $sourceIdentifier") + + assert(SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + DeltaLog.forTable(spark, TableIdentifier(cloneTable)).snapshot.schema, + new StructType().add("id", LongType).add("data", StringType))) + + checkAnswer(spark.table(cloneTable), Row(1, "a") :: Nil) + } + } + + protected def runCreateOrReplace(mode: String, source: String): DataFrame = { + Try(spark.sql(s"DELETE FROM $cloneTable")) + spark.sql(s"CREATE OR REPLACE TABLE $cloneTable $mode CLONE $source") + } + + testClone("table with renamed columns") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b')") + spark.sql("ALTER TABLE local.db.table RENAME COLUMN id TO id2") + spark.sql(s"INSERT INTO $table VALUES (3, 'c')") + + // Parquet files still have the old schema + assert( + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + spark.read.format("parquet").load(tablePath + "/data").schema, + new StructType().add("id", LongType).add("data", StringType))) + + runCreateOrReplace(mode, sourceIdentifier) + // The converted delta table will get the updated schema + assert( + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + DeltaLog.forTable(spark, TableIdentifier(cloneTable)).snapshot.schema, + new StructType().add("id2", LongType).add("data", StringType))) + + checkAnswer(spark.table(cloneTable), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + } + } + + testClone("create or replace table - same schema") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + + // Add some rows to check the initial CLONE. + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b')") + runCreateOrReplace(mode, sourceIdentifier) + checkAnswer(spark.table(cloneTable), Row(1, "a") :: Row(2, "b") :: Nil) + + // Add more rows to check incremental update with REPLACE. + spark.sql(s"INSERT INTO $table VALUES (3, 'c')") + runCreateOrReplace(mode, sourceIdentifier) + checkAnswer(spark.table(cloneTable), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + } + } + + testClone("create or replace table - renamed column") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b')") + runCreateOrReplace(mode, sourceIdentifier) + assert( + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + DeltaLog.forTable(spark, TableIdentifier(cloneTable)).snapshot.schema, + new StructType().add("id", LongType).add("data", StringType))) + + checkAnswer(spark.table(cloneTable), Row(1, "a") :: Row(2, "b") :: Nil) + + // Rename column 'id' into column 'id2'. + spark.sql("ALTER TABLE local.db.table RENAME COLUMN id TO id2") + spark.sql(s"INSERT INTO $table VALUES (3, 'c')") + + // Update the cloned delta table with REPLACE. + runCreateOrReplace(mode, sourceIdentifier) + assert( + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + DeltaLog.forTable(spark, TableIdentifier(cloneTable)).snapshot.schema, + new StructType().add("id2", LongType).add("data", StringType))) + + checkAnswer(spark.table(cloneTable), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + } + } + + testClone("create or replace table - deleted rows") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + runCreateOrReplace(mode, sourceIdentifier) + checkAnswer(spark.table(cloneTable), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + + // Delete some rows from the iceberg table. + spark.sql(s"DELETE FROM $table WHERE data > 'a'") + checkAnswer( + spark.sql(s"SELECT * from $table"), Row(1, "a") :: Nil) + + runCreateOrReplace(mode, sourceIdentifier) + checkAnswer(spark.table(cloneTable), Row(1, "a") :: Nil) + } + } + + testClone("create or replace table - schema with nested column") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (id bigint, person struct) + |USING iceberg PARTITIONED BY (truncate(person.name, 2))""".stripMargin) + + spark.sql(s"INSERT INTO $table VALUES (1, ('AaAaAa', 10)), (2, ('BbBbBb', 20))") + runCreateOrReplace(mode, sourceIdentifier) + checkAnswer( + spark.table(cloneTable), + Row(1, Row("AaAaAa", 10), "Aa") :: Row(2, Row("BbBbBb", 20), "Bb") :: Nil) + + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(cloneTable)) + val schemaBefore = deltaLog.update().schema + + spark.sql(s"INSERT INTO $table VALUES (3, ('AaZzZz', 30)), (4, ('CcCcCc', 40))") + runCreateOrReplace(mode, sourceIdentifier) + checkAnswer( + spark.table(cloneTable), + Row(1, Row("AaAaAa", 10), "Aa") :: Row(2, Row("BbBbBb", 20), "Bb") :: + Row(3, Row("AaZzZz", 30), "Aa") :: Row(4, Row("CcCcCc", 40), "Cc") :: Nil) + + assert(physicalNamesAreEqual(schemaBefore, deltaLog.update().schema)) + } + } + + testClone("create or replace table - add partition field") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (date date, id bigint, category string, price double) + | USING iceberg PARTITIONED BY (date)""".stripMargin) + + // scalastyle:off deltahadoopconfiguration + val hadoopTables = new HadoopTables(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val icebergTable = hadoopTables.load(tablePath) + val icebergTableSchema = + org.apache.iceberg.spark.SparkSchemaUtil.convert(icebergTable.schema()) + + val df1 = spark.createDataFrame( + Seq( + Row(toDate("2022-01-01"), 1L, "toy", 2.5D), + Row(toDate("2022-01-01"), 2L, "food", 0.6D), + Row(toDate("2022-02-05"), 3L, "food", 1.4D), + Row(toDate("2022-02-05"), 4L, "toy", 10.2D)).asJava, + icebergTableSchema) + + df1.writeTo(table).append() + + runCreateOrReplace(mode, sourceIdentifier) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(cloneTable)) + assert(deltaLog.snapshot.metadata.partitionColumns == Seq("date")) + checkAnswer(spark.table(cloneTable), df1) + + // Add a new partition field from the existing column "category" + icebergTable.refresh() + icebergTable.updateSpec().addField("category").commit() + + // Invalidate cache and load the updated partition spec + spark.sql(s"REFRESH TABLE $table") + val df2 = spark.createDataFrame( + Seq( + Row(toDate("2022-02-05"), 5L, "toy", 5.8D), + Row(toDate("2022-06-04"), 6L, "toy", 20.1D)).asJava, + icebergTableSchema) + + df2.writeTo(table).append() + + runCreateOrReplace(mode, sourceIdentifier) + assert(deltaLog.update().metadata.partitionColumns == Seq("date", "category")) + // Old data of cloned Delta table has null on the new partition field. + checkAnswer(spark.table(cloneTable), df1.withColumn("category", lit(null)).union(df2)) + // Iceberg table projects existing value of old data to the new partition field though. + checkAnswer(spark.sql(s"SELECT * FROM $table"), df1.union(df2)) + } + } + + testClone("create or replace table - remove partition field") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (date date, id bigint, category string, price double) + | USING iceberg PARTITIONED BY (date)""".stripMargin) + + // scalastyle:off deltahadoopconfiguration + val hadoopTables = new HadoopTables(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val icebergTable = hadoopTables.load(tablePath) + val icebergTableSchema = + org.apache.iceberg.spark.SparkSchemaUtil.convert(icebergTable.schema()) + + val df1 = spark.createDataFrame( + Seq( + Row(toDate("2022-01-01"), 1L, "toy", 2.5D), + Row(toDate("2022-01-01"), 2L, "food", 0.6D), + Row(toDate("2022-02-05"), 3L, "food", 1.4D), + Row(toDate("2022-02-05"), 4L, "toy", 10.2D)).asJava, + icebergTableSchema) + + df1.writeTo(table).append() + + runCreateOrReplace(mode, sourceIdentifier) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(cloneTable)) + assert(deltaLog.snapshot.metadata.partitionColumns == Seq("date")) + checkAnswer(spark.table(cloneTable), df1) + + // Remove the partition field "date" + icebergTable.refresh() + icebergTable.updateSpec().removeField("date").commit() + + // Invalidate cache and load the updated partition spec + spark.sql(s"REFRESH TABLE $table") + val df2 = spark.createDataFrame( + Seq( + Row(toDate("2022-02-05"), 5L, "toy", 5.8D), + Row(toDate("2022-06-04"), 6L, "toy", 20.1D)).asJava, + icebergTableSchema) + + df2.writeTo(table).append() + + runCreateOrReplace(mode, sourceIdentifier) + assert(deltaLog.update().metadata.partitionColumns.isEmpty) + // Both cloned Delta table and Iceberg table has data for the removed partition field. + checkAnswer(spark.table(cloneTable), df1.union(df2)) + checkAnswer(spark.table(cloneTable), spark.sql(s"SELECT * FROM $table")) + } + } + + testClone("create or replace table - replace partition field") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (date date, id bigint, category string, price double) + | USING iceberg PARTITIONED BY (date)""".stripMargin) + + // scalastyle:off deltahadoopconfiguration + val hadoopTables = new HadoopTables(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val icebergTable = hadoopTables.load(tablePath) + val icebergTableSchema = + org.apache.iceberg.spark.SparkSchemaUtil.convert(icebergTable.schema()) + + val df1 = spark.createDataFrame( + Seq( + Row(toDate("2022-01-01"), 1L, "toy", 2.5D), + Row(toDate("2022-01-01"), 2L, "food", 0.6D), + Row(toDate("2022-02-05"), 3L, "food", 1.4D), + Row(toDate("2022-02-05"), 4L, "toy", 10.2D)).asJava, + icebergTableSchema) + + df1.writeTo(table).append() + + runCreateOrReplace(mode, sourceIdentifier) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(cloneTable)) + assert(deltaLog.snapshot.metadata.partitionColumns == Seq("date")) + checkAnswer(spark.table(cloneTable), df1) + + // Replace the partition field "date" with a transformed field "month(date)" + icebergTable.refresh() + icebergTable.updateSpec().removeField("date") + .addField(org.apache.iceberg.expressions.Expressions.month("date")) + .commit() + + // Invalidate cache and load the updated partition spec + spark.sql(s"REFRESH TABLE $table") + val df2 = spark.createDataFrame( + Seq( + Row(toDate("2022-02-05"), 5L, "toy", 5.8D), + Row(toDate("2022-06-04"), 6L, "toy", 20.1D)).asJava, + icebergTableSchema) + + df2.writeTo(table).append() + + runCreateOrReplace(mode, sourceIdentifier) + assert(deltaLog.update().metadata.partitionColumns == Seq("date_month")) + // Old data of cloned Delta table has null on the new partition field. + checkAnswer(spark.table(cloneTable), + df1.withColumn("date_month", lit(null)) + .union(df2.withColumn("date_month", substring(col("date") cast "String", 1, 7)))) + // The new partition field is a hidden metadata column in Iceberg. + checkAnswer( + spark.table(cloneTable).drop("date_month"), + spark.sql(s"SELECT * FROM $table")) + } + } + + testClone("Enables column mapping table feature") { mode => + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + + spark.sql(s"CREATE TABLE $cloneTable $mode CLONE $sourceIdentifier") + val log = DeltaLog.forTable(spark, TableIdentifier(cloneTable)) + val protocol = log.update().protocol + assert(protocol.isFeatureSupported(ColumnMappingTableFeature)) + } + } +} + +class CloneIcebergByPathSuite extends CloneIcebergSuiteBase +{ + override def sourceIdentifier: String = s"iceberg.`$tablePath`" + + test("negative case: select from iceberg table using path") { + withTable(table) { + val ae = intercept[AnalysisException] { + sql(s"SELECT * FROM $sourceIdentifier") + } + assert(ae.getMessage.contains("does not support batch scan")) + } + } +} + +class CloneIcebergByNameSuite extends CloneIcebergSuiteBase +{ + override def sourceIdentifier: String = table + + test("missing iceberg library should throw a sensical error") { + val validIcebergSparkTableClassPath = ConvertUtils.icebergSparkTableClassPath + val validIcebergLibTableClassPath = ConvertUtils.icebergLibTableClassPath + + Seq( + () => { + ConvertUtils.icebergSparkTableClassPath = validIcebergSparkTableClassPath + "2" + }, + () => { + ConvertUtils.icebergLibTableClassPath = validIcebergLibTableClassPath + "2" + } + ).foreach { makeInvalid => + try { + makeInvalid() + withTable(table, cloneTable) { + spark.sql( + s"""CREATE TABLE $table (`1 id` bigint, 2data string) + |USING iceberg PARTITIONED BY (2data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + val e = intercept[DeltaIllegalStateException] { + runCreateOrReplace("SHALLOW", sourceIdentifier) + } + assert(e.getErrorClass == "DELTA_MISSING_ICEBERG_CLASS") + } + } finally { + ConvertUtils.icebergSparkTableClassPath = validIcebergSparkTableClassPath + ConvertUtils.icebergLibTableClassPath = validIcebergLibTableClassPath + } + } + } +} + diff --git a/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertIcebergToDeltaPartitionSuite.scala b/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertIcebergToDeltaPartitionSuite.scala new file mode 100644 index 00000000000..517a998bac0 --- /dev/null +++ b/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertIcebergToDeltaPartitionSuite.scala @@ -0,0 +1,652 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File +import java.sql.Timestamp +import java.util.concurrent.TimeUnit + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.sql.delta.commands.ConvertToDeltaCommand +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path +import org.apache.iceberg.Table + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.types._ +// scalastyle:on import.ordering.noEmptyLine + +abstract class ConvertIcebergToDeltaPartitioningUtils + extends QueryTest + with ConvertIcebergToDeltaUtils { + + override protected val schemaDDL = "id bigint, data string, size int, ts timestamp, dt date" + + protected lazy val schemaColumnNames: Seq[String] = schema.map(_.name) + + /** Original iceberg data used to check the correctness of conversion. */ + protected def initRows: Seq[String] = Seq( + "1L, 'abc', 100, cast('2021-06-01 18:00:00' as timestamp), cast('2021-06-01' as date)", + "2L, 'ace', 200, cast('2022-07-01 20:00:00' as timestamp), cast('2022-07-01' as date)" + ) + + /** Data added into both iceberg and converted delta to check post-conversion consistency. */ + protected def incrRows: Seq[String] = Seq( + "3L, 'acf', 300, cast('2023-07-01 03:00:00' as timestamp), cast('2023-07-01' as date)" + ) + + protected override def test(testName: String, testTags: org.scalatest.Tag*) + (testFun: => Any) + (implicit pos: org.scalactic.source.Position): Unit = { + Seq("true", "false").foreach { flag => + val msg = if (flag == "true") "- with native partition values" + else "- with inferred partition values" + super.test(testName + msg, testTags : _*) { + withSQLConf(DeltaSQLConf.DELTA_CONVERT_ICEBERG_USE_NATIVE_PARTITION_VALUES.key -> flag) { + testFun + } + }(pos) + } + } + + /** + * Creates an iceberg table with the default schema and the provided partition columns, writes + * some original rows into the iceberg table for conversion. + */ + protected def createIcebergTable( + tableName: String, + partitionColumns: Seq[String], + withRows: Seq[String] = initRows): Unit = { + val partitionClause = + if (partitionColumns.nonEmpty) s"PARTITIONED BY (${partitionColumns.mkString(",")})" else "" + spark.sql(s"CREATE TABLE $tableName ($schemaDDL) USING iceberg $partitionClause") + + withRows.foreach{ row => spark.sql(s"INSERT INTO $tableName VALUES ($row)") } + } + + /** + * Tests ConvertToDelta on the provided iceberg table, and checks both schema and data of the + * converted delta table. + * + * @param tableName: the iceberg table name. + * @param tablePath: the iceberg table path. + * @param partitionSchemaDDL: the expected partition schema DDL. + * @param deltaPath: the location for the converted delta table. + */ + protected def testConvertToDelta( + tableName: String, + tablePath: String, + partitionSchemaDDL: String, + deltaPath: String): Unit = { + // Convert at an external location to ease testing. + ConvertToDeltaCommand( + tableIdentifier = TableIdentifier(tablePath, Some("iceberg")), + partitionSchema = None, + collectStats = true, + Some(deltaPath)).run(spark) + + // Check the converted table schema. + validateConvertedSchema( + readIcebergHadoopTable(tablePath), + DeltaLog.forTable(spark, new Path(deltaPath)), + StructType.fromDDL(partitionSchemaDDL)) + + // Check converted data. + checkAnswer( + // The converted delta table will have partition columns. + spark.sql(s"select ${schemaColumnNames.mkString(",")} from delta.`$deltaPath`"), + spark.sql(s"select * from $tableName")) + } + + /** + * Checks partition-based file skipping on the iceberg table (as parquet) and the converted delta + * table to verify post-conversion partition consistency. + * + * @param icebergTableName: the iceberg table name. + * @param icebergTablePath: the iceberg table path. + * @param deltaTablePath: the converted delta table path. + * @param filterAndFiles: a map from filter expression to the expected number of scanned files. + */ + protected def checkSkipping( + icebergTableName: String, + icebergTablePath: String, + deltaTablePath: String, + filterAndFiles: Map[String, Int] = Map.empty[String, Int]): Unit = { + // Add the same data into both iceberg table and converted delta table. + writeRows(icebergTableName, deltaTablePath, incrRows) + + // Disable file stats to check file skipping solely based on partition, please note this only + // works for optimizable partition expressions, check 'optimizablePartitionExpressions.scala' + // for the whole list of supported partition expressions. + sql( + s""" + |ALTER TABLE delta.`$deltaTablePath` + |SET TBLPROPERTIES ( + | '${DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.key}' = '0')""".stripMargin) + + // Always check full scan. + (filterAndFiles ++ Map("" -> 3)).foreach { case (filter, numFilesScanned) => + val filterExpr = if (filter == "") "" else s"where $filter" + checkAnswer( + // The converted delta table will have partition columns. + spark.sql( + s"""SELECT ${schemaColumnNames.mkString(",")} FROM delta.`$deltaTablePath` + | WHERE $filter""".stripMargin), + spark.sql(s"SELECT * FROM $icebergTableName $filterExpr")) + + // Check the raw parquet partition directories written out by Iceberg + checkAnswer( + spark.sql(s"select * from parquet.`$icebergTablePath/data` $filterExpr"), + spark.sql(s"select * from delta.`$deltaTablePath` $filterExpr")) + + assert( + spark.sql(s"select * from delta.`$deltaTablePath` $filterExpr").inputFiles.length == + numFilesScanned) + } + } + + /** + * Validates the table schema and partition schema of the iceberg table and the converted delta + * table. + */ + private def validateConvertedSchema( + icebergTable: Table, + convertedDeltaLog: DeltaLog, + expectedPartitionSchema: StructType): Unit = { + + def mergeSchema(dataSchema: StructType, partitionSchema: StructType): StructType = { + StructType(dataSchema.fields ++ + partitionSchema.fields.filter { partField => + !dataSchema.fields.exists(f => spark.sessionState.conf.resolver(partField.name, f.name))}) + } + + val columnIds = mutable.Set[Long]() + val schemaWithoutMetadata = + SchemaMergingUtils.transformColumns(convertedDeltaLog.update().schema) { (_, field, _) => + // all columns should have the columnID metadata + assert(DeltaColumnMapping.hasColumnId(field)) + // all columns should have physical name metadata + assert(DeltaColumnMapping.hasPhysicalName(field)) + // nest column ids should be distinct + val id = DeltaColumnMapping.getColumnId(field) + assert(!columnIds.contains(id)) + columnIds.add(id) + // the id can either be a data schema id or a identity transform partition field + // or it is generated because it's a non-identity transform partition field + assert( + Option(icebergTable.schema().findField(id)).map(_.name()).contains(field.name) || + icebergTable.spec().fields().asScala.map(_.name()).contains(field.name) + ) + field.copy(metadata = Metadata.empty) + } + + assert(schemaWithoutMetadata == mergeSchema(schema, expectedPartitionSchema)) + + // check partition columns + assert( + expectedPartitionSchema.map(_.name) == convertedDeltaLog.update().metadata.partitionColumns) + } + + /** + * Writes the same rows into both the iceberg table and the converted delta table using the + * default schema. + */ + protected def writeRows( + icebergTableName: String, + deltaTablePath: String, + rows: Seq[String]): Unit = { + + // Write Iceberg + rows.foreach { row => spark.sql(s"INSERT INTO $icebergTableName VALUES ($row)") } + + // Write Delta + rows.foreach { row => + val values = row.split(",") + assert(values.length == schemaColumnNames.length) + val valueAsColumns = + values.zip(schemaColumnNames).map { case (value, column) => s"$value AS $column" } + + val df = spark.sql(valueAsColumns.mkString("SELECT ", ",", "")) + df.write.format("delta").mode("append").save(deltaTablePath) + } + } +} + +class ConvertIcebergToDeltaPartitioningSuite extends ConvertIcebergToDeltaPartitioningUtils { + + import testImplicits._ + + test("partition by timestamp year") { + withTable(table) { + createIcebergTable(table, Seq("years(ts)")) + + withTempDir { dir => + testConvertToDelta(table, tablePath, "ts_year int", dir.getCanonicalPath) + checkSkipping( + table, tablePath, dir.getCanonicalPath, + Map( + "ts < cast('2021-06-01 00:00:00' as timestamp)" -> 1, + "ts <= cast('2021-06-01 00:00:00' as timestamp)" -> 1, + "ts > cast('2021-06-01 00:00:00' as timestamp)" -> 3, + "ts > cast('2022-01-01 00:00:00' as timestamp)" -> 2) + ) + } + } + } + + test("partition by date year") { + withTable(table) { + createIcebergTable(table, Seq("years(dt)")) + + withTempDir { dir => + testConvertToDelta(table, tablePath, "dt_year int", dir.getCanonicalPath) + checkSkipping( + table, tablePath, dir.getCanonicalPath, + Map( + "dt < cast('2021-06-01' as date)" -> 1, + "dt <= cast('2021-06-01' as date)" -> 1, + "dt > cast('2021-06-01' as date)" -> 3, + "dt = cast('2022-08-01' as date)" -> 1) + ) + } + } + } + + test("partition by timestamp day") { + withTable(table) { + createIcebergTable(table, Seq("days(ts)")) + + withTempDir { dir => + testConvertToDelta(table, tablePath, "ts_day date", dir.getCanonicalPath) + checkSkipping( + table, tablePath, dir.getCanonicalPath, + Map("ts < cast('2021-07-01 00:00:00' as timestamp)" -> 1)) + } + } + } + + test("partition by date day") { + withTable(table) { + createIcebergTable(table, Seq("days(dt)")) + + withTempDir { dir => + testConvertToDelta(table, tablePath, "dt_day date", dir.getCanonicalPath) + checkSkipping( + table, tablePath, dir.getCanonicalPath, + Map( + "dt < cast('2021-06-01' as date)" -> 1, + "dt <= cast('2021-06-01' as date)" -> 1, + "dt > cast('2021-06-01' as date)" -> 3, + "dt = cast('2022-07-01' as date)" -> 1) + ) + } + } + } + + test("partition by truncate string") { + withTable(table) { + createIcebergTable(table, Seq("truncate(data, 2)")) + + withTempDir { dir => + testConvertToDelta(table, tablePath, "data_trunc string", dir.getCanonicalPath) + checkSkipping( + table, tablePath, dir.getCanonicalPath, + Map( + "data >= 'ac'" -> 2, + "data >= 'ad'" -> 0 + ) + ) + } + } + } + + test("partition by truncate long and int") { + withTable(table) { + // Include both positive and negative long values in the rows: positive will be rounded up + // while negative will be rounded down. + val sampleRows = Seq( + "111L, 'abc', 100, cast('2021-06-01 18:00:00' as timestamp), cast('2021-06-01' as date)", + "-11L, 'ace', -10, cast('2022-07-01 20:00:00' as timestamp), cast('2022-07-01' as date)") + createIcebergTable(table, Seq("truncate(id, 10)", "truncate(size, 8)"), sampleRows) + + withTempDir { dir => + val deltaPath = dir.getCanonicalPath + testConvertToDelta(table, tablePath, "id_trunc long, size_trunc int", deltaPath) + // TODO: make iceberg truncate partition expression optimizable and check file skipping. + + // Write the same rows again into the converted delta table and make sure the partition + // value computed by delta are the same with iceberg. + writeRows(table, deltaPath, sampleRows) + checkAnswer( + spark.sql(s"SELECT id_trunc, size_trunc FROM delta.`$deltaPath`"), + Row(110L, 96) :: Row(-20L, -16) :: Row(110L, 96) :: Row(-20L, -16) :: Nil) + } + } + } + + test("partition by identity") { + withTable(table) { + createIcebergTable(table, Seq("data")) + + withTempDir { dir => + val deltaPath = new File(dir, "delta-table").getCanonicalPath + testConvertToDelta(table, tablePath, "data string", deltaPath) + checkSkipping(table, tablePath, deltaPath) + + spark.read.format("delta").load(deltaPath).inputFiles.foreach { fileName => + val sourceFile = new File(fileName.stripPrefix("file:")) + val targetFile = new File(dir, sourceFile.getName) + FileUtils.copyFile(sourceFile, targetFile) + val parquetFileSchema = + spark.read.format("parquet").load(targetFile.getCanonicalPath).schema + if (fileName.contains("acf")) { // new file written by delta + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + parquetFileSchema, StructType(schema.fields.filter(_.name != "data"))) + } else { + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability(parquetFileSchema, schema) + } + } + } + } + } + + test("df writes and Insert Into with composite partitioning") { + withTable(table) { + createIcebergTable(table, Seq("years(dt), truncate(data, 3), id")) + + withTempDir { dir => + val deltaPath = new File(dir, "/delta").getCanonicalPath + testConvertToDelta( + table, + tablePath, + "dt_year int, data_trunc string, id bigint", + deltaPath) + + checkSkipping( + table, tablePath, deltaPath, + Map( + "data >= 'ac'" -> 2, + "data >= 'acg'" -> 0, + "dt = cast('2022-07-01' as date) and data >= 'ac'" -> 1 + ) + ) + + // for Dataframe, we don't need to explicitly mention partition columns + Seq((4L, "bcddddd", 400, + new Timestamp(TimeUnit.DAYS.toMillis(10)), + new java.sql.Date(TimeUnit.DAYS.toMillis(10)))) + .toDF(schemaColumnNames: _*) + .write.format("delta").mode("append").save(deltaPath) + + checkAnswer( + spark.read.format("delta").load(deltaPath).where("id = 4") + .select("id", "data", "dt_year", "data_trunc"), + Row( + 4, + "bcddddd", + // generated partition columns + 1970, "bcd") :: Nil) + + val tempTablePath = dir.getCanonicalPath + "/temp" + Seq((5, "c", 500, + new Timestamp(TimeUnit.DAYS.toMillis(20)), + new java.sql.Date(TimeUnit.DAYS.toMillis(20))) + ).toDF(schemaColumnNames: _*) + .write.format("delta").save(tempTablePath) + + val e = intercept[AnalysisException] { + spark.sql( + s""" + | INSERT INTO delta.`$deltaPath` + | SELECT * from delta.`$tempTablePath` + |""".stripMargin) + } + assert(e.getMessage.contains("not enough data columns")) + } + } + } + + test("partition by timestamp month") { + withTable(table) { + createIcebergTable(table, Seq("months(ts)")) + + withTempDir { dir => + testConvertToDelta(table, tablePath, "ts_month string", dir.getCanonicalPath) + // Do NOT infer partition column type for ts_month and dt_month since: 2020-01 will be + // inferred as a date and cast it to 2020-01-01. + withSQLConf("spark.sql.sources.partitionColumnTypeInference.enabled" -> "false") { + checkSkipping( + table, + tablePath, + dir.getCanonicalPath, + Map( + "ts < cast('2021-06-01 00:00:00' as timestamp)" -> 1, + "ts <= cast('2021-06-01 00:00:00' as timestamp)" -> 1, + "ts > cast('2021-06-01 00:00:00' as timestamp)" -> 3, + "ts >= cast('2021-06-01 00:00:00' as timestamp)" -> 3, + "ts < cast('2021-05-01 00:00:00' as timestamp)" -> 0, + "ts > cast('2021-07-01 00:00:00' as timestamp)" -> 2, + "ts = cast('2023-07-30 00:00:00' as timestamp)" -> 1, + "ts > cast('2023-08-01 00:00:00' as timestamp)" -> 0)) + } + } + } + } + + test("partition by date month") { + withTable(table) { + createIcebergTable(table, Seq("months(dt)")) + + withTempDir { dir => + testConvertToDelta(table, tablePath, "dt_month string", dir.getCanonicalPath) + // Do NOT infer partition column type for ts_month and dt_month since: 2020-01 will be + // inferred as a date and cast it to 2020-01-01. + withSQLConf("spark.sql.sources.partitionColumnTypeInference.enabled" -> "false") { + checkSkipping( + table, tablePath, dir.getCanonicalPath, + Map( + "dt < cast('2021-06-01' as date)" -> 1, + "dt <= cast('2021-06-01' as date)" -> 1, + "dt > cast('2021-06-01' as date)" -> 3, + "dt >= cast('2021-06-01' as date)" -> 3, + "dt < cast('2021-05-01' as date)" -> 0, + "dt > cast('2021-07-01' as date)" -> 2, + "dt = cast('2023-07-30' as date)" -> 1, + "dt > cast('2023-08-01' as date)" -> 0)) + } + } + } + } + + test("partition by timestamp hour") { + withTable(table) { + createIcebergTable(table, Seq("hours(ts)")) + + withTempDir { dir => + testConvertToDelta(table, tablePath, "ts_hour string", dir.getCanonicalPath) + checkSkipping(table, tablePath, dir.getCanonicalPath, + Map( + "ts < cast('2021-06-01 18:00:00' as timestamp)" -> 1, + "ts <= cast('2021-06-01 18:00:00' as timestamp)" -> 1, + "ts > cast('2021-06-01 18:00:00' as timestamp)" -> 3, + "ts >= cast('2021-06-01 18:30:00' as timestamp)" -> 3, + "ts < cast('2021-06-01 17:59:59' as timestamp)" -> 0, + "ts = cast('2021-06-01 18:30:10' as timestamp)" -> 1, + "ts > cast('2022-07-01 20:00:00' as timestamp)" -> 2, + "ts > cast('2023-07-01 02:00:00' as timestamp)" -> 1, + "ts > cast('2023-07-01 04:00:00' as timestamp)" -> 0)) + } + } + } +} + +///////////////////////////////// +// 5-DIGIT-YEAR TIMESTAMP TEST // +///////////////////////////////// +class ConvertIcebergToDeltaPartitioningFiveDigitYearSuite + extends ConvertIcebergToDeltaPartitioningUtils { + + override protected def initRows: Seq[String] = Seq( + "1, 'abc', 100, cast('13168-11-15 18:00:00' as timestamp), cast('13168-11-15' as date)", + "2, 'abc', 200, cast('2021-08-24 18:00:00' as timestamp), cast('2021-08-24' as date)" + ) + + override protected def incrRows: Seq[String] = Seq( + "3, 'acf', 300, cast('11267-07-15 18:00:00' as timestamp), cast('11267-07-15' as date)", + "4, 'acf', 400, cast('2008-07-15 18:00:00' as timestamp), cast('2008-07-15' as date)" + ) + + /** + * Checks filtering on 5-digit year based on different policies. + * + * @param icebergTableName: the iceberg table name. + * @param deltaTablePath: the converted delta table path. + * @param partitionSchemaDDL: the partition schema DDL. + * @param policy: time parser policy to determine 5-digit year handling. + * @param filters: a list of filter expressions to check. + */ + private def checkFiltering( + icebergTableName: String, + deltaTablePath: String, + partitionSchemaDDL: String, + policy: String, + filters: Seq[String]): Unit = { + filters.foreach { filter => + val filterExpr = if (filter == "") "" else s"where $filter" + if (policy == "EXCEPTION" && filterExpr != "" && + partitionSchemaDDL != "ts_year int" && partitionSchemaDDL != "ts_day date") { + var thrownError = false + val msg = try { + spark.sql(s"select * from delta.`$deltaTablePath` $filterExpr").collect() + } catch { + case e: Throwable if e.isInstanceOf[org.apache.spark.SparkThrowable] && + e.getMessage.contains("spark.sql.legacy.timeParserPolicy") => + thrownError = true + case other: Throwable => throw other + } + assert(thrownError, s"Error message $msg is incorrect.") + } else { + // check results of iceberg == delta + checkAnswer( + // the converted delta table will have partition columns + spark.sql( + s"select ${schema.fields.map(_.name).mkString(",")} from delta.`$deltaTablePath`"), + spark.sql(s"select * from $icebergTableName")) + } + } + } + + Seq("EXCEPTION", "CORRECTED", "LEGACY").foreach { policy => + test(s"future timestamp: partition by month when timeParserPolicy is: $policy") { + withSQLConf("spark.sql.legacy.timeParserPolicy" -> policy) { + withTable(table) { + createIcebergTable(table, Seq("months(ts)")) + + withTempDir { dir => + val partitionSchemaDDL = "ts_month string" + testConvertToDelta(table, tablePath, partitionSchemaDDL, dir.getCanonicalPath) + checkFiltering( + table, dir.getCanonicalPath, partitionSchemaDDL, policy, + Seq("", + "ts > cast('2021-06-01 00:00:00' as timestamp)", + "ts < cast('12000-06-01 00:00:00' as timestamp)", + "ts >= cast('13000-06-01 00:00:00' as timestamp)", + "ts <= cast('2009-06-01 00:00:00' as timestamp)", + "ts = cast('11267-07-15 00:00:00' as timestamp)" + ) + ) + } + } + } + } + + test(s"future timestamp: partition by hour when timeParserPolicy is: $policy") { + withSQLConf("spark.sql.legacy.timeParserPolicy" -> policy) { + withTable(table) { + createIcebergTable(table, Seq("hours(ts)")) + + withTempDir { dir => + val partitionSchemaDDL = "ts_hour string" + testConvertToDelta(table, tablePath, partitionSchemaDDL, dir.getCanonicalPath) + checkFiltering( + table, dir.getCanonicalPath, partitionSchemaDDL, policy, + Seq("", + "ts > cast('2021-06-01 18:00:00' as timestamp)", + "ts < cast('12000-06-01 18:00:00' as timestamp)", + "ts >= cast('13000-06-01 19:00:00' as timestamp)", + "ts <= cast('2009-06-01 16:00:00' as timestamp)", + "ts = cast('11267-07-15 18:30:00' as timestamp)" + ) + ) + } + } + } + } + + test(s"future timestamp: partition by year when timeParserPolicy is: $policy") { + withSQLConf("spark.sql.legacy.timeParserPolicy" -> policy) { + withTable(table) { + createIcebergTable(table, Seq("years(ts)")) + + withTempDir { dir => + val partitionSchemaDDL = "ts_year int" + testConvertToDelta(table, tablePath, partitionSchemaDDL, dir.getCanonicalPath) + checkFiltering( + table, dir.getCanonicalPath, partitionSchemaDDL, policy, + Seq("", + "ts > cast('2021-06-01 18:00:00' as timestamp)", + "ts < cast('12000-06-01 18:00:00' as timestamp)", + "ts >= cast('13000-06-01 19:00:00' as timestamp)", + "ts <= cast('2009-06-01 16:00:00' as timestamp)", + "ts = cast('11267-07-15 18:30:00' as timestamp)" + ) + ) + } + } + } + } + + test(s"future timestamp: partition by day when timeParserPolicy is: $policy") { + withSQLConf("spark.sql.legacy.timeParserPolicy" -> policy) { + withTable(table) { + createIcebergTable(table, Seq("days(ts)")) + + withTempDir { dir => + val partitionSchemaDDL = "ts_day date" + testConvertToDelta(table, tablePath, partitionSchemaDDL, dir.getCanonicalPath) + checkFiltering( + table, dir.getCanonicalPath, partitionSchemaDDL, policy, + Seq("", + "ts > cast('2021-06-01 18:00:00' as timestamp)", + "ts < cast('12000-06-01 18:00:00' as timestamp)", + "ts >= cast('13000-06-01 19:00:00' as timestamp)", + "ts <= cast('2009-06-01 16:00:00' as timestamp)", + "ts = cast('11267-07-15 18:30:00' as timestamp)" + ) + ) + } + } + } + } + } +} diff --git a/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertIcebergToDeltaSuite.scala b/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertIcebergToDeltaSuite.scala new file mode 100644 index 00000000000..519e5a399d8 --- /dev/null +++ b/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertIcebergToDeltaSuite.scala @@ -0,0 +1,989 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File +import java.text.SimpleDateFormat +import java.util.TimeZone + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import org.apache.spark.sql.delta.commands.ConvertToDeltaCommand +import org.apache.spark.sql.delta.commands.convert.{ConvertUtils, IcebergTable} +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.StatsUtils +import io.delta.sql.DeltaSparkSessionExtension +import org.apache.hadoop.fs.Path +import org.apache.iceberg.{Table, TableProperties} +import org.apache.iceberg.hadoop.HadoopTables +import org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SparkSession, SparkSessionExtensions} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.{col, expr, from_json} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, TestSparkSession} +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils +// scalastyle:on import.ordering.noEmptyLine + +class IcebergCompatibleDeltaTestSparkSession(sparkConf: SparkConf) + extends TestSparkSession(sparkConf) { + override val extensions: SparkSessionExtensions = { + val extensions = new SparkSessionExtensions + new DeltaSparkSessionExtension().apply(extensions) + new IcebergSparkSessionExtensions().apply(extensions) + extensions + } +} + +trait ConvertIcebergToDeltaUtils extends SharedSparkSession { + + protected var warehousePath: File = null + protected lazy val table: String = "local.db.table" + protected lazy val tablePath: String = "file://" + warehousePath.getCanonicalPath + "/db/table" + protected lazy val nestedTable: String = "local.db.nested_table" + protected lazy val nestedTablePath: String = + "file://" + warehousePath.getCanonicalPath + "/db/nested_table" + + protected def collectStatisticsStringOption(collectStats: Boolean): String = Option(collectStats) + .filterNot(identity).map(_ => "NO STATISTICS").getOrElse("") + + + override def beforeAll(): Unit = { + warehousePath = Utils.createTempDir() + super.beforeAll() + } + + override def afterAll(): Unit = { + super.afterAll() + if (warehousePath != null) Utils.deleteRecursively(warehousePath) + } + + override def afterEach(): Unit = { + sql(s"DROP TABLE IF EXISTS $table") + super.afterEach() + } + + /** + * Setting the java default timezone, as we use java.util.TimeZone.getDefault for partition + * values... + * + * In production clusters, the default timezone is always set as UTC. + */ + def withDefaultTimeZone(timeZoneId: String)(func: => Unit): Unit = { + val previousTimeZone = TimeZone.getDefault() + try { + TimeZone.setDefault(TimeZone.getTimeZone(timeZoneId)) + func + } finally { + TimeZone.setDefault(previousTimeZone) + } + } + + override protected def createSparkSession: TestSparkSession = { + SparkSession.cleanupAnyExistingSession() + val session = new IcebergCompatibleDeltaTestSparkSession(sparkConf) + session.conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[DeltaCatalog].getName) + session + } + + protected override def sparkConf = super.sparkConf + .set( + "spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") + .set( + "spark.sql.catalog.local.type", "hadoop") + .set( + "spark.sql.catalog.local.warehouse", warehousePath.getCanonicalPath) + .set("spark.sql.session.timeZone", "UTC") + + protected val schemaDDL = "id bigint, data string, ts timestamp, dt date" + protected lazy val schema = StructType.fromDDL(schemaDDL) + + protected def readIcebergHadoopTable(tablePath: String): Table = { + // scalastyle:off deltahadoopconfiguration + new HadoopTables(spark.sessionState.newHadoopConf).load(tablePath) + // scalastyle:on deltahadoopconfiguration + } +} + +trait ConvertIcebergToDeltaSuiteBase + extends QueryTest + with ConvertIcebergToDeltaUtils + with StatsUtils { + + import testImplicits._ + + protected def convert(tableIdentifier: String, partitioning: Option[String] = None, + collectStats: Boolean = true): Unit + + test("convert with statistics") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b')") + spark.sql(s"INSERT INTO $table VALUES (3, 'c')") + convert(s"iceberg.`$tablePath`", collectStats = true) + + // Check statistics + val deltaLog = DeltaLog.forTable(spark, new Path(tablePath)) + val statsDf = deltaLog.unsafeVolatileSnapshot.allFiles + .select( + from_json(col("stats"), deltaLog.unsafeVolatileSnapshot.statsSchema).as("stats")) + .select("stats.*") + assert(statsDf.filter(col("numRecords").isNull).count == 0) + val history = io.delta.tables.DeltaTable.forPath(tablePath).history() + assert(history.count == 1) + } + } + + + test("table with deleted files") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + spark.sql(s"DELETE FROM $table WHERE data > 'a'") + checkAnswer( + spark.sql(s"SELECT * from $table"), Row(1, "a") :: Nil) + + convert(s"iceberg.`$tablePath`") + assert(SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + spark.read.format("delta").load(tablePath).schema, + new StructType().add("id", LongType).add("data", StringType))) + checkAnswer( + spark.read.format("delta").load(tablePath), + Row(1, "a") :: Nil) + } + } + + + test("missing iceberg library should throw a sensical error") { + val validIcebergSparkTableClassPath = ConvertUtils.icebergSparkTableClassPath + + Seq( + () => { + ConvertUtils.icebergSparkTableClassPath = validIcebergSparkTableClassPath + "2" + }).foreach { makeInvalid => + try { + makeInvalid() + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (`1 id` bigint, 2data string) + |USING iceberg PARTITIONED BY (2data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + val e = intercept[DeltaIllegalStateException] { + convert(s"iceberg.`$tablePath`") + } + assert(e.getErrorClass == "DELTA_MISSING_ICEBERG_CLASS") + } + } finally { + ConvertUtils.icebergSparkTableClassPath = validIcebergSparkTableClassPath + } + } + } + + test("non-parquet table") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data) + |TBLPROPERTIES ('write.format.default'='orc') + |""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + val e = intercept[UnsupportedOperationException] { + convert(s"iceberg.`$tablePath`") + } + assert(e.getMessage.contains("Cannot convert") && e.getMessage.contains("orc")) + } + } + + test("external location") { + withTempDir { dir => + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b')") + spark.sql(s"INSERT INTO $table VALUES (3, 'c')") + ConvertToDeltaCommand( + TableIdentifier(tablePath, Some("iceberg")), + None, + collectStats = true, + Some(dir.getCanonicalPath)).run(spark) + + checkAnswer( + spark.read.format("delta").load(dir.getCanonicalPath), + Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + } + } + } + + test("table with renamed columns") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b')") + spark.sql("ALTER TABLE local.db.table RENAME COLUMN id TO id2") + spark.sql(s"INSERT INTO $table VALUES (3, 'c')") + convert(s"iceberg.`$tablePath`") + + // The converted delta table will get the updated schema + assert( + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + spark.read.format("delta").load(tablePath).schema, + new StructType().add("id2", LongType).add("data", StringType))) + + // Parquet files still have the old schema + assert( + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + spark.read.format("parquet").load(tablePath + "/data").schema, + new StructType().add("id", LongType).add("data", StringType))) + + val properties = readIcebergHadoopTable(tablePath).properties() + + // This confirms that name mapping is not used for this case + assert(properties.get(TableProperties.DEFAULT_NAME_MAPPING) == null) + + // As of right now, the data added before rename will be nulls. + checkAnswer( + spark.read.format("delta").load(tablePath), + Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + } + } + + test("columns starting with numbers") { + val table2 = "local.db.table2" + val tablePath2 = tablePath + "2" + withTable(table2) { + spark.sql( + s"""CREATE TABLE $table2 (1id bigint, 2data string) + |USING iceberg PARTITIONED BY (2data)""".stripMargin) + spark.sql(s"INSERT INTO $table2 VALUES (1, 'a'), (2, 'b')") + spark.sql(s"INSERT INTO $table2 VALUES (3, 'c')") + assert(spark.sql(s"select * from $table2").schema == + new StructType().add("1id", LongType).add("2data", StringType)) + + checkAnswer( + spark.sql(s"select * from $table2"), + Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + + val properties = readIcebergHadoopTable(tablePath2).properties() + + // This confirms that name mapping is not used for this case + assert(properties.get(TableProperties.DEFAULT_NAME_MAPPING) == null) + + convert(s"iceberg.`$tablePath2`") + // The converted delta table gets the updated schema + assert( + SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability( + spark.read.format("delta").load(tablePath2).schema, + new StructType().add("1id", LongType).add("2data", StringType))) + + // parquet file schema has been modified + assert( + spark.read.format("parquet").load(tablePath2 + "/data").schema == + new StructType() + .add("_1id", LongType) + .add("_2data", StringType) + // this is the partition column, which stays as-is + .add("2data", StringType)) + + checkAnswer( + spark.read.format("delta").load(tablePath2), + Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + } + } + + test("nested schema") { + withTable(table) { + def createDDL(tname: String): String = + s"""CREATE TABLE $tname (id bigint, person struct) + |USING iceberg PARTITIONED BY (truncate(person.name, 2))""".stripMargin + def insertDDL(tname: String): String = + s"INSERT INTO $tname VALUES (1, ('aaaaa', 10)), (2, ('bbbbb', 20))" + testNestedColumnIDs(createDDL(nestedTable), insertDDL(nestedTable)) + + spark.sql(createDDL(table)) + + spark.sql(s"INSERT INTO $table VALUES (1, ('aaaaa', 10)), (2, ('bbbbb', 20))") + checkAnswer( + spark.sql(s"SELECT * from $table"), + Row(1, Row("aaaaa", 10)) :: Row(2, Row("bbbbb", 20)) :: Nil) + + convert(s"iceberg.`$tablePath`") + + val tblSchema = spark.read.format("delta").load(tablePath).schema + + val expectedSchema = new StructType() + .add("id", LongType) + .add("person", new StructType().add("name", StringType).add("phone", IntegerType)) + .add("person.name_trunc", StringType) + + assert(SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability(expectedSchema, tblSchema)) + + checkAnswer( + spark.read.format("delta").load(tablePath), + Row(1, Row("aaaaa", 10), "aa") :: Row(2, Row("bbbbb", 20), "bb") :: Nil) + + assert( + spark.sql(s"select * from delta.`$tablePath` where person.name > 'b'") + .inputFiles.length == 1) + + spark.sql( + s""" + |insert into $table (id, person) + |values (3, struct("ccccc", 30)) + |""".stripMargin) + + val insertDataSchema = StructType.fromDDL("id bigint, person struct") + val df = spark.createDataFrame(Seq(Row(3L, Row("ccccc", 30))).asJava, insertDataSchema) + df.write.format("delta").mode("append").save(tablePath) + + checkAnswer( + // check the raw parquet partition directories written out by Iceberg + spark.sql(s"select * from parquet.`$tablePath/data`"), + spark.sql(s"select * from delta.`$tablePath`") + ) + assert( + spark.sql(s"select * from delta.`$tablePath` where person.name > 'b'") + .inputFiles.length == 2) + } + } + + private def schemaTestNoDataSkipping( + createTableSql: String, + initialInsertValuesSql: String, + expectedInitialRows: Seq[Row], + expectedSchema: StructType, + finalInsertValuesSql: String) : Unit = { + withTable(table) { + spark.sql(s"DROP TABLE IF EXISTS $table") + spark.sql(s"CREATE TABLE $table $createTableSql USING iceberg") + spark.sql(s"INSERT INTO $table VALUES $initialInsertValuesSql") + checkAnswer(spark.sql(s"SELECT * FROM $table"), expectedInitialRows) + + convert(s"iceberg.`$tablePath`") + + val tblSchema = spark.read.format("delta").load(tablePath).schema + + assert(SchemaMergingUtils.equalsIgnoreCaseAndCompatibleNullability(expectedSchema, tblSchema)) + + checkAnswer(spark.read.format("delta").load(tablePath), expectedInitialRows) + + spark.sql( + s""" + |INSERT INTO $table + |VALUES $finalInsertValuesSql + |""".stripMargin) + + spark.sql( + s""" + |INSERT INTO delta.`$tablePath` + |VALUES $finalInsertValuesSql + |""".stripMargin) + + checkAnswer( + // check the raw parquet partition directories written out by Iceberg + spark.sql(s"SELECT * FROM parquet.`$tablePath/data`"), + spark.sql(s"SELECT * FROM delta.`$tablePath`") + ) + } + } + + test("array of struct schema") { + val createTableSql = "(id bigint, grades array>)" + val initialInsertValuesSql = "(1, array(('mat', 10), ('cs', 90))), (2, array(('eng', 80)))" + val expectedInitialRows = Row(1, Seq(Row("mat", 10), Row("cs", 90))) :: + Row(2, Seq(Row("eng", 80))) :: Nil + val arrayType = ArrayType(new StructType().add("class", StringType).add("score", IntegerType)) + val expectedSchema = new StructType() + .add("id", LongType) + .add("grades", arrayType) + val finalInsertValuesSql = "(3, array(struct(\"mat\", 100), struct(\"cs\", 100)))" + + schemaTestNoDataSkipping(createTableSql, initialInsertValuesSql, expectedInitialRows, + expectedSchema, finalInsertValuesSql) + } + + test("map schema") { + val createTableSql = "(id bigint, grades map)" + val initialInsertValuesSql = "(1, map('mat', 10, 'cs', 90)), (2, map('eng', 80))" + val expectedInitialRows = Row(1, Map[String, Int]("mat" -> 10, "cs" -> 90)) :: + Row(2, Map[String, Int]("eng" -> 80)) :: Nil + val expectedSchema = new StructType() + .add("id", LongType) + .add("grades", MapType(StringType, IntegerType)) + val finalInsertValuesSql = "(3, map(\"mat\", 100, \"cs\", 100))" + + schemaTestNoDataSkipping(createTableSql, initialInsertValuesSql, expectedInitialRows, + expectedSchema, finalInsertValuesSql) + } + + test("partition schema is not allowed") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data) + |""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + val e = intercept[IllegalArgumentException] { + convert(s"iceberg.`$tablePath`", Some("data string")) + } + assert(e.getMessage.contains("Partition schema cannot be specified")) + } + } + + test("copy over Iceberg table properties") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + spark.sql( + s"""ALTER TABLE $table SET TBLPROPERTIES( + | 'read.split.target-size'='268435456' + |)""".stripMargin) + convert(s"iceberg.`$tablePath`") + checkAnswer( + spark.sql(s"SHOW TBLPROPERTIES delta.`$tablePath`") + .filter(col("key").startsWith("read.")), + Row("read.split.target-size", "268435456") :: Nil + ) + } + } + + test("converted table columns have metadata containing iceberg column ids") { + + val nested1 = s"""CREATE TABLE $nestedTable (name string, age int, + |pokemon array>) + |USING iceberg""".stripMargin + + val insert1 = s"""INSERT INTO $nestedTable VALUES ('Ash', 10, + |array(struct('Charizard', 'Fire/Flying'), struct('Pikachu', 'Electric'))) + """.stripMargin + testNestedColumnIDs(nested1, insert1) + + val nested2 = s"""CREATE TABLE $nestedTable (name string, + |info struct, id:int>) + |USING iceberg""".stripMargin + + val insert2 = s"""INSERT INTO $nestedTable VALUES ('Zigzagoon', + |struct(struct('Hoenn', 'Common'), 263)) + """.stripMargin + testNestedColumnIDs(nested2, insert2) + + val nested3 = s"""CREATE TABLE $nestedTable (name string, + |moves map>) + |USING iceberg""".stripMargin + + val insert3 = s"""INSERT INTO $nestedTable VALUES ('Heatran', + |map('Fire Fang', struct(17, 7))) + """.stripMargin + testNestedColumnIDs(nested3, insert3) + } + + test("comments are retained from Iceberg") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint comment "myexample", data string comment "myexample") + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + + convert(s"iceberg.`$tablePath`") + + val readSchema = spark.read.format("delta").load(tablePath).schema + readSchema.foreach { field => + assert(field.getComment().contains("myexample")) + } + } + } + + private def testNestedColumnIDs(createString: String, insertString: String): Unit = { + // Nested schema + withTable(nestedTable) { + // Create table and insert into it + spark.sql(createString) + + spark.sql(insertString) + + // Convert to Delta + convert(s"iceberg.`$nestedTablePath`") + + // Check Delta schema + val schema = DeltaLog.forTable(spark, new Path(nestedTablePath)).update().schema + + // Get initial Iceberg schema + val icebergTable = readIcebergHadoopTable(nestedTablePath) + val icebergSchema = icebergTable.schema() + + // Check all nested fields to see if they all have a column ID then check the iceberg schema + // for whether that column ID corresponds to the same column name + val columnIds = mutable.Set[Long]() + SchemaMergingUtils.transformColumns(schema) { (_, field, _) => + assert(DeltaColumnMapping.hasColumnId(field)) + // nest column ids should be distinct + val id = DeltaColumnMapping.getColumnId(field) + assert(!columnIds.contains(id)) + columnIds.add(id) + // the id can either be a data schema id or a identity transform partition field + // or it is generated bc it's a non-identity transform partition field + assert( + Option(icebergSchema.findField(id)).map(_.name()).contains(field.name) || + icebergTable.spec().fields().asScala.map(_.name()).contains(field.name) + ) + field + } + } + } + + test("conversion should fail if had partition evolution / multiple partition specs") { + /** + * Per https://iceberg.apache.org/evolution/#partition-evolution, if partition evolution happens + * in Iceberg, multiple partition specs are persisted, thus convert to Delta cannot be + * supported w/o repartitioning because Delta only supports one consistent spec + */ + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string, data2 string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')") + // add new partition spec + readIcebergHadoopTable(tablePath).updateSpec().addField("data2").commit() + spark.sql(s"INSERT INTO $table VALUES (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')") + // partition evolution happens, convert will fail + val e1 = intercept[UnsupportedOperationException] { + convert(s"iceberg.`$tablePath`") + } + assert(e1.getMessage.contains(IcebergTable.ERR_MULTIPLE_PARTITION_SPECS)) + + // drop old partition spec + readIcebergHadoopTable(tablePath).updateSpec().removeField("data2").commit() + spark.sql(s"INSERT INTO $table VALUES (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')") + // partition spec is reverted, but partition evolution happens already + // use assert explicitly bc we do not want checks in IcebergPartitionUtils to run first + assert(readIcebergHadoopTable(tablePath).specs().size() > 1) + } + } + + test("convert Iceberg table with not null columns") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint NOT NULL, data string, name string NOT NULL) + |USING iceberg PARTITIONED BY (id)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a', 'b'), (2, 'b', 'c'), (3, 'c', 'd')") + convert(s"iceberg.`$tablePath`") + val data = spark.read.format("delta").load(tablePath) + // verify data is converted properly + checkAnswer(data, Seq(Row(1, "a", "b"), Row(2, "b", "c"), Row(3, "c", "d"))) + + // Verify schema contains not null constraint where appropriate + val dataSchema = data.schema + dataSchema.foreach { field => + // both partition columns and data columns should have the correct nullability + if (field.name == "id" || field.name == "name") { + assert(!field.nullable) + } else { + assert(field.nullable) + } + } + + // Should not be able to write nulls to not null data column + var ex = intercept[Exception] { + spark.sql(s"INSERT INTO $table VALUES (4, 'd', null)") + } + assert(ex.getMessage.contains("Null value appeared in non-nullable field") || + // TODO: remove it after OSS 3.4 release. + ex.getMessage.contains("""Cannot write nullable values to non-null column 'name'""")) + + // Should not be able to write nulls to not null partition column + ex = intercept[Exception] { + spark.sql(s"INSERT INTO $table VALUES (null, 'e', 'e')") + } + assert(ex.getMessage.contains("Null value appeared in non-nullable field") || + // TODO: remove it after OSS 3.4 release. + ex.getMessage.contains("""Cannot write nullable values to non-null column 'id'""")) + + // Should be able to write nulls to nullable column + spark.sql(s"INSERT INTO $table VALUES (5, null, 'e')") + } + } + + test("convert Iceberg table with case sensitive columns") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (i bigint NOT NULL, I string) + |USING iceberg PARTITIONED BY (I)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')") + val ex = intercept[UnsupportedOperationException] { + convert(s"iceberg.`$tablePath`") + } + + assert(ex.getMessage.contains("contains column names that only differ by case")) + } + } + } + + test("should block converting Iceberg table with name mapping") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data) + |""".stripMargin + ) + spark.sql( + s"""ALTER TABLE $table SET TBLPROPERTIES( + | 'schema.name-mapping.default' = + | '[{"field-id": 1, "names": ["my_id"]},{"field-id": 2, "names": ["my_data"]}]' + |)""".stripMargin) + + val e = intercept[UnsupportedOperationException] { + convert(s"iceberg.`$tablePath`") + } + assert(e.getMessage.contains(IcebergTable.ERR_CUSTOM_NAME_MAPPING)) + + } + } + + private def testNullPartitionValues(): Unit = { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string, dt date) + |USING iceberg PARTITIONED BY (dt)""".stripMargin) + spark.sql(s"INSERT INTO $table" + + s" VALUES (1, 'a', null), (2, 'b', null), (3, 'c', cast('2021-01-03' as date))") + convert(s"iceberg.`$tablePath`") + val data = spark.read.format("delta").load(tablePath) + val fmt = new SimpleDateFormat("yyyy-MM-dd") + checkAnswer(data, + Seq( + Row(1, "a", null), + Row(2, "b", null), + Row(3, "c", new java.sql.Date(fmt.parse("2021-01-03").getTime)))) + } + } + + test("partition columns are null") { + withSQLConf(DeltaSQLConf.DELTA_CONVERT_ICEBERG_USE_NATIVE_PARTITION_VALUES.key -> "false") { + val e = intercept[RuntimeException] { + testNullPartitionValues() + } + assert(e.getMessage.contains("Failed to cast partition value")) + } + + withSQLConf( + DeltaSQLConf.DELTA_CONVERT_PARTITION_VALUES_IGNORE_CAST_FAILURE.key -> "true", + DeltaSQLConf.DELTA_CONVERT_ICEBERG_USE_NATIVE_PARTITION_VALUES.key -> "false") { + testNullPartitionValues() + } + + // default setting should work + testNullPartitionValues() + } + + test("arbitrary name") { + def col(name: String): String = name + "with_special_chars_;{}()\n\t=" + + // turns out Iceberg would fail when partition col names have special chars + def partCol(name: String): String = "0123" + name + + withTable(table) { + spark.sql( + s"""CREATE TABLE $table ( + | `${col("data")}` int, + | `${partCol("part1")}` bigint, + | `${partCol("part2")}` string) + |USING iceberg + |PARTITIONED BY ( + | `${partCol("part1")}`, + | truncate(`${partCol("part2")}`, 4)) + |""".stripMargin) + + spark.sql( + s""" + |INSERT INTO $table + |VALUES (123, 1234567890123, 'str11') + |""".stripMargin) + + convert(s"iceberg.`$tablePath`") + + spark.sql( + s""" + |INSERT INTO delta.`$tablePath` + |VALUES (456, 4567890123456, 'str22', 'str2') + |""".stripMargin) + + checkAnswer(spark.sql(s"select * from delta.`$tablePath`"), + Seq( + Row(123, 1234567890123L, "str11", "str1"), + Row(456, 4567890123456L, "str22", "str2"))) + + // projection and filter + checkAnswer( + spark.table(s"delta.`$tablePath`") + .select(s"`${col("data")}`", s"`${partCol("part1")}`") + .where(s"`${partCol("part2")}` = 'str22'"), + Seq(Row(456, 4567890123456L))) + } + } + + test("partition by identity, using native partition values") { + withDefaultTimeZone("UTC") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table ( + | data_binary binary, + | part_ts timestamp, + | part_date date, + | part_bool boolean, + | part_int integer, + | part_long long, + | part_float float, + | part_double double, + | part_decimal decimal(3, 2), + | part_string string + | ) + |USING iceberg PARTITIONED BY (part_ts, part_date, part_bool, part_int, part_long, + | part_float, part_double, part_decimal, part_string)""".stripMargin) + + def insertData(targetTable: String): Unit = { + spark.sql( + s""" + |INSERT INTO $targetTable + |VALUES (cast('this is binary' as binary), + | cast(1635728400000 as timestamp), + | cast('2021-11-15' as date), + | true, + | 123, + | 12345678901234, + | 123.4, + | 123.4, + | 1.23, + | 'this is a string')""".stripMargin) + } + + insertData(table) + withTempDir { dir => + val deltaPath = dir.getCanonicalPath + ConvertToDeltaCommand( + tableIdentifier = TableIdentifier(tablePath, Some("iceberg")), + partitionSchema = None, + collectStats = true, + Some(deltaPath)).run(spark) + // check that all the partition value types can be converted correctly + checkAnswer(spark.table(s"delta.`$deltaPath`"), spark.table(table)) + + insertData(s"delta.`$deltaPath`") + insertData(table) + // check that new writes to both Delta and Iceberg can be read back the same + checkAnswer(spark.table(s"delta.`$deltaPath`"), spark.table(table)) + } + } + } + } + + test("mor table without deletion files") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg + |TBLPROPERTIES ( + | "format-version" = "2", + | "write.delete.mode" = "merge-on-read" + |) + |""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a')") + spark.sql(s"INSERT INTO $table VALUES (2, 'b')") + spark.sql(s"DELETE FROM $table WHERE id = 1") + // The two rows above should've been in separate files, and DELETE will remove all rows from + // one file completely, in this case, we could still convert the table as Spark scan will + // ignore the completely deleted file. + convert(s"iceberg.`$tablePath`") + checkAnswer( + spark.read.format("delta").load(tablePath), + Row(2, "b") :: Nil + ) + } + } + + test("block convert: mor table with deletion files") { + def setupBulkMorTable(): Unit = { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg + |TBLPROPERTIES ( + | "format-version" = "2", + | "write.delete.mode" = "merge-on-read", + | "write.update.mode" = "merge-on-read", + | "write.merge.mode" = "merge-on-read" + |) + |""".stripMargin) + // Now we need to write a considerable amount of data in a dataframe fashion so Iceberg can + // combine multiple records in one Parquet file. + (0 until 100).map(i => (i.toLong, s"name_$i")).toDF("id", "data") + .write.format("iceberg").mode("append").saveAsTable(table) + } + + def assertConversionFailed(): Unit = { + // By default, conversion should fail because it is unsafe. + val e = intercept[UnsupportedOperationException] { + convert(s"iceberg.`$tablePath`") + } + assert(e.getMessage.contains("merge-on-read")) + } + + // --- DELETE + withTable(table) { + setupBulkMorTable() + // This should touch part of one Parquet file + spark.sql(s"DELETE FROM $table WHERE id = 1") + // By default, conversion should fail because it is unsafe. + assertConversionFailed() + // Force escape should work + withSQLConf(DeltaSQLConf.DELTA_CONVERT_ICEBERG_UNSAFE_MOR_TABLE_ENABLE.key -> "true") { + convert(s"iceberg.`$tablePath`") + // ... but with data duplication + checkAnswer( + spark.read.format("delta").load(tablePath), + (0 until 100).map(i => Row(i.toLong, s"name_$i")) + ) + } + } + + // --- UPDATE + withTable(table) { + setupBulkMorTable() + // This should touch part of one Parquet file + spark.sql(s"UPDATE $table SET id = id * 2 WHERE id = 1") + // By default, conversion should fail because it is unsafe. + assertConversionFailed() + } + + // --- MERGE + withTable(table) { + setupBulkMorTable() + (0 until 100).filter(_ % 2 == 0) + .toDF("id") + .createOrReplaceTempView("tempdata") + + // This should touch part of one Parquet file + spark.sql( + s""" + |MERGE INTO $table t + |USING tempdata s + |ON t.id = s.id + |WHEN MATCHED THEN UPDATE SET t.data = "some_other" + |""".stripMargin) + // By default, conversion should fail because it is unsafe. + assertConversionFailed() + } + } + + test("block convert: binary type partition columns") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table ( + | data int, + | part binary) + |USING iceberg + |PARTITIONED BY (part) + |""".stripMargin) + spark.sql(s"insert into $table values (123, cast('str1' as binary))") + val e = intercept[UnsupportedOperationException] { + convert(s"iceberg.`$tablePath`") + } + assert(e.getMessage.contains("Unsupported partition transform expression")) + } + } + + test("block convert: partition transform truncate decimal type") { + withTable(table) { + spark.sql( + s"""CREATE TABLE $table ( + | data int, + | part decimal) + |USING iceberg + |PARTITIONED BY (truncate(part, 3)) + |""".stripMargin) + spark.sql(s"insert into $table values (123, 123456)") + val e = intercept[UnsupportedOperationException] { + convert(s"iceberg.`$tablePath`") + } + assert(e.getMessage.contains("Unsupported partition transform expression")) + } + } +} + +class ConvertIcebergToDeltaScalaSuite extends ConvertIcebergToDeltaSuiteBase { + override protected def convert( + tableIdentifier: String, + partitioning: Option[String] = None, + collectStats: Boolean = true): Unit = { + if (partitioning.isDefined) { + io.delta.tables.DeltaTable.convertToDelta(spark, tableIdentifier, partitioning.get) + } else { + io.delta.tables.DeltaTable.convertToDelta(spark, tableIdentifier) + } + } +} + +class ConvertIcebergToDeltaSQLSuite extends ConvertIcebergToDeltaSuiteBase { + override protected def convert( + tableIdentifier: String, + partitioning: Option[String] = None, + collectStats: Boolean = true): Unit = { + val statement = partitioning.map(p => s" PARTITIONED BY ($p)").getOrElse("") + spark.sql(s"CONVERT TO DELTA ${tableIdentifier}${statement} " + + s"${collectStatisticsStringOption(collectStats)}") + } + + // TODO: Move to base once DeltaAPI support collectStats parameter + test("convert without statistics") { + withTempDir { dir => + withTable(table) { + spark.sql( + s"""CREATE TABLE $table (id bigint, data string) + |USING iceberg PARTITIONED BY (data)""".stripMargin) + spark.sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b')") + spark.sql(s"INSERT INTO $table VALUES (3, 'c')") + ConvertToDeltaCommand( + TableIdentifier(tablePath, Some("iceberg")), + None, + collectStats = false, + Some(dir.getCanonicalPath)).run(spark) + + // Check statistics + val deltaLog = DeltaLog.forTable(spark, new Path(dir.getPath)) + val statsDf = deltaLog.unsafeVolatileSnapshot.allFiles + .select(from_json(col("stats"), deltaLog.unsafeVolatileSnapshot.statsSchema).as("stats")) + .select("stats.*") + assert(statsDf.filter(col("numRecords").isNotNull).count == 0) + val history = io.delta.tables.DeltaTable.forPath(dir.getPath).history() + assert(history.count == 1) + } + } + } +} diff --git a/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertToIcebergSuite.scala b/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertToIcebergSuite.scala new file mode 100644 index 00000000000..1f645a30db4 --- /dev/null +++ b/iceberg/src/test/scala/org/apache/spark/sql/delta/ConvertToIcebergSuite.scala @@ -0,0 +1,236 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File, IOException} +import java.net.ServerSocket + +import org.scalatest.concurrent.Eventually +import org.scalatest.time.SpanSugar._ + +import org.apache.spark.SparkContext +import org.apache.spark.sql.{QueryTest, Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogStorageFormat} +import org.apache.spark.sql.delta.actions.Metadata +import org.apache.spark.sql.types.{IntegerType, StringType, StructType, StructField} +import org.apache.spark.util.Utils + +/** + * This test suite relies on an external Hive metastore (HMS) instance to run. + * + * A standalone HMS can be created using the following docker command. + * ************************************************************ + * docker run -d -p 9083:9083 --env SERVICE_NAME=metastore \ + * --name metastore-standalone apache/hive:4.0.0-beta-1 + * ************************************************************ + * The URL of this standalone HMS is thrift://localhost:9083 + * + * By default this hms will use `/opt/hive/data/warehouse` as warehouse path. + * Please make sure this path exists prior to running the suite. + */ +class ConvertToIcebergSuite extends QueryTest with Eventually { + + private var _sparkSession: SparkSession = null + private var _sparkSessionWithDelta: SparkSession = null + private var _sparkSessionWithIceberg: SparkSession = null + + private val PORT = 9083 + private val WAREHOUSE_PATH = "/opt/hive/data/warehouse/" + + private val testTableName: String = "deltatable" + private var testTablePath: String = s"$WAREHOUSE_PATH$testTableName" + + override def spark: SparkSession = _sparkSession + + override def beforeAll(): Unit = { + super.beforeAll() + if (hmsReady(PORT)) { + _sparkSessionWithDelta = createSparkSessionWithDelta() + _sparkSessionWithIceberg = createSparkSessionWithIceberg() + require(!_sparkSessionWithDelta.eq(_sparkSessionWithIceberg), "separate sessions expected") + } + } + + override def afterEach(): Unit = { + super.afterEach() + if (hmsReady(PORT)) { + _sparkSessionWithDelta.sql(s"DROP TABLE IF EXISTS $testTableName") + } + Utils.deleteRecursively(new File(testTablePath)) + } + + override def afterAll(): Unit = { + super.afterAll() + SparkContext.getActive.foreach(_.stop()) + } + + test("enforceSupportInCatalog") { + var testTable = new CatalogTable( + TableIdentifier("table"), + CatalogTableType.EXTERNAL, + CatalogStorageFormat(None, None, None, None, compressed = false, Map.empty), + new StructType(Array(StructField("col1", IntegerType), StructField("col2", StringType)))) + var testMetadata = Metadata() + + assert(UniversalFormat.enforceSupportInCatalog(testTable, testMetadata).isEmpty) + + testTable = testTable.copy(properties = Map("table_type" -> "iceberg")) + var resultTable = UniversalFormat.enforceSupportInCatalog(testTable, testMetadata) + assert(resultTable.nonEmpty) + assert(!resultTable.get.properties.contains("table_type")) + + testMetadata = testMetadata.copy( + configuration = Map("delta.universalFormat.enabledFormats" -> "iceberg")) + assert(UniversalFormat.enforceSupportInCatalog(testTable, testMetadata).isEmpty) + + testTable = testTable.copy(properties = Map.empty) + resultTable = UniversalFormat.enforceSupportInCatalog(testTable, testMetadata) + assert(resultTable.nonEmpty) + assert(resultTable.get.properties("table_type") == "iceberg") + } + + test("basic test - managed table created with SQL") { + if (hmsReady(PORT)) { + runDeltaSql( + s"""CREATE TABLE `${testTableName}` (col1 INT) USING DELTA + |TBLPROPERTIES ( + | 'delta.columnMapping.mode' = 'name', + | 'delta.universalFormat.enabledFormats' = 'iceberg' + |)""".stripMargin) + runDeltaSql(s"INSERT INTO `$testTableName` VALUES (123)") + verifyReadWithIceberg(testTableName, Seq(Row(123))) + } + } + + test("basic test - catalog table created with DataFrame") { + if (hmsReady(PORT)) { + withDeltaSparkSession { deltaSpark => + withDefaultTablePropsInSQLConf { + deltaSpark.range(10).write.format("delta") + .option("path", testTablePath) + .saveAsTable(testTableName) + } + } + withDeltaSparkSession { deltaSpark => + deltaSpark.range(10, 20, 1) + .write.format("delta").mode("append") + .option("path", testTablePath) + .saveAsTable(testTableName) + } + verifyReadWithIceberg(testTableName, 0 to 19 map (Row(_))) + } + } + + def runDeltaSql(sqlStr: String): Unit = { + withDeltaSparkSession { deltaSpark => + deltaSpark.sql(sqlStr) + } + } + + def verifyReadWithIceberg(tableName: String, expectedAnswer: Seq[Row]): Unit = { + withIcebergSparkSession { icebergSparkSession => + eventually(timeout(10.seconds)) { + icebergSparkSession.sql(s"REFRESH TABLE ${tableName}") + val icebergDf = icebergSparkSession.read.format("iceberg").load(tableName) + checkAnswer(icebergDf, expectedAnswer) + } + } + } + + + def withDefaultTablePropsInSQLConf(f: => Unit): Unit = { + withSQLConf( + DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> "name", + DeltaConfigs.UNIVERSAL_FORMAT_ENABLED_FORMATS.defaultTablePropertyKey -> "iceberg" + ) { f } + } + + def withDeltaSparkSession[T](f: SparkSession => T): T = { + withSparkSession(_sparkSessionWithDelta, f) + } + + def withIcebergSparkSession[T](f: SparkSession => T): T = { + withSparkSession(_sparkSessionWithIceberg, f) + } + + def withSparkSession[T](sessionToUse: SparkSession, f: SparkSession => T): T = { + try { + SparkSession.setDefaultSession(sessionToUse) + SparkSession.setActiveSession(sessionToUse) + _sparkSession = sessionToUse + f(sessionToUse) + } finally { + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + _sparkSession = null + } + } + + protected def createSparkSessionWithDelta(): SparkSession = { + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + val sparkSession = SparkSession.builder() + .master("local[*]") + .appName("DeltaSession") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .config("hive.metastore.uris", s"thrift://localhost:$PORT") + .config("spark.sql.catalogImplementation", "hive") + .getOrCreate() + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + sparkSession + } + + protected def createSparkSessionWithIceberg(): SparkSession = { + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + val sparkSession = SparkSession.builder() + .master("local[*]") + .appName("IcebergSession") + .config("spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") + .config("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog") + .config("hive.metastore.uris", s"thrift://localhost:$PORT") + .config("spark.sql.catalogImplementation", "hive") + .getOrCreate() + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + sparkSession + } + + def hmsReady(port: Int): Boolean = { + var ss: ServerSocket = null + try { + ss = new ServerSocket(port) + ss.setReuseAddress(true) + logWarning("No HMS detected, test suite will not run") + return false + } catch { + case e: IOException => + } finally { + if (ss != null) { + try ss.close() + catch { + case e: IOException => + } + } + } + true + } +} diff --git a/iceberg/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ESuite.scala b/iceberg/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ESuite.scala new file mode 100644 index 00000000000..1b914c3509a --- /dev/null +++ b/iceberg/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ESuite.scala @@ -0,0 +1,43 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.uniform + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.uniform.{UniFormE2EIcebergSuiteBase, UniFormE2ETest} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.uniform.hms.HMSTest + +/** + * This trait allows the tests to write with Delta + * using a in-memory HiveMetaStore as catalog, + * and read from the same HiveMetaStore with Iceberg. + */ +trait WriteDeltaHMSReadIceberg extends UniFormE2ETest with DeltaSQLCommandTest with HMSTest { + + override protected def sparkConf: SparkConf = + setupSparkConfWithHMS(super.sparkConf) + .set(DeltaSQLConf.DELTA_UNIFORM_ICEBERG_SYNC_CONVERT_ENABLED.key, "true") + + override protected def createReaderSparkSession: SparkSession = createIcebergSparkSession +} + +class UniFormE2EIcebergSuite + extends UniFormE2EIcebergSuiteBase + with WriteDeltaHMSReadIceberg diff --git a/icebergShaded/.gitignore b/icebergShaded/.gitignore new file mode 100644 index 00000000000..efaced7fb6f --- /dev/null +++ b/icebergShaded/.gitignore @@ -0,0 +1,2 @@ +iceberg_src +lib \ No newline at end of file diff --git a/icebergShaded/generate_iceberg_jars.py b/icebergShaded/generate_iceberg_jars.py new file mode 100644 index 00000000000..0c2579eb4e3 --- /dev/null +++ b/icebergShaded/generate_iceberg_jars.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os +import glob +import subprocess +import shlex +import shutil +from os import path + +iceberg_lib_dir_name = "lib" +iceberg_src_dir_name = "iceberg_src" # this is a git dir +iceberg_patches_dir_name = "iceberg_src_patches" + +iceberg_src_commit_hash = "ede085d0f7529f24acd0c81dd0a43f7bb969b763" +iceberg_src_branch = "main" # only this branch will be downloaded + +# Relative to iceberg_src directory. +# We use * because after applying the patches, a random git hash will be appended to each jar name. +# This, for all usages below, we must search for these jar files using `glob.glob(pattern)` +iceberg_src_compiled_jar_rel_glob_patterns = [ + "bundled-guava/build/libs/iceberg-bundled-guava-*.jar", + "common/build/libs/iceberg-common-*.jar", + "api/build/libs/iceberg-api-*.jar", + "core/build/libs/iceberg-core-*.jar", + "parquet/build/libs/iceberg-parquet-*.jar", + "hive-metastore/build/libs/iceberg-hive-*.jar", + "data/build/libs/iceberg-data-*.jar" +] + +iceberg_root_dir = path.abspath(path.dirname(__file__)) # this is NOT a git dir +iceberg_src_dir = path.join(iceberg_root_dir, iceberg_src_dir_name) +iceberg_patches_dir = path.join(iceberg_root_dir, iceberg_patches_dir_name) +iceberg_lib_dir = path.join(iceberg_root_dir, iceberg_lib_dir_name) + + +def iceberg_jars_exists(): + for compiled_jar_rel_glob_pattern in iceberg_src_compiled_jar_rel_glob_patterns: + jar_file_name_pattern = path.basename(path.normpath(compiled_jar_rel_glob_pattern)) + lib_jar_abs_pattern = path.join(iceberg_lib_dir, jar_file_name_pattern) + results = glob.glob(lib_jar_abs_pattern) + + if len(results) > 1: + raise Exception("More jars than expected: " + str(results)) + + if len(results) == 0: + return False + + return True + + +def prepare_iceberg_source(): + with WorkingDirectory(iceberg_root_dir): + print(">>> Cloning Iceberg repo") + shutil.rmtree(iceberg_src_dir_name, ignore_errors=True) + + # We just want the shallowest, smallest iceberg clone. We will check out the commit later. + run_cmd("git clone --depth 1 --branch %s https://github.com/apache/iceberg.git %s" % + (iceberg_src_branch, iceberg_src_dir_name)) + + with WorkingDirectory(iceberg_src_dir): + run_cmd("git config user.email \"<>\"") + run_cmd("git config user.name \"Anonymous\"") + + # Fetch just the single commit (shallow) + run_cmd("git fetch origin %s --depth 1" % iceberg_src_commit_hash) + run_cmd("git checkout %s" % iceberg_src_commit_hash) + + print(">>> Applying patch files") + patch_files = glob.glob(path.join(iceberg_patches_dir, "*.patch")) + patch_files.sort() + + for patch_file in patch_files: + print(">>> Applying '%s'" % patch_file) + run_cmd("git apply %s" % patch_file) + run_cmd("git add .") + run_cmd("git commit -a -m 'applied %s'" % path.basename(patch_file)) + + +def generate_iceberg_jars(): + print(">>> Compiling JARs") + with WorkingDirectory(iceberg_src_dir): + # disable style checks (can fail with patches) and tests + build_args = "-x spotlessCheck -x checkstyleMain -x test -x integrationTest" + run_cmd("./gradlew :iceberg-core:build %s" % build_args) + run_cmd("./gradlew :iceberg-parquet:build %s" % build_args) + run_cmd("./gradlew :iceberg-hive-metastore:build %s" % build_args) + run_cmd("./gradlew :iceberg-data:build %s" % build_args) + + print(">>> Copying JARs to lib directory") + shutil.rmtree(iceberg_lib_dir, ignore_errors=True) + os.mkdir(iceberg_lib_dir) + + # For each relative pattern p ... + for compiled_jar_rel_glob_pattern in iceberg_src_compiled_jar_rel_glob_patterns: + # Get the absolute pattern + compiled_jar_abs_pattern = path.join(iceberg_src_dir, compiled_jar_rel_glob_pattern) + # Search for all glob results + results = glob.glob(compiled_jar_abs_pattern) + # Compiled jars will include tests, sources, javadocs; exclude them + results = list(filter(lambda result: all(x not in result for x in ["tests.jar", "sources.jar", "javadoc.jar"]), results)) + + if len(results) == 0: + raise Exception("Could not find the jar: " + compled_jar_rel_glob_pattern) + if len(results) > 1: + raise Exception("More jars created than expected: " + str(results)) + + # Copy the one jar result into the /lib directory + compiled_jar_abs_path = results[0] + compiled_jar_name = path.basename(path.normpath(compiled_jar_abs_path)) + lib_jar_abs_path = path.join(iceberg_lib_dir, compiled_jar_name) + shutil.copyfile(compiled_jar_abs_path, lib_jar_abs_path) + + if not iceberg_jars_exists(): + raise Exception("JAR copying failed") + + +def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, **kwargs): + if isinstance(cmd, str): + cmd = shlex.split(cmd) + cmd_env = os.environ.copy() + if env: + cmd_env.update(env) + + if stream_output: + child = subprocess.Popen(cmd, env=cmd_env, **kwargs) + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception("Non-zero exitcode: %s" % (exit_code)) + print("----\n") + return exit_code + else: + child = subprocess.Popen( + cmd, + env=cmd_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + (stdout, stderr) = child.communicate() + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception( + "Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" % + (exit_code, stdout, stderr)) + return (exit_code, stdout, stderr) + + +# pylint: disable=too-few-public-methods +class WorkingDirectory(object): + def __init__(self, working_directory): + self.working_directory = working_directory + self.old_workdir = os.getcwd() + + def __enter__(self): + os.chdir(self.working_directory) + + def __exit__(self, tpe, value, traceback): + os.chdir(self.old_workdir) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--force", + required=False, + default=False, + action="store_true", + help="Force the generation even if already generated, useful for testing.") + args = parser.parse_args() + + if args.force or not iceberg_jars_exists(): + prepare_iceberg_source() + generate_iceberg_jars() diff --git a/icebergShaded/iceberg_src_patches/0001-schema-evolution-with-correct-field.patch b/icebergShaded/iceberg_src_patches/0001-schema-evolution-with-correct-field.patch new file mode 100644 index 00000000000..8be6a077109 --- /dev/null +++ b/icebergShaded/iceberg_src_patches/0001-schema-evolution-with-correct-field.patch @@ -0,0 +1,186 @@ +Creates a new `SetSchema` pending update that will let us set the latest iceberg schema instead of having to apply incremental/delta changes to the existing schema. + +This PR requires that column mapping ID mode be enabled, and uses the same fieldId on the iceberg schema using the delta schema columnIds. + +This PR also blocks MapType or ArrayType (on the iceberg side). Doing so requires more complicated fieldId calculation, which is out of scope of this PR and of the first milestone. TLDR Delta Map and Array types have their inner elements as DataTypes, but iceberg Map and List types have their inner elements as actual fields (which need a field ID). So even though delta column mapping ID mode will assign IDs to each delta field, this is insufficient as it won't assign IDs for these maps/array types. + +--- + .../java/org/apache/iceberg/SetSchema.java | 25 ++ + .../java/org/apache/iceberg/Transaction.java | 7 + + .../org/apache/iceberg/BaseTransaction.java | 8 + + .../iceberg/CommitCallbackTransaction.java | 5 + + .../org/apache/iceberg/SetSchemaImpl.java | 45 ++++ + .../org/apache/iceberg/TableMetadata.java | 14 +- + .../IcebergConversionTransaction.scala | 232 +++++++++--------- + .../tahoe/iceberg/IcebergSchemaUtils.scala | 55 +++-- + .../iceberg/IcebergTransactionUtils.scala | 16 +- + .../IcebergConversionTransactionSuite.scala | 224 ++++++++++++++++- + .../tahoe/iceberg/IcebergConverterSuite.scala | 3 +- + .../iceberg/IcebergSchemaUtilsSuite.scala | 200 ++++++++------- + .../IcebergTransactionUtilsSuite.scala | 25 +- + 13 files changed, 595 insertions(+), 264 deletions(-) + create mode 100644 api/src/main/java/org/apache/iceberg/SetSchema.java + create mode 100644 core/src/main/java/org/apache/iceberg/SetSchemaImpl.java + +diff --git a/api/src/main/java/org/apache/iceberg/SetSchema.java b/connector/iceberg-core/api/src/main/java/org/apache/iceberg/SetSchema.java +new file mode 100644 +index 00000000000..042a594ae5b +--- /dev/null ++++ b/api/src/main/java/org/apache/iceberg/SetSchema.java +@@ -0,0 +1,25 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, ++ * software distributed under the License is distributed on an ++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY ++ * KIND, either express or implied. See the License for the ++ * specific language governing permissions and limitations ++ * under the License. ++ */ ++ ++package org.apache.iceberg; ++ ++/** ++ * API to set the new, latest Iceberg schema. ++ */ ++public interface SetSchema extends PendingUpdate { } +diff --git a/api/src/main/java/org/apache/iceberg/Transaction.java b/connector/iceberg-core/api/src/main/java/org/apache/iceberg/Transaction.java +index 090b5dfe37c..3879c9a9146 100644 +--- a/api/src/main/java/org/apache/iceberg/Transaction.java ++++ b/api/src/main/java/org/apache/iceberg/Transaction.java +@@ -37,6 +37,13 @@ public interface Transaction { + */ + UpdateSchema updateSchema(); + ++ /** ++ * Create a new {@link SetSchema} to set the new table schema. ++ * ++ * @return a new {@link SetSchema} ++ */ ++ SetSchema setSchema(Schema newSchema); ++ + /** + * Create a new {@link UpdatePartitionSpec} to alter the partition spec of this table. + * +diff --git a/core/src/main/java/org/apache/iceberg/BaseTransaction.java b/connector/iceberg-core/core/src/main/java/org/apache/iceberg/BaseTransaction.java +index 241738fedab..e299d04ebbd 100644 +--- a/core/src/main/java/org/apache/iceberg/BaseTransaction.java ++++ b/core/src/main/java/org/apache/iceberg/BaseTransaction.java +@@ -113,6 +113,14 @@ public class BaseTransaction implements Transaction { + return schemaChange; + } + ++ @Override ++ public SetSchema setSchema(Schema newSchema) { ++ checkLastOperationCommitted("SetSchema"); ++ SetSchema setSchema = new SetSchemaImpl(transactionOps, transactionOps.current(), newSchema); ++ updates.add(setSchema); ++ return setSchema; ++ } ++ + @Override + public UpdatePartitionSpec updateSpec() { + checkLastOperationCommitted("UpdateSpec"); +diff --git a/core/src/main/java/org/apache/iceberg/CommitCallbackTransaction.java b/connector/iceberg-core/core/src/main/java/org/apache/iceberg/CommitCallbackTransaction.java +index 19b74a65eca..6a2d7614a82 100644 +--- a/core/src/main/java/org/apache/iceberg/CommitCallbackTransaction.java ++++ b/core/src/main/java/org/apache/iceberg/CommitCallbackTransaction.java +@@ -41,6 +41,11 @@ class CommitCallbackTransaction implements Transaction { + return wrapped.updateSchema(); + } + ++ @Override ++ public SetSchema setSchema(Schema newSchema) { ++ return wrapped.setSchema(newSchema); ++ } ++ + @Override + public UpdatePartitionSpec updateSpec() { + return wrapped.updateSpec(); +diff --git a/core/src/main/java/org/apache/iceberg/SetSchemaImpl.java b/connector/iceberg-core/core/src/main/java/org/apache/iceberg/SetSchemaImpl.java +new file mode 100644 +index 00000000000..ce6731a4e13 +--- /dev/null ++++ b/core/src/main/java/org/apache/iceberg/SetSchemaImpl.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (2021) The Delta Lake Project Authors. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++ ++ ++ ++package org.apache.iceberg; ++ ++public class SetSchemaImpl implements SetSchema { ++ ++ private final TableOperations ops; ++ private final TableMetadata base; ++ private final Schema newSchema; ++ ++ public SetSchemaImpl(TableOperations ops, TableMetadata base, Schema newSchema) { ++ this.ops = ops; ++ this.base = base; ++ this.newSchema = newSchema; ++ } ++ ++ @Override ++ public Schema apply() { ++ return newSchema; ++ } ++ ++ @Override ++ public void commit() { ++ // This will override the current schema ++ TableMetadata update = base.updateSchema(apply(), newSchema.highestFieldId()); ++ ops.commit(base, update); ++ } ++} +diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/connector/iceberg-core/core/src/main/java/org/apache/iceberg/TableMetadata.java +index afa2c7ac2d5..52546f02a75 100644 +--- a/core/src/main/java/org/apache/iceberg/TableMetadata.java ++++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java +@@ -1339,11 +1339,15 @@ public class TableMetadata implements Serializable { + } + + private int addSchemaInternal(Schema schema, int newLastColumnId) { +- Preconditions.checkArgument( +- newLastColumnId >= lastColumnId, +- "Invalid last column ID: %s < %s (previous last column ID)", +- newLastColumnId, +- lastColumnId); ++ // Since we use txn.setSchema instead of txn.updateSchema, we are manually setting the new ++ // schema. Thus, if we drop the last column, it is clearly possible and valid for the ++ // newLastColumnId to be < the previous lastColumnId. Thus, we ignore this check. ++ // ++ // Preconditions.checkArgument( ++ // newLastColumnId >= lastColumnId, ++ // "Invalid last column ID: %s < %s (previous last column ID)", ++ // newLastColumnId, ++ // lastColumnId); + + int newSchemaId = reuseOrCreateNewSchemaId(schema); + boolean schemaFound = schemasById.containsKey(newSchemaId); +-- +2.39.2 (Apple Git-143) diff --git a/icebergShaded/iceberg_src_patches/0002-iceberg-core-must-not-delete-any-delta-data-files.patch b/icebergShaded/iceberg_src_patches/0002-iceberg-core-must-not-delete-any-delta-data-files.patch new file mode 100644 index 00000000000..a181f065040 --- /dev/null +++ b/icebergShaded/iceberg_src_patches/0002-iceberg-core-must-not-delete-any-delta-data-files.patch @@ -0,0 +1,177 @@ +iceberg core must NOT delete any delta data files + +--- + .../iceberg/IncrementalFileCleanup.java | 8 +-- + .../apache/iceberg/ReachableFileCleanup.java | 5 +- + .../apache/iceberg/TestRemoveSnapshots.java | 57 +++++++++++-------- + 3 files changed, 40 insertions(+), 30 deletions(-) + +diff --git a/core/src/main/java/org/apache/iceberg/IncrementalFileCleanup.java b/connector/iceberg-core/core/src/main/java/org/apache/iceberg/IncrementalFileCleanup.java +index d894dcbf36d..ead7ea6b076 100644 +--- a/core/src/main/java/org/apache/iceberg/IncrementalFileCleanup.java ++++ b/core/src/main/java/org/apache/iceberg/IncrementalFileCleanup.java +@@ -256,10 +256,10 @@ class IncrementalFileCleanup extends FileCleanupStrategy { + } + }); + +- Set filesToDelete = +- findFilesToDelete(manifestsToScan, manifestsToRevert, validIds, afterExpiration); +- +- deleteFiles(filesToDelete, "data"); ++ // iceberg core MUST NOT delete any data files which are managed by delta ++ // Set filesToDelete = ++ // findFilesToDelete(manifestsToScan, manifestsToRevert, validIds, afterExpiration); ++ // deleteFiles(filesToDelete, "data"); + LOG.warn("Manifests to delete: {}", Joiner.on(", ").join(manifestsToDelete)); + LOG.warn("Manifests Lists to delete: {}", Joiner.on(", ").join(manifestListsToDelete)); + deleteFiles(manifestsToDelete, "manifest"); +diff --git a/core/src/main/java/org/apache/iceberg/ReachableFileCleanup.java b/connector/iceberg-core/core/src/main/java/org/apache/iceberg/ReachableFileCleanup.java +index ccbee78e27b..da888a63b3d 100644 +--- a/core/src/main/java/org/apache/iceberg/ReachableFileCleanup.java ++++ b/core/src/main/java/org/apache/iceberg/ReachableFileCleanup.java +@@ -72,8 +72,9 @@ class ReachableFileCleanup extends FileCleanupStrategy { + snapshotsAfterExpiration, deletionCandidates, currentManifests::add); + + if (!manifestsToDelete.isEmpty()) { +- Set dataFilesToDelete = findFilesToDelete(manifestsToDelete, currentManifests); +- deleteFiles(dataFilesToDelete, "data"); ++ // iceberg core MUST NOT delete any data files which are managed by delta ++ // Set dataFilesToDelete = findFilesToDelete(manifestsToDelete, currentManifests); ++ // deleteFiles(dataFilesToDelete, "data"); + Set manifestPathsToDelete = + manifestsToDelete.stream().map(ManifestFile::path).collect(Collectors.toSet()); + deleteFiles(manifestPathsToDelete, "manifest"); +diff --git a/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java b/connector/iceberg-core/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java +index 53e5af520d9..95fa8e41de1 100644 +--- a/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java ++++ b/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java +@@ -147,8 +147,9 @@ public class TestRemoveSnapshots extends TableTestBase { + secondSnapshot + .allManifests(table.io()) + .get(0) +- .path(), // manifest contained only deletes, was dropped +- FILE_A.path()), // deleted ++ .path() // manifest contained only deletes, was dropped ++ // FILE_A.path() should NOT delete data files ++ ), // deleted + deletedFiles); + } + +@@ -209,8 +210,9 @@ public class TestRemoveSnapshots extends TableTestBase { + .allManifests(table.io()) + .get(0) + .path(), // manifest was rewritten for delete +- secondSnapshot.manifestListLocation(), // snapshot expired +- FILE_A.path()), // deleted ++ secondSnapshot.manifestListLocation() // snapshot expired ++ // FILE_A.path() should not delete any data files ++ ), + deletedFiles); + } + +@@ -309,8 +311,9 @@ public class TestRemoveSnapshots extends TableTestBase { + Sets.newHashSet( + secondSnapshot.manifestListLocation(), // snapshot expired + Iterables.getOnlyElement(secondSnapshotManifests) +- .path(), // manifest is no longer referenced +- FILE_B.path()), // added, but rolled back ++ .path() // manifest is no longer referenced ++ // FILE_B.path() should not delete any data files ++ ), + deletedFiles); + } + +@@ -686,7 +689,8 @@ public class TestRemoveSnapshots extends TableTestBase { + + removeSnapshots(table).expireOlderThan(t3).deleteWith(deletedFiles::add).commit(); + +- Assert.assertTrue("FILE_A should be deleted", deletedFiles.contains(FILE_A.path().toString())); ++ Assert.assertTrue("FILE_A should NOT be deleted", ++ !deletedFiles.contains(FILE_A.path().toString())); + } + + @Test +@@ -712,7 +716,8 @@ public class TestRemoveSnapshots extends TableTestBase { + + removeSnapshots(table).expireOlderThan(t3).deleteWith(deletedFiles::add).commit(); + +- Assert.assertTrue("FILE_A should be deleted", deletedFiles.contains(FILE_A.path().toString())); ++ Assert.assertTrue("FILE_A should NOT be deleted", ++ !deletedFiles.contains(FILE_A.path().toString())); + } + + @Test +@@ -749,8 +754,10 @@ public class TestRemoveSnapshots extends TableTestBase { + + removeSnapshots(table).expireOlderThan(t4).deleteWith(deletedFiles::add).commit(); + +- Assert.assertTrue("FILE_A should be deleted", deletedFiles.contains(FILE_A.path().toString())); +- Assert.assertTrue("FILE_B should be deleted", deletedFiles.contains(FILE_B.path().toString())); ++ Assert.assertTrue("FILE_A should NOT be deleted", ++ !deletedFiles.contains(FILE_A.path().toString())); ++ Assert.assertTrue("FILE_B should NOT be deleted", ++ !deletedFiles.contains(FILE_B.path().toString())); + } + + @Test +@@ -824,9 +831,11 @@ public class TestRemoveSnapshots extends TableTestBase { + Sets.newHashSet( + "remove-snapshot-0", "remove-snapshot-1", "remove-snapshot-2", "remove-snapshot-3")); + +- Assert.assertTrue("FILE_A should be deleted", deletedFiles.contains(FILE_A.path().toString())); +- Assert.assertTrue("FILE_B should be deleted", deletedFiles.contains(FILE_B.path().toString())); +- Assert.assertTrue("Thread should be created in provided pool", planThreadsIndex.get() > 0); ++ Assert.assertTrue("FILE_A should NOT be deleted", ++ !deletedFiles.contains(FILE_A.path().toString())); ++ Assert.assertTrue("FILE_B should NOT be deleted", ++ !deletedFiles.contains(FILE_B.path().toString())); ++ // Assert.assertTrue("Thread should be created in provided pool", planThreadsIndex.get() > 0); + } + + @Test +@@ -885,13 +894,13 @@ public class TestRemoveSnapshots extends TableTestBase { + Set expectedDeletes = Sets.newHashSet(); + expectedDeletes.add(snapshotA.manifestListLocation()); + +- // Files should be deleted of dangling staged snapshot +- snapshotB +- .addedDataFiles(table.io()) +- .forEach( +- i -> { +- expectedDeletes.add(i.path().toString()); +- }); ++ // Files should NOT be deleted of dangling staged snapshot ++ // snapshotB ++ // .addedDataFiles(table.io()) ++ // .forEach( ++ // i -> { ++ // expectedDeletes.add(i.path().toString()); ++ // }); + + // ManifestList should be deleted too + expectedDeletes.add(snapshotB.manifestListLocation()); +@@ -1144,10 +1153,10 @@ public class TestRemoveSnapshots extends TableTestBase { + removeSnapshots(table).expireOlderThan(fourthSnapshotTs).deleteWith(deletedFiles::add).commit(); + + Assert.assertEquals( +- "Should remove old delete files and delete file manifests", ++ "Should only delete file manifests", + ImmutableSet.builder() +- .add(FILE_A.path()) +- .add(FILE_A_DELETES.path()) ++ // .add(FILE_A.path()) ++ // .add(FILE_A_DELETES.path()) + .add(firstSnapshot.manifestListLocation()) + .add(secondSnapshot.manifestListLocation()) + .add(thirdSnapshot.manifestListLocation()) +@@ -1501,7 +1510,7 @@ public class TestRemoveSnapshots extends TableTestBase { + expectedDeletes.addAll(manifestPaths(appendA, table.io())); + expectedDeletes.add(branchDelete.manifestListLocation()); + expectedDeletes.addAll(manifestPaths(branchDelete, table.io())); +- expectedDeletes.add(FILE_A.path().toString()); ++ // expectedDeletes.add(FILE_A.path().toString()); + + Assert.assertEquals(2, Iterables.size(table.snapshots())); + Assert.assertEquals(expectedDeletes, deletedFiles); +-- +2.39.2 (Apple Git-143) diff --git a/icebergShaded/iceberg_src_patches/0003-iceberg-hive-metastore-must-not-remove-unknown-table-data.patch b/icebergShaded/iceberg_src_patches/0003-iceberg-hive-metastore-must-not-remove-unknown-table-data.patch new file mode 100644 index 00000000000..23386853c2d --- /dev/null +++ b/icebergShaded/iceberg_src_patches/0003-iceberg-hive-metastore-must-not-remove-unknown-table-data.patch @@ -0,0 +1,45 @@ +HiveTableOperations should have its catalog operations compatible with Delta + +This patch prevent Iceberg HiveTableOperations to overwrite catalog table properties used by Delta. It also writes a dummy schema to metastore to be aligned with Delta's behavior. +--- +Index: hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +=================================================================== +diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +--- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java (revision ede085d0f7529f24acd0c81dd0a43f7bb969b763) ++++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java (revision 4470b919dd6a97b0f6d6b7d57d1d57348a40c025) +@@ -43,6 +43,7 @@ + import org.apache.hadoop.hive.metastore.IMetaStoreClient; + import org.apache.hadoop.hive.metastore.TableType; + import org.apache.hadoop.hive.metastore.api.InvalidObjectException; ++import org.apache.hadoop.hive.metastore.api.FieldSchema; + import org.apache.hadoop.hive.metastore.api.LockComponent; + import org.apache.hadoop.hive.metastore.api.LockLevel; + import org.apache.hadoop.hive.metastore.api.LockRequest; +@@ -286,7 +287,9 @@ + LOG.debug("Committing new table: {}", fullName); + } + +- tbl.setSd(storageDescriptor(metadata, hiveEngineEnabled)); // set to pickup any schema changes ++ StorageDescriptor newsd = storageDescriptor(metadata, hiveEngineEnabled); ++ newsd.getSerdeInfo().setParameters(tbl.getSd().getSerdeInfo().getParameters()); ++ tbl.setSd(newsd); // set to pickup any schema changes + + String metadataLocation = tbl.getParameters().get(METADATA_LOCATION_PROP); + String baseMetadataLocation = base != null ? base.metadataFileLocation() : null; +@@ -393,6 +396,7 @@ + @VisibleForTesting + void persistTable(Table hmsTable, boolean updateHiveTable) + throws TException, InterruptedException { ++ hmsTable.getSd().setCols(Collections.singletonList(new FieldSchema("col", "array", ""))); + if (updateHiveTable) { + metaClients.run( + client -> { +@@ -468,7 +472,7 @@ + } + + // remove any props from HMS that are no longer present in Iceberg table props +- obsoleteProps.forEach(parameters::remove); ++ // obsoleteProps.forEach(parameters::remove); + + parameters.put(TABLE_TYPE_PROP, ICEBERG_TABLE_TYPE_VALUE.toUpperCase(Locale.ENGLISH)); + parameters.put(METADATA_LOCATION_PROP, newMetadataLocation); diff --git a/icebergShaded/iceberg_src_patches/0004-uniform-support-timestamp-as-partition-value.patch b/icebergShaded/iceberg_src_patches/0004-uniform-support-timestamp-as-partition-value.patch new file mode 100644 index 00000000000..f0a0f8fb410 --- /dev/null +++ b/icebergShaded/iceberg_src_patches/0004-uniform-support-timestamp-as-partition-value.patch @@ -0,0 +1,57 @@ +Uniform support timestamp as partition value +--- +Index: api/src/main/java/org/apache/iceberg/types/Conversions.java +=================================================================== +diff --git a/api/src/main/java/org/apache/iceberg/types/Conversions.java b/api/src/main/java/org/apache/iceberg/types/Conversions.java +--- a/api/src/main/java/org/apache/iceberg/types/Conversions.java ++++ b/api/src/main/java/org/apache/iceberg/types/Conversions.java +@@ -27,6 +27,7 @@ + import java.nio.charset.CharsetDecoder; + import java.nio.charset.CharsetEncoder; + import java.nio.charset.StandardCharsets; ++import java.sql.Timestamp; + import java.util.Arrays; + import java.util.UUID; + import org.apache.iceberg.exceptions.RuntimeIOException; +@@ -68,6 +69,8 @@ + return new BigDecimal(asString); + case DATE: + return Literal.of(asString).to(Types.DateType.get()).value(); ++ case TIMESTAMP: ++ return Timestamp.valueOf(asString); + default: + throw new UnsupportedOperationException( + "Unsupported type for fromPartitionString: " + type); +diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java +--- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java ++++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java +@@ -22,7 +22,9 @@ + import java.nio.ByteBuffer; + import java.nio.CharBuffer; + import java.nio.charset.StandardCharsets; ++import java.sql.Timestamp; + import java.util.UUID; ++ + import org.apache.iceberg.expressions.Literal; + import org.apache.iceberg.types.Types.BinaryType; + import org.apache.iceberg.types.Types.BooleanType; +@@ -182,4 +184,19 @@ + Assert.assertArrayEquals(expectedBinary, byteBuffer.array()); + Assert.assertEquals(value, Conversions.fromByteBuffer(type, byteBuffer)); + } ++ ++ @Test ++ public void testPartitionString() { ++ // timestamps are stored as {year}-{month}-{day} {hour}:{minute}:{second} or ++ // {year}-{month}-{day} {hour}:{minute}:{second}.{microsecond} ++ assertPartitionConversion("1970-01-01 00:00:00.001", TimestampType.withoutZone(), new Timestamp(1L)); ++ assertPartitionConversion("1970-01-01 00:00:00.001", TimestampType.withZone(), new Timestamp(1L)); ++ assertPartitionConversion("1970-01-01 00:00:01", TimestampType.withoutZone(), new Timestamp(1000L)); ++ assertPartitionConversion("1970-01-01 00:00:01", TimestampType.withZone(), new Timestamp(1000L)); ++ } ++ ++ private void assertPartitionConversion(String asString, Type type, Object expectedObject) { ++ Object resultObject = Conversions.fromPartitionString(type, asString); ++ Assert.assertEquals(expectedObject, resultObject); ++ } + } diff --git a/project/FlinkMimaExcludes.scala b/project/FlinkMimaExcludes.scala new file mode 100644 index 00000000000..f9a6296e4d7 --- /dev/null +++ b/project/FlinkMimaExcludes.scala @@ -0,0 +1,24 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.typesafe.tools.mima.core._ + +/** + * The list of Mima errors to exclude in the Flink project. + */ +object FlinkMimaExcludes { + val ignoredABIProblems = Seq() +} diff --git a/project/Mima.scala b/project/Mima.scala new file mode 100644 index 00000000000..32dc5077f3b --- /dev/null +++ b/project/Mima.scala @@ -0,0 +1,111 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.typesafe.tools.mima.plugin.MimaPlugin.autoImport.{mimaBinaryIssueFilters, mimaPreviousArtifacts, mimaReportBinaryIssues} +import sbt._ +import sbt.Keys._ + +/** + * Mima settings + */ +object Mima { + + /** + * @return tuple of (major, minor, patch) versions extracted from a version string. + * e.g. "1.2.3" would return (1, 2, 3) + */ + def getMajorMinorPatch(versionStr: String): (Int, Int, Int) = { + implicit def extractInt(str: String): Int = { + """\d+""".r.findFirstIn(str).map(java.lang.Integer.parseInt).getOrElse { + throw new Exception(s"Could not extract version number from $str in $version") + } + } + + versionStr.split("\\.").toList match { + case majorStr :: minorStr :: patchStr :: _ => + (majorStr, minorStr, patchStr) + case _ => throw new Exception(s"Could not parse version for $version.") + } + } + + def getPrevSparkName(currentVersion: String): String = { + val (major, minor, patch) = getMajorMinorPatch(currentVersion) + // name change in version 3.0.0, so versions > 3.0.0 should have delta-spark are prev version. + if (major >= 3 && (minor > 0 || patch > 0)) "delta-spark" else "delta-core" + } + + def getPrevSparkVersion(currentVersion: String): String = { + val (major, minor, patch) = getMajorMinorPatch(currentVersion) + + val lastVersionInMajorVersion = Map( + 0 -> "0.8.0", + 1 -> "1.2.1", + 2 -> "2.4.0" + ) + if (minor == 0) { // 1.0.0 or 2.0.0 or 3.0.0 + lastVersionInMajorVersion.getOrElse(major - 1, { + throw new Exception(s"Last version of ${major - 1}.x.x not configured.") + }) + } else if (patch == 0) { + s"$major.${minor - 1}.0" // 1.1.0 -> 1.0.0 + } else { + s"$major.$minor.${patch - 1}" // 1.1.1 -> 1.1.0 + } + } + + def getPrevConnectorVersion(currentVersion: String): String = { + val (major, minor, patch) = getMajorMinorPatch(currentVersion) + + val majorToLastMinorVersions: Map[Int, String] = Map( + // We skip from 0.6.0 to 3.0.0 when migrating connectors to the main delta repo + 0 -> "0.6.0", + 1 -> "0.6.0", + 2 -> "0.6.0" + ) + if (minor == 0) { // 1.0.0 + majorToLastMinorVersions.getOrElse(major - 1, { + throw new Exception(s"Last minor version of ${major - 1}.x.x not configured.") + }) + } else if (patch == 0) { + s"$major.${minor - 1}.0" // 1.1.0 -> 1.0.0 + } else { + s"$major.$minor.${patch - 1}" // 1.1.1 -> 1.1.0 + } + } + + lazy val sparkMimaSettings = Seq( + Test / test := ((Test / test) dependsOn mimaReportBinaryIssues).value, + mimaPreviousArtifacts := + Set("io.delta" %% getPrevSparkName(version.value) % getPrevSparkVersion(version.value)), + mimaBinaryIssueFilters ++= SparkMimaExcludes.ignoredABIProblems + ) + + lazy val standaloneMimaSettings = Seq( + Test / test := ((Test / test) dependsOn mimaReportBinaryIssues).value, + mimaPreviousArtifacts := { + Set("io.delta" %% "delta-standalone" % getPrevConnectorVersion(version.value)) + }, + mimaBinaryIssueFilters ++= StandaloneMimaExcludes.ignoredABIProblems + ) + + lazy val flinkMimaSettings = Seq( + Test / test := ((Test / test) dependsOn mimaReportBinaryIssues).value, + mimaPreviousArtifacts := { + Set("io.delta" % "delta-flink" % getPrevConnectorVersion(version.value)) + }, + mimaBinaryIssueFilters ++= FlinkMimaExcludes.ignoredABIProblems + ) +} diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala new file mode 100644 index 00000000000..6297a410213 --- /dev/null +++ b/project/MimaExcludes.scala @@ -0,0 +1,90 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.typesafe.tools.mima.core._ +import com.typesafe.tools.mima.core.ProblemFilters._ + +/** + * The list of Mima errors to exclude. + */ +object MimaExcludes { + val ignoredABIProblems = Seq( + // scalastyle:off line.size.limit + ProblemFilters.exclude[Problem]("org.*"), + ProblemFilters.exclude[Problem]("io.delta.sql.parser.*"), + ProblemFilters.exclude[Problem]("io.delta.tables.execution.*"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.apply"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.executeHistory"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.executeVacuum"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.this"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.deltaLog"), + + // Changes in 0.6.0 + ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaTable.makeUpdateTable"), + ProblemFilters.exclude[IncompatibleMethTypeProblem]("io.delta.tables.DeltaMergeBuilder.withClause"), + ProblemFilters.exclude[IncompatibleMethTypeProblem]("io.delta.tables.DeltaTable.this"), + + // ... removed unnecessarily public methods in DeltaMergeBuilder + ProblemFilters.exclude[MissingTypesProblem]("io.delta.tables.DeltaMergeBuilder"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$7"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$6"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logError"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.log"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaOperation$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$4"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordEvent$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logName"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaEvent"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.withStatusCode$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.isTraceEnabled"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.withStatusCode"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordEvent"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaEvent$default$4"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logDebug"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logInfo"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logInfo"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$5"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$6"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logTrace"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.initializeLogIfNecessary"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$9"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordEvent$default$2"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$4"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logWarning"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$7"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaEvent$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$2"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaOperation"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logConsole"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$5"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordEvent$default$4"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$8"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.initializeLogIfNecessary$default$2"), + + // Changes in 0.7.0 + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.makeUpdateTable"), + + // Changes in 1.2.0 + ProblemFilters.exclude[MissingClassProblem]("io.delta.storage.LogStore"), + ProblemFilters.exclude[MissingClassProblem]("io.delta.storage.CloseableIterator") + + // scalastyle:on line.size.limit + ) +} + diff --git a/project/SparkMimaExcludes.scala b/project/SparkMimaExcludes.scala new file mode 100644 index 00000000000..07a09dc2f8c --- /dev/null +++ b/project/SparkMimaExcludes.scala @@ -0,0 +1,90 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.typesafe.tools.mima.core._ +import com.typesafe.tools.mima.core.ProblemFilters._ + +/** + * The list of Mima errors to exclude. + */ +object SparkMimaExcludes { + val ignoredABIProblems = Seq( + // scalastyle:off line.size.limit + ProblemFilters.exclude[Problem]("org.*"), + ProblemFilters.exclude[Problem]("io.delta.sql.parser.*"), + ProblemFilters.exclude[Problem]("io.delta.tables.execution.*"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.apply"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.executeHistory"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.executeVacuum"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.this"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.deltaLog"), + + // Changes in 0.6.0 + ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaTable.makeUpdateTable"), + ProblemFilters.exclude[IncompatibleMethTypeProblem]("io.delta.tables.DeltaMergeBuilder.withClause"), + ProblemFilters.exclude[IncompatibleMethTypeProblem]("io.delta.tables.DeltaTable.this"), + + // ... removed unnecessarily public methods in DeltaMergeBuilder + ProblemFilters.exclude[MissingTypesProblem]("io.delta.tables.DeltaMergeBuilder"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$7"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$6"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logError"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.log"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaOperation$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$4"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordEvent$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logName"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaEvent"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.withStatusCode$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.isTraceEnabled"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.withStatusCode"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordEvent"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaEvent$default$4"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logDebug"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logInfo"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logInfo"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$5"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$6"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logTrace"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.initializeLogIfNecessary"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$9"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordEvent$default$2"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$4"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logWarning"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordUsage$default$7"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaEvent$default$3"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$2"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordDeltaOperation"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.logConsole"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$5"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordEvent$default$4"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.recordOperation$default$8"), + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaMergeBuilder.initializeLogIfNecessary$default$2"), + + // Changes in 0.7.0 + ProblemFilters.exclude[DirectMissingMethodProblem]("io.delta.tables.DeltaTable.makeUpdateTable"), + + // Changes in 1.2.0 + ProblemFilters.exclude[MissingClassProblem]("io.delta.storage.LogStore"), + ProblemFilters.exclude[MissingClassProblem]("io.delta.storage.CloseableIterator") + + // scalastyle:on line.size.limit + ) +} + diff --git a/project/StandaloneMimaExcludes.scala b/project/StandaloneMimaExcludes.scala new file mode 100644 index 00000000000..9e8043aa7aa --- /dev/null +++ b/project/StandaloneMimaExcludes.scala @@ -0,0 +1,54 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.typesafe.tools.mima.core._ + +/** + * The list of Mima errors to exclude in the Standalone project. + */ +object StandaloneMimaExcludes { + val ignoredABIProblems = Seq( + // scalastyle:off line.size.limit + + // Ignore changes to internal Scala codes + ProblemFilters.exclude[Problem]("io.delta.standalone.internal.*"), + + // Public API changes in 0.2.0 -> 0.3.0 + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.getChanges"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.startTransaction"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.Snapshot.scan"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.tableExists"), + + // Switch to using delta-storage LogStore API in 0.4.0 -> 0.5.0 + ProblemFilters.exclude[MissingClassProblem]("io.delta.standalone.storage.LogStore"), + + // Ignore missing shaded attributes + ProblemFilters.exclude[Problem]("shadedelta.*"), + + // Public API changes in 0.4.0 -> 0.5.0 + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.getVersionBeforeOrAtTimestamp"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.getVersionAtOrAfterTimestamp"), + + // ParquetSchemaConverter etc. were moved to project standalone-parquet + ProblemFilters.exclude[MissingClassProblem]("io.delta.standalone.util.ParquetSchemaConverter"), + ProblemFilters.exclude[MissingClassProblem]("io.delta.standalone.util.ParquetSchemaConverter$ParquetOutputTimestampType"), + + // Public API changes in 0.5.0 -> 0.6.0 + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.OptimisticTransaction.readVersion"), + + // scalastyle:on line.size.limit + ) +} diff --git a/project/TestParallelization.scala b/project/TestParallelization.scala new file mode 100644 index 00000000000..a0c9ce45d6f --- /dev/null +++ b/project/TestParallelization.scala @@ -0,0 +1,146 @@ +import sbt.Keys._ +import sbt._ + +object TestParallelization { + + lazy val settings = { + val parallelismCount = sys.env.get("TEST_PARALLELISM_COUNT") + if (parallelismCount.exists( _.toInt > 1)) { + customTestGroupingSettings ++ simpleGroupingStrategySettings + } + else { + Seq.empty[Setting[_]] + } + } + + /** + Replace the default value for Test / testGrouping settingKey + and set it to a new value calculated by using the custom Task + [[testGroupingStrategy]]. Adding these settings to the build + will require to separately provide a value for the TaskKey + [[testGroupingStrategy]] + */ + lazy val customTestGroupingSettings = { + Seq( + Test / testGrouping := { + val tests = (Test / definedTests).value + val groupingStrategy = (Test / testGroupingStrategy).value + val grouping = tests.foldLeft(groupingStrategy) { + case (strategy, testDefinition) => strategy.add(testDefinition) + } + val logger = streams.value.log + logger.info(s"Tests will be grouped in ${grouping.testGroups.size} groups") + val groups = grouping.testGroups + groups.foreach{ + group => + logger.info(s"${group.name} contains ${group.tests.size} tests") + } + groups + } + ) + } + + + + /** + Sets the Test / testGroupingStrategy Task to an instance of the + SimpleHashStrategy + */ + lazy val simpleGroupingStrategySettings = Seq( + Test / forkTestJVMCount := { + sys.env.get("TEST_PARALLELISM_COUNT").map(_.toInt).getOrElse(4) + }, + Test / testGroupingStrategy := { + val groupsCount = (Test / forkTestJVMCount).value + val baseJvmDir = baseDirectory.value + SimpleHashStrategy(groupsCount, baseJvmDir, defaultForkOptions.value) + }, + Test / parallelExecution := true, + Global / concurrentRestrictions := { + Seq(Tags.limit(Tags.ForkedTestGroup, (Test / forkTestJVMCount).value)) + } + ) + + + val forkTestJVMCount = SettingKey[Int]("fork test jvm count", + "The number of separate JVM to use for tests" + ) + + val testGroupingStrategy = TaskKey[GroupingStrategy]("test grouping strategy", + "The strategy to allocate different tests into groups," + + "potentially using multiple JVMS for their execution" + ) + private val defaultForkOptions = Def.task { + ForkOptions( + javaHome = javaHome.value, + outputStrategy = outputStrategy.value, + bootJars = Vector.empty, + workingDirectory = Some(baseDirectory.value), + runJVMOptions = (Test / javaOptions).value.toVector, + connectInput = connectInput.value, + envVars = (Test / envVars).value + ) + } + /** + * Base trait to group tests. + * + * By default SBT will run all tests as if they belong to a single group, + * but allows tests to be grouped. Setting [[sbt.Keys.testGrouping]] to + * a list of groups replace the default single-group definition. + * + * When creating an instance of [[sbt.Tests.Group]] it is possible to specify + * an [[sbt.Tests.TestRunPolicy]]: this parameter can be used to use multiple + * subprocesses for test execution + * + */ + sealed trait GroupingStrategy { + + /** + * Adds an [[sbt.TestDefinition]] to this GroupingStrategy and + * returns an updated Grouping Strategy + */ + def add(testDefinition: TestDefinition): GroupingStrategy + + /** + * Returns the test groups built from this GroupingStrategy + */ + def testGroups: List[Tests.Group] + } + + class SimpleHashStrategy private(groups: Map[Int, Tests.Group]) extends GroupingStrategy { + + lazy val testGroups = groups.values.toList + val groupCount = groups.size + + override def add(testDefinition: TestDefinition): GroupingStrategy = { + val groupIdx = math.abs(testDefinition.name.hashCode % groupCount) + val currentGroup = groups(groupIdx) + val updatedGroup = currentGroup.withTests( + currentGroup.tests :+ testDefinition + ) + new SimpleHashStrategy(groups + (groupIdx -> updatedGroup)) + } + } + + object SimpleHashStrategy { + + def apply(groupCount: Int, + baseDir: File, forkOptionsTemplate: ForkOptions): GroupingStrategy = { + val testGroups = (0 until groupCount).map { + groupIdx => + val forkOptions = forkOptionsTemplate.withRunJVMOptions( + runJVMOptions = forkOptionsTemplate.runJVMOptions ++ + Seq(s"-Djava.io.tmpdir=${baseDir}/target/tmp/$groupIdx") + ) + val group = Tests.Group( + name = s"Test group ${groupIdx}", + tests = Nil, + runPolicy = Tests.SubProcess(forkOptions) + ) + groupIdx -> group + } + new SimpleHashStrategy(testGroups.toMap) + } + } + +} diff --git a/project/Unidoc.scala b/project/Unidoc.scala new file mode 100644 index 00000000000..81ef10d5fcb --- /dev/null +++ b/project/Unidoc.scala @@ -0,0 +1,162 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import sbt._ +import sbt.Keys._ +import sbtunidoc._ +import sbtunidoc.BaseUnidocPlugin.autoImport._ +import sbtunidoc.ScalaUnidocPlugin.autoImport._ +import sbtunidoc.JavaUnidocPlugin.autoImport._ + +object Unidoc { + + /** + * Patterns are strings to do simple substring matches on the full path of every source file. + */ + case class SourceFilePattern(patterns: Seq[String], project: Option[Project] = None) + + object SourceFilePattern { + def apply(patterns: String*): SourceFilePattern = SourceFilePattern(patterns.toSeq, None) + } + + val unidocSourceFilePatterns = settingKey[Seq[SourceFilePattern]]( + "Patterns to match (simple substring match) against full source file paths. " + + "Matched files will be selected for generating API docs.") + + implicit class PatternsHelper(patterns: Seq[SourceFilePattern]) { + def scopeToProject(projectToAdd: Project): Seq[SourceFilePattern] = { + patterns.map(_.copy(project = Some(projectToAdd))) + } + } + + implicit class UnidocHelper(val projectToUpdate: Project) { + def configureUnidoc( + docTitle: String = null, + generateScalaDoc: Boolean = false + ): Project = { + + var updatedProject: Project = projectToUpdate + if (generateScalaDoc) { + updatedProject = updatedProject.enablePlugins(ScalaUnidocPlugin) + } + updatedProject + .enablePlugins(GenJavadocPlugin, JavaUnidocPlugin) + // TODO: Allows maven publishing to use unidoc doc jar, but it currently throws errors. + // .enablePlugins(PublishJavadocPlugin) + .settings( + libraryDependencies ++= Seq( + // Ensure genJavaDoc plugin is of the right version that works with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ), + + generateUnidocSettings(docTitle, generateScalaDoc), + + // Ensure unidoc is run with tests. + (Test / test) := ((Test / test) dependsOn (Compile / unidoc)).value + ) + } + + private def generateUnidocSettings( + customDocTitle: String, + generateScalaDoc: Boolean): Def.SettingsDefinition = { + + val internalFilePattern = Seq("/internal/", "/execution/", "$") + + // Generate the full doc title + def fullDocTitle(projectName: String, version: String, isScalaDoc: Boolean): String = { + val namePart = Option(customDocTitle).getOrElse { + projectName.split("-").map(_.capitalize).mkString(" ") + } + val versionPart = version.replaceAll("-SNAPSHOT", "") + val langPart = if (isScalaDoc) "Scala API Docs" else "Java API Docs" + s"$namePart $versionPart - $langPart" + } + + // Remove source files that does not match the pattern + def ignoreUndocumentedSources( + allSourceFiles: Seq[Seq[java.io.File]], + sourceFilePatternsToKeep: Seq[SourceFilePattern] + ): Seq[Seq[java.io.File]] = { + if (sourceFilePatternsToKeep.isEmpty) return Nil + + val projectSrcDirToFilePatternsToKeep = sourceFilePatternsToKeep.map { + case SourceFilePattern(dirs, projOption) => + val projectPath = projOption.getOrElse(projectToUpdate).base.getCanonicalPath + projectPath -> dirs + }.toMap + + def shouldKeep(path: String): Boolean = { + projectSrcDirToFilePatternsToKeep.foreach { case (projBaseDir, filePatterns) => + def isInProjectSrcDir = + path.contains(s"$projBaseDir/src") || path.contains(s"$projBaseDir/target/java/") + def matchesFilePattern = filePatterns.exists(path.contains(_)) + def matchesInternalFilePattern = internalFilePattern.exists(path.contains(_)) + if (isInProjectSrcDir && matchesFilePattern && !matchesInternalFilePattern) return true + } + false + } + allSourceFiles.map {_.filter(f => shouldKeep(f.getCanonicalPath))} + } + + val javaUnidocSettings = Seq( + // Configure Java unidoc + JavaUnidoc / unidoc / javacOptions := Seq( + "-public", + "-windowtitle", + fullDocTitle((projectToUpdate / name).value, version.value, isScalaDoc = false), + "-noqualifier", "java.lang", + "-tag", "implNote:a:Implementation Note:", + "-tag", "apiNote:a:API Note:", + "-tag", "return:X", + "-Xdoclint:none" + ), + + JavaUnidoc / unidoc / unidocAllSources := { + ignoreUndocumentedSources( + allSourceFiles = (JavaUnidoc / unidoc / unidocAllSources).value, + sourceFilePatternsToKeep = unidocSourceFilePatterns.value) + }, + + // Settings for plain, old Java doc needed for successful doc generation during publishing. + Compile / doc / javacOptions ++= Seq( + "-public", + "-noqualifier", "java.lang", + "-tag", "implNote:a:Implementation Note:", + "-tag", "apiNote:a:API Note:", + "-Xdoclint:all") + ) + + val scalaUnidocSettings = if (generateScalaDoc) Seq( + // Configure Scala unidoc + ScalaUnidoc / unidoc / scalacOptions ++= Seq( + "-doc-title", + fullDocTitle((projectToUpdate / name).value, version.value, isScalaDoc = true), + ), + + ScalaUnidoc / unidoc / unidocAllSources := { + ignoreUndocumentedSources( + allSourceFiles = (ScalaUnidoc / unidoc / unidocAllSources).value, + sourceFilePatternsToKeep = unidocSourceFilePatterns.value + ) + }, + ) else Nil + + javaUnidocSettings ++ scalaUnidocSettings + } + } +} + diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 00000000000..dfeb6098c5f --- /dev/null +++ b/project/build.properties @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2021) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +sbt.version=1.5.5 diff --git a/project/plugins.sbt b/project/plugins.sbt new file mode 100644 index 00000000000..88cd4edccc6 --- /dev/null +++ b/project/plugins.sbt @@ -0,0 +1,51 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +addSbtPlugin("com.github.sbt" % "sbt-release" % "1.1.0") + +addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") + +addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") + +addSbtPlugin("com.github.sbt" % "sbt-unidoc" % "0.5.0") + +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.0") + +addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.9.2") + +addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3") + +addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.15") + +addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.6") +//Upgrade sbt-scoverage to 2.0.3+ because 2.0.0 is not compatible to Scala 2.12.17: +//sbt.librarymanagement.ResolveException: Error downloading org.scoverage:scalac-scoverage-plugin_2.12.17:2.0.0 + +//It caused a conflict issue: +//[error] java.lang.RuntimeException: found version conflict(s) in library dependencies; some are suspected to be binary incompatible: +//[error] +//[error] * org.scala-lang.modules:scala-xml_2.12:2.1.0 (early-semver) is selected over 1.0.6 +//[error] +- org.scoverage:scalac-scoverage-reporter_2.12:2.0.7 (depends on 2.1.0) +//[error] +- org.scalariform:scalariform_2.12:0.2.0 (depends on 1.0.6) +//The following fix the conflict: +libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always % "test" + +addSbtPlugin("net.aichler" % "sbt-jupiter-interface" % "0.9.1") + +addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1") +// By default, sbt-checkstyle-plugin uses checkstyle version 6.15, but we should set it to use the +// same version as Spark +dependencyOverrides += "com.puppycrawl.tools" % "checkstyle" % "8.43" diff --git a/project/project/plugins.sbt b/project/project/plugins.sbt new file mode 100644 index 00000000000..822a0e43d6c --- /dev/null +++ b/project/project/plugins.sbt @@ -0,0 +1,17 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.9") diff --git a/python/README.md b/python/README.md new file mode 100644 index 00000000000..87a46b9b857 --- /dev/null +++ b/python/README.md @@ -0,0 +1,14 @@ +# Delta Lake + +[Delta Lake](https://delta.io) is an open source storage layer that brings reliability to data lakes. Delta Lake provides ACID transactions, scalable metadata handling, and unifies streaming and batch data processing. Delta Lake runs on top of your existing data lake and is fully compatible with Apache Spark APIs. + +This PyPi package contains the Python APIs for using Delta Lake with Apache Spark. + +## Installation and usage + +1. Install using `pip install delta-spark` +2. To use the Delta Lake with Apache Spark, you have to set additional configurations when creating the SparkSession. See the online [project web page](https://docs.delta.io/latest/delta-intro.html) for details. + +## Documentation + +This README file only contains basic information related to pip installed Delta Lake. You can find the full documentation on the [project web page](https://docs.delta.io/latest/delta-intro.html) diff --git a/python/delta/__init__.py b/python/delta/__init__.py new file mode 100644 index 00000000000..ff99835025c --- /dev/null +++ b/python/delta/__init__.py @@ -0,0 +1,20 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from delta.tables import DeltaTable +from delta.pip_utils import configure_spark_with_delta_pip + +__all__ = ['DeltaTable', 'configure_spark_with_delta_pip'] diff --git a/python/delta/_typing.py b/python/delta/_typing.py new file mode 100644 index 00000000000..05e9007bd44 --- /dev/null +++ b/python/delta/_typing.py @@ -0,0 +1,23 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Dict, Optional, Union +from pyspark.sql.column import Column + +ExpressionOrColumn = Union[str, Column] +OptionalExpressionOrColumn = Optional[ExpressionOrColumn] +ColumnMapping = Dict[str, ExpressionOrColumn] +OptionalColumnMapping = Optional[ColumnMapping] diff --git a/python/delta/exceptions.py b/python/delta/exceptions.py new file mode 100644 index 00000000000..b64e06d14ec --- /dev/null +++ b/python/delta/exceptions.py @@ -0,0 +1,167 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import TYPE_CHECKING, Optional + +from pyspark import SparkContext +from pyspark.errors.exceptions import captured +from pyspark.errors.exceptions.captured import CapturedException +from pyspark.sql.utils import ( + AnalysisException, + IllegalArgumentException, + ParseException +) +from py4j.java_gateway import is_instance_of # type: ignore[import] + +if TYPE_CHECKING: + from py4j.java_gateway import JavaObject, JVMView # type: ignore[import] + + +class DeltaConcurrentModificationException(CapturedException): + """ + The basic class for all Delta commit conflict exceptions. + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + + +class ConcurrentWriteException(CapturedException): + """ + Thrown when a concurrent transaction has written data after the current transaction read the + table. + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + + +class MetadataChangedException(CapturedException): + """ + Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit. + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + + +class ProtocolChangedException(CapturedException): + """ + Thrown when the protocol version has changed between the time of read + and the time of commit. + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + + +class ConcurrentAppendException(CapturedException): + """ + Thrown when files are added that would have been read by the current transaction. + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + + +class ConcurrentDeleteReadException(CapturedException): + """ + Thrown when the current transaction reads data that was deleted by a concurrent transaction. + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + + +class ConcurrentDeleteDeleteException(CapturedException): + """ + Thrown when the current transaction deletes data that was deleted by a concurrent transaction. + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + + +class ConcurrentTransactionException(CapturedException): + """ + Thrown when concurrent transaction both attempt to update the same idempotent transaction. + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + + +_delta_exception_patched = False + + +def _convert_delta_exception(e: "JavaObject") -> Optional[CapturedException]: + """ + Convert Delta's Scala concurrent exceptions to the corresponding Python exceptions. + """ + s: str = e.toString() + c: "JavaObject" = e.getCause() + + jvm: "JVMView" = SparkContext._jvm # type: ignore[attr-defined] + gw = SparkContext._gateway # type: ignore[attr-defined] + stacktrace = jvm.org.apache.spark.util.Utils.exceptionString(e) + + if s.startswith('io.delta.exceptions.DeltaConcurrentModificationException: '): + return DeltaConcurrentModificationException(s.split(': ', 1)[1], stacktrace, c) + if s.startswith('io.delta.exceptions.ConcurrentWriteException: '): + return ConcurrentWriteException(s.split(': ', 1)[1], stacktrace, c) + if s.startswith('io.delta.exceptions.MetadataChangedException: '): + return MetadataChangedException(s.split(': ', 1)[1], stacktrace, c) + if s.startswith('io.delta.exceptions.ProtocolChangedException: '): + return ProtocolChangedException(s.split(': ', 1)[1], stacktrace, c) + if s.startswith('io.delta.exceptions.ConcurrentAppendException: '): + return ConcurrentAppendException(s.split(': ', 1)[1], stacktrace, c) + if s.startswith('io.delta.exceptions.ConcurrentDeleteReadException: '): + return ConcurrentDeleteReadException(s.split(': ', 1)[1], stacktrace, c) + if s.startswith('io.delta.exceptions.ConcurrentDeleteDeleteException: '): + return ConcurrentDeleteDeleteException(s.split(': ', 1)[1], stacktrace, c) + if s.startswith('io.delta.exceptions.ConcurrentTransactionException: '): + return ConcurrentTransactionException(s.split(': ', 1)[1], stacktrace, c) + return None + + +def _patch_convert_exception() -> None: + """ + Patch PySpark's exception convert method to convert Delta's Scala concurrent exceptions to the + corresponding Python exceptions. + """ + original_convert_sql_exception = captured.convert_exception + + def convert_delta_exception(e: "JavaObject") -> CapturedException: + delta_exception = _convert_delta_exception(e) + if delta_exception is not None: + return delta_exception + return original_convert_sql_exception(e) + + captured.convert_exception = convert_delta_exception + + +if not _delta_exception_patched: + _patch_convert_exception() + _delta_exception_patched = True diff --git a/python/delta/pip_utils.py b/python/delta/pip_utils.py new file mode 100644 index 00000000000..99055215d54 --- /dev/null +++ b/python/delta/pip_utils.py @@ -0,0 +1,84 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import List, Optional + +from pyspark.sql import SparkSession + + +def configure_spark_with_delta_pip( + spark_session_builder: SparkSession.Builder, + extra_packages: Optional[List[str]] = None +) -> SparkSession.Builder: + """ + Utility function to configure a SparkSession builder such that the generated SparkSession + will automatically download the required Delta Lake JARs from Maven. This function is + required when you want to + + 1. Install Delta Lake locally using pip, and + + 2. Execute your Python code using Delta Lake + Pyspark directly, that is, not using + `spark-submit --packages io.delta:...` or `pyspark --packages io.delta:...`. + + builder = SparkSession.builder \ + .master("local[*]") \ + .appName("test") + + spark = configure_spark_with_delta_pip(builder).getOrCreate() + + 3. If you would like to add more packages, use the `extra_packages` parameter. + + builder = SparkSession.builder \ + .master("local[*]") \ + .appName("test") + my_packages = ["org.apache.spark:spark-sql-kafka-0-10_2.12:x.y.z"] + spark = configure_spark_with_delta_pip(builder, extra_packages=my_packages).getOrCreate() + + :param spark_session_builder: SparkSession.Builder object being used to configure and + create a SparkSession. + :param extra_packages: Set other packages to add to Spark session besides Delta Lake. + :return: Updated SparkSession.Builder object + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + import importlib_metadata # load this library only when this function is called + + if type(spark_session_builder) is not SparkSession.Builder: + msg = f''' +This function must be called with a SparkSession builder as the argument. +The argument found is of type {str(type(spark_session_builder))}. +See the online documentation for the correct usage of this function. + ''' + raise TypeError(msg) + + try: + delta_version = importlib_metadata.version("delta_spark") + except Exception as e: + msg = ''' +This function can be used only when Delta Lake has been locally installed with pip. +See the online documentation for the correct usage of this function. + ''' + raise Exception(msg) from e + + scala_version = "2.12" + maven_artifact = f"io.delta:delta-spark_{scala_version}:{delta_version}" + + extra_packages = extra_packages if extra_packages is not None else [] + all_artifacts = [maven_artifact] + extra_packages + packages_str = ",".join(all_artifacts) + + return spark_session_builder.config("spark.jars.packages", packages_str) diff --git a/python/delta/py.typed b/python/delta/py.typed new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/delta/tables.py b/python/delta/tables.py new file mode 100644 index 00000000000..6ee6fa83c2e --- /dev/null +++ b/python/delta/tables.py @@ -0,0 +1,1370 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import ( + TYPE_CHECKING, cast, overload, Any, Dict, Iterable, Optional, Union, NoReturn, List, Tuple +) + +import delta.exceptions # noqa: F401; pylint: disable=unused-variable +from delta._typing import ( + ColumnMapping, OptionalColumnMapping, ExpressionOrColumn, OptionalExpressionOrColumn +) + +from pyspark import since +from pyspark.sql import Column, DataFrame, functions, SparkSession +from pyspark.sql.column import _to_seq # type: ignore[attr-defined] +from pyspark.sql.types import DataType, StructType, StructField + + +if TYPE_CHECKING: + from py4j.java_gateway import JavaObject, JVMView # type: ignore[import] + from py4j.java_collections import JavaMap # type: ignore[import] + + +class DeltaTable(object): + """ + Main class for programmatically interacting with Delta tables. + You can create DeltaTable instances using the path of the Delta table.:: + + deltaTable = DeltaTable.forPath(spark, "/path/to/table") + + In addition, you can convert an existing Parquet table in place into a Delta table.:: + + deltaTable = DeltaTable.convertToDelta(spark, "parquet.`/path/to/table`") + + .. versionadded:: 0.4 + """ + def __init__(self, spark: SparkSession, jdt: "JavaObject"): + self._spark = spark + self._jdt = jdt + + @since(0.4) # type: ignore[arg-type] + def toDF(self) -> DataFrame: + """ + Get a DataFrame representation of this Delta table. + """ + return DataFrame( + self._jdt.toDF(), + # Simple trick to avoid warnings from Spark 3.3.0. `_wrapped` + # in SparkSession is removed in Spark 3.3.0, see also SPARK-38121. + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + + @since(0.4) # type: ignore[arg-type] + def alias(self, aliasName: str) -> "DeltaTable": + """ + Apply an alias to the Delta table. + """ + jdt = self._jdt.alias(aliasName) + return DeltaTable(self._spark, jdt) + + @since(0.5) # type: ignore[arg-type] + def generate(self, mode: str) -> None: + """ + Generate manifest files for the given delta table. + + :param mode: mode for the type of manifest file to be generated + The valid modes are as follows (not case sensitive): + + - "symlink_format_manifest": This will generate manifests in symlink format + for Presto and Athena read support. + + See the online documentation for more information. + """ + self._jdt.generate(mode) + + @since(0.4) # type: ignore[arg-type] + def delete(self, condition: OptionalExpressionOrColumn = None) -> None: + """ + Delete data from the table that match the given ``condition``. + + Example:: + + deltaTable.delete("date < '2017-01-01'") # predicate using SQL formatted string + + deltaTable.delete(col("date") < "2017-01-01") # predicate using Spark SQL functions + + :param condition: condition of the update + :type condition: str or pyspark.sql.Column + """ + if condition is None: + self._jdt.delete() + else: + self._jdt.delete(DeltaTable._condition_to_jcolumn(condition)) + + @overload + def update( + self, condition: ExpressionOrColumn, set: ColumnMapping + ) -> None: + ... + + @overload + def update(self, *, set: ColumnMapping) -> None: + ... + + def update( + self, + condition: OptionalExpressionOrColumn = None, + set: OptionalColumnMapping = None + ) -> None: + """ + Update data from the table on the rows that match the given ``condition``, + which performs the rules defined by ``set``. + + Example:: + + # condition using SQL formatted string + deltaTable.update( + condition = "eventType = 'clck'", + set = { "eventType": "'click'" } ) + + # condition using Spark SQL functions + deltaTable.update( + condition = col("eventType") == "clck", + set = { "eventType": lit("click") } ) + + :param condition: Optional condition of the update + :type condition: str or pyspark.sql.Column + :param set: Defines the rules of setting the values of columns that need to be updated. + *Note: This param is required.* Default value None is present to allow + positional args in same order across languages. + :type set: dict with str as keys and str or pyspark.sql.Column as values + + .. versionadded:: 0.4 + """ + jmap = DeltaTable._dict_to_jmap(self._spark, set, "'set'") + jcolumn = DeltaTable._condition_to_jcolumn(condition) + if condition is None: + self._jdt.update(jmap) + else: + self._jdt.update(jcolumn, jmap) + + @since(0.4) # type: ignore[arg-type] + def merge( + self, source: DataFrame, condition: ExpressionOrColumn + ) -> "DeltaMergeBuilder": + """ + Merge data from the `source` DataFrame based on the given merge `condition`. This returns + a :class:`DeltaMergeBuilder` object that can be used to specify the update, delete, or + insert actions to be performed on rows based on whether the rows matched the condition or + not. See :class:`DeltaMergeBuilder` for a full description of this operation and what + combinations of update, delete and insert operations are allowed. + + Example 1 with conditions and update expressions as SQL formatted string:: + + deltaTable.alias("events").merge( + source = updatesDF.alias("updates"), + condition = "events.eventId = updates.eventId" + ).whenMatchedUpdate(set = + { + "data": "updates.data", + "count": "events.count + 1" + } + ).whenNotMatchedInsert(values = + { + "date": "updates.date", + "eventId": "updates.eventId", + "data": "updates.data", + "count": "1" + } + ).execute() + + Example 2 with conditions and update expressions as Spark SQL functions:: + + from pyspark.sql.functions import * + + deltaTable.alias("events").merge( + source = updatesDF.alias("updates"), + condition = expr("events.eventId = updates.eventId") + ).whenMatchedUpdate(set = + { + "data" : col("updates.data"), + "count": col("events.count") + 1 + } + ).whenNotMatchedInsert(values = + { + "date": col("updates.date"), + "eventId": col("updates.eventId"), + "data": col("updates.data"), + "count": lit("1") + } + ).execute() + + :param source: Source DataFrame + :type source: pyspark.sql.DataFrame + :param condition: Condition to match sources rows with the Delta table rows. + :type condition: str or pyspark.sql.Column + + :return: builder object to specify whether to update, delete or insert rows based on + whether the condition matched or not + :rtype: :py:class:`delta.tables.DeltaMergeBuilder` + """ + if source is None: + raise ValueError("'source' in merge cannot be None") + elif type(source) is not DataFrame: + raise TypeError("Type of 'source' in merge must be DataFrame.") + if condition is None: + raise ValueError("'condition' in merge cannot be None") + + jbuilder = self._jdt.merge(source._jdf, DeltaTable._condition_to_jcolumn(condition)) + return DeltaMergeBuilder(self._spark, jbuilder) + + @since(0.4) # type: ignore[arg-type] + def vacuum(self, retentionHours: Optional[float] = None) -> DataFrame: + """ + Recursively delete files and directories in the table that are not needed by the table for + maintaining older versions up to the given retention threshold. This method will return an + empty DataFrame on successful completion. + + Example:: + + deltaTable.vacuum() # vacuum files not required by versions more than 7 days old + + deltaTable.vacuum(100) # vacuum files not required by versions more than 100 hours old + + :param retentionHours: Optional number of hours retain history. If not specified, then the + default retention period of 168 hours (7 days) will be used. + """ + jdt = self._jdt + if retentionHours is None: + return DataFrame( + jdt.vacuum(), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + else: + return DataFrame( + jdt.vacuum(float(retentionHours)), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + + @since(0.4) # type: ignore[arg-type] + def history(self, limit: Optional[int] = None) -> DataFrame: + """ + Get the information of the latest `limit` commits on this table as a Spark DataFrame. + The information is in reverse chronological order. + + Example:: + + fullHistoryDF = deltaTable.history() # get the full history of the table + + lastOperationDF = deltaTable.history(1) # get the last operation + + :param limit: Optional, number of latest commits to returns in the history. + :return: Table's commit history. See the online Delta Lake documentation for more details. + :rtype: pyspark.sql.DataFrame + """ + jdt = self._jdt + if limit is None: + return DataFrame( + jdt.history(), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + else: + return DataFrame( + jdt.history(limit), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + + @since(2.1) # type: ignore[arg-type] + def detail(self) -> DataFrame: + """ + Get the details of a Delta table such as the format, name, and size. + + Example:: + + detailDF = deltaTable.detail() # get the full details of the table + + :return Information of the table (format, name, size, etc.) + :rtype: pyspark.sql.DataFrame + + .. note:: Evolving + """ + return DataFrame( + self._jdt.detail(), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + + @classmethod + @since(0.4) # type: ignore[arg-type] + def convertToDelta( + cls, + sparkSession: SparkSession, + identifier: str, + partitionSchema: Optional[Union[str, StructType]] = None + ) -> "DeltaTable": + """ + Create a DeltaTable from the given parquet table. Takes an existing parquet table and + constructs a delta transaction log in the base path of the table. + Note: Any changes to the table during the conversion process may not result in a consistent + state at the end of the conversion. Users should stop any changes to the table before the + conversion is started. + + Example:: + + # Convert unpartitioned parquet table at path 'path/to/table' + deltaTable = DeltaTable.convertToDelta( + spark, "parquet.`path/to/table`") + + # Convert partitioned parquet table at path 'path/to/table' and partitioned by + # integer column named 'part' + partitionedDeltaTable = DeltaTable.convertToDelta( + spark, "parquet.`path/to/table`", "part int") + + :param sparkSession: SparkSession to use for the conversion + :type sparkSession: pyspark.sql.SparkSession + :param identifier: Parquet table identifier formatted as "parquet.`path`" + :type identifier: str + :param partitionSchema: Hive DDL formatted string, or pyspark.sql.types.StructType + :return: DeltaTable representing the converted Delta table + :rtype: :py:class:`~delta.tables.DeltaTable` + """ + assert sparkSession is not None + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = sparkSession._jsparkSession # type: ignore[attr-defined] + + if partitionSchema is None: + jdt = jvm.io.delta.tables.DeltaTable.convertToDelta( + jsparkSession, identifier + ) + else: + if not isinstance(partitionSchema, str): + partitionSchema = jsparkSession.parseDataType(partitionSchema.json()) + jdt = jvm.io.delta.tables.DeltaTable.convertToDelta( + jsparkSession, identifier, + partitionSchema) + return DeltaTable(sparkSession, jdt) + + @classmethod + @since(0.4) # type: ignore[arg-type] + def forPath( + cls, + sparkSession: SparkSession, + path: str, + hadoopConf: Dict[str, str] = dict() + ) -> "DeltaTable": + """ + Instantiate a :class:`DeltaTable` object representing the data at the given path, + If the given path is invalid (i.e. either no table exists or an existing table is + not a Delta table), it throws a `not a Delta table` error. + + :param sparkSession: SparkSession to use for loading the table + :type sparkSession: pyspark.sql.SparkSession + :param hadoopConf: Hadoop configuration starting with "fs." or "dfs." will be picked + up by `DeltaTable` to access the file system when executing queries. + Other configurations will not be allowed. + :type hadoopConf: optional dict with str as key and str as value. + :return: loaded Delta table + :rtype: :py:class:`~delta.tables.DeltaTable` + + Example:: + + hadoopConf = {"fs.s3a.access.key" : "", + "fs.s3a.secret.key": "secret-key"} + deltaTable = DeltaTable.forPath( + spark, + "/path/to/table", + hadoopConf) + """ + assert sparkSession is not None + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = sparkSession._jsparkSession # type: ignore[attr-defined] + + jdt = jvm.io.delta.tables.DeltaTable.forPath(jsparkSession, path, hadoopConf) + return DeltaTable(sparkSession, jdt) + + @classmethod + @since(0.7) # type: ignore[arg-type] + def forName( + cls, sparkSession: SparkSession, tableOrViewName: str + ) -> "DeltaTable": + """ + Instantiate a :class:`DeltaTable` object using the given table name. If the given + tableOrViewName is invalid (i.e. either no table exists or an existing table is not a + Delta table), it throws a `not a Delta table` error. Note: Passing a view name will + also result in this error as views are not supported. + + The given tableOrViewName can also be the absolute path of a delta datasource (i.e. + delta.`path`), If so, instantiate a :class:`DeltaTable` object representing the data at + the given path (consistent with the `forPath`). + + :param sparkSession: SparkSession to use for loading the table + :param tableOrViewName: name of the table or view + :return: loaded Delta table + :rtype: :py:class:`~delta.tables.DeltaTable` + + Example:: + + deltaTable = DeltaTable.forName(spark, "tblName") + """ + assert sparkSession is not None + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = sparkSession._jsparkSession # type: ignore[attr-defined] + + jdt = jvm.io.delta.tables.DeltaTable.forName(jsparkSession, tableOrViewName) + return DeltaTable(sparkSession, jdt) + + @classmethod + @since(1.0) # type: ignore[arg-type] + def create( + cls, sparkSession: Optional[SparkSession] = None + ) -> "DeltaTableBuilder": + """ + Return :class:`DeltaTableBuilder` object that can be used to specify + the table name, location, columns, partitioning columns, table comment, + and table properties to create a Delta table, error if the table exists + (the same as SQL `CREATE TABLE`). + + See :class:`DeltaTableBuilder` for a full description and examples + of this operation. + + :param sparkSession: SparkSession to use for creating the table + :return: an instance of DeltaTableBuilder + :rtype: :py:class:`~delta.tables.DeltaTableBuilder` + + .. note:: Evolving + """ + if sparkSession is None: + sparkSession = SparkSession.getActiveSession() + assert sparkSession is not None + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = sparkSession._jsparkSession # type: ignore[attr-defined] + + jdt = jvm.io.delta.tables.DeltaTable.create(jsparkSession) + return DeltaTableBuilder(sparkSession, jdt) + + @classmethod + @since(1.0) # type: ignore[arg-type] + def createIfNotExists( + cls, sparkSession: Optional[SparkSession] = None + ) -> "DeltaTableBuilder": + """ + Return :class:`DeltaTableBuilder` object that can be used to specify + the table name, location, columns, partitioning columns, table comment, + and table properties to create a Delta table, + if it does not exists (the same as SQL `CREATE TABLE IF NOT EXISTS`). + + See :class:`DeltaTableBuilder` for a full description and examples + of this operation. + + :param sparkSession: SparkSession to use for creating the table + :return: an instance of DeltaTableBuilder + :rtype: :py:class:`~delta.tables.DeltaTableBuilder` + + .. note:: Evolving + """ + if sparkSession is None: + sparkSession = SparkSession.getActiveSession() + assert sparkSession is not None + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = sparkSession._jsparkSession # type: ignore[attr-defined] + + jdt = jvm.io.delta.tables.DeltaTable.createIfNotExists(jsparkSession) + return DeltaTableBuilder(sparkSession, jdt) + + @classmethod + @since(1.0) # type: ignore[arg-type] + def replace( + cls, sparkSession: Optional[SparkSession] = None + ) -> "DeltaTableBuilder": + """ + Return :class:`DeltaTableBuilder` object that can be used to specify + the table name, location, columns, partitioning columns, table comment, + and table properties to replace a Delta table, + error if the table doesn't exist (the same as SQL `REPLACE TABLE`). + + See :class:`DeltaTableBuilder` for a full description and examples + of this operation. + + :param sparkSession: SparkSession to use for creating the table + :return: an instance of DeltaTableBuilder + :rtype: :py:class:`~delta.tables.DeltaTableBuilder` + + .. note:: Evolving + """ + if sparkSession is None: + sparkSession = SparkSession.getActiveSession() + assert sparkSession is not None + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = sparkSession._jsparkSession # type: ignore[attr-defined] + + jdt = jvm.io.delta.tables.DeltaTable.replace(jsparkSession) + return DeltaTableBuilder(sparkSession, jdt) + + @classmethod + @since(1.0) # type: ignore[arg-type] + def createOrReplace( + cls, sparkSession: Optional[SparkSession] = None + ) -> "DeltaTableBuilder": + """ + Return :class:`DeltaTableBuilder` object that can be used to specify + the table name, location, columns, partitioning columns, table comment, + and table properties replace a Delta table, + error if the table doesn't exist (the same as SQL `REPLACE TABLE`). + + See :class:`DeltaTableBuilder` for a full description and examples + of this operation. + + :param sparkSession: SparkSession to use for creating the table + :return: an instance of DeltaTableBuilder + :rtype: :py:class:`~delta.tables.DeltaTableBuilder` + + .. note:: Evolving + """ + if sparkSession is None: + sparkSession = SparkSession.getActiveSession() + assert sparkSession is not None + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = sparkSession._jsparkSession # type: ignore[attr-defined] + + jdt = jvm.io.delta.tables.DeltaTable.createOrReplace(jsparkSession) + return DeltaTableBuilder(sparkSession, jdt) + + @classmethod + @since(0.4) # type: ignore[arg-type] + def isDeltaTable(cls, sparkSession: SparkSession, identifier: str) -> bool: + """ + Check if the provided `identifier` string, in this case a file path, + is the root of a Delta table using the given SparkSession. + + :param sparkSession: SparkSession to use to perform the check + :param path: location of the table + :return: If the table is a delta table or not + :rtype: bool + + Example:: + + DeltaTable.isDeltaTable(spark, "/path/to/table") + """ + assert sparkSession is not None + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = sparkSession._jsparkSession # type: ignore[attr-defined] + + return jvm.io.delta.tables.DeltaTable.isDeltaTable(jsparkSession, identifier) + + @since(0.8) # type: ignore[arg-type] + def upgradeTableProtocol(self, readerVersion: int, writerVersion: int) -> None: + """ + Updates the protocol version of the table to leverage new features. Upgrading the reader + version will prevent all clients that have an older version of Delta Lake from accessing + this table. Upgrading the writer version will prevent older versions of Delta Lake to write + to this table. The reader or writer version cannot be downgraded. + + See online documentation and Delta's protocol specification at PROTOCOL.md for more details. + """ + jdt = self._jdt + if not isinstance(readerVersion, int): + raise ValueError("The readerVersion needs to be an integer but got '%s'." % + type(readerVersion)) + if not isinstance(writerVersion, int): + raise ValueError("The writerVersion needs to be an integer but got '%s'." % + type(writerVersion)) + jdt.upgradeTableProtocol(readerVersion, writerVersion) + + @since(1.2) # type: ignore[arg-type] + def restoreToVersion(self, version: int) -> DataFrame: + """ + Restore the DeltaTable to an older version of the table specified by version number. + + Example:: + + io.delta.tables.DeltaTable.restoreToVersion(1) + + :param version: target version of restored table + :return: Dataframe with metrics of restore operation. + :rtype: pyspark.sql.DataFrame + """ + + DeltaTable._verify_type_int(version, "version") + return DataFrame( + self._jdt.restoreToVersion(version), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + + @since(1.2) # type: ignore[arg-type] + def restoreToTimestamp(self, timestamp: str) -> DataFrame: + """ + Restore the DeltaTable to an older version of the table specified by a timestamp. + Timestamp can be of the format yyyy-MM-dd or yyyy-MM-dd HH:mm:ss + + Example:: + + io.delta.tables.DeltaTable.restoreToTimestamp('2021-01-01') + io.delta.tables.DeltaTable.restoreToTimestamp('2021-01-01 01:01:01') + + :param timestamp: target timestamp of restored table + :return: Dataframe with metrics of restore operation. + :rtype: pyspark.sql.DataFrame + """ + + DeltaTable._verify_type_str(timestamp, "timestamp") + return DataFrame( + self._jdt.restoreToTimestamp(timestamp), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + + @since(2.0) # type: ignore[arg-type] + def optimize(self) -> "DeltaOptimizeBuilder": + """ + Optimize the data layout of the table. This returns + a :py:class:`~delta.tables.DeltaOptimizeBuilder` object that can + be used to specify the partition filter to limit the scope of + optimize and also execute different optimization techniques + such as file compaction or order data using Z-Order curves. + + See the :py:class:`~delta.tables.DeltaOptimizeBuilder` for a + full description of this operation. + + Example:: + + deltaTable.optimize().where("date='2021-11-18'").executeCompaction() + + :return: an instance of DeltaOptimizeBuilder. + :rtype: :py:class:`~delta.tables.DeltaOptimizeBuilder` + """ + jbuilder = self._jdt.optimize() + return DeltaOptimizeBuilder(self._spark, jbuilder) + + @staticmethod # type: ignore[arg-type] + def _verify_type_str(variable: str, name: str) -> None: + if not isinstance(variable, str) or variable is None: + raise ValueError("%s needs to be a string but got '%s'." % (name, type(variable))) + + @staticmethod # type: ignore[arg-type] + def _verify_type_int(variable: int, name: str) -> None: + if not isinstance(variable, int) or variable is None: + raise ValueError("%s needs to be an int but got '%s'." % (name, type(variable))) + + @staticmethod + def _dict_to_jmap( + sparkSession: SparkSession, + pydict: OptionalColumnMapping, + argname: str, + ) -> "JavaObject": + """ + convert dict to Map + """ + # Get the Java map for pydict + if pydict is None: + raise ValueError("%s cannot be None" % argname) + elif type(pydict) is not dict: + e = "%s must be a dict, found to be %s" % (argname, str(type(pydict))) + raise TypeError(e) + + jvm: "JVMView" = sparkSession._sc._jvm # type: ignore[attr-defined] + + jmap: "JavaMap" = jvm.java.util.HashMap() + for col, expr in pydict.items(): + if type(col) is not str: + e = ("Keys of dict in %s must contain only strings with column names" % argname) + \ + (", found '%s' of type '%s" % (str(col), str(type(col)))) + raise TypeError(e) + if type(expr) is Column: + jmap.put(col, expr._jc) + elif type(expr) is str: + jmap.put(col, functions.expr(expr)._jc) + else: + e = ("Values of dict in %s must contain only Spark SQL Columns " % argname) + \ + "or strings (expressions in SQL syntax) as values, " + \ + ("found '%s' of type '%s'" % (str(expr), str(type(expr)))) + raise TypeError(e) + return jmap + + @staticmethod + def _condition_to_jcolumn( + condition: OptionalExpressionOrColumn, argname: str = "'condition'" + ) -> "JavaObject": + if condition is None: + jcondition = None + elif type(condition) is Column: + jcondition = condition._jc + elif type(condition) is str: + jcondition = functions.expr(condition)._jc + else: + e = ("%s must be a Spark SQL Column or a string (expression in SQL syntax)" % argname) \ + + ", found to be of type %s" % str(type(condition)) + raise TypeError(e) + return jcondition + + +class DeltaMergeBuilder(object): + """ + Builder to specify how to merge data from source DataFrame into the target Delta table. + Use :py:meth:`delta.tables.DeltaTable.merge` to create an object of this class. + Using this builder, you can specify any number of ``whenMatched``, ``whenNotMatched`` and + ``whenNotMatchedBySource`` clauses. Here are the constraints on these clauses. + + - Constraints in the ``whenMatched`` clauses: + + - The condition in a ``whenMatched`` clause is optional. However, if there are multiple + ``whenMatched`` clauses, then only the last one may omit the condition. + + - When there are more than one ``whenMatched`` clauses and there are conditions (or the lack + of) such that a row satisfies multiple clauses, then the action for the first clause + satisfied is executed. In other words, the order of the ``whenMatched`` clauses matters. + + - If none of the ``whenMatched`` clauses match a source-target row pair that satisfy + the merge condition, then the target rows will not be updated or deleted. + + - If you want to update all the columns of the target Delta table with the + corresponding column of the source DataFrame, then you can use the + ``whenMatchedUpdateAll()``. This is equivalent to:: + + whenMatchedUpdate(set = { + "col1": "source.col1", + "col2": "source.col2", + ... # for all columns in the delta table + }) + + - Constraints in the ``whenNotMatched`` clauses: + + - The condition in a ``whenNotMatched`` clause is optional. However, if there are + multiple ``whenNotMatched`` clauses, then only the last one may omit the condition. + + - When there are more than one ``whenNotMatched`` clauses and there are conditions (or the + lack of) such that a row satisfies multiple clauses, then the action for the first clause + satisfied is executed. In other words, the order of the ``whenNotMatched`` clauses matters. + + - If no ``whenNotMatched`` clause is present or if it is present but the non-matching source + row does not satisfy the condition, then the source row is not inserted. + + - If you want to insert all the columns of the target Delta table with the + corresponding column of the source DataFrame, then you can use + ``whenNotMatchedInsertAll()``. This is equivalent to:: + + whenNotMatchedInsert(values = { + "col1": "source.col1", + "col2": "source.col2", + ... # for all columns in the delta table + }) + + - Constraints in the ``whenNotMatchedBySource`` clauses: + + - The condition in a ``whenNotMatchedBySource`` clause is optional. However, if there are + multiple ``whenNotMatchedBySource`` clauses, then only the last ``whenNotMatchedBySource`` + clause may omit the condition. + + - Conditions and update expressions in ``whenNotMatchedBySource`` clauses may only refer to + columns from the target Delta table. + + - When there are more than one ``whenNotMatchedBySource`` clauses and there are conditions (or + the lack of) such that a row satisfies multiple clauses, then the action for the first + clause satisfied is executed. In other words, the order of the ``whenNotMatchedBySource`` + clauses matters. + + - If no ``whenNotMatchedBySource`` clause is present or if it is present but the + non-matching target row does not satisfy any of the ``whenNotMatchedBySource`` clause + condition, then the target row will not be updated or deleted. + + Example 1 with conditions and update expressions as SQL formatted string:: + + deltaTable.alias("events").merge( + source = updatesDF.alias("updates"), + condition = "events.eventId = updates.eventId" + ).whenMatchedUpdate(set = + { + "data": "updates.data", + "count": "events.count + 1" + } + ).whenNotMatchedInsert(values = + { + "date": "updates.date", + "eventId": "updates.eventId", + "data": "updates.data", + "count": "1", + "missed_count": "0" + } + ).whenNotMatchedBySourceUpdate(set = + { + "missed_count": "events.missed_count + 1" + } + ).execute() + + Example 2 with conditions and update expressions as Spark SQL functions:: + + from pyspark.sql.functions import * + + deltaTable.alias("events").merge( + source = updatesDF.alias("updates"), + condition = expr("events.eventId = updates.eventId") + ).whenMatchedUpdate(set = + { + "data" : col("updates.data"), + "count": col("events.count") + 1 + } + ).whenNotMatchedInsert(values = + { + "date": col("updates.date"), + "eventId": col("updates.eventId"), + "data": col("updates.data"), + "count": lit("1"), + "missed_count": lit("0") + } + ).whenNotMatchedBySourceUpdate(set = + { + "missed_count": col("events.missed_count") + 1 + } + ).execute() + + .. versionadded:: 0.4 + """ + def __init__(self, spark: SparkSession, jbuilder: "JavaObject"): + self._spark = spark + self._jbuilder = jbuilder + + @overload + def whenMatchedUpdate( + self, condition: OptionalExpressionOrColumn, set: ColumnMapping + ) -> "DeltaMergeBuilder": + ... + + @overload + def whenMatchedUpdate( + self, *, set: ColumnMapping + ) -> "DeltaMergeBuilder": + ... + + def whenMatchedUpdate( + self, + condition: OptionalExpressionOrColumn = None, + set: OptionalColumnMapping = None + ) -> "DeltaMergeBuilder": + """ + Update a matched table row based on the rules defined by ``set``. + If a ``condition`` is specified, then it must evaluate to true for the row to be updated. + + See :py:class:`~delta.tables.DeltaMergeBuilder` for complete usage details. + + :param condition: Optional condition of the update + :type condition: str or pyspark.sql.Column + :param set: Defines the rules of setting the values of columns that need to be updated. + *Note: This param is required.* Default value None is present to allow + positional args in same order across languages. + :type set: dict with str as keys and str or pyspark.sql.Column as values + :return: this builder + + .. versionadded:: 0.4 + """ + jset = DeltaTable._dict_to_jmap(self._spark, set, "'set' in whenMatchedUpdate") + new_jbuilder = self.__getMatchedBuilder(condition).update(jset) + return DeltaMergeBuilder(self._spark, new_jbuilder) + + @since(0.4) # type: ignore[arg-type] + def whenMatchedUpdateAll( + self, condition: OptionalExpressionOrColumn = None + ) -> "DeltaMergeBuilder": + """ + Update all the columns of the matched table row with the values of the corresponding + columns in the source row. If a ``condition`` is specified, then it must be + true for the new row to be updated. + + See :py:class:`~delta.tables.DeltaMergeBuilder` for complete usage details. + + :param condition: Optional condition of the insert + :type condition: str or pyspark.sql.Column + :return: this builder + """ + new_jbuilder = self.__getMatchedBuilder(condition).updateAll() + return DeltaMergeBuilder(self._spark, new_jbuilder) + + @since(0.4) # type: ignore[arg-type] + def whenMatchedDelete( + self, condition: OptionalExpressionOrColumn = None + ) -> "DeltaMergeBuilder": + """ + Delete a matched row from the table only if the given ``condition`` (if specified) is + true for the matched row. + + See :py:class:`~delta.tables.DeltaMergeBuilder` for complete usage details. + + :param condition: Optional condition of the delete + :type condition: str or pyspark.sql.Column + :return: this builder + """ + new_jbuilder = self.__getMatchedBuilder(condition).delete() + return DeltaMergeBuilder(self._spark, new_jbuilder) + + @overload + def whenNotMatchedInsert( + self, condition: ExpressionOrColumn, values: ColumnMapping + ) -> "DeltaMergeBuilder": + ... + + @overload + def whenNotMatchedInsert( + self, *, values: ColumnMapping = ... + ) -> "DeltaMergeBuilder": + ... + + def whenNotMatchedInsert( + self, + condition: OptionalExpressionOrColumn = None, + values: OptionalColumnMapping = None + ) -> "DeltaMergeBuilder": + """ + Insert a new row to the target table based on the rules defined by ``values``. If a + ``condition`` is specified, then it must evaluate to true for the new row to be inserted. + + See :py:class:`~delta.tables.DeltaMergeBuilder` for complete usage details. + + :param condition: Optional condition of the insert + :type condition: str or pyspark.sql.Column + :param values: Defines the rules of setting the values of columns that need to be updated. + *Note: This param is required.* Default value None is present to allow + positional args in same order across languages. + :type values: dict with str as keys and str or pyspark.sql.Column as values + :return: this builder + + .. versionadded:: 0.4 + """ + jvalues = DeltaTable._dict_to_jmap(self._spark, values, "'values' in whenNotMatchedInsert") + new_jbuilder = self.__getNotMatchedBuilder(condition).insert(jvalues) + return DeltaMergeBuilder(self._spark, new_jbuilder) + + @since(0.4) # type: ignore[arg-type] + def whenNotMatchedInsertAll( + self, condition: OptionalExpressionOrColumn = None + ) -> "DeltaMergeBuilder": + """ + Insert a new target Delta table row by assigning the target columns to the values of the + corresponding columns in the source row. If a ``condition`` is specified, then it must + evaluate to true for the new row to be inserted. + + See :py:class:`~delta.tables.DeltaMergeBuilder` for complete usage details. + + :param condition: Optional condition of the insert + :type condition: str or pyspark.sql.Column + :return: this builder + """ + new_jbuilder = self.__getNotMatchedBuilder(condition).insertAll() + return DeltaMergeBuilder(self._spark, new_jbuilder) + + @overload + def whenNotMatchedBySourceUpdate( + self, condition: OptionalExpressionOrColumn, set: ColumnMapping + ) -> "DeltaMergeBuilder": + ... + + @overload + def whenNotMatchedBySourceUpdate( + self, *, set: ColumnMapping + ) -> "DeltaMergeBuilder": + ... + + def whenNotMatchedBySourceUpdate( + self, + condition: OptionalExpressionOrColumn = None, + set: OptionalColumnMapping = None + ) -> "DeltaMergeBuilder": + """ + Update a target row that has no matches in the source based on the rules defined by ``set``. + If a ``condition`` is specified, then it must evaluate to true for the row to be updated. + + See :py:class:`~delta.tables.DeltaMergeBuilder` for complete usage details. + + :param condition: Optional condition of the update + :type condition: str or pyspark.sql.Column + :param set: Defines the rules of setting the values of columns that need to be updated. + *Note: This param is required.* Default value None is present to allow + positional args in same order across languages. + :type set: dict with str as keys and str or pyspark.sql.Column as values + :return: this builder + + .. versionadded:: 2.3 + """ + jset = DeltaTable._dict_to_jmap(self._spark, set, "'set' in whenNotMatchedBySourceUpdate") + new_jbuilder = self.__getNotMatchedBySourceBuilder(condition).update(jset) + return DeltaMergeBuilder(self._spark, new_jbuilder) + + @since(2.3) # type: ignore[arg-type] + def whenNotMatchedBySourceDelete( + self, condition: OptionalExpressionOrColumn = None + ) -> "DeltaMergeBuilder": + """ + Delete a target row that has no matches in the source from the table only if the given + ``condition`` (if specified) is true for the target row. + + See :py:class:`~delta.tables.DeltaMergeBuilder` for complete usage details. + + :param condition: Optional condition of the delete + :type condition: str or pyspark.sql.Column + :return: this builder + """ + new_jbuilder = self.__getNotMatchedBySourceBuilder(condition).delete() + return DeltaMergeBuilder(self._spark, new_jbuilder) + + @since(0.4) # type: ignore[arg-type] + def execute(self) -> None: + """ + Execute the merge operation based on the built matched and not matched actions. + + See :py:class:`~delta.tables.DeltaMergeBuilder` for complete usage details. + """ + self._jbuilder.execute() + + def __getMatchedBuilder( + self, condition: OptionalExpressionOrColumn = None + ) -> "JavaObject": + if condition is None: + return self._jbuilder.whenMatched() + else: + return self._jbuilder.whenMatched(DeltaTable._condition_to_jcolumn(condition)) + + def __getNotMatchedBuilder( + self, condition: OptionalExpressionOrColumn = None + ) -> "JavaObject": + if condition is None: + return self._jbuilder.whenNotMatched() + else: + return self._jbuilder.whenNotMatched(DeltaTable._condition_to_jcolumn(condition)) + + def __getNotMatchedBySourceBuilder( + self, condition: OptionalExpressionOrColumn = None + ) -> "JavaObject": + if condition is None: + return self._jbuilder.whenNotMatchedBySource() + else: + return self._jbuilder.whenNotMatchedBySource( + DeltaTable._condition_to_jcolumn(condition)) + + +class DeltaTableBuilder(object): + """ + Builder to specify how to create / replace a Delta table. + You must specify the table name or the path before executing the builder. + You can specify the table columns, the partitioning columns, + the location of the data, the table comment and the property, + and how you want to create / replace the Delta table. + + After executing the builder, a :py:class:`~delta.tables.DeltaTable` + object is returned. + + Use :py:meth:`delta.tables.DeltaTable.create`, + :py:meth:`delta.tables.DeltaTable.createIfNotExists`, + :py:meth:`delta.tables.DeltaTable.replace`, + :py:meth:`delta.tables.DeltaTable.createOrReplace` to create an object of this class. + + Example 1 to create a Delta table with separate columns, using the table name:: + + deltaTable = DeltaTable.create(sparkSession) + .tableName("testTable") + .addColumn("c1", dataType = "INT", nullable = False) + .addColumn("c2", dataType = IntegerType(), generatedAlwaysAs = "c1 + 1") + .partitionedBy("c1") + .execute() + + Example 2 to replace a Delta table with existing columns, using the location:: + + df = spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"]) + + deltaTable = DeltaTable.replace(sparkSession) + .tableName("testTable") + .addColumns(df.schema) + .execute() + + .. versionadded:: 1.0 + + .. note:: Evolving + """ + def __init__(self, spark: SparkSession, jbuilder: "JavaObject"): + self._spark = spark + self._jbuilder = jbuilder + + def _raise_type_error(self, msg: str, objs: Iterable[Any]) -> NoReturn: + errorMsg = msg + for obj in objs: + errorMsg += " Found %s with type %s" % ((str(obj)), str(type(obj))) + raise TypeError(errorMsg) + + @since(1.0) # type: ignore[arg-type] + def tableName(self, identifier: str) -> "DeltaTableBuilder": + """ + Specify the table name. + Optionally qualified with a database name [database_name.] table_name. + + :param identifier: the table name + :type identifier: str + :return: this builder + + .. note:: Evolving + """ + if type(identifier) is not str: + self._raise_type_error("Identifier must be str.", [identifier]) + self._jbuilder = self._jbuilder.tableName(identifier) + return self + + @since(1.0) # type: ignore[arg-type] + def location(self, location: str) -> "DeltaTableBuilder": + """ + Specify the path to the directory where table data is stored, + which could be a path on distributed storage. + + :param location: the data stored location + :type location: str + :return: this builder + + .. note:: Evolving + """ + if type(location) is not str: + self._raise_type_error("Location must be str.", [location]) + self._jbuilder = self._jbuilder.location(location) + return self + + @since(1.0) # type: ignore[arg-type] + def comment(self, comment: str) -> "DeltaTableBuilder": + """ + Comment to describe the table. + + :param comment: the table comment + :type comment: str + :return: this builder + + .. note:: Evolving + """ + if type(comment) is not str: + self._raise_type_error("Table comment must be str.", [comment]) + self._jbuilder = self._jbuilder.comment(comment) + return self + + @since(1.0) # type: ignore[arg-type] + def addColumn( + self, + colName: str, + dataType: Union[str, DataType], + nullable: bool = True, + generatedAlwaysAs: Optional[str] = None, + comment: Optional[str] = None, + ) -> "DeltaTableBuilder": + """ + Specify a column in the table + + :param colName: the column name + :type colName: str + :param dataType: the column data type + :type dataType: str or pyspark.sql.types.DataType + :param nullable: whether column is nullable + :type nullable: bool + :param generatedAlwaysAs: a SQL expression if the column is always generated + as a function of other columns. + See online documentation for details on Generated Columns. + :type generatedAlwaysAs: str + :param comment: the column comment + :type comment: str + + :return: this builder + + .. note:: Evolving + """ + if type(colName) is not str: + self._raise_type_error("Column name must be str.", [colName]) + if type(dataType) is not str and not isinstance(dataType, DataType): + self._raise_type_error("Column data type must be str or DataType.", + [dataType]) + + jvm: "JVMView" = self._spark._sc._jvm # type: ignore[attr-defined] + jsparkSession: "JavaObject" = self._spark._jsparkSession # type: ignore[attr-defined] + + _col_jbuilder = jvm.io.delta.tables.DeltaTable.columnBuilder(jsparkSession, colName) + if isinstance(dataType, DataType): + dataType = jsparkSession.parseDataType(dataType.json()) + _col_jbuilder = _col_jbuilder.dataType(dataType) + if type(nullable) is not bool: + self._raise_type_error("Column nullable must be bool.", [nullable]) + _col_jbuilder = _col_jbuilder.nullable(nullable) + if generatedAlwaysAs is not None: + if type(generatedAlwaysAs) is not str: + self._raise_type_error("Column generation expression must be str.", + [generatedAlwaysAs]) + _col_jbuilder = _col_jbuilder.generatedAlwaysAs(generatedAlwaysAs) + if comment is not None: + if type(comment) is not str: + self._raise_type_error("Column comment must be str.", [comment]) + _col_jbuilder = _col_jbuilder.comment(comment) + self._jbuilder = self._jbuilder.addColumn(_col_jbuilder.build()) + return self + + @since(1.0) # type: ignore[arg-type] + def addColumns( + self, cols: Union[StructType, List[StructField]] + ) -> "DeltaTableBuilder": + """ + Specify columns in the table using an existing schema + + :param cols: the columns in the existing schema + :type cols: pyspark.sql.types.StructType + or a list of pyspark.sql.types.StructType. + + :return: this builder + + .. note:: Evolving + """ + if isinstance(cols, list): + for col in cols: + if type(col) is not StructField: + self._raise_type_error( + "Column in existing schema must be StructField.", [col]) + cols = StructType(cols) + if type(cols) is not StructType: + self._raise_type_error("Schema must be StructType " + + "or a list of StructField.", + [cols]) + + jsparkSession: "JavaObject" = self._spark._jsparkSession # type: ignore[attr-defined] + + scalaSchema = jsparkSession.parseDataType(cols.json()) + self._jbuilder = self._jbuilder.addColumns(scalaSchema) + return self + + @overload + def partitionedBy( + self, *cols: str + ) -> "DeltaTableBuilder": + ... + + @overload + def partitionedBy( + self, __cols: Union[List[str], Tuple[str, ...]] + ) -> "DeltaTableBuilder": + ... + + @since(1.0) # type: ignore[arg-type] + def partitionedBy( + self, *cols: Union[str, List[str], Tuple[str, ...]] + ) -> "DeltaTableBuilder": + """ + Specify columns for partitioning + + :param cols: the partitioning cols + :type cols: str or list name of columns + + :return: this builder + + .. note:: Evolving + """ + if len(cols) == 1 and isinstance(cols[0], (list, tuple)): + cols = cols[0] # type: ignore[assignment] + for c in cols: + if type(c) is not str: + self._raise_type_error("Partitioning column must be str.", [c]) + self._jbuilder = self._jbuilder.partitionedBy(_to_seq( + self._spark._sc, # type: ignore[attr-defined] + cast(Iterable[Union[Column, str]], cols) + )) + return self + + @since(1.0) # type: ignore[arg-type] + def property(self, key: str, value: str) -> "DeltaTableBuilder": + """ + Specify a table property + + :param key: the table property key + :type value: the table property value + + :return: this builder + + .. note:: Evolving + """ + if type(key) is not str or type(value) is not str: + self._raise_type_error("Key and value of property must be string.", + [key, value]) + self._jbuilder = self._jbuilder.property(key, value) + return self + + @since(1.0) # type: ignore[arg-type] + def execute(self) -> DeltaTable: + """ + Execute Table Creation. + + :rtype: :py:class:`~delta.tables.DeltaTable` + + .. note:: Evolving + """ + jdt = self._jbuilder.execute() + return DeltaTable(self._spark, jdt) + + +class DeltaOptimizeBuilder(object): + """ + Builder class for constructing OPTIMIZE command and executing. + + Use :py:meth:`delta.tables.DeltaTable.optimize` to create an instance of this class. + + .. versionadded:: 2.0.0 + """ + def __init__(self, spark: SparkSession, jbuilder: "JavaObject"): + self._spark = spark + self._jbuilder = jbuilder + + @since(2.0) # type: ignore[arg-type] + def where(self, partitionFilter: str) -> "DeltaOptimizeBuilder": + """ + Apply partition filter on this optimize command builder to limit + the operation on selected partitions. + + :param partitionFilter: The partition filter to apply + :type partitionFilter: str + :return: DeltaOptimizeBuilder with partition filter applied + :rtype: :py:class:`~delta.tables.DeltaOptimizeBuilder` + """ + self._jbuilder = self._jbuilder.where(partitionFilter) + return self + + @since(2.0) # type: ignore[arg-type] + def executeCompaction(self) -> DataFrame: + """ + Compact the small files in selected partitions. + + :return: DataFrame containing the OPTIMIZE execution metrics + :rtype: pyspark.sql.DataFrame + """ + return DataFrame( + self._jbuilder.executeCompaction(), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) + + @since(2.0) # type: ignore[arg-type] + def executeZOrderBy(self, *cols: Union[str, List[str], Tuple[str, ...]]) -> DataFrame: + """ + Z-Order the data in selected partitions using the given columns. + + :param cols: the Z-Order cols + :type cols: str or list name of columns + + :return: DataFrame containing the OPTIMIZE execution metrics + :rtype: pyspark.sql.DataFrame + """ + if len(cols) == 1 and isinstance(cols[0], (list, tuple)): + cols = cols[0] # type: ignore[assignment] + for c in cols: + if type(c) is not str: + errorMsg = "Z-order column must be str. " + errorMsg += "Found %s with type %s" % ((str(c)), str(type(c))) + raise TypeError(errorMsg) + + return DataFrame( + self._jbuilder.executeZOrderBy(_to_seq( + self._spark._sc, # type: ignore[attr-defined] + cast(Iterable[Union[Column, str]], cols) + )), + getattr(self._spark, "_wrapped", self._spark) # type: ignore[attr-defined] + ) diff --git a/python/delta/testing/__init__.py b/python/delta/testing/__init__.py new file mode 100644 index 00000000000..f1ee8ecb1b9 --- /dev/null +++ b/python/delta/testing/__init__.py @@ -0,0 +1,17 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +__all__ = ['utils'] diff --git a/python/delta/testing/log4j2.properties b/python/delta/testing/log4j2.properties new file mode 100644 index 00000000000..7d6ac63632e --- /dev/null +++ b/python/delta/testing/log4j2.properties @@ -0,0 +1,56 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the console +rootLogger.level = warn +rootLogger.appenderRef.stdout.ref = STDOUT + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.target = SYSTEM_OUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Settings to quiet third party logs that are too verbose +logger.jetty.name = org.sparkproject.jetty +logger.jetty.level = warn +logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle +logger.jetty2.level = error +logger.repl1.name = org.apache.spark.repl.SparkIMain$exprTyper +logger.repl1.level = info +logger.repl2.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter +logger.repl2.level = info + +# Set the default spark-shell log level to WARN. When running the spark-shell, the +# log level for this class is used to overwrite the root logger's log level, so that +# the user can have different defaults for the shell and regular Spark apps. +logger.repl.name = org.apache.spark.repl.Main +logger.repl.level = warn + +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs +# in SparkSQL with Hive support +logger.metastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler +logger.metastore.level = fatal +logger.hive_functionregistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry +logger.hive_functionregistry.level = error + +# Parquet related logging +logger.parquet.name = org.apache.parquet.CorruptStatistics +logger.parquet.level = error +logger.parquet2.name = parquet.CorruptStatistics +logger.parquet2.level = error + diff --git a/python/delta/testing/utils.py b/python/delta/testing/utils.py new file mode 100644 index 00000000000..aefb2c54c59 --- /dev/null +++ b/python/delta/testing/utils.py @@ -0,0 +1,53 @@ +# +# Copyright (2023) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import shutil +import sys +import tempfile +import unittest + +from pyspark import SparkConf +from pyspark.testing.sqlutils import ReusedSQLTestCase # type: ignore[import] + + +class DeltaTestCase(ReusedSQLTestCase): + """Test class base that sets up a correctly configured SparkSession for querying Delta tables. + """ + + @classmethod + def conf(cls) -> SparkConf: + _conf = super(DeltaTestCase, cls).conf() + _conf.set("spark.app.name", cls.__name__) + _conf.set("spark.master", "local[4]") + _conf.set("spark.ui.enabled", "false") + _conf.set("spark.databricks.delta.snapshotPartitions", "2") + _conf.set("spark.sql.shuffle.partitions", "5") + _conf.set("delta.log.cacheSize", "3") + _conf.set("spark.sql.sources.parallelPartitionDiscovery.parallelism", "5") + _conf.set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + _conf.set("spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog") + return _conf + + def setUp(self) -> None: + super(DeltaTestCase, self).setUp() + self.tempPath = tempfile.mkdtemp() + self.tempFile = os.path.join(self.tempPath, "tempFile") + + def tearDown(self) -> None: + super(DeltaTestCase, self).tearDown() + shutil.rmtree(self.tempPath) diff --git a/python/delta/tests/__init__.py b/python/delta/tests/__init__.py new file mode 100644 index 00000000000..0601709918e --- /dev/null +++ b/python/delta/tests/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/delta/tests/test_deltatable.py b/python/delta/tests/test_deltatable.py new file mode 100644 index 00000000000..fafbc488a4a --- /dev/null +++ b/python/delta/tests/test_deltatable.py @@ -0,0 +1,1253 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# mypy: disable-error-code="union-attr" +# mypy: disable-error-code="attr-defined" +# type: ignore[union-attr] + +import unittest +import os +from multiprocessing.pool import ThreadPool +from typing import List, Set, Dict, Optional, Any, Callable, Union, Tuple + +from pyspark.sql import DataFrame, Row +from pyspark.sql.column import _to_seq # type: ignore[attr-defined] +from pyspark.sql.functions import col, lit, expr, floor +from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType, DataType +from pyspark.sql.utils import AnalysisException, ParseException + +from delta.tables import DeltaTable, DeltaTableBuilder, DeltaOptimizeBuilder +from delta.testing.utils import DeltaTestCase + + +class DeltaTableTestsMixin: + + def test_forPath(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3)]) + dt = DeltaTable.forPath(self.spark, self.tempFile).toDF() + self.__checkAnswer(dt, [('a', 1), ('b', 2), ('c', 3)]) + + def test_forPathWithOptions(self) -> None: + path = self.tempFile + fsOptions = {"fs.fake.impl": "org.apache.spark.sql.delta.FakeFileSystem", + "fs.fake.impl.disable.cache": "true"} + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3)]) + dt = DeltaTable.forPath(self.spark, path, fsOptions).toDF() + self.__checkAnswer(dt, [('a', 1), ('b', 2), ('c', 3)]) + + def test_forName(self) -> None: + with self.table("test"): + self.__writeAsTable([('a', 1), ('b', 2), ('c', 3)], "test") + df = DeltaTable.forName(self.spark, "test").toDF() + self.__checkAnswer(df, [('a', 1), ('b', 2), ('c', 3)]) + + def test_alias_and_toDF(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3)]) + dt = DeltaTable.forPath(self.spark, self.tempFile).toDF() + self.__checkAnswer( + dt.alias("myTable").select('myTable.key', 'myTable.value'), + [('a', 1), ('b', 2), ('c', 3)]) + + def test_delete(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3), ('d', 4)]) + dt = DeltaTable.forPath(self.spark, self.tempFile) + + # delete with condition as str + dt.delete("key = 'a'") + self.__checkAnswer(dt.toDF(), [('b', 2), ('c', 3), ('d', 4)]) + + # delete with condition as Column + dt.delete(col("key") == lit("b")) + self.__checkAnswer(dt.toDF(), [('c', 3), ('d', 4)]) + + # delete without condition + dt.delete() + self.__checkAnswer(dt.toDF(), []) + + # bad args + with self.assertRaises(TypeError): + dt.delete(condition=1) # type: ignore[arg-type] + + def test_generate(self) -> None: + # create a delta table + numFiles = 10 + self.spark.range(100).repartition(numFiles).write.format("delta").save(self.tempFile) + dt = DeltaTable.forPath(self.spark, self.tempFile) + + # Generate the symlink format manifest + dt.generate("symlink_format_manifest") + + # check the contents of the manifest + # NOTE: this is not a correctness test, we are testing correctness in the scala suite + manifestPath = os.path.join(self.tempFile, + os.path.join("_symlink_format_manifest", "manifest")) + files = [] + with open(manifestPath) as f: + files = f.readlines() + + # the number of files we write should equal the number of lines in the manifest + self.assertEqual(len(files), numFiles) + + def test_update(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3), ('d', 4)]) + dt = DeltaTable.forPath(self.spark, self.tempFile) + + # update with condition as str and with set exprs as str + dt.update("key = 'a' or key = 'b'", {"value": "1"}) + self.__checkAnswer(dt.toDF(), [('a', 1), ('b', 1), ('c', 3), ('d', 4)]) + + # update with condition as Column and with set exprs as Columns + dt.update(expr("key = 'a' or key = 'b'"), {"value": expr("0")}) + self.__checkAnswer(dt.toDF(), [('a', 0), ('b', 0), ('c', 3), ('d', 4)]) + + # update without condition + dt.update(set={"value": "200"}) + self.__checkAnswer(dt.toDF(), [('a', 200), ('b', 200), ('c', 200), ('d', 200)]) + + # bad args + with self.assertRaisesRegex(ValueError, "cannot be None"): + dt.update({"value": "200"}) # type: ignore[call-overload] + + with self.assertRaisesRegex(ValueError, "cannot be None"): + dt.update(condition='a') # type: ignore[call-overload] + + with self.assertRaisesRegex(TypeError, "must be a dict"): + dt.update(set=1) # type: ignore[call-overload] + + with self.assertRaisesRegex(TypeError, "must be a Spark SQL Column or a string"): + dt.update(1, {}) # type: ignore[call-overload] + + with self.assertRaisesRegex(TypeError, "Values of dict in .* must contain only"): + dt.update(set={"value": 1}) # type: ignore[dict-item] + + with self.assertRaisesRegex(TypeError, "Keys of dict in .* must contain only"): + dt.update(set={1: ""}) # type: ignore[dict-item] + + with self.assertRaises(TypeError): + dt.update(set=1) # type: ignore[call-overload] + + def test_merge(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3), ('d', 4)]) + source = self.spark.createDataFrame([('a', -1), ('b', 0), ('e', -5), ('f', -6)], ["k", "v"]) + + def reset_table() -> None: + self.__overwriteDeltaTable([('a', 1), ('b', 2), ('c', 3), ('d', 4)]) + + dt = DeltaTable.forPath(self.spark, self.tempFile) + + # ============== Test basic syntax ============== + + # String expressions in merge condition and dicts + reset_table() + dt.merge(source, "key = k") \ + .whenMatchedUpdate(set={"value": "v + 0"}) \ + .whenNotMatchedInsert(values={"key": "k", "value": "v + 0"}) \ + .whenNotMatchedBySourceUpdate(set={"value": "value + 0"}) \ + .execute() + self.__checkAnswer(dt.toDF(), + ([('a', -1), ('b', 0), ('c', 3), ('d', 4), ('e', -5), ('f', -6)])) + + # Column expressions in merge condition and dicts + reset_table() + dt.merge(source, expr("key = k")) \ + .whenMatchedUpdate(set={"value": col("v") + 0}) \ + .whenNotMatchedInsert(values={"key": "k", "value": col("v") + 0}) \ + .whenNotMatchedBySourceUpdate(set={"value": col("value") + 0}) \ + .execute() + self.__checkAnswer(dt.toDF(), + ([('a', -1), ('b', 0), ('c', 3), ('d', 4), ('e', -5), ('f', -6)])) + + # Multiple matched update clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenMatchedUpdate(condition="key = 'a'", set={"value": "5"}) \ + .whenMatchedUpdate(set={"value": "0"}) \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', 5), ('b', 0), ('c', 3), ('d', 4)])) + + # Multiple matched delete clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenMatchedDelete(condition="key = 'a'") \ + .whenMatchedDelete() \ + .execute() + self.__checkAnswer(dt.toDF(), ([('c', 3), ('d', 4)])) + + # Redundant matched update and delete clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenMatchedUpdate(condition="key = 'a'", set={"value": "5"}) \ + .whenMatchedUpdate(condition="key = 'a'", set={"value": "0"}) \ + .whenMatchedUpdate(condition="key = 'b'", set={"value": "6"}) \ + .whenMatchedDelete(condition="key = 'b'") \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', 5), ('b', 6), ('c', 3), ('d', 4)])) + + # Interleaved matched update and delete clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenMatchedDelete(condition="key = 'a'") \ + .whenMatchedUpdate(condition="key = 'a'", set={"value": "5"}) \ + .whenMatchedDelete(condition="key = 'b'") \ + .whenMatchedUpdate(set={"value": "6"}) \ + .execute() + self.__checkAnswer(dt.toDF(), ([('c', 3), ('d', 4)])) + + # Multiple not matched insert clauses + reset_table() + dt.alias("t")\ + .merge(source.toDF("key", "value").alias("s"), expr("t.key = s.key")) \ + .whenNotMatchedInsert(condition="s.key = 'e'", + values={"t.key": "s.key", "t.value": "5"}) \ + .whenNotMatchedInsertAll() \ + .execute() + self.__checkAnswer(dt.toDF(), + ([('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5), ('f', -6)])) + + # Redundant not matched update and delete clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenNotMatchedInsert(condition="k = 'e'", values={"key": "k", "value": "5"}) \ + .whenNotMatchedInsert(condition="k = 'e'", values={"key": "k", "value": "6"}) \ + .whenNotMatchedInsert(condition="k = 'f'", values={"key": "k", "value": "7"}) \ + .whenNotMatchedInsert(condition="k = 'f'", values={"key": "k", "value": "8"}) \ + .execute() + self.__checkAnswer(dt.toDF(), + ([('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5), ('f', 7)])) + + # Multiple not matched by source update clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenNotMatchedBySourceUpdate(condition="key = 'c'", set={"value": "5"}) \ + .whenNotMatchedBySourceUpdate(set={"value": "0"}) \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', 1), ('b', 2), ('c', 5), ('d', 0)])) + + # Multiple not matched by source delete clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenNotMatchedBySourceDelete(condition="key = 'c'") \ + .whenNotMatchedBySourceDelete() \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', 1), ('b', 2)])) + + # Redundant not matched by source update and delete clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenNotMatchedBySourceUpdate(condition="key = 'c'", set={"value": "5"}) \ + .whenNotMatchedBySourceUpdate(condition="key = 'c'", set={"value": "0"}) \ + .whenNotMatchedBySourceUpdate(condition="key = 'd'", set={"value": "6"}) \ + .whenNotMatchedBySourceDelete(condition="key = 'd'") \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', 1), ('b', 2), ('c', 5), ('d', 6)])) + + # Interleaved update and delete clauses + reset_table() + dt.merge(source, expr("key = k")) \ + .whenNotMatchedBySourceDelete(condition="key = 'c'") \ + .whenNotMatchedBySourceUpdate(condition="key = 'c'", set={"value": "5"}) \ + .whenNotMatchedBySourceDelete(condition="key = 'd'") \ + .whenNotMatchedBySourceUpdate(set={"value": "6"}) \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', 1), ('b', 2)])) + + # ============== Test clause conditions ============== + + # String expressions in all conditions and dicts + reset_table() + dt.merge(source, "key = k") \ + .whenMatchedUpdate(condition="k = 'a'", set={"value": "v + 0"}) \ + .whenMatchedDelete(condition="k = 'b'") \ + .whenNotMatchedInsert(condition="k = 'e'", values={"key": "k", "value": "v + 0"}) \ + .whenNotMatchedBySourceUpdate(condition="key = 'c'", set={"value": col("value") + 0}) \ + .whenNotMatchedBySourceDelete(condition="key = 'd'") \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', -1), ('c', 3), ('e', -5)])) + + # Column expressions in all conditions and dicts + reset_table() + dt.merge(source, expr("key = k")) \ + .whenMatchedUpdate( + condition=expr("k = 'a'"), + set={"value": col("v") + 0}) \ + .whenMatchedDelete(condition=expr("k = 'b'")) \ + .whenNotMatchedInsert( + condition=expr("k = 'e'"), + values={"key": "k", "value": col("v") + 0}) \ + .whenNotMatchedBySourceUpdate( + condition=expr("key = 'c'"), + set={"value": col("value") + 0}) \ + .whenNotMatchedBySourceDelete(condition=expr("key = 'd'")) \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', -1), ('c', 3), ('e', -5)])) + + # Positional arguments + reset_table() + dt.merge(source, "key = k") \ + .whenMatchedUpdate("k = 'a'", {"value": "v + 0"}) \ + .whenMatchedDelete("k = 'b'") \ + .whenNotMatchedInsert("k = 'e'", {"key": "k", "value": "v + 0"}) \ + .whenNotMatchedBySourceUpdate("key = 'c'", {"value": "value + 0"}) \ + .whenNotMatchedBySourceDelete("key = 'd'") \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', -1), ('c', 3), ('e', -5)])) + + # ============== Test updateAll/insertAll ============== + + # No clause conditions and insertAll/updateAll + aliases + reset_table() + dt.alias("t") \ + .merge(source.toDF("key", "value").alias("s"), expr("t.key = s.key")) \ + .whenMatchedUpdateAll() \ + .whenNotMatchedInsertAll() \ + .execute() + self.__checkAnswer(dt.toDF(), + ([('a', -1), ('b', 0), ('c', 3), ('d', 4), ('e', -5), ('f', -6)])) + + # String expressions in all clause conditions and insertAll/updateAll + aliases + reset_table() + dt.alias("t") \ + .merge(source.toDF("key", "value").alias("s"), "s.key = t.key") \ + .whenMatchedUpdateAll("s.key = 'a'") \ + .whenNotMatchedInsertAll("s.key = 'e'") \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', -1), ('b', 2), ('c', 3), ('d', 4), ('e', -5)])) + + # Column expressions in all clause conditions and insertAll/updateAll + aliases + reset_table() + dt.alias("t") \ + .merge(source.toDF("key", "value").alias("s"), expr("t.key = s.key")) \ + .whenMatchedUpdateAll(expr("s.key = 'a'")) \ + .whenNotMatchedInsertAll(expr("s.key = 'e'")) \ + .execute() + self.__checkAnswer(dt.toDF(), ([('a', -1), ('b', 2), ('c', 3), ('d', 4), ('e', -5)])) + + # ============== Test bad args ============== + # ---- bad args in merge() + with self.assertRaisesRegex(TypeError, "must be DataFrame"): + dt.merge(1, "key = k") # type: ignore[arg-type] + + with self.assertRaisesRegex(TypeError, "must be a Spark SQL Column or a string"): + dt.merge(source, 1) # type: ignore[arg-type] + + # ---- bad args in whenMatchedUpdate() + with self.assertRaisesRegex(ValueError, "cannot be None"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenMatchedUpdate({"value": "v"})) + + with self.assertRaisesRegex(ValueError, "cannot be None"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenMatchedUpdate(1)) + + with self.assertRaisesRegex(ValueError, "cannot be None"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenMatchedUpdate(condition="key = 'a'")) + + with self.assertRaisesRegex(TypeError, "must be a Spark SQL Column or a string"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenMatchedUpdate(1, {"value": "v"})) + + with self.assertRaisesRegex(TypeError, "must be a dict"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenMatchedUpdate("k = 'a'", 1)) + + with self.assertRaisesRegex(TypeError, "Values of dict in .* must contain only"): + (dt + .merge(source, "key = k") + .whenMatchedUpdate(set={"value": 1})) # type: ignore[dict-item] + + with self.assertRaisesRegex(TypeError, "Keys of dict in .* must contain only"): + (dt + .merge(source, "key = k") + .whenMatchedUpdate(set={1: ""})) # type: ignore[dict-item] + + with self.assertRaises(TypeError): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenMatchedUpdate(set="k = 'a'", condition={"value": 1})) + + # bad args in whenMatchedDelete() + with self.assertRaisesRegex(TypeError, "must be a Spark SQL Column or a string"): + dt.merge(source, "key = k").whenMatchedDelete(1) # type: ignore[arg-type] + + # ---- bad args in whenNotMatchedInsert() + with self.assertRaisesRegex(ValueError, "cannot be None"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedInsert({"value": "v"})) + + with self.assertRaisesRegex(ValueError, "cannot be None"): + dt.merge(source, "key = k").whenNotMatchedInsert(1) # type: ignore[call-overload] + + with self.assertRaisesRegex(ValueError, "cannot be None"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedInsert(condition="key = 'a'")) + + with self.assertRaisesRegex(TypeError, "must be a Spark SQL Column or a string"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedInsert(1, {"value": "v"})) + + with self.assertRaisesRegex(TypeError, "must be a dict"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedInsert("k = 'a'", 1)) + + with self.assertRaisesRegex(TypeError, "Values of dict in .* must contain only"): + (dt + .merge(source, "key = k") + .whenNotMatchedInsert(values={"value": 1})) # type: ignore[dict-item] + + with self.assertRaisesRegex(TypeError, "Keys of dict in .* must contain only"): + (dt + .merge(source, "key = k") + .whenNotMatchedInsert(values={1: "value"})) # type: ignore[dict-item] + + with self.assertRaises(TypeError): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedInsert(values="k = 'a'", condition={"value": 1})) + + # ---- bad args in whenNotMatchedBySourceUpdate() + with self.assertRaisesRegex(ValueError, "cannot be None"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedBySourceUpdate({"value": "value"})) + + with self.assertRaisesRegex(ValueError, "cannot be None"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedBySourceUpdate(1)) + + with self.assertRaisesRegex(ValueError, "cannot be None"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedBySourceUpdate(condition="key = 'a'")) + + with self.assertRaisesRegex(TypeError, "must be a Spark SQL Column or a string"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedBySourceUpdate(1, {"value": "value"})) + + with self.assertRaisesRegex(TypeError, "must be a dict"): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedBySourceUpdate("key = 'a'", 1)) + + with self.assertRaisesRegex(TypeError, "Values of dict in .* must contain only"): + (dt + .merge(source, "key = k") + .whenNotMatchedBySourceUpdate(set={"value": 1})) # type: ignore[dict-item] + + with self.assertRaisesRegex(TypeError, "Keys of dict in .* must contain only"): + (dt + .merge(source, "key = k") + .whenNotMatchedBySourceUpdate(set={1: ""})) # type: ignore[dict-item] + + with self.assertRaises(TypeError): + (dt # type: ignore[call-overload] + .merge(source, "key = k") + .whenNotMatchedBySourceUpdate(set="key = 'a'", condition={"value": 1})) + + # bad args in whenNotMatchedBySourceDelete() + with self.assertRaisesRegex(TypeError, "must be a Spark SQL Column or a string"): + dt.merge(source, "key = k").whenNotMatchedBySourceDelete(1) # type: ignore[arg-type] + + def test_merge_with_inconsistent_sessions(self) -> None: + source_path = os.path.join(self.tempFile, "source") + target_path = os.path.join(self.tempFile, "target") + spark = self.spark + + def f(spark): + spark.range(20) \ + .withColumn("x", col("id")) \ + .withColumn("y", col("id")) \ + .write.mode("overwrite").format("delta").save(source_path) + spark.range(1) \ + .withColumn("x", col("id")) \ + .write.mode("overwrite").format("delta").save(target_path) + target = DeltaTable.forPath(spark, target_path) + source = spark.read.format("delta").load(source_path).alias("s") + target.alias("t") \ + .merge(source, "t.id = s.id") \ + .whenMatchedUpdate(set={"t.x": "t.x + 1"}) \ + .whenNotMatchedInsertAll() \ + .execute() + assert(spark.read.format("delta").load(target_path).count() == 20) + + pool = ThreadPool(3) + spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled", "true") + try: + pool.starmap(f, [(spark,)]) + finally: + spark.conf.unset("spark.databricks.delta.schema.autoMerge.enabled") + + def test_history(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3)]) + self.__overwriteDeltaTable([('a', 3), ('b', 2), ('c', 1)]) + dt = DeltaTable.forPath(self.spark, self.tempFile) + operations = dt.history().select('operation') + self.__checkAnswer(operations, + [Row("WRITE"), Row("WRITE")], + StructType([StructField( + "operation", StringType(), True)])) + + lastMode = dt.history(1).select('operationParameters.mode') + self.__checkAnswer( + lastMode, + [Row("Overwrite")], + StructType([StructField("operationParameters.mode", StringType(), True)])) + + def test_detail(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3)]) + dt = DeltaTable.forPath(self.spark, self.tempFile) + details = dt.detail() + self.__checkAnswer( + details.select('format'), + [Row('delta')], + StructType([StructField('format', StringType(), True)]) + ) + + def test_vacuum(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3)]) + dt = DeltaTable.forPath(self.spark, self.tempFile) + self.__createFile('abc.txt', 'abcde') + self.__createFile('bac.txt', 'abcdf') + self.assertEqual(True, self.__checkFileExists('abc.txt')) + dt.vacuum() # will not delete files as default retention is used. + dt.vacuum(1000) # test whether integers work + + self.assertEqual(True, self.__checkFileExists('bac.txt')) + retentionConf = "spark.databricks.delta.retentionDurationCheck.enabled" + self.spark.conf.set(retentionConf, "false") + dt.vacuum(0.0) + self.spark.conf.set(retentionConf, "true") + self.assertEqual(False, self.__checkFileExists('bac.txt')) + self.assertEqual(False, self.__checkFileExists('abc.txt')) + + def test_convertToDelta(self) -> None: + df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"]) + df.write.format("parquet").save(self.tempFile) + dt = DeltaTable.convertToDelta(self.spark, "parquet.`%s`" % self.tempFile) + self.__checkAnswer( + self.spark.read.format("delta").load(self.tempFile), + [('a', 1), ('b', 2), ('c', 3)]) + + # test if convert to delta with partition columns work + tempFile2 = self.tempFile + "_2" + df.write.partitionBy("value").format("parquet").save(tempFile2) + schema = StructType() + schema.add("value", IntegerType(), True) + dt = DeltaTable.convertToDelta( + self.spark, + "parquet.`%s`" % tempFile2, + schema) + self.__checkAnswer( + self.spark.read.format("delta").load(tempFile2), + [('a', 1), ('b', 2), ('c', 3)]) + self.assertEqual(type(dt), type(DeltaTable.forPath(self.spark, tempFile2))) + + # convert to delta with partition column provided as a string + tempFile3 = self.tempFile + "_3" + df.write.partitionBy("value").format("parquet").save(tempFile3) + dt = DeltaTable.convertToDelta( + self.spark, + "parquet.`%s`" % tempFile3, + "value int") + self.__checkAnswer( + self.spark.read.format("delta").load(tempFile3), + [('a', 1), ('b', 2), ('c', 3)]) + self.assertEqual(type(dt), type(DeltaTable.forPath(self.spark, tempFile3))) + + def test_isDeltaTable(self) -> None: + df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"]) + df.write.format("parquet").save(self.tempFile) + tempFile2 = self.tempFile + '_2' + df.write.format("delta").save(tempFile2) + self.assertEqual(DeltaTable.isDeltaTable(self.spark, self.tempFile), False) + self.assertEqual(DeltaTable.isDeltaTable(self.spark, tempFile2), True) + + def __verify_table_schema(self, tableName: str, schema: StructType, cols: List[str], + types: List[DataType], nullables: Set[str] = set(), + comments: Dict[str, str] = {}, + properties: Dict[str, str] = {}, + partitioningColumns: List[str] = [], + tblComment: Optional[str] = None) -> None: + fields = [] + for i in range(len(cols)): + col = cols[i] + dataType = types[i] + metadata = {} + if col in comments: + metadata["comment"] = comments[col] + fields.append(StructField(col, dataType, col in nullables, metadata)) + self.assertEqual(StructType(fields), schema) + if len(properties) > 0: + result = ( + self.spark.sql( # type: ignore[assignment, misc] + "SHOW TBLPROPERTIES {}".format(tableName) + ) + .collect()) + tablePropertyMap = {row.key: row.value for row in result} + for key in properties: + self.assertIn(key, tablePropertyMap) + self.assertEqual(tablePropertyMap[key], properties[key]) + tableDetails = self.spark.sql("DESCRIBE DETAIL {}".format(tableName))\ + .collect()[0] + self.assertEqual(tableDetails.format, "delta") + actualComment = tableDetails.description + self.assertEqual(actualComment, tblComment) + partitionCols = tableDetails.partitionColumns + self.assertEqual(sorted(partitionCols), sorted((partitioningColumns))) + + def __verify_generated_column(self, tableName: str, deltaTable: DeltaTable) -> None: + cmd = "INSERT INTO {table} (col1, col2) VALUES (1, 11)".format(table=tableName) + self.spark.sql(cmd) + deltaTable.update(expr("col2 = 11"), {"col1": expr("2")}) + self.__checkAnswer(deltaTable.toDF(), [(2, 12)], schema=["col1", "col2"]) + + def __build_delta_table(self, builder: DeltaTableBuilder) -> DeltaTable: + return builder.addColumn("col1", "int", comment="foo", nullable=False) \ + .addColumn("col2", IntegerType(), generatedAlwaysAs="col1 + 10") \ + .property("foo", "bar") \ + .comment("comment") \ + .partitionedBy("col1").execute() + + def __create_table(self, ifNotExists: bool, + tableName: Optional[str] = None, + location: Optional[str] = None) -> DeltaTable: + builder = DeltaTable.createIfNotExists(self.spark) if ifNotExists \ + else DeltaTable.create(self.spark) + if tableName: + builder = builder.tableName(tableName) + if location: + builder = builder.location(location) + return self.__build_delta_table(builder) + + def __replace_table(self, + orCreate: bool, + tableName: Optional[str] = None, + location: Optional[str] = None) -> DeltaTable: + builder = DeltaTable.createOrReplace(self.spark) if orCreate \ + else DeltaTable.replace(self.spark) + if tableName: + builder = builder.tableName(tableName) + if location: + builder = builder.location(location) + return self.__build_delta_table(builder) + + def test_create_table_with_existing_schema(self) -> None: + df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"]) + with self.table("test"): + deltaTable = DeltaTable.create(self.spark).tableName("test") \ + .addColumns(df.schema) \ + .addColumn("value2", dataType="int")\ + .partitionedBy(["value2", "value"])\ + .execute() + self.__verify_table_schema("test", + deltaTable.toDF().schema, + ["key", "value", "value2"], + [StringType(), LongType(), IntegerType()], + nullables={"key", "value", "value2"}, + partitioningColumns=["value", "value2"]) + + with self.table("test2"): + # verify creating table with list of structFields + deltaTable2 = DeltaTable.create(self.spark).tableName("test2").addColumns( + df.schema.fields) \ + .addColumn("value2", dataType="int") \ + .partitionedBy("value2", "value")\ + .execute() + self.__verify_table_schema("test2", + deltaTable2.toDF().schema, + ["key", "value", "value2"], + [StringType(), LongType(), IntegerType()], + nullables={"key", "value", "value2"}, + partitioningColumns=["value", "value2"]) + + def test_create_replace_table_with_no_spark_session_passed(self) -> None: + with self.table("test"): + # create table. + deltaTable = DeltaTable.create().tableName("test")\ + .addColumn("value", dataType="int").execute() + self.__verify_table_schema("test", + deltaTable.toDF().schema, + ["value"], + [IntegerType()], + nullables={"value"}) + + # ignore existence with createIfNotExists + deltaTable = DeltaTable.createIfNotExists().tableName("test") \ + .addColumn("value2", dataType="int").execute() + self.__verify_table_schema("test", + deltaTable.toDF().schema, + ["value"], + [IntegerType()], + nullables={"value"}) + + # replace table with replace + deltaTable = DeltaTable.replace().tableName("test") \ + .addColumn("key", dataType="int").execute() + self.__verify_table_schema("test", + deltaTable.toDF().schema, + ["key"], + [IntegerType()], + nullables={"key"}) + + # replace with a new column again + deltaTable = DeltaTable.createOrReplace().tableName("test") \ + .addColumn("col1", dataType="int").execute() + + self.__verify_table_schema("test", + deltaTable.toDF().schema, + ["col1"], + [IntegerType()], + nullables={"col1"}) + + def test_create_table_with_name_only(self) -> None: + for ifNotExists in (False, True): + tableName = "testTable{}".format(ifNotExists) + with self.table(tableName): + deltaTable = self.__create_table(ifNotExists, tableName=tableName) + + self.__verify_table_schema(tableName, + deltaTable.toDF().schema, + ["col1", "col2"], + [IntegerType(), IntegerType()], + nullables={"col2"}, + comments={"col1": "foo"}, + properties={"foo": "bar"}, + partitioningColumns=["col1"], + tblComment="comment") + # verify generated columns. + self.__verify_generated_column(tableName, deltaTable) + + def test_create_table_with_location_only(self) -> None: + for ifNotExists in (False, True): + path = self.tempFile + str(ifNotExists) + deltaTable = self.__create_table(ifNotExists, location=path) + + self.__verify_table_schema("delta.`{}`".format(path), + deltaTable.toDF().schema, + ["col1", "col2"], + [IntegerType(), IntegerType()], + nullables={"col2"}, + comments={"col1": "foo"}, + partitioningColumns=["col1"], + tblComment="comment") + # verify generated columns. + self.__verify_generated_column("delta.`{}`".format(path), deltaTable) + + def test_create_table_with_name_and_location(self) -> None: + for ifNotExists in (False, True): + path = self.tempFile + str(ifNotExists) + tableName = "testTable{}".format(ifNotExists) + with self.table(tableName): + deltaTable = self.__create_table( + ifNotExists, tableName=tableName, location=path) + + self.__verify_table_schema(tableName, + deltaTable.toDF().schema, + ["col1", "col2"], + [IntegerType(), IntegerType()], + nullables={"col2"}, + comments={"col1": "foo"}, + properties={"foo": "bar"}, + partitioningColumns=["col1"], + tblComment="comment") + # verify generated columns. + self.__verify_generated_column(tableName, deltaTable) + + def test_create_table_behavior(self) -> None: + with self.table("testTable"): + self.spark.sql("CREATE TABLE testTable (c1 int) USING DELTA") + + # Errors out if doesn't ignore. + with self.assertRaises(AnalysisException) as error_ctx: + self.__create_table(False, tableName="testTable") + msg = str(error_ctx.exception) + assert ("testTable" in msg and "already exists" in msg) + + # ignore table creation. + self.__create_table(True, tableName="testTable") + schema = self.spark.read.format("delta").table("testTable").schema + self.__verify_table_schema("testTable", + schema, + ["c1"], + [IntegerType()], + nullables={"c1"}) + + def test_replace_table_with_name_only(self) -> None: + for orCreate in (False, True): + tableName = "testTable{}".format(orCreate) + with self.table(tableName): + self.spark.sql("CREATE TABLE {} (c1 int) USING DELTA".format(tableName)) + deltaTable = self.__replace_table(orCreate, tableName=tableName) + + self.__verify_table_schema(tableName, + deltaTable.toDF().schema, + ["col1", "col2"], + [IntegerType(), IntegerType()], + nullables={"col2"}, + comments={"col1": "foo"}, + properties={"foo": "bar"}, + partitioningColumns=["col1"], + tblComment="comment") + # verify generated columns. + self.__verify_generated_column(tableName, deltaTable) + + def test_replace_table_with_location_only(self) -> None: + for orCreate in (False, True): + path = self.tempFile + str(orCreate) + self.__create_table(False, location=path) + deltaTable = self.__replace_table(orCreate, location=path) + + self.__verify_table_schema("delta.`{}`".format(path), + deltaTable.toDF().schema, + ["col1", "col2"], + [IntegerType(), IntegerType()], + nullables={"col2"}, + comments={"col1": "foo"}, + properties={"foo": "bar"}, + partitioningColumns=["col1"], + tblComment="comment") + # verify generated columns. + self.__verify_generated_column("delta.`{}`".format(path), deltaTable) + + def test_replace_table_with_name_and_location(self) -> None: + for orCreate in (False, True): + path = self.tempFile + str(orCreate) + tableName = "testTable{}".format(orCreate) + with self.table(tableName): + self.spark.sql("CREATE TABLE {} (col int) USING DELTA LOCATION '{}'" + .format(tableName, path)) + deltaTable = self.__replace_table( + orCreate, tableName=tableName, location=path) + + self.__verify_table_schema(tableName, + deltaTable.toDF().schema, + ["col1", "col2"], + [IntegerType(), IntegerType()], + nullables={"col2"}, + comments={"col1": "foo"}, + properties={"foo": "bar"}, + partitioningColumns=["col1"], + tblComment="comment") + # verify generated columns. + self.__verify_generated_column(tableName, deltaTable) + + def test_replace_table_behavior(self) -> None: + with self.table("testTable"): + with self.assertRaises(AnalysisException) as error_ctx: + self.__replace_table(False, tableName="testTable") + msg = str(error_ctx.exception) + self.assertIn("testtable", msg.lower()) + self.assertTrue("did not exist" in msg or "cannot be found" in msg) + deltaTable = self.__replace_table(True, tableName="testTable") + self.__verify_table_schema("testTable", + deltaTable.toDF().schema, + ["col1", "col2"], + [IntegerType(), IntegerType()], + nullables={"col2"}, + comments={"col1": "foo"}, + properties={"foo": "bar"}, + partitioningColumns=["col1"], + tblComment="comment") + + def test_verify_paritionedBy_compatibility(self) -> None: + with self.table("testTable"): + tableBuilder = DeltaTable.create(self.spark).tableName("testTable") \ + .addColumn("col1", "int", comment="foo", nullable=False) \ + .addColumn("col2", IntegerType(), generatedAlwaysAs="col1 + 10") \ + .property("foo", "bar") \ + .comment("comment") + tableBuilder._jbuilder = tableBuilder._jbuilder.partitionedBy( + _to_seq(self.spark._sc, ["col1"]) # type: ignore[attr-defined] + ) + deltaTable = tableBuilder.execute() + self.__verify_table_schema("testTable", + deltaTable.toDF().schema, + ["col1", "col2"], + [IntegerType(), IntegerType()], + nullables={"col2"}, + comments={"col1": "foo"}, + properties={"foo": "bar"}, + partitioningColumns=["col1"], + tblComment="comment") + + def test_delta_table_builder_with_bad_args(self) -> None: + builder = DeltaTable.create(self.spark).location(self.tempFile) + + # bad table name + with self.assertRaises(TypeError): + builder.tableName(1) # type: ignore[arg-type] + + # bad location + with self.assertRaises(TypeError): + builder.location(1) # type: ignore[arg-type] + + # bad comment + with self.assertRaises(TypeError): + builder.comment(1) # type: ignore[arg-type] + + # bad column name + with self.assertRaises(TypeError): + builder.addColumn(1, "int") # type: ignore[arg-type] + + # bad datatype. + with self.assertRaises(TypeError): + builder.addColumn("a", 1) # type: ignore[arg-type] + + # bad column datatype - can't be pared + with self.assertRaises(ParseException): + builder.addColumn("a", "1") + builder.execute() + + # bad comment + with self.assertRaises(TypeError): + builder.addColumn("a", "int", comment=1) # type: ignore[arg-type] + + # bad generatedAlwaysAs + with self.assertRaises(TypeError): + builder.addColumn("a", "int", generatedAlwaysAs=1) # type: ignore[arg-type] + + # bad nullable + with self.assertRaises(TypeError): + builder.addColumn("a", "int", nullable=1) # type: ignore[arg-type] + + # bad existing schema + with self.assertRaises(TypeError): + builder.addColumns(1) # type: ignore[arg-type] + + # bad existing schema. + with self.assertRaises(TypeError): + builder.addColumns([StructField("1", IntegerType()), 1]) # type: ignore[list-item] + + # bad partitionedBy col name + with self.assertRaises(TypeError): + builder.partitionedBy(1) # type: ignore[call-overload] + + with self.assertRaises(TypeError): + builder.partitionedBy(1, "1") # type: ignore[call-overload] + + with self.assertRaises(TypeError): + builder.partitionedBy([1]) # type: ignore[list-item] + + # bad property key + with self.assertRaises(TypeError): + builder.property(1, "1") # type: ignore[arg-type] + + # bad property value + with self.assertRaises(TypeError): + builder.property("1", 1) # type: ignore[arg-type] + + def test_protocolUpgrade(self) -> None: + try: + self.spark.conf.set('spark.databricks.delta.minWriterVersion', '2') + self.spark.conf.set('spark.databricks.delta.minReaderVersion', '1') + self.__writeDeltaTable([('a', 1), ('b', 2), ('c', 3), ('d', 4)]) + dt = DeltaTable.forPath(self.spark, self.tempFile) + dt.upgradeTableProtocol(1, 3) + finally: + self.spark.conf.unset('spark.databricks.delta.minWriterVersion') + self.spark.conf.unset('spark.databricks.delta.minReaderVersion') + + # cannot downgrade once upgraded + dt.upgradeTableProtocol(1, 2) + dt_details = dt.detail().collect()[0].asDict() + self.assertTrue(dt_details["minReaderVersion"] == 1, + "The upgrade should be a no-op, because downgrades aren't allowed") + self.assertTrue(dt_details["minWriterVersion"] == 3, + "The upgrade should be a no-op, because downgrades aren't allowed") + + # bad args + with self.assertRaisesRegex(ValueError, "readerVersion"): + dt.upgradeTableProtocol("abc", 3) # type: ignore[arg-type] + with self.assertRaisesRegex(ValueError, "readerVersion"): + dt.upgradeTableProtocol([1], 3) # type: ignore[arg-type] + with self.assertRaisesRegex(ValueError, "readerVersion"): + dt.upgradeTableProtocol([], 3) # type: ignore[arg-type] + with self.assertRaisesRegex(ValueError, "readerVersion"): + dt.upgradeTableProtocol({}, 3) # type: ignore[arg-type] + with self.assertRaisesRegex(ValueError, "writerVersion"): + dt.upgradeTableProtocol(1, "abc") # type: ignore[arg-type] + with self.assertRaisesRegex(ValueError, "writerVersion"): + dt.upgradeTableProtocol(1, [3]) # type: ignore[arg-type] + with self.assertRaisesRegex(ValueError, "writerVersion"): + dt.upgradeTableProtocol(1, []) # type: ignore[arg-type] + with self.assertRaisesRegex(ValueError, "writerVersion"): + dt.upgradeTableProtocol(1, {}) # type: ignore[arg-type] + + def test_restore_to_version(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2)]) + self.__overwriteDeltaTable([('a', 3), ('b', 2)], + schema=["key_new", "value_new"], + overwriteSchema='true') + + overwritten = DeltaTable.forPath(self.spark, self.tempFile).toDF() + self.__checkAnswer(overwritten, + [Row(key_new='a', value_new=3), Row(key_new='b', value_new=2)]) + + DeltaTable.forPath(self.spark, self.tempFile).restoreToVersion(0) + restored = DeltaTable.forPath(self.spark, self.tempFile).toDF() + + self.__checkAnswer(restored, [Row(key='a', value=1), Row(key='b', value=2)]) + + def test_restore_to_timestamp(self) -> None: + self.__writeDeltaTable([('a', 1), ('b', 2)]) + timestampToRestore = DeltaTable.forPath(self.spark, self.tempFile) \ + .history() \ + .head() \ + .timestamp \ + .strftime('%Y-%m-%d %H:%M:%S.%f') + + self.__overwriteDeltaTable([('a', 3), ('b', 2)], + schema=["key_new", "value_new"], + overwriteSchema='true') + + overwritten = DeltaTable.forPath(self.spark, self.tempFile).toDF() + self.__checkAnswer(overwritten, + [Row(key_new='a', value_new=3), Row(key_new='b', value_new=2)]) + + DeltaTable.forPath(self.spark, self.tempFile).restoreToTimestamp(timestampToRestore) + + restored = DeltaTable.forPath(self.spark, self.tempFile).toDF() + self.__checkAnswer(restored, [Row(key='a', value=1), Row(key='b', value=2)]) + + # we cannot test the actual working of restore to timestamp here but we can make sure + # that the api is being called at least + def runRestore() -> None: + DeltaTable.forPath(self.spark, self.tempFile).restoreToTimestamp('05/04/1999') + self.__intercept(runRestore, "The provided timestamp ('05/04/1999') " + "cannot be converted to a valid timestamp") + + def test_restore_invalid_inputs(self) -> None: + df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"]) + df.write.format("delta").save(self.tempFile) + + dt = DeltaTable.forPath(self.spark, self.tempFile) + + def runRestoreToTimestamp() -> None: + dt.restoreToTimestamp(12342323232) # type: ignore[arg-type] + self.__intercept(runRestoreToTimestamp, + "timestamp needs to be a string but got ''") + + def runRestoreToVersion() -> None: + dt.restoreToVersion("0") # type: ignore[arg-type] + self.__intercept(runRestoreToVersion, + "version needs to be an int but got ''") + + def test_optimize(self) -> None: + # write an unoptimized delta table + df = self.spark.createDataFrame([("a", 1), ("a", 2)], ["key", "value"]).repartition(1) + df.write.format("delta").save(self.tempFile) + df = self.spark.createDataFrame([("a", 3), ("a", 4)], ["key", "value"]).repartition(1) + df.write.format("delta").save(self.tempFile, mode="append") + df = self.spark.createDataFrame([("b", 1), ("b", 2)], ["key", "value"]).repartition(1) + df.write.format("delta").save(self.tempFile, mode="append") + + # create DeltaTable + dt = DeltaTable.forPath(self.spark, self.tempFile) + + # execute bin compaction + optimizer = dt.optimize() + res = optimizer.executeCompaction() + op_params = dt.history().first().operationParameters + + # assertions + self.assertEqual(1, res.first().metrics.numFilesAdded) + self.assertEqual(3, res.first().metrics.numFilesRemoved) + self.assertEqual('[]', op_params['predicate']) + + # test non-partition column + def optimize() -> None: + dt.optimize().where("key = 'a'").executeCompaction() + self.__intercept(optimize, + "Predicate references non-partition column 'key'. " + "Only the partition columns may be referenced: []") + + def test_optimize_w_partition_filter(self) -> None: + # write an unoptimized delta table + df = self.spark.createDataFrame([("a", 1), ("a", 2)], ["key", "value"]).repartition(1) + df.write.partitionBy("key").format("delta").save(self.tempFile) + df = self.spark.createDataFrame([("a", 3), ("a", 4)], ["key", "value"]).repartition(1) + df.write.partitionBy("key").format("delta").save(self.tempFile, mode="append") + df = self.spark.createDataFrame([("b", 1), ("b", 2)], ["key", "value"]).repartition(1) + df.write.partitionBy("key").format("delta").save(self.tempFile, mode="append") + + # create DeltaTable + dt = DeltaTable.forPath(self.spark, self.tempFile) + + # execute bin compaction + optimizer = dt.optimize().where("key = 'a'") + res = optimizer.executeCompaction() + op_params = dt.history().first().operationParameters + + # assertions + self.assertEqual(1, res.first().metrics.numFilesAdded) + self.assertEqual(2, res.first().metrics.numFilesRemoved) + self.assertEqual('''["('key = a)"]''', op_params['predicate']) + + # test non-partition column + def optimize() -> None: + dt.optimize().where("value = 1").executeCompaction() + self.__intercept(optimize, + "Predicate references non-partition column 'value'. " + "Only the partition columns may be referenced: [key]") + + def test_optimize_zorder_by(self) -> None: + # write an unoptimized delta table + self.spark.createDataFrame([i for i in range(0, 100)], IntegerType()) \ + .withColumn("col1", floor(col("value") % 7)) \ + .withColumn("col2", floor(col("value") % 27)) \ + .withColumn("p", floor(col("value") % 10)) \ + .repartition(4).write.partitionBy("p").format("delta").save(self.tempFile) + + # get the number of data files in the current version + numDataFilesPreZOrder = self.spark.read.format("delta").load(self.tempFile) \ + .select("_metadata.file_path").distinct().count() + + # create DeltaTable + dt = DeltaTable.forPath(self.spark, self.tempFile) + + # execute Z-Order Optimization + optimizer = dt.optimize() + result = optimizer.executeZOrderBy(["col1", "col2"]) + metrics = result.select("metrics.*").head() + + # expect there is only one file after the Z-Order as Z-Order also + # does the compaction implicitly and all small files are written to one file + # for each partition. Ther are 10 partitions in the table, so expect 10 final files + numDataFilesPostZOrder = 10 + + self.assertEqual(numDataFilesPostZOrder, metrics.numFilesAdded) + self.assertEqual(numDataFilesPreZOrder, metrics.numFilesRemoved) + self.assertEqual(0, metrics.totalFilesSkipped) + self.assertEqual(numDataFilesPreZOrder, metrics.totalConsideredFiles) + self.assertEqual('all', metrics.zOrderStats.strategyName) + self.assertEqual(10, metrics.zOrderStats.numOutputCubes) # one for each partition + + # negative test: Z-Order on partition column + def optimize() -> None: + dt.optimize().where("p = 1").executeZOrderBy(["p"]) + self.__intercept(optimize, + "p is a partition column. " + "Z-Ordering can only be performed on data columns") + + def test_optimize_zorder_by_w_partition_filter(self) -> None: + # write an unoptimized delta table + df = self.spark.createDataFrame([i for i in range(0, 100)], IntegerType()) \ + .withColumn("col1", floor(col("value") % 7)) \ + .withColumn("col2", floor(col("value") % 27)) \ + .withColumn("p", floor(col("value") % 10)) \ + .repartition(4).write.partitionBy("p") + + df.format("delta").save(self.tempFile) + + # get the number of data files in the current version in partition p = 2 + numDataFilesPreZOrder = self.spark.read.format("delta").load(self.tempFile) \ + .filter("p=2").select("_metadata.file_path").distinct().count() + + # create DeltaTable + dt = DeltaTable.forPath(self.spark, self.tempFile) + + # execute Z-OrderBy + optimizer = dt.optimize().where("p = 2") + result = optimizer.executeZOrderBy(["col1", "col2"]) + metrics = result.select("metrics.*").head() + + # expect there is only one file after the Z-Order as Z-Order also + # does the compaction implicitly and all small files are written to one file + numDataFilesPostZOrder = 1 + + self.assertEqual(numDataFilesPostZOrder, metrics.numFilesAdded) + self.assertEqual(numDataFilesPreZOrder, metrics.numFilesRemoved) + self.assertEqual(0, metrics.totalFilesSkipped) + # expected to consider all input files for Z-Order + self.assertEqual(numDataFilesPreZOrder, metrics.totalConsideredFiles) + self.assertEqual('all', metrics.zOrderStats.strategyName) + self.assertEqual(1, metrics.zOrderStats.numOutputCubes) # one per each affected partition + + def __checkAnswer(self, df: DataFrame, + expectedAnswer: List[Any], + schema: Union[StructType, List[str]] = ["key", "value"]) -> None: + if not expectedAnswer: + self.assertEqual(df.count(), 0) + return + expectedDF = self.spark.createDataFrame(expectedAnswer, schema) + try: + self.assertEqual(df.count(), expectedDF.count()) + self.assertEqual(len(df.columns), len(expectedDF.columns)) + self.assertEqual([], df.subtract(expectedDF).take(1)) + self.assertEqual([], expectedDF.subtract(df).take(1)) + except AssertionError: + print("Expected:") + expectedDF.show() + print("Found:") + df.show() + raise + + def __writeDeltaTable(self, datalist: List[Tuple[Any, Any]]) -> None: + df = self.spark.createDataFrame(datalist, ["key", "value"]) + df.write.format("delta").save(self.tempFile) + + def __writeAsTable(self, datalist: List[Tuple[Any, Any]], tblName: str) -> None: + df = self.spark.createDataFrame(datalist, ["key", "value"]) + df.write.format("delta").saveAsTable(tblName) + + def __overwriteDeltaTable(self, datalist: List[Tuple[Any, Any]], + schema: Union[StructType, List[str]] = ["key", "value"], + overwriteSchema: str = 'false') -> None: + df = self.spark.createDataFrame(datalist, schema) + df.write.format("delta") \ + .option('overwriteSchema', overwriteSchema) \ + .mode("overwrite") \ + .save(self.tempFile) + + def __createFile(self, fileName: str, content: Any) -> None: + with open(os.path.join(self.tempFile, fileName), 'w') as f: + f.write(content) + + def __checkFileExists(self, fileName: str) -> bool: + return os.path.exists(os.path.join(self.tempFile, fileName)) + + def __intercept(self, func: Callable[[], None], exceptionMsg: str) -> None: + seenTheRightException = False + try: + func() + except Exception as e: + if exceptionMsg in str(e): + seenTheRightException = True + assert seenTheRightException, ("Did not catch expected Exception:" + exceptionMsg) + + +class DeltaTableTests(DeltaTableTestsMixin, DeltaTestCase): + pass + + +if __name__ == "__main__": + try: + import xmlrunner + testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=4) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=4) diff --git a/python/delta/tests/test_exceptions.py b/python/delta/tests/test_exceptions.py new file mode 100644 index 00000000000..12981ae8534 --- /dev/null +++ b/python/delta/tests/test_exceptions.py @@ -0,0 +1,92 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Any, Callable, TYPE_CHECKING +import unittest + +import delta.exceptions as exceptions + +from delta.testing.utils import DeltaTestCase +from pyspark.sql.utils import AnalysisException, IllegalArgumentException + +if TYPE_CHECKING: + from py4j.java_gateway import JVMView # type: ignore[import] + + +class DeltaExceptionTests(DeltaTestCase): + + def setUp(self) -> None: + super(DeltaExceptionTests, self).setUp() + self.jvm: "JVMView" = self.spark.sparkContext._jvm # type: ignore[attr-defined] + + def _raise_concurrent_exception(self, exception_type: Callable[[Any], Any]) -> None: + e = exception_type("") + self.jvm.scala.util.Failure(e).get() + + def test_capture_concurrent_write_exception(self) -> None: + e = self.jvm.io.delta.exceptions.ConcurrentWriteException + self.assertRaises(exceptions.ConcurrentWriteException, + lambda: self._raise_concurrent_exception(e)) + + def test_capture_metadata_changed_exception(self) -> None: + e = self.jvm.io.delta.exceptions.MetadataChangedException + self.assertRaises(exceptions.MetadataChangedException, + lambda: self._raise_concurrent_exception(e)) + + def test_capture_protocol_changed_exception(self) -> None: + e = self.jvm.io.delta.exceptions.ProtocolChangedException + self.assertRaises(exceptions.ProtocolChangedException, + lambda: self._raise_concurrent_exception(e)) + + def test_capture_concurrent_append_exception(self) -> None: + e = self.jvm.io.delta.exceptions.ConcurrentAppendException + self.assertRaises(exceptions.ConcurrentAppendException, + lambda: self._raise_concurrent_exception(e)) + + def test_capture_concurrent_delete_read_exception(self) -> None: + e = self.jvm.io.delta.exceptions.ConcurrentDeleteReadException + self.assertRaises(exceptions.ConcurrentDeleteReadException, + lambda: self._raise_concurrent_exception(e)) + + def test_capture_concurrent_delete_delete_exception(self) -> None: + e = self.jvm.io.delta.exceptions.ConcurrentDeleteDeleteException + self.assertRaises(exceptions.ConcurrentDeleteDeleteException, + lambda: self._raise_concurrent_exception(e)) + + def test_capture_concurrent_transaction_exception(self) -> None: + e = self.jvm.io.delta.exceptions.ConcurrentTransactionException + self.assertRaises(exceptions.ConcurrentTransactionException, + lambda: self._raise_concurrent_exception(e)) + + def test_capture_delta_analysis_exception(self) -> None: + e = self.jvm.org.apache.spark.sql.delta.DeltaErrors.invalidColumnName + self.assertRaises(AnalysisException, + lambda: self.jvm.scala.util.Failure(e("invalid")).get()) + + def test_capture_delta_illegal_argument_exception(self) -> None: + e = self.jvm.org.apache.spark.sql.delta.DeltaErrors + method = e.throwDeltaIllegalArgumentException + self.assertRaises(IllegalArgumentException, + lambda: self.jvm.scala.util.Failure(method()).get()) + + +if __name__ == "__main__": + try: + import xmlrunner + testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=4) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=4) diff --git a/python/delta/tests/test_pip_utils.py b/python/delta/tests/test_pip_utils.py new file mode 100644 index 00000000000..a6303c9f0ba --- /dev/null +++ b/python/delta/tests/test_pip_utils.py @@ -0,0 +1,94 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import shutil +import tempfile +import unittest +from typing import List, Optional + +from pyspark.sql import SparkSession +import delta + + +class PipUtilsTests(unittest.TestCase): + + def setUp(self) -> None: + builder = SparkSession.builder \ + .appName("pip-test") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog") + + self.spark = delta.configure_spark_with_delta_pip(builder).getOrCreate() + self.tempPath = tempfile.mkdtemp() + self.tempFile = os.path.join(self.tempPath, "tempFile") + + def tearDown(self) -> None: + self.spark.stop() + shutil.rmtree(self.tempPath) + + def test_maven_jar_loaded(self) -> None: + # Read and write Delta table to check that the maven jars are loaded and Delta works. + self.spark.range(0, 5).write.format("delta").save(self.tempFile) + self.spark.read.format("delta").load(self.tempFile) + + +class PipUtilsCustomJarsTests(unittest.TestCase): + + def setUp(self) -> None: + builder = SparkSession.builder \ + .appName("pip-test") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog") + + import importlib_metadata + scala_version = "2.12" + delta_version = importlib_metadata.version("delta_spark") + maven_artifacts = [f"io.delta:delta-spark_{scala_version}:{delta_version}"] + # configure extra packages + self.spark = delta.configure_spark_with_delta_pip(builder, maven_artifacts).getOrCreate() + + self.tempPath = tempfile.mkdtemp() + self.tempFile = os.path.join(self.tempPath, "tempFile") + + def tearDown(self) -> None: + self.spark.stop() + shutil.rmtree(self.tempPath) + + def test_maven_jar_loaded(self) -> None: + packagesConf: Optional[str] = self.spark.conf.get("spark.jars.packages") + assert packagesConf is not None # mypi needs this to assign type str from Optional[str] + packages: str = packagesConf + packagesList: List[str] = packages.split(",") + # Check `spark.jars.packages` contains `extra_packages` + self.assertTrue(len(packagesList) == 2, "There should only be 2 packages") + + # Read and write Delta table to check that the maven jars are loaded and Delta works. + self.spark.range(0, 5).write.format("delta").save(self.tempFile) + self.spark.read.format("delta").load(self.tempFile) + + +if __name__ == "__main__": + try: + import xmlrunner + testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=4) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=4) diff --git a/python/delta/tests/test_sql.py b/python/delta/tests/test_sql.py new file mode 100644 index 00000000000..490e81c4795 --- /dev/null +++ b/python/delta/tests/test_sql.py @@ -0,0 +1,177 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# mypy: disable-error-code="union-attr" +# mypy: disable-error-code="attr-defined" + +import unittest +import tempfile +import shutil +import os +from typing import List, Any + +from pyspark.sql import DataFrame + +from delta.testing.utils import DeltaTestCase + + +class DeltaSqlTests(DeltaTestCase): + + def setUp(self) -> None: + super(DeltaSqlTests, self).setUp() + # Create a simple Delta table inside the temp directory to test SQL commands. + df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"]) + df.write.format("delta").save(self.tempFile) + df.write.mode("overwrite").format("delta").save(self.tempFile) + + def test_vacuum(self) -> None: + self.spark.sql("set spark.databricks.delta.retentionDurationCheck.enabled = false") + try: + deleted_files = self.spark.sql("VACUUM '%s' RETAIN 0 HOURS" % self.tempFile).collect() + # Verify `VACUUM` did delete some data files + self.assertTrue(self.tempFile in deleted_files[0][0]) + finally: + self.spark.sql("set spark.databricks.delta.retentionDurationCheck.enabled = true") + + def test_describe_history(self) -> None: + self.assertGreater( + len(self.spark.sql("desc history delta.`%s`" % (self.tempFile)).collect()), 0) + + def test_generate(self) -> None: + # create a delta table + temp_path = tempfile.mkdtemp() + temp_file = os.path.join(temp_path, "delta_sql_test_table") + numFiles = 10 + self.spark.range(100).repartition(numFiles).write.format("delta").save(temp_file) + + # Generate the symlink format manifest + self.spark.sql("GENERATE SYMLINK_FORMAT_MANIFEST FOR TABLE delta.`{}`" + .format(temp_file)) + + # check the contents of the manifest + # NOTE: this is not a correctness test, we are testing correctness in the scala suite + manifestPath = os.path.join(temp_file, + os.path.join("_symlink_format_manifest", "manifest")) + files = [] + with open(manifestPath) as f: + files = f.readlines() + + shutil.rmtree(temp_path) + # the number of files we write should equal the number of lines in the manifest + self.assertEqual(len(files), numFiles) + + def test_convert(self) -> None: + df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"]) + temp_path2 = tempfile.mkdtemp() + temp_path3 = tempfile.mkdtemp() + temp_file2 = os.path.join(temp_path2, "delta_sql_test2") + temp_file3 = os.path.join(temp_path3, "delta_sql_test3") + + df.write.format("parquet").save(temp_file2) + self.spark.sql("CONVERT TO DELTA parquet.`" + temp_file2 + "`") + self.__checkAnswer( + self.spark.read.format("delta").load(temp_file2), + [('a', 1), ('b', 2), ('c', 3)]) + + # test if convert to delta with partition columns work + df.write.partitionBy("value").format("parquet").save(temp_file3) + self.spark.sql("CONVERT TO DELTA parquet.`" + temp_file3 + "` PARTITIONED BY (value INT)") + self.__checkAnswer( + self.spark.read.format("delta").load(temp_file3), + [('a', 1), ('b', 2), ('c', 3)]) + + shutil.rmtree(temp_path2) + shutil.rmtree(temp_path3) + + def test_ddls(self) -> None: + table = "deltaTable" + table2 = "deltaTable2" + with self.table(table, table2): + def read_table() -> DataFrame: + return self.spark.sql(f"SELECT * FROM {table}") + + self.spark.sql(f"DROP TABLE IF EXISTS {table}") + self.spark.sql(f"DROP TABLE IF EXISTS {table2}") + + self.spark.sql(f"CREATE TABLE {table}(a LONG, b String NOT NULL) USING delta") + self.assertEqual(read_table().count(), 0) + self.spark.sql(f"CREATE TABLE {table}_part(a LONG, b String NOT NULL)" + " USING delta PARTITIONED BY (a)") + + # Unpartitioned table does not include partitioning information in Spark 3.4+ + answer = [("a", "bigint"), ("b", "string")] + self.__checkAnswer( + self.spark.sql(f"DESCRIBE TABLE {table}").select("col_name", "data_type"), + answer, + schema=["col_name", "data_type"]) + + answer_part = [("a", "bigint"), ("b", "string"), ("# Partition Information", ""), + ("# col_name", "data_type"), ("a", "bigint")] + self.__checkAnswer( + self.spark.sql(f"DESCRIBE TABLE {table}_part").select("col_name", "data_type"), + answer_part, + schema=["col_name", "data_type"]) + + self.spark.sql(f"ALTER TABLE {table} CHANGE COLUMN a a LONG AFTER b") + self.assertSequenceEqual(["b", "a"], [f.name for f in read_table().schema.fields]) + + self.spark.sql(f"ALTER TABLE {table} ALTER COLUMN b DROP NOT NULL") + self.assertIn(True, [f.nullable for f in read_table().schema.fields if f.name == "b"]) + + self.spark.sql(f"ALTER TABLE {table} ADD COLUMNS (x LONG)") + self.assertIn("x", [f.name for f in read_table().schema.fields]) + + self.spark.sql(f"ALTER TABLE {table} SET TBLPROPERTIES ('k' = 'v')") + self.__checkAnswer(self.spark.sql(f"SHOW TBLPROPERTIES {table}"), + [('k', 'v'), + ('delta.minReaderVersion', '1'), + ('delta.minWriterVersion', '2')]) + + self.spark.sql(f"ALTER TABLE {table} UNSET TBLPROPERTIES ('k')") + self.__checkAnswer(self.spark.sql(f"SHOW TBLPROPERTIES {table}"), + [('delta.minReaderVersion', '1'), + ('delta.minWriterVersion', '2')]) + + self.spark.sql(f"ALTER TABLE {table} RENAME TO {table2}") + self.assertEqual(self.spark.sql(f"SELECT * FROM {table2}").count(), 0) + + test_dir = os.path.join(tempfile.mkdtemp(), table2) + self.spark.createDataFrame([("", 0, 0)], ["b", "a", "x"]) \ + .write.format("delta").save(test_dir) + + self.spark.sql(f"ALTER TABLE {table2} SET LOCATION '{test_dir}'") + self.assertEqual(self.spark.sql(f"SELECT * FROM {table2}").count(), 1) + + def __checkAnswer(self, df: DataFrame, + expectedAnswer: List[Any], + schema: List[str] = ["key", "value"]) -> None: + if not expectedAnswer: + self.assertEqual(df.count(), 0) + return + expectedDF = self.spark.createDataFrame(expectedAnswer, schema) + self.assertEqual(df.count(), expectedDF.count()) + self.assertEqual(len(df.columns), len(expectedDF.columns)) + self.assertEqual([], df.subtract(expectedDF).take(1)) + self.assertEqual([], expectedDF.subtract(df).take(1)) + + +if __name__ == "__main__": + try: + import xmlrunner + testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=4) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=4) diff --git a/python/mypy.ini b/python/mypy.ini new file mode 100644 index 00000000000..b57444c2f8c --- /dev/null +++ b/python/mypy.ini @@ -0,0 +1,24 @@ +; +; Copyright (2021) The Delta Lake Project Authors. +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; + +[mypy] +strict_optional = True +no_implicit_optional = True +disallow_untyped_defs = True +show_error_codes = True + +[mypy-xmlrunner.*] +ignore_missing_imports = True diff --git a/python/run-tests.py b/python/run-tests.py new file mode 100755 index 00000000000..5d3e3334ce1 --- /dev/null +++ b/python/run-tests.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 + +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import subprocess +import shutil +from os import path + + +def test(root_dir, package): + # Run all of the test under test/python directory, each of them + # has main entry point to execute, which is python's unittest testing + # framework. + python_root_dir = path.join(root_dir, "python") + test_dir = path.join(python_root_dir, path.join("delta", "tests")) + test_files = [os.path.join(test_dir, f) for f in os.listdir(test_dir) + if os.path.isfile(os.path.join(test_dir, f)) and + f.endswith(".py") and not f.startswith("_")] + extra_class_path = path.join(python_root_dir, path.join("delta", "testing")) + + for test_file in test_files: + try: + cmd = ["spark-submit", + "--driver-class-path=%s" % extra_class_path, + "--packages", package, test_file] + print("Running tests in %s\n=============" % test_file) + print("Command: %s" % str(cmd)) + run_cmd(cmd, stream_output=True) + except: + print("Failed tests in %s" % (test_file)) + raise + + +def delete_if_exists(path): + # if path exists, delete it. + if os.path.exists(path): + shutil.rmtree(path) + print("Deleted %s " % path) + + +def prepare(root_dir): + print("##### Preparing python tests & building packages #####") + # Build package with python files in it + sbt_path = path.join(root_dir, path.join("build", "sbt")) + delete_if_exists(os.path.expanduser("~/.ivy2/cache/io.delta")) + delete_if_exists(os.path.expanduser("~/.m2/repository/io/delta/")) + run_cmd([sbt_path, "clean", "publishM2"], stream_output=True) + + # Get current release which is required to be loaded + version = '0.0.0' + with open(os.path.join(root_dir, "version.sbt")) as fd: + version = fd.readline().split('"')[1] + package = "io.delta:delta-spark_2.12:" + version + return package + + +def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, print_cmd=True, **kwargs): + if print_cmd: + print("### Executing cmd: " + " ".join(cmd)) + + cmd_env = os.environ.copy() + if env: + cmd_env.update(env) + + if stream_output: + child = subprocess.Popen(cmd, env=cmd_env, **kwargs) + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception("Non-zero exitcode: %s" % (exit_code)) + return exit_code + else: + child = subprocess.Popen( + cmd, + env=cmd_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + (stdout, stderr) = child.communicate() + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception( + "Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" % + (exit_code, stdout, stderr)) + return (exit_code, stdout, stderr) + + +def run_python_style_checks(root_dir): + print("##### Running python style tests #####") + run_cmd([os.path.join(root_dir, "dev", "lint-python")], stream_output=True) + + +def run_mypy_tests(root_dir): + print("##### Running mypy tests #####") + python_package_root = path.join(root_dir, path.join("python", "delta")) + mypy_config_path = path.join(root_dir, path.join("python", "mypy.ini")) + run_cmd([ + "mypy", + "--config-file", mypy_config_path, + python_package_root + ], stream_output=True) + + +def run_pypi_packaging_tests(root_dir): + """ + We want to test that the delta-spark PyPi artifact for this delta version can be generated, + locally installed, and used in python tests. + + We will uninstall any existing local delta-spark PyPi artifact. + We will generate a new local delta-spark PyPi artifact. + We will install it into the local PyPi repository. + And then we will run relevant python tests to ensure everything works as expected. + """ + print("##### Running PyPi Packaging tests #####") + + version = '0.0.0' + with open(os.path.join(root_dir, "version.sbt")) as fd: + version = fd.readline().split('"')[1] + + # uninstall packages if they exist + run_cmd(["pip3", "uninstall", "--yes", "delta-spark"], stream_output=True) + + wheel_dist_dir = path.join(root_dir, "dist") + + print("### Deleting `dist` directory if it exists") + delete_if_exists(wheel_dist_dir) + + # generate artifacts + run_cmd( + ["python3", "setup.py", "bdist_wheel"], + stream_output=True, + stderr=open('/dev/null', 'w')) + + run_cmd(["python3", "setup.py", "sdist"], stream_output=True) + + # we need, for example, 1.1.0_SNAPSHOT not 1.1.0-SNAPSHOT + version_formatted = version.replace("-", "_") + delta_whl_name = "delta_spark-" + version_formatted + "-py3-none-any.whl" + + # this will install delta-spark-$version + install_whl_cmd = ["pip3", "install", path.join(wheel_dist_dir, delta_whl_name)] + run_cmd(install_whl_cmd, stream_output=True) + + # run test python file directly with python and not with spark-submit + test_file = path.join(root_dir, path.join("examples", "python", "using_with_pip.py")) + test_cmd = ["python3", test_file] + try: + print("### Starting tests...") + run_cmd(test_cmd, stream_output=True) + except: + print("Failed pip installation tests in %s" % (test_file)) + raise + + +if __name__ == "__main__": + print("##### Running python tests #####") + root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + package = prepare(root_dir) + + run_python_style_checks(root_dir) + run_mypy_tests(root_dir) + run_pypi_packaging_tests(root_dir) + test(root_dir, package) diff --git a/run-integration-tests.py b/run-integration-tests.py new file mode 100755 index 00000000000..c58374d6d41 --- /dev/null +++ b/run-integration-tests.py @@ -0,0 +1,492 @@ +#!/usr/bin/env python3 + +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import subprocess +from os import path +import shutil +import argparse + + +def delete_if_exists(path): + # if path exists, delete it. + if os.path.exists(path): + shutil.rmtree(path) + print("Deleted %s " % path) + + +def run_scala_integration_tests(root_dir, version, test_name, extra_maven_repo, scala_version, + use_local): + print("\n\n##### Running Scala tests on delta version %s and scala version %s #####" + % (str(version), scala_version)) + clear_artifact_cache() + if use_local: + run_cmd(["build/sbt", "publishM2"]) + + test_dir = path.join(root_dir, "examples", "scala") + test_src_dir = path.join(test_dir, "src", "main", "scala", "example") + test_classes = [f.replace(".scala", "") for f in os.listdir(test_src_dir) + if f.endswith(".scala") and not f.startswith("_")] + env = {"DELTA_VERSION": str(version), "SCALA_VERSION": scala_version} + if extra_maven_repo: + env["EXTRA_MAVEN_REPO"] = extra_maven_repo + with WorkingDirectory(test_dir): + for test_class in test_classes: + if test_name is not None and test_name not in test_class: + print("\nSkipping Scala tests in %s\n=====================" % test_class) + continue + + try: + cmd = ["build/sbt", "runMain example.%s" % test_class] + print("\nRunning Scala tests in %s\n=====================" % test_class) + print("Command: %s" % " ".join(cmd)) + run_cmd(cmd, stream_output=True, env=env) + except: + print("Failed Scala tests in %s" % (test_class)) + raise + + +def get_artifact_name(version): + """ + version: string representation, e.g. 2.3.0 or 3.0.0.rc1 + return: either "core" or "spark" + """ + return "spark" if int(version[0]) >= 3 else "core" + + +def run_python_integration_tests(root_dir, version, test_name, extra_maven_repo, use_local): + print("\n\n##### Running Python tests on version %s #####" % str(version)) + clear_artifact_cache() + if use_local: + run_cmd(["build/sbt", "publishM2"]) + + test_dir = path.join(root_dir, path.join("examples", "python")) + files_to_skip = {"using_with_pip.py", "missing_delta_storage_jar.py", "image_storage.py"} + + test_files = [path.join(test_dir, f) for f in os.listdir(test_dir) + if path.isfile(path.join(test_dir, f)) and + f.endswith(".py") and not f.startswith("_") and + f not in files_to_skip] + + python_root_dir = path.join(root_dir, "python") + extra_class_path = path.join(python_root_dir, path.join("delta", "testing")) + package = "io.delta:delta-%s_2.12:%s" % (get_artifact_name(version), version) + + repo = extra_maven_repo if extra_maven_repo else "" + + for test_file in test_files: + if test_name is not None and test_name not in test_file: + print("\nSkipping Python tests in %s\n=====================" % test_file) + continue + try: + cmd = ["spark-submit", + "--driver-class-path=%s" % extra_class_path, # for less verbose logging + "--packages", package, + "--repositories", repo, test_file] + print("\nRunning Python tests in %s\n=============" % test_file) + print("Command: %s" % " ".join(cmd)) + run_cmd(cmd, stream_output=True) + except: + print("Failed Python tests in %s" % (test_file)) + raise + + +def test_missing_delta_storage_jar(root_dir, version, use_local): + if not use_local: + print("Skipping 'missing_delta_storage_jar' - test should only run in local mode") + return + + print("\n\n##### Running 'missing_delta_storage_jar' on version %s #####" % str(version)) + + clear_artifact_cache() + + run_cmd(["build/sbt", "publishM2"]) + + print("Clearing delta-storage artifact") + delete_if_exists(os.path.expanduser("~/.m2/repository/io/delta/delta-storage")) + delete_if_exists(os.path.expanduser("~/.ivy2/cache/io.delta/delta-storage")) + delete_if_exists(os.path.expanduser("~/.ivy2/local/io.delta/delta-storage")) + + python_root_dir = path.join(root_dir, "python") + extra_class_path = path.join(python_root_dir, path.join("delta", "testing")) + test_file = path.join(root_dir, path.join("examples", "python", "missing_delta_storage_jar.py")) + artifact_name = get_artifact_name(version) + jar = path.join( + os.path.expanduser("~/.m2/repository/io/delta/"), + "delta-%s_2.12" % artifact_name, + version, + "delta-%s_2.12-%s.jar" % (artifact_name, str(version))) + + try: + cmd = ["spark-submit", + "--driver-class-path=%s" % extra_class_path, # for less verbose logging + "--jars", jar, test_file] + print("\nRunning Python tests in %s\n=============" % test_file) + print("Command: %s" % " ".join(cmd)) + run_cmd(cmd, stream_output=True) + except: + print("Failed Python tests in %s" % (test_file)) + raise + + +def run_dynamodb_logstore_integration_tests(root_dir, version, test_name, extra_maven_repo, + extra_packages, conf, use_local): + print( + "\n\n##### Running DynamoDB logstore integration tests on version %s #####" % str(version) + ) + clear_artifact_cache() + if use_local: + run_cmd(["build/sbt", "publishM2"]) + + test_dir = path.join(root_dir, path.join("storage-s3-dynamodb", "integration_tests")) + test_files = [path.join(test_dir, f) for f in os.listdir(test_dir) + if path.isfile(path.join(test_dir, f)) and + f.endswith(".py") and not f.startswith("_")] + + python_root_dir = path.join(root_dir, "python") + extra_class_path = path.join(python_root_dir, path.join("delta", "testing")) + packages = "io.delta:delta-%s_2.12:%s" % (get_artifact_name(version), version) + packages += "," + "io.delta:delta-storage-s3-dynamodb:" + version + if extra_packages: + packages += "," + extra_packages + + conf_args = [] + if conf: + for i in conf: + conf_args.extend(["--conf", i]) + + repo_args = ["--repositories", extra_maven_repo] if extra_maven_repo else [] + + for test_file in test_files: + if test_name is not None and test_name not in test_file: + print("\nSkipping DynamoDB logstore integration tests in %s\n============" % test_file) + continue + try: + cmd = ["spark-submit", + "--driver-class-path=%s" % extra_class_path, # for less verbose logging + "--packages", packages] + repo_args + conf_args + [test_file] + print("\nRunning DynamoDB logstore integration tests in %s\n=============" % test_file) + print("Command: %s" % " ".join(cmd)) + run_cmd(cmd, stream_output=True) + except: + print("Failed DynamoDB logstore integration tests tests in %s" % (test_file)) + raise + + +def run_s3_log_store_util_integration_tests(): + print("\n\n##### Running S3LogStoreUtil tests #####") + + env = { "S3_LOG_STORE_UTIL_TEST_ENABLED": "true" } + assert os.environ.get("S3_LOG_STORE_UTIL_TEST_BUCKET") is not None, "S3_LOG_STORE_UTIL_TEST_BUCKET must be set" + assert os.environ.get("S3_LOG_STORE_UTIL_TEST_RUN_UID") is not None, "S3_LOG_STORE_UTIL_TEST_RUN_UID must be set" + + try: + cmd = ["build/sbt", "project storage", "testOnly -- -n IntegrationTest"] + print("\nRunning IntegrationTests of storage\n=====================") + print("Command: %s" % " ".join(cmd)) + run_cmd(cmd, stream_output=True, env=env) + except: + print("Failed IntegrationTests") + raise + + +def run_iceberg_integration_tests(root_dir, version, spark_version, iceberg_version, extra_maven_repo, use_local): + print("\n\n##### Running Iceberg tests on version %s #####" % str(version)) + clear_artifact_cache() + if use_local: + run_cmd(["build/sbt", "publishM2"]) + + test_dir = path.join(root_dir, path.join("iceberg", "integration_tests")) + + # Add more Iceberg tests here if needed ... + test_files_names = ["iceberg_converter.py"] + test_files = [path.join(test_dir, f) for f in test_files_names] + + python_root_dir = path.join(root_dir, "python") + extra_class_path = path.join(python_root_dir, path.join("delta", "testing")) + package = ','.join([ + "io.delta:delta-%s_2.12:%s" % (get_artifact_name(version), version), + "io.delta:delta-iceberg_2.12:" + version, + "org.apache.iceberg:iceberg-spark-runtime-{}_2.12:{}".format(spark_version, iceberg_version)]) + + repo = extra_maven_repo if extra_maven_repo else "" + + for test_file in test_files: + try: + cmd = ["spark-submit", + "--driver-class-path=%s" % extra_class_path, # for less verbose logging + "--packages", package, + "--repositories", repo, test_file] + print("\nRunning Iceberg tests in %s\n=============" % test_file) + print("Command: %s" % " ".join(cmd)) + run_cmd(cmd, stream_output=True) + except: + print("Failed Iceberg tests in %s" % (test_file)) + raise + + +def run_pip_installation_tests(root_dir, version, use_testpypi, use_localpypi, extra_maven_repo): + print("\n\n##### Running pip installation tests on version %s #####" % str(version)) + clear_artifact_cache() + delta_pip_name = "delta-spark" + # uninstall packages if they exist + run_cmd(["pip", "uninstall", "--yes", delta_pip_name, "pyspark"], stream_output=True) + + # install packages + delta_pip_name_with_version = "%s==%s" % (delta_pip_name, str(version)) + if use_testpypi: + install_cmd = ["pip", "install", + "--extra-index-url", "https://test.pypi.org/simple/", + delta_pip_name_with_version] + elif use_localpypi: + pip_wheel_file_name = "%s-%s-py3-none-any.whl" % \ + (delta_pip_name.replace("-", "_"), str(version)) + pip_wheel_file_path = os.path.join(use_localpypi, pip_wheel_file_name) + install_cmd = ["pip", "install", pip_wheel_file_path] + else: + install_cmd = ["pip", "install", delta_pip_name_with_version] + print("pip install command: %s" % str(install_cmd)) + run_cmd(install_cmd, stream_output=True) + + # run test python file directly with python and not with spark-submit + env = {} + if extra_maven_repo: + env["EXTRA_MAVEN_REPO"] = extra_maven_repo + tests = ["image_storage.py", "using_with_pip.py"] + for test in tests: + test_file = path.join(root_dir, path.join("examples", "python", test)) + print("\nRunning Python tests in %s\n=============" % test_file) + test_cmd = ["python3", test_file] + print("Test command: %s" % str(test_cmd)) + try: + run_cmd(test_cmd, stream_output=True, env=env) + except: + print("Failed pip installation tests in %s" % (test_file)) + raise + + +def clear_artifact_cache(): + print("Clearing Delta artifacts from ivy2 and mvn cache") + delete_if_exists(os.path.expanduser("~/.ivy2/cache/io.delta")) + delete_if_exists(os.path.expanduser("~/.ivy2/local/io.delta")) + delete_if_exists(os.path.expanduser("~/.m2/repository/io/delta/")) + + +def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, **kwargs): + cmd_env = os.environ.copy() + if env: + cmd_env.update(env) + + if stream_output: + child = subprocess.Popen(cmd, env=cmd_env, **kwargs) + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception("Non-zero exitcode: %s" % (exit_code)) + return exit_code + else: + child = subprocess.Popen( + cmd, + env=cmd_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + (stdout, stderr) = child.communicate() + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception( + "Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" % + (exit_code, stdout, stderr)) + return (exit_code, stdout, stderr) + + +# pylint: disable=too-few-public-methods +class WorkingDirectory(object): + def __init__(self, working_directory): + self.working_directory = working_directory + self.old_workdir = os.getcwd() + + def __enter__(self): + os.chdir(self.working_directory) + + def __exit__(self, tpe, value, traceback): + os.chdir(self.old_workdir) + + +if __name__ == "__main__": + """ + Script to run integration tests which are located in the examples directory. + call this by running "python run-integration-tests.py" + additionally the version can be provided as a command line argument. + " + """ + + # get the version of the package + root_dir = path.dirname(__file__) + with open(path.join(root_dir, "version.sbt")) as fd: + default_version = fd.readline().split('"')[1] + + parser = argparse.ArgumentParser() + parser.add_argument( + "--version", + required=False, + default=default_version, + help="Delta version to use to run the integration tests") + parser.add_argument( + "--python-only", + required=False, + default=False, + action="store_true", + help="Run only Python tests") + parser.add_argument( + "--scala-only", + required=False, + default=False, + action="store_true", + help="Run only Scala tests") + parser.add_argument( + "--s3-log-store-util-only", + required=False, + default=False, + action="store_true", + help="Run only S3LogStoreUtil tests") + parser.add_argument( + "--scala-version", + required=False, + default="2.12", + help="Specify scala version for scala tests only, valid values are '2.12' and '2.13'") + parser.add_argument( + "--pip-only", + required=False, + default=False, + action="store_true", + help="Run only pip installation tests") + parser.add_argument( + "--no-pip", + required=False, + default=False, + action="store_true", + help="Do not run pip installation tests") + parser.add_argument( + "--test", + required=False, + default=None, + help="Run a specific test by substring-match with Scala/Python file name") + parser.add_argument( + "--maven-repo", + required=False, + default=None, + help="Additional Maven repo to resolve staged new release artifacts") + parser.add_argument( + "--use-testpypi", + required=False, + default=False, + action="store_true", + help="Use testpypi for testing pip installation") + parser.add_argument( + "--use-localpypiartifact", + required=False, + default=None, + help="Directory path where the downloaded pypi artifacts are present. " + + "It should have two files: e.g. delta-spark-3.1.0.tar.gz, delta_spark-3.1.0-py3-none-any.whl") + parser.add_argument( + "--use-local", + required=False, + default=False, + action="store_true", + help="Generate JARs from local source code and use to run tests") + parser.add_argument( + "--run-storage-s3-dynamodb-integration-tests", + required=False, + default=False, + action="store_true", + help="Run the DynamoDB integration tests (and only them)") + parser.add_argument( + "--dbb-packages", + required=False, + default=None, + help="Additional packages required for Dynamodb logstore integration tests") + parser.add_argument( + "--dbb-conf", + required=False, + default=None, + nargs="+", + help="All `--conf` values passed to `spark-submit` for DynamoDB logstore integration tests") + parser.add_argument( + "--run-iceberg-integration-tests", + required=False, + default=False, + action="store_true", + help="Run the Iceberg integration tests (and only them)") + parser.add_argument( + "--iceberg-spark-version", + required=False, + default="3.5", + help="Spark version for the Iceberg library") + parser.add_argument( + "--iceberg-lib-version", + required=False, + default="1.4.0", + help="Iceberg Spark Runtime library version") + + args = parser.parse_args() + + if args.scala_version not in ["2.12", "2.13"]: + raise Exception("Scala version can only be specified as --scala-version 2.12 or " + + "--scala-version 2.13") + + if args.pip_only and args.no_pip: + raise Exception("Cannot specify both --pip-only and --no-pip") + + if args.use_local and (args.version != default_version): + raise Exception("Cannot specify --use-local with a --version different than in version.sbt") + + run_python = not args.scala_only and not args.pip_only + run_scala = not args.python_only and not args.pip_only + run_pip = not args.python_only and not args.scala_only and not args.no_pip + + if args.run_iceberg_integration_tests: + run_iceberg_integration_tests( + root_dir, args.version, + args.iceberg_spark_version, args.iceberg_lib_version, args.maven_repo, args.use_local) + quit() + + if args.run_storage_s3_dynamodb_integration_tests: + run_dynamodb_logstore_integration_tests(root_dir, args.version, args.test, args.maven_repo, + args.dbb_packages, args.dbb_conf, args.use_local) + quit() + + if args.s3_log_store_util_only: + run_s3_log_store_util_integration_tests() + quit() + + if run_scala: + run_scala_integration_tests(root_dir, args.version, args.test, args.maven_repo, + args.scala_version, args.use_local) + + if run_python: + run_python_integration_tests(root_dir, args.version, args.test, args.maven_repo, + args.use_local) + + test_missing_delta_storage_jar(root_dir, args.version, args.use_local) + + if run_pip: + if args.use_testpypi and args.use_localpypiartifact is not None: + raise Exception("Cannot specify both --use-testpypi and --use-localpypiartifact.") + + run_pip_installation_tests(root_dir, args.version, args.use_testpypi, + args.use_localpypiartifact, args.maven_repo) diff --git a/run-tests.py b/run-tests.py new file mode 100755 index 00000000000..6c09a4c9b12 --- /dev/null +++ b/run-tests.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 + +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import subprocess +import shlex +from os import path +import argparse + +# Define groups of subprojects that can be tested separately from other groups. +# As of now, we have only defined project groups in the SBT build, so these must match +# the group names defined in build.sbt. +valid_project_groups = ["spark", "kernel"] + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--group", + required=False, + default=None, + choices=valid_project_groups, + help="Run tests on a group of SBT projects" + ) + parser.add_argument( + "--coverage", + required=False, + default=False, + action="store_true", + help="Enables test coverage and generates an aggregate report for all subprojects") + return parser.parse_args() + + +def run_sbt_tests(root_dir, test_group, coverage, scala_version=None): + print("##### Running SBT tests #####") + is_running_spark_tests = test_group is None or test_group == "spark" + + sbt_path = path.join(root_dir, path.join("build", "sbt")) + cmd = [sbt_path, "clean"] + + test_cmd = "test" + + if test_group: + # if test group is specified, then run tests only on that test group + test_cmd = "{}Group/test".format(test_group) + + if coverage: + cmd += ["coverage"] + + if scala_version is None: + # when no scala version is specified, run test with all scala versions + cmd += ["+ %s" % test_cmd] # build/sbt ... "+ project/test" ... + else: + # when no scala version is specified, run test with only the specified scala version + cmd += ["++ %s" % scala_version, test_cmd] # build/sbt ... "++ 2.13.8" "project/test" ... + + if is_running_spark_tests: + cmd += ["unidoc"] + + if coverage: + cmd += ["coverageAggregate", "coverageOff"] + cmd += ["-v"] # show java options used + + # https://docs.oracle.com/javase/7/docs/technotes/guides/vm/G1.html + # a GC that is optimized for larger multiprocessor machines with large memory + cmd += ["-J-XX:+UseG1GC"] + # 4x the default heap size (set in delta/built.sbt) + cmd += ["-J-Xmx4G"] + run_cmd(cmd, stream_output=True) + +def run_python_tests(root_dir): + print("##### Running Python tests #####") + python_test_script = path.join(root_dir, path.join("python", "run-tests.py")) + print("Calling script %s", python_test_script) + run_cmd(["python3", python_test_script], stream_output=True) + + +def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, **kwargs): + if isinstance(cmd, str): + old_cmd = cmd + cmd = shlex.split(cmd) + + cmd_env = os.environ.copy() + if env: + cmd_env.update(env) + print("Running command: " + str(cmd)) + if stream_output: + child = subprocess.Popen(cmd, env=cmd_env, **kwargs) + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception("Non-zero exitcode: %s" % (exit_code)) + return exit_code + else: + child = subprocess.Popen( + cmd, + env=cmd_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + (stdout, stderr) = child.communicate() + exit_code = child.wait() + if not isinstance(stdout, str): + # Python 3 produces bytes which needs to be converted to str + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") + if throw_on_error and exit_code != 0: + raise Exception( + "Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" % + (exit_code, stdout, stderr)) + return (exit_code, stdout, stderr) + + +def pull_or_build_docker_image(root_dir): + """ + This method prepare the docker image for running tests. It uses a hash of the Dockerfile + to generate the image tag/name so that we reuse images until the Dockerfile has changed. + Then it tries to prepare that image by either pulling from a Docker registry + (if configured with environment variable DOCKER_REGISTRY) or by building it from + scratch using the Dockerfile. If pulling from registry fails, then it will fallback + to building it from scratch, but it will also attempt to push to the registry to + avoid image builds in the future. + """ + + dockerfile_path = os.path.join(root_dir, "Dockerfile") + _, out, _ = run_cmd("md5sum %s" % dockerfile_path) + dockerfile_hash = out.strip().split(" ")[0].strip() + print("Dockerfile hash: %s" % dockerfile_hash) + + test_env_image_tag = "delta_test_env:%s" % dockerfile_hash + print("Test env image: %s" % test_env_image_tag) + + docker_registry = os.getenv("DOCKER_REGISTRY") + print("Docker registry set as " + str(docker_registry)) + + + def build_image(): + print("---\nBuilding image %s ..." % test_env_image_tag) + run_cmd("docker build --tag=%s %s" % (test_env_image_tag, root_dir)) + print("Built image %s" % test_env_image_tag) + + def pull_image(registry_image_tag): + try: + print("---\nPulling image %s ..." % registry_image_tag) + run_cmd("docker pull %s" % registry_image_tag) + run_cmd("docker tag %s %s" % (registry_image_tag, test_env_image_tag)) + print("Pulling image %s succeeded" % registry_image_tag) + return True + except Exception as e: + print("Pulling image %s failed: %s" % (registry_image_tag, repr(e))) + return False + + def push_image(registry_image_tag): + try: + print("---\nPushing image %s ..." % registry_image_tag) + run_cmd("docker tag %s %s" % (test_env_image_tag, registry_image_tag)) + run_cmd("docker push %s" % registry_image_tag) + print("Pushing image %s succeeded" % registry_image_tag) + return True + except Exception as e: + print("Pushing image %s failed: %s" % (registry_image_tag, repr(e))) + return False + + if docker_registry is not None: + print("Attempting to use the docker registry") + test_env_image_tag_with_registry = docker_registry + "/delta/" + test_env_image_tag + success = pull_image(test_env_image_tag_with_registry) + if not success: + build_image() + push_image(test_env_image_tag_with_registry) + else: + build_image() + return test_env_image_tag + + +def run_tests_in_docker(image_tag, test_group): + """ + Run the necessary tests in a docker container made from the given image. + It starts the container with the delta repo mounted in it, and then + executes this script. + """ + + # Note: Pass only relevant env that the script needs to run in the docker container. + # Do not pass docker related env variable as we want this script to run natively in + # the container and not attempt to recursively another docker container. + envs = "-e JENKINS_URL -e SBT_1_5_5_MIRROR_JAR_URL " + scala_version = os.getenv("SCALA_VERSION") + if scala_version is not None: + envs = envs + "-e SCALA_VERSION=%s " % scala_version + + test_parallelism = os.getenv("TEST_PARALLELISM_COUNT") + if test_parallelism is not None: + envs = envs + "-e TEST_PARALLELISM_COUNT=%s " % test_parallelism + + cwd = os.getcwd() + test_script = os.path.basename(__file__) + + test_script_args = "" + if test_group: + test_script_args += " --group %s" % test_group + + test_run_cmd = "docker run --rm -v %s:%s -w %s %s %s ./%s %s" % ( + cwd, cwd, cwd, envs, image_tag, test_script, test_script_args + ) + run_cmd(test_run_cmd, stream_output=True) + + +if __name__ == "__main__": + root_dir = os.path.dirname(os.path.abspath(__file__)) + args = get_args() + if os.getenv("USE_DOCKER") is not None: + test_env_image_tag = pull_or_build_docker_image(root_dir) + run_tests_in_docker(test_env_image_tag, args.group) + else: + scala_version = os.getenv("SCALA_VERSION") + run_sbt_tests(root_dir, args.group, args.coverage, scala_version) + + # Python tests are run only when spark group of projects are being tested. + is_testing_spark_group = args.group is None or args.group == "spark" + # Python tests are skipped when using Scala 2.13 as PySpark doesn't support it. + is_testing_scala_212 = scala_version is None or scala_version.startswith("2.12") + if is_testing_spark_group and is_testing_scala_212: + run_python_tests(root_dir) diff --git a/scalastyle-config.xml b/scalastyle-config.xml new file mode 100644 index 00000000000..cfc9a903f37 --- /dev/null +++ b/scalastyle-config.xml @@ -0,0 +1,469 @@ + + + + + + + Scalastyle standard configuration + + + + + + + + + + true + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + + + + ^FunSuite[A-Za-z]*$ + Tests must extend org.apache.spark.SparkFunSuite instead. + + + + + ^println$ + + + + + spark(.sqlContext)?.sparkContext.hadoopConfiguration + + + + + sessionState.newHadoopConf + + + + + @VisibleForTesting + + + + + Runtime\.getRuntime\.addShutdownHook + + + + + mutable\.SynchronizedBuffer + + + + + Class\.forName + + + + + Await\.result + + + + + Await\.ready + + + + + (\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\))) + + + + + typed[lL]it + + + + + spark(Session)?.implicits._ + + + + + throw new \w+Error\( + + + + + count\(" + + + + + + JavaConversions + Instead of importing implicits in scala.collection.JavaConversions._, import + scala.collection.JavaConverters._ and use .asScala / .asJava methods + + + + org\.apache\.commons\.lang\. + Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead + of Commons Lang 2 (package org.apache.commons.lang.*) + + + + extractOpt + Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter + is slower. + + + + + COMMA + + + + + + \)\{ + + + + + (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] + Use Javadoc style indentation for multiline comments + + + + case[^\n>]*=>\s*\{ + Omit braces in case clauses. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 800> + + + + + 30 + + + + + 10 + + + + + 50 + + + + + + + + + + + -1,0,1,2,3 + + + diff --git a/setup.py b/setup.py new file mode 100644 index 00000000000..610f4d6e65b --- /dev/null +++ b/setup.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import os +import sys + +from setuptools import setup +from setuptools.command.install import install + + +# delta.io version +def get_version_from_sbt(): + with open("version.sbt") as fp: + version = fp.read().strip() + return version.split('"')[1] + + +VERSION = get_version_from_sbt() + + +class VerifyVersionCommand(install): + """Custom command to verify that the git tag matches our version""" + description = 'verify that the git tag matches our version' + + def run(self): + tag = os.getenv('CIRCLE_TAG') + + if tag != VERSION: + info = "Git tag: {0} does not match the version of this app: {1}".format( + tag, VERSION + ) + sys.exit(info) + + +with open("python/README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() + +setup( + name="delta-spark", + version=VERSION, + description="Python APIs for using Delta Lake with Apache Spark", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/delta-io/delta/", + project_urls={ + 'Source': 'https://github.com/delta-io/delta', + 'Documentation': 'https://docs.delta.io/latest/index.html', + 'Issues': 'https://github.com/delta-io/delta/issues' + }, + author="The Delta Lake Project Authors", + author_email="delta-users@googlegroups.com", + license="Apache-2.0", + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python :: 3", + "Typing :: Typed", + ], + keywords='delta.io', + package_dir={'': 'python'}, + packages=['delta'], + package_data={ + 'delta': ['py.typed'], + }, + install_requires=[ + 'pyspark>=3.5.0,<3.6.0', + 'importlib_metadata>=1.0.0', + ], + python_requires='>=3.6', + cmdclass={ + 'verify': VerifyVersionCommand, + } +) diff --git a/sharing/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sharing/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister new file mode 100644 index 00000000000..c81a708f931 --- /dev/null +++ b/sharing/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -0,0 +1 @@ +io.delta.sharing.spark.DeltaSharingDataSource \ No newline at end of file diff --git a/sharing/src/main/scala/io/delta/sharing/spark/DeltaFormatSharingLimitPushDown.scala b/sharing/src/main/scala/io/delta/sharing/spark/DeltaFormatSharingLimitPushDown.scala new file mode 100644 index 00000000000..589df97057a --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/DeltaFormatSharingLimitPushDown.scala @@ -0,0 +1,53 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import io.delta.sharing.client.util.ConfUtils + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.IntegerLiteral +import org.apache.spark.sql.catalyst.plans.logical.{LocalLimit, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} + +// A spark rule that applies limit pushdown to DeltaSharingFileIndex, when the config is enabled. +// To allow only fetching needed files from delta sharing server. +object DeltaFormatSharingLimitPushDown extends Rule[LogicalPlan] { + + def setup(spark: SparkSession): Unit = synchronized { + if (!spark.experimental.extraOptimizations.contains(DeltaFormatSharingLimitPushDown)) { + spark.experimental.extraOptimizations ++= Seq(DeltaFormatSharingLimitPushDown) + } + } + + def apply(p: LogicalPlan): LogicalPlan = { + p transform { + case localLimit @ LocalLimit( + literalExpr @ IntegerLiteral(limit), + l @ LogicalRelation( + r @ HadoopFsRelation(remoteIndex: DeltaSharingFileIndex, _, _, _, _, _), + _, + _, + _ + ) + ) if (ConfUtils.limitPushdownEnabled(p.conf) && remoteIndex.limitHint.isEmpty) => + val spark = SparkSession.active + val newRel = r.copy(location = remoteIndex.copy(limitHint = Some(limit)))(spark) + LocalLimit(literalExpr, l.copy(relation = newRel)) + } + } +} diff --git a/sharing/src/main/scala/io/delta/sharing/spark/DeltaFormatSharingSource.scala b/sharing/src/main/scala/io/delta/sharing/spark/DeltaFormatSharingSource.scala new file mode 100644 index 00000000000..76e688f036b --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/DeltaFormatSharingSource.scala @@ -0,0 +1,548 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.lang.ref.WeakReference +import java.util.concurrent.TimeUnit + +import org.apache.spark.sql.delta.{ + DeltaErrors, + DeltaLog, + DeltaOptions, + SnapshotDescriptor +} +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.{ + DeltaDataSource, + DeltaSource, + DeltaSourceOffset +} +import io.delta.sharing.client.DeltaSharingClient +import io.delta.sharing.client.model.{Table => DeltaSharingTable} + +import org.apache.spark.delta.sharing.CachedTableManager +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.connector.read.streaming +import org.apache.spark.sql.connector.read.streaming.{ReadLimit, SupportsAdmissionControl} +import org.apache.spark.sql.execution.streaming.{Offset, Source} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.StructType + +/** + * A streaming source for a Delta Sharing table. + * + * This class wraps a DeltaSource to read data out of locally constructed delta log. + * When a new stream is started, delta sharing starts by fetching delta log from the server side, + * constructing a local delta log, and call delta source apis to compute offset or read data. + * + * TODO: Support CDC Streaming, SupportsTriggerAvailableNow and SupportsConcurrentExecution. + */ +case class DeltaFormatSharingSource( + spark: SparkSession, + client: DeltaSharingClient, + table: DeltaSharingTable, + options: DeltaSharingOptions, + parameters: Map[String, String], + sqlConf: SQLConf, + metadataPath: String) + extends Source + with SupportsAdmissionControl + with DeltaLogging { + + private var tableId: String = "unset_table_id" + + private val tablePath = options.options.getOrElse( + "path", + throw DeltaSharingErrors.pathNotSpecifiedException + ) + + // A unique string composed of a formatted timestamp and an uuid. + // Used as a suffix for the table name and its delta log path of a delta sharing table in a + // streaming job, to avoid overwriting the delta log from multiple references of the same delta + // sharing table in one streaming job. + private val timestampWithUUID = DeltaSharingUtils.getFormattedTimestampWithUUID() + private val customTablePathWithUUIDSuffix = DeltaSharingUtils.getTablePathWithIdSuffix( + client.getProfileProvider.getCustomTablePath(tablePath), + timestampWithUUID + ) + private val deltaLogPath = + s"${DeltaSharingLogFileSystem.encode(customTablePathWithUUIDSuffix).toString}/_delta_log" + + // The latest metadata of the shared table, fetched at the initialization time of the + // DeltaFormatSharingSource, used to initialize the wrapped DeltaSource. + private lazy val deltaSharingTableMetadata = + DeltaSharingUtils.getDeltaSharingTableMetadata(client, table) + + private lazy val deltaSource = initDeltaSource() + + private def initDeltaSource(): DeltaSource = { + val (localDeltaLog, snapshotDescriptor) = DeltaSharingUtils.getDeltaLogAndSnapshotDescriptor( + spark, + deltaSharingTableMetadata, + customTablePathWithUUIDSuffix + ) + val schemaTrackingLogOpt = + DeltaDataSource.getMetadataTrackingLogForDeltaSource( + spark, + snapshotDescriptor, + parameters, + // Pass in the metadata path opt so we can use it for validation + sourceMetadataPathOpt = Some(metadataPath) + ) + + val readSchema = schemaTrackingLogOpt + .flatMap(_.getCurrentTrackedMetadata.map(_.dataSchema)) + .getOrElse(snapshotDescriptor.schema) + + if (readSchema.isEmpty) { + throw DeltaErrors.schemaNotSetException + } + + DeltaSource( + spark = spark, + deltaLog = localDeltaLog, + options = new DeltaOptions(parameters, sqlConf), + snapshotAtSourceInit = snapshotDescriptor, + metadataPath = metadataPath, + metadataTrackingLog = schemaTrackingLogOpt + ) + } + + // schema of the streaming source, based on the latest metadata of the shared table. + override val schema: StructType = { + val schemaWithoutCDC = deltaSharingTableMetadata.metadata.schema + tableId = deltaSharingTableMetadata.metadata.deltaMetadata.id + if (options.readChangeFeed) { + CDCReader.cdcReadSchema(schemaWithoutCDC) + } else { + schemaWithoutCDC + } + } + + // Latest endOffset of the getBatch call, used to compute startingOffset which will then be used + // to compare with the the latest table version on server to decide whether to fetch new data. + private var latestProcessedEndOffsetOption: Option[DeltaSourceOffset] = None + + // Latest table version for the data fetched from the delta sharing server, and stored in the + // local delta log. Used to check whether all fetched files are processed by the DeltaSource. + private var latestTableVersionInLocalDeltaLogOpt: Option[Long] = None + + // This is needed because DeltaSource is not advancing the offset to the next version + // automatically when scanning through a snapshot, so DeltaFormatSharingSource needs to count the + // number of files in the min version and advance the offset to the next version when the offset + // is at the last index of the version. + private var numFileActionsInStartingSnapshotOpt: Option[Int] = None + + // Latest timestamp for getTableVersion rpc from the server, used to compare with the current + // timestamp, to ensure the gap QUERY_TABLE_VERSION_INTERVAL_MILLIS between two rpcs, to avoid + // a high traffic load to the server. + private var lastTimestampForGetVersionFromServer: Long = -1 + + // The minimum gap between two getTableVersion rpcs, to avoid a high traffic load to the server. + private val QUERY_TABLE_VERSION_INTERVAL_MILLIS = TimeUnit.SECONDS.toMillis(30) + + // Maximum number of versions of getFiles() rpc when fetching files from the server. Used to + // reduce the number of files returned to avoid timeout of the rpc on the server. + private val maxVersionsPerRpc: Int = options.maxVersionsPerRpc.getOrElse( + DeltaSharingOptions.MAX_VERSIONS_PER_RPC_DEFAULT + ) + + // A variable to store the latest table version on server, returned from the getTableVersion rpc. + // Used to store the latest table version for getOrUpdateLatestTableVersion when not getting + // updates from the server. + // For all other callers, please use getOrUpdateLatestTableVersion instead of this variable. + private var latestTableVersionOnServer: Long = -1 + + /** + * Check the latest table version from the delta sharing server through the client.getTableVersion + * RPC. Adding a minimum interval of QUERY_TABLE_VERSION_INTERVAL_MILLIS between two consecutive + * rpcs to avoid traffic jam on the delta sharing server. + * + * @return the latest table version on the server. + */ + private def getOrUpdateLatestTableVersion: Long = { + val currentTimeMillis = System.currentTimeMillis() + if ((currentTimeMillis - lastTimestampForGetVersionFromServer) >= + QUERY_TABLE_VERSION_INTERVAL_MILLIS) { + val serverVersion = client.getTableVersion(table) + if (serverVersion < 0) { + throw new IllegalStateException( + s"Delta Sharing Server returning negative table version: " + + s"$serverVersion." + ) + } else if (serverVersion < latestTableVersionOnServer) { + logWarning( + s"Delta Sharing Server returning smaller table version: $serverVersion < " + + s"$latestTableVersionOnServer." + ) + } + logInfo(s"Delta Sharing Server returning $serverVersion for getTableVersion.") + latestTableVersionOnServer = serverVersion + lastTimestampForGetVersionFromServer = currentTimeMillis + } + latestTableVersionOnServer + } + + /** + * NOTE: need to match with the logic in DeltaSource.extractStartingState(). + * + * Get the starting offset used to send rpc to delta sharing server, to fetch needed files. + * Use input startOffset when it's defined, otherwise use user defined starting version, otherwise + * use input endOffset if it's defined, the least option is the latest table version returned from + * the delta sharing server (which is usually used when a streaming query starts from scratch). + * + * @param startOffsetOption optional start offset, return it if defined. It's empty when the + * streaming query starts from scratch. It's set for following calls. + * @param endOffsetOption optional end offset. It's set when the function is called from + * getBatch and is empty when called from latestOffset. + * @return The starting offset. + */ + private def getStartingOffset( + startOffsetOption: Option[DeltaSourceOffset], + endOffsetOption: Option[DeltaSourceOffset]): DeltaSourceOffset = { + if (startOffsetOption.isEmpty) { + val (version, isInitialSnapshot) = getStartingVersion match { + case Some(v) => (v, false) + case None => + if (endOffsetOption.isDefined) { + if (endOffsetOption.get.isInitialSnapshot) { + (endOffsetOption.get.reservoirVersion, true) + } else { + assert( + endOffsetOption.get.reservoirVersion > 0, + s"invalid reservoirVersion in endOffset: ${endOffsetOption.get}" + ) + // Load from snapshot `endOffset.reservoirVersion - 1L` so that `index` in `endOffset` + // is still valid. + // It's OK to use the previous version as the updated initial snapshot, even if the + // initial snapshot might have been different from the last time when this starting + // offset was computed. + (endOffsetOption.get.reservoirVersion - 1L, true) + } + } else { + (getOrUpdateLatestTableVersion, true) + } + } + // Constructed the same way as DeltaSource.buildOffsetFromIndexedFile + DeltaSourceOffset( + reservoirId = tableId, + reservoirVersion = version, + index = DeltaSourceOffset.BASE_INDEX, + isInitialSnapshot = isInitialSnapshot + ) + } else { + startOffsetOption.get + } + } + + /** + * The ending version used in rpc is restricted by both the latest table version and + * maxVersionsPerRpc, to avoid loading too many files from the server to cause a timeout. + * @param startingOffset The start offset used in the rpc. + * @param latestTableVersion The latest table version at the server. + * @return the ending version used in the rpc. + */ + private def getEndingVersionForRpc( + startingOffset: DeltaSourceOffset, + latestTableVersion: Long): Long = { + if (startingOffset.isInitialSnapshot) { + // ending version is the same as starting version for snapshot query. + return startingOffset.reservoirVersion + } + // using "startVersion + maxVersionsPerRpc - 1" because the endingVersion is inclusive. + val endingVersionForQuery = latestTableVersion.min( + startingOffset.reservoirVersion + maxVersionsPerRpc - 1 + ) + if (endingVersionForQuery < latestTableVersion) { + logInfo( + s"Reducing ending version for delta sharing rpc from latestTableVersion(" + + s"$latestTableVersion) to endingVersionForQuery($endingVersionForQuery), " + + s"startVersion:${startingOffset.reservoirVersion}, maxVersionsPerRpc:$maxVersionsPerRpc, " + + s"for table(id:$tableId, name:${table.toString})." + ) + } + endingVersionForQuery + } + + override def getDefaultReadLimit: ReadLimit = { + deltaSource.getDefaultReadLimit + } + + override def latestOffset(startOffset: streaming.Offset, limit: ReadLimit): streaming.Offset = { + val deltaSourceOffset = getStartingOffset(latestProcessedEndOffsetOption, None) + + if (deltaSourceOffset.reservoirVersion < 0) { + return null + } + + maybeGetLatestFileChangesFromServer(deltaSourceOffset) + + maybeMoveToNextVersion(deltaSource.latestOffset(startOffset, limit)) + } + + // Advance the DeltaSourceOffset to the next version when the offset is at the last index of the + // version. + // This is because DeltaSource is not advancing the offset automatically when processing a + // snapshot (isStartingVersion = true), and advancing the offset is necessary for delta sharing + // streaming to fetch new files from the delta sharing server. + private def maybeMoveToNextVersion( + latestOffsetFromDeltaSource: streaming.Offset): DeltaSourceOffset = { + val deltaLatestOffset = deltaSource.toDeltaSourceOffset(latestOffsetFromDeltaSource) + if (deltaLatestOffset.isInitialSnapshot && + (numFileActionsInStartingSnapshotOpt.exists(_ == deltaLatestOffset.index + 1))) { + DeltaSourceOffset( + reservoirId = deltaLatestOffset.reservoirId, + reservoirVersion = deltaLatestOffset.reservoirVersion + 1, + index = DeltaSourceOffset.BASE_INDEX, + isInitialSnapshot = false + ) + } else { + deltaLatestOffset + } + } + + /** + * Whether need to fetch new files from the delta sharing server. + * @param startingOffset the startingOffset of the next batch asked by spark streaming engine. + * @param latestTableVersion the latest table version on the delta sharing server. + * @return whether need to fetch new files from the delta sharing server, this is needed when all + * files are processed in the local delta log, and there are new files on the delta + * sharing server. + * And we avoid fetching new files when files in the delta log are not fully processed. + */ + private def needNewFilesFromServer( + startingOffset: DeltaSourceOffset, + latestTableVersion: Long): Boolean = { + if (latestTableVersionInLocalDeltaLogOpt.isEmpty) { + return true + } + + val allLocalFilesProcessed = latestTableVersionInLocalDeltaLogOpt.exists( + _ < startingOffset.reservoirVersion + ) + val newChangesOnServer = latestTableVersionInLocalDeltaLogOpt.exists(_ < latestTableVersion) + allLocalFilesProcessed && newChangesOnServer + } + + /** + * Check whether we need to fetch new files from the server and calls getTableFileChanges if true. + * + * @param startingOffset the starting offset used to fetch files, the 3 parameters will be useful: + * - reservoirVersion: initially would be the startingVersion or the latest + * table version. + * - index: index of a file within the same version. + * - isInitialSnapshot: If true, will load fromVersion as a table snapshot( + * including files from previous versions). If false, will only load files + * since fromVersion. + * 2 usages: 1) used to compare with latestTableVersionInLocalDeltaLogOpt to + * check whether new files are needed. 2) used for getTableFileChanges, + * check more details in the function header. + */ + private def maybeGetLatestFileChangesFromServer(startingOffset: DeltaSourceOffset): Unit = { + // Use a local variable to avoid a difference in the two usages below. + val latestTableVersion = getOrUpdateLatestTableVersion + + if (needNewFilesFromServer(startingOffset, latestTableVersion)) { + val endingVersionForQuery = + getEndingVersionForRpc(startingOffset, latestTableVersion) + + if (startingOffset.isInitialSnapshot || !options.readChangeFeed) { + getTableFileChanges(startingOffset, endingVersionForQuery) + } else { + throw new UnsupportedOperationException("CDF Streaming is not supported yet.") + } + } + } + + /** + * Fetch the table changes from delta sharing server starting from (version, index) of the + * startingOffset, and store them in locally constructed delta log. + * + * @param startingOffset Includes a reservoirVersion, an index of a file within the same version, + * and an isInitialSnapshot. + * If isInitialSnapshot is true, will load startingOffset.reservoirVersion + * as a table snapshot (including files from previous versions). If false, + * it will only load files since startingOffset.reservoirVersion. + * @param endingVersionForQuery The ending version used for the query, always smaller than + * the latest table version on server. + */ + private def getTableFileChanges( + startingOffset: DeltaSourceOffset, + endingVersionForQuery: Long): Unit = { + logInfo( + s"Fetching files with table version(${startingOffset.reservoirVersion}), " + + s"index(${startingOffset.index}), isInitialSnapshot(${startingOffset.isInitialSnapshot})," + + s" endingVersionForQuery($endingVersionForQuery), for table(id:$tableId, " + + s"name:${table.toString}) with latest version on server($latestTableVersionOnServer)." + ) + + val (tableFiles, refreshFunc) = if (startingOffset.isInitialSnapshot) { + // If isInitialSnapshot is true, it means to fetch the snapshot at the fromVersion, which may + // include table changes from previous versions. + val tableFiles = client.getFiles( + table = table, + predicates = Nil, + limit = None, + versionAsOf = Some(startingOffset.reservoirVersion), + timestampAsOf = None, + jsonPredicateHints = None, + refreshToken = None + ) + val refreshFunc = DeltaSharingUtils.getRefresherForGetFiles( + client = client, + table = table, + predicates = Nil, + limit = None, + versionAsOf = Some(startingOffset.reservoirVersion), + timestampAsOf = None, + jsonPredicateHints = None, + refreshToken = None + ) + logInfo( + s"Fetched ${tableFiles.lines.size} lines for table version ${tableFiles.version} from" + + " delta sharing server." + ) + (tableFiles, refreshFunc) + } else { + // If isStartingVersion is false, it means to fetch files for data changes since fromVersion, + // not including files from previous versions. + val tableFiles = client.getFiles( + table = table, + startingVersion = startingOffset.reservoirVersion, + endingVersion = Some(endingVersionForQuery) + ) + val refreshFunc = DeltaSharingUtils.getRefresherForGetFilesWithStartingVersion( + client = client, + table = table, + startingVersion = startingOffset.reservoirVersion, + endingVersion = Some(endingVersionForQuery) + ) + logInfo( + s"Fetched ${tableFiles.lines.size} lines from startingVersion " + + s"${startingOffset.reservoirVersion} to enedingVersion ${endingVersionForQuery} from " + + "delta sharing server." + ) + (tableFiles, refreshFunc) + } + + val deltaLogMetadata = DeltaSharingLogFileSystem.constructLocalDeltaLogAcrossVersions( + lines = tableFiles.lines, + customTablePath = customTablePathWithUUIDSuffix, + startingVersionOpt = Some(startingOffset.reservoirVersion), + endingVersionOpt = Some(endingVersionForQuery) + ) + assert( + deltaLogMetadata.maxVersion > 0, + s"Invalid table version in delta sharing response: ${tableFiles.lines}." + ) + latestTableVersionInLocalDeltaLogOpt = Some(deltaLogMetadata.maxVersion) + logInfo(s"Setting latestTableVersionInLocalDeltaLogOpt to ${deltaLogMetadata.maxVersion}") + assert( + deltaLogMetadata.numFileActionsInMinVersionOpt.isDefined, + "numFileActionsInMinVersionOpt missing after constructed delta log." + ) + if (startingOffset.isInitialSnapshot) { + numFileActionsInStartingSnapshotOpt = deltaLogMetadata.numFileActionsInMinVersionOpt + } + + CachedTableManager.INSTANCE.register( + tablePath = DeltaSharingUtils.getTablePathWithIdSuffix(tablePath, timestampWithUUID), + idToUrl = deltaLogMetadata.idToUrl, + refs = Seq(new WeakReference(this)), + profileProvider = client.getProfileProvider, + refresher = refreshFunc, + expirationTimestamp = + if (CachedTableManager.INSTANCE + .isValidUrlExpirationTime(deltaLogMetadata.minUrlExpirationTimestamp)) { + deltaLogMetadata.minUrlExpirationTimestamp.get + } else { + System.currentTimeMillis() + CachedTableManager.INSTANCE.preSignedUrlExpirationMs + }, + refreshToken = tableFiles.refreshToken + ) + } + + override def getBatch(startOffsetOption: Option[Offset], end: Offset): DataFrame = { + val endOffset = deltaSource.toDeltaSourceOffset(end) + val startDeltaOffsetOption = startOffsetOption.map(deltaSource.toDeltaSourceOffset) + val startingOffset = getStartingOffset(startDeltaOffsetOption, Some(endOffset)) + + maybeGetLatestFileChangesFromServer(startingOffset = startingOffset) + // Reset latestProcessedEndOffsetOption only when endOffset is larger. + // Because with microbatch pipelining, we may get getBatch requests out of order. + if (latestProcessedEndOffsetOption.isEmpty || + endOffset.reservoirVersion > latestProcessedEndOffsetOption.get.reservoirVersion || + (endOffset.reservoirVersion == latestProcessedEndOffsetOption.get.reservoirVersion && + endOffset.index > latestProcessedEndOffsetOption.get.index)) { + latestProcessedEndOffsetOption = Some(endOffset) + logInfo(s"Setting latestProcessedEndOffsetOption to $endOffset") + } + + deltaSource.getBatch(startOffsetOption, end) + } + + override def getOffset: Option[Offset] = { + throw new UnsupportedOperationException( + "latestOffset(Offset, ReadLimit) should be called instead of this method." + ) + } + + /** + * Extracts whether users provided the option to time travel a relation. If a query restarts from + * a checkpoint and the checkpoint has recorded the offset, this method should never been called. + */ + private lazy val getStartingVersion: Option[Long] = { + + /** DeltaOption validates input and ensures that only one is provided. */ + if (options.startingVersion.isDefined) { + val v = options.startingVersion.get match { + case StartingVersionLatest => + getOrUpdateLatestTableVersion + 1 + case StartingVersion(version) => + version + } + Some(v) + } else if (options.startingTimestamp.isDefined) { + Some(client.getTableVersion(table, options.startingTimestamp)) + } else { + None + } + } + + override def stop(): Unit = { + deltaSource.stop() + + DeltaSharingLogFileSystem.tryToCleanUpDeltaLog(deltaLogPath) + } + + // Calls deltaSource.commit for checks related to column mapping. + override def commit(end: Offset): Unit = { + logInfo(s"Commit end offset: $end.") + deltaSource.commit(end) + + // Clean up previous blocks after commit. + val endOffset = deltaSource.toDeltaSourceOffset(end) + DeltaSharingLogFileSystem.tryToCleanUpPreviousBlocks( + deltaLogPath, + endOffset.reservoirVersion - 1 + ) + } + + override def toString(): String = s"DeltaFormatSharingSource[${table.toString}]" +} diff --git a/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingCDFUtils.scala b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingCDFUtils.scala new file mode 100644 index 00000000000..7b8a8294c67 --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingCDFUtils.scala @@ -0,0 +1,112 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.lang.ref.WeakReference +import java.nio.charset.StandardCharsets.UTF_8 + +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import com.google.common.hash.Hashing +import io.delta.sharing.client.DeltaSharingClient +import io.delta.sharing.client.model.{Table => DeltaSharingTable} +import org.apache.hadoop.fs.Path + +import org.apache.spark.delta.sharing.CachedTableManager +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.sources.BaseRelation + +object DeltaSharingCDFUtils extends Logging { + + private def getDuration(start: Long): Double = { + (System.currentTimeMillis() - start) / 1000.0 + } + + /** + * Prepares the BaseRelation for cdf queries on a delta sharing table. Since there's no limit + * pushdown or filter pushdown involved, it wiill firatly fetch all the files from the delta + * sharing server, prepare the local delta log, and leverage DeltaTableV2 to produce the relation. + */ + private[sharing] def prepareCDFRelation( + sqlContext: SQLContext, + options: DeltaSharingOptions, + table: DeltaSharingTable, + client: DeltaSharingClient): BaseRelation = { + val startTime = System.currentTimeMillis() + // 1. Get all files with DeltaSharingClient. + // includeHistoricalMetadata is always set to true, to get the metadata at the startingVersion + // and also any metadata changes between [startingVersion, endingVersion], to put them in the + // delta log. This is to allow delta library to check the metadata change and handle it + // properly -- currently it throws error for column mapping changes. + val deltaTableFiles = + client.getCDFFiles(table, options.cdfOptions, includeHistoricalMetadata = true) + logInfo( + s"Fetched ${deltaTableFiles.lines.size} lines with cdf options ${options.cdfOptions} " + + s"for table ${table} from delta sharing server, took ${getDuration(startTime)}s." + ) + + val path = options.options.getOrElse("path", throw DeltaSharingErrors.pathNotSpecifiedException) + // 2. Prepare local delta log + val queryCustomTablePath = client.getProfileProvider.getCustomTablePath(path) + val queryParamsHashId = DeltaSharingUtils.getQueryParamsHashId(options.cdfOptions) + val tablePathWithHashIdSuffix = + DeltaSharingUtils.getTablePathWithIdSuffix(queryCustomTablePath, queryParamsHashId) + val deltaLogMetadata = DeltaSharingLogFileSystem.constructLocalDeltaLogAcrossVersions( + lines = deltaTableFiles.lines, + customTablePath = tablePathWithHashIdSuffix, + startingVersionOpt = None, + endingVersionOpt = None + ) + + // 3. Register parquet file id to url mapping + CachedTableManager.INSTANCE.register( + // Using path instead of queryCustomTablePath because it will be customized within + // CachedTableManager. + tablePath = DeltaSharingUtils.getTablePathWithIdSuffix(path, queryParamsHashId), + idToUrl = deltaLogMetadata.idToUrl, + // A weak reference is needed by the CachedTableManager to decide whether the query is done + // and it's ok to clean up the id to url mapping for this table. + refs = Seq(new WeakReference(this)), + profileProvider = client.getProfileProvider, + refresher = DeltaSharingUtils.getRefresherForGetCDFFiles( + client = client, + table = table, + cdfOptions = options.cdfOptions + ), + expirationTimestamp = + if (CachedTableManager.INSTANCE + .isValidUrlExpirationTime(deltaLogMetadata.minUrlExpirationTimestamp)) { + deltaLogMetadata.minUrlExpirationTimestamp.get + } else { + System.currentTimeMillis() + CachedTableManager.INSTANCE.preSignedUrlExpirationMs + }, + refreshToken = None + ) + + // 4. return Delta + val localDeltaCdfOptions = Map( + DeltaSharingOptions.CDF_START_VERSION -> deltaLogMetadata.minVersion.toString, + DeltaSharingOptions.CDF_END_VERSION -> deltaLogMetadata.maxVersion.toString, + DeltaSharingOptions.CDF_READ_OPTION -> "true" + ) + DeltaTableV2( + spark = sqlContext.sparkSession, + path = DeltaSharingLogFileSystem.encode(tablePathWithHashIdSuffix), + options = localDeltaCdfOptions + ).toBaseRelation + } +} diff --git a/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingDataSource.scala b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingDataSource.scala new file mode 100644 index 00000000000..8f8522cc2dc --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingDataSource.scala @@ -0,0 +1,448 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.{ + DeltaColumnMapping, + DeltaErrors, + DeltaTableUtils => TahoeDeltaTableUtils +} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.{DeltaDataSource, DeltaSQLConf} +import io.delta.sharing.client.{DeltaSharingClient, DeltaSharingRestClient} +import io.delta.sharing.client.model.{Table => DeltaSharingTable} +import io.delta.sharing.client.util.{ConfUtils, JsonUtils} +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkEnv +import org.apache.spark.delta.sharing.PreSignedUrlCache +import org.apache.spark.sql.{SparkSession, SQLContext} +import org.apache.spark.sql.execution.datasources.HadoopFsRelation +import org.apache.spark.sql.execution.streaming.Source +import org.apache.spark.sql.sources.{ + BaseRelation, + DataSourceRegister, + RelationProvider, + StreamSourceProvider +} +import org.apache.spark.sql.types.StructType + +/** + * A DataSource for Delta Sharing, used to support all types of queries on a delta sharing table: + * batch, cdf, streaming, time travel, filters, etc. + */ +private[sharing] class DeltaSharingDataSource + extends RelationProvider + with StreamSourceProvider + with DataSourceRegister + with DeltaLogging { + + override def sourceSchema( + sqlContext: SQLContext, + schema: Option[StructType], + providerName: String, + parameters: Map[String, String]): (String, StructType) = { + DeltaSharingDataSource.setupFileSystem(sqlContext) + if (schema.nonEmpty && schema.get.nonEmpty) { + throw DeltaErrors.specifySchemaAtReadTimeException + } + val options = new DeltaSharingOptions(parameters) + if (options.isTimeTravel) { + throw DeltaErrors.timeTravelNotSupportedException + } + val path = options.options.getOrElse("path", throw DeltaSharingErrors.pathNotSpecifiedException) + + if (options.responseFormat == DeltaSharingOptions.RESPONSE_FORMAT_PARQUET) { + logInfo(s"sourceSchema with parquet format for table path:$path, parameters:$parameters") + val deltaLog = RemoteDeltaLog( + path, + forStreaming = true, + responseFormat = options.responseFormat + ) + val schemaToUse = deltaLog.snapshot().schema + if (schemaToUse.isEmpty) { + throw DeltaSharingErrors.schemaNotSetException + } + + if (options.readChangeFeed) { + (shortName(), DeltaTableUtils.addCdcSchema(schemaToUse)) + } else { + (shortName(), schemaToUse) + } + } else if (options.responseFormat == DeltaSharingOptions.RESPONSE_FORMAT_DELTA) { + logInfo(s"sourceSchema with delta format for table path:$path, parameters:$parameters") + if (options.readChangeFeed) { + throw new UnsupportedOperationException( + s"Delta sharing cdc streaming is not supported when responseforma=delta." + ) + } + // 1. create delta sharing client + val parsedPath = DeltaSharingRestClient.parsePath(path) + val client = DeltaSharingRestClient( + profileFile = parsedPath.profileFile, + forStreaming = true, + responseFormat = options.responseFormat, + // comma separated delta reader features, used to tell delta sharing server what delta + // reader features the client is able to process. + readerFeatures = DeltaSharingUtils.STREAMING_SUPPORTED_READER_FEATURES.mkString(",") + ) + val dsTable = DeltaSharingTable( + share = parsedPath.share, + schema = parsedPath.schema, + name = parsedPath.table + ) + + // 2. getMetadata for schema to be used in the file index. + val deltaSharingTableMetadata = DeltaSharingUtils.getDeltaSharingTableMetadata( + client = client, + table = dsTable + ) + val customTablePathWithUUIDSuffix = DeltaSharingUtils.getTablePathWithIdSuffix( + client.getProfileProvider.getCustomTablePath(path), + DeltaSharingUtils.getFormattedTimestampWithUUID() + ) + val deltaLogPath = + s"${DeltaSharingLogFileSystem.encode(customTablePathWithUUIDSuffix).toString}/_delta_log" + val (_, snapshotDescriptor) = DeltaSharingUtils.getDeltaLogAndSnapshotDescriptor( + sqlContext.sparkSession, + deltaSharingTableMetadata, + customTablePathWithUUIDSuffix + ) + + // This is the analyzed schema for Delta streaming + val readSchema = { + // Check if we would like to merge consecutive schema changes, this would allow customers + // to write queries based on their latest changes instead of an arbitrary schema in the + // past. + val shouldMergeConsecutiveSchemas = sqlContext.sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_ENABLE_SCHEMA_TRACKING_MERGE_CONSECUTIVE_CHANGES + ) + + // This method is invoked during the analysis phase and would determine the schema for the + // streaming dataframe. We only need to merge consecutive schema changes here because the + // process would create a new entry in the schema log such that when the schema log is + // looked up again in the execution phase, we would use the correct schema. + DeltaDataSource + .getMetadataTrackingLogForDeltaSource( + sqlContext.sparkSession, + snapshotDescriptor, + parameters, + mergeConsecutiveSchemaChanges = shouldMergeConsecutiveSchemas + ) + .flatMap(_.getCurrentTrackedMetadata.map(_.dataSchema)) + .getOrElse(snapshotDescriptor.schema) + } + + val schemaToUse = TahoeDeltaTableUtils.removeInternalMetadata( + sqlContext.sparkSession, + readSchema + ) + if (schemaToUse.isEmpty) { + throw DeltaErrors.schemaNotSetException + } + + DeltaSharingLogFileSystem.tryToCleanUpDeltaLog(deltaLogPath) + (shortName(), schemaToUse) + } else { + throw new UnsupportedOperationException( + s"responseformat(${options.responseFormat}) is not " + + s"supported in delta sharing." + ) + } + } + + override def createSource( + sqlContext: SQLContext, + metadataPath: String, + schema: Option[StructType], + providerName: String, + parameters: Map[String, String]): Source = { + DeltaSharingDataSource.setupFileSystem(sqlContext) + if (schema.nonEmpty && schema.get.nonEmpty) { + throw DeltaSharingErrors.specifySchemaAtReadTimeException + } + val options = new DeltaSharingOptions(parameters) + val path = options.options.getOrElse("path", throw DeltaSharingErrors.pathNotSpecifiedException) + + if (options.responseFormat == DeltaSharingOptions.RESPONSE_FORMAT_PARQUET) { + logInfo(s"createSource with parquet format for table path:$path, parameters:$parameters") + val deltaLog = RemoteDeltaLog(path, forStreaming = true, options.responseFormat) + DeltaSharingSource(SparkSession.active, deltaLog, options) + } else if (options.responseFormat == DeltaSharingOptions.RESPONSE_FORMAT_DELTA) { + logInfo(s"createSource with delta format for table path:$path, parameters:$parameters") + if (options.readChangeFeed) { + throw new UnsupportedOperationException( + s"Delta sharing cdc streaming is not supported when responseforma=delta." + ) + } + // 1. create delta sharing client + val parsedPath = DeltaSharingRestClient.parsePath(path) + val client = DeltaSharingRestClient( + profileFile = parsedPath.profileFile, + forStreaming = true, + responseFormat = options.responseFormat, + // comma separated delta reader features, used to tell delta sharing server what delta + // reader features the client is able to process. + readerFeatures = DeltaSharingUtils.STREAMING_SUPPORTED_READER_FEATURES.mkString(",") + ) + val dsTable = DeltaSharingTable( + share = parsedPath.share, + schema = parsedPath.schema, + name = parsedPath.table + ) + + DeltaFormatSharingSource( + spark = sqlContext.sparkSession, + client = client, + table = dsTable, + options = options, + parameters = parameters, + sqlConf = sqlContext.sparkSession.sessionState.conf, + metadataPath = metadataPath + ) + } else { + throw new UnsupportedOperationException( + s"responseformat(${options.responseFormat}) is not " + + s"supported in delta sharing." + ) + } + } + + override def createRelation( + sqlContext: SQLContext, + parameters: Map[String, String]): BaseRelation = { + DeltaSharingDataSource.setupFileSystem(sqlContext) + val options = new DeltaSharingOptions(parameters) + + val userInputResponseFormat = options.options.get(DeltaSharingOptions.RESPONSE_FORMAT) + if (userInputResponseFormat.isEmpty && !options.readChangeFeed) { + return autoResolveBaseRelationForSnapshotQuery(options) + } + + val path = options.options.getOrElse("path", throw DeltaSharingErrors.pathNotSpecifiedException) + if (options.responseFormat == DeltaSharingOptions.RESPONSE_FORMAT_PARQUET) { + // When user explicitly set responseFormat=parquet, to query shared tables without advanced + // delta features. + logInfo(s"createRelation with parquet format for table path:$path, parameters:$parameters") + val deltaLog = RemoteDeltaLog( + path, + forStreaming = false, + responseFormat = options.responseFormat + ) + deltaLog.createRelation( + options.versionAsOf, + options.timestampAsOf, + options.cdfOptions + ) + } else if (options.responseFormat == DeltaSharingOptions.RESPONSE_FORMAT_DELTA) { + // When user explicitly set responseFormat=delta, to query shared tables with advanced + // delta features. + logInfo(s"createRelation with delta format for table path:$path, parameters:$parameters") + // 1. create delta sharing client + val parsedPath = DeltaSharingRestClient.parsePath(path) + val client = DeltaSharingRestClient( + profileFile = parsedPath.profileFile, + forStreaming = false, + responseFormat = options.responseFormat, + // comma separated delta reader features, used to tell delta sharing server what delta + // reader features the client is able to process. + readerFeatures = DeltaSharingUtils.SUPPORTED_READER_FEATURES.mkString(",") + ) + val dsTable = DeltaSharingTable( + share = parsedPath.share, + schema = parsedPath.schema, + name = parsedPath.table + ) + + if (options.readChangeFeed) { + return DeltaSharingCDFUtils.prepareCDFRelation(sqlContext, options, dsTable, client) + } + // 2. getMetadata for schema to be used in the file index. + val deltaTableMetadata = DeltaSharingUtils.queryDeltaTableMetadata( + client = client, + table = dsTable, + versionAsOf = options.versionAsOf, + timestampAsOf = options.timestampAsOf + ) + val deltaSharingTableMetadata = DeltaSharingUtils.getDeltaSharingTableMetadata( + table = dsTable, + deltaTableMetadata = deltaTableMetadata + ) + + // 3. Prepare HadoopFsRelation + getHadoopFsRelationForDeltaSnapshotQuery( + path = path, + options = options, + dsTable = dsTable, + client = client, + deltaSharingTableMetadata = deltaSharingTableMetadata + ) + } else { + throw new UnsupportedOperationException( + s"responseformat(${options.responseFormat}) is not supported in delta sharing." + ) + } + } + + /** + * "parquet format sharing" leverages the existing set of remote classes to directly handle the + * list of presigned urls and read data. + * "delta format sharing" instead constructs a local delta log and leverages the delta library to + * read data. + * Firstly we sends a getMetadata call to the delta sharing server the suggested response format + * of the shared table by the server (based on whether there are advanced delta features in the + * shared table), and then decide the code path on the client side. + */ + private def autoResolveBaseRelationForSnapshotQuery( + options: DeltaSharingOptions): BaseRelation = { + val path = options.options.getOrElse("path", throw DeltaSharingErrors.pathNotSpecifiedException) + val parsedPath = DeltaSharingRestClient.parsePath(path) + + val client = DeltaSharingRestClient( + profileFile = parsedPath.profileFile, + forStreaming = false, + // Indicating that the client is able to process response format in both parquet and delta. + responseFormat = s"${DeltaSharingOptions.RESPONSE_FORMAT_PARQUET}," + + s"${DeltaSharingOptions.RESPONSE_FORMAT_DELTA}", + // comma separated delta reader features, used to tell delta sharing server what delta + // reader features the client is able to process. + readerFeatures = DeltaSharingUtils.SUPPORTED_READER_FEATURES.mkString(",") + ) + val dsTable = DeltaSharingTable( + name = parsedPath.table, + schema = parsedPath.schema, + share = parsedPath.share + ) + + val deltaTableMetadata = DeltaSharingUtils.queryDeltaTableMetadata( + client = client, + table = dsTable, + versionAsOf = options.versionAsOf, + timestampAsOf = options.timestampAsOf + ) + + if (deltaTableMetadata.respondedFormat == DeltaSharingOptions.RESPONSE_FORMAT_PARQUET) { + val deltaLog = RemoteDeltaLog( + path = path, + forStreaming = false, + responseFormat = DeltaSharingOptions.RESPONSE_FORMAT_PARQUET, + initDeltaTableMetadata = Some(deltaTableMetadata) + ) + deltaLog.createRelation(options.versionAsOf, options.timestampAsOf, options.cdfOptions) + } else if (deltaTableMetadata.respondedFormat == DeltaSharingOptions.RESPONSE_FORMAT_DELTA) { + val deltaSharingTableMetadata = DeltaSharingUtils.getDeltaSharingTableMetadata( + table = dsTable, + deltaTableMetadata = deltaTableMetadata + ) + val deltaOnlyClient = DeltaSharingRestClient( + profileFile = parsedPath.profileFile, + forStreaming = false, + // Indicating that the client request delta format in response. + responseFormat = DeltaSharingOptions.RESPONSE_FORMAT_DELTA, + // comma separated delta reader features, used to tell delta sharing server what delta + // reader features the client is able to process. + readerFeatures = DeltaSharingUtils.SUPPORTED_READER_FEATURES.mkString(",") + ) + getHadoopFsRelationForDeltaSnapshotQuery( + path = path, + options = options, + dsTable = dsTable, + client = deltaOnlyClient, + deltaSharingTableMetadata = deltaSharingTableMetadata + ) + } else { + throw new UnsupportedOperationException( + s"Unexpected respondedFormat for getMetadata rpc:${deltaTableMetadata.respondedFormat}." + ) + } + } + + /** + * Prepare a HadoopFsRelation for the snapshot query on a delta sharing table. It will contain a + * DeltaSharingFileIndex which is used to handle delta sharing rpc, and construct the local delta + * log, and then build a TahoeFileIndex on top of the delta log. + */ + private def getHadoopFsRelationForDeltaSnapshotQuery( + path: String, + options: DeltaSharingOptions, + dsTable: DeltaSharingTable, + client: DeltaSharingClient, + deltaSharingTableMetadata: DeltaSharingUtils.DeltaSharingTableMetadata): BaseRelation = { + // Prepare DeltaSharingFileIndex + val spark = SparkSession.active + val params = new DeltaSharingFileIndexParams( + new Path(path), + spark, + deltaSharingTableMetadata, + options + ) + if (ConfUtils.limitPushdownEnabled(spark.sessionState.conf)) { + DeltaFormatSharingLimitPushDown.setup(spark) + } + // limitHint is always None here and will be overridden in DeltaFormatSharingLimitPushDown. + val fileIndex = DeltaSharingFileIndex( + params = params, + table = dsTable, + client = client, + limitHint = None + ) + + // return HadoopFsRelation with the DeltaSharingFileIndex. + HadoopFsRelation( + location = fileIndex, + // This is copied from DeltaLog.buildHadoopFsRelationWithFileIndex. + // Dropping column mapping metadata because it is not relevant for partition schema. + partitionSchema = DeltaColumnMapping.dropColumnMappingMetadata(fileIndex.partitionSchema), + // This is copied from DeltaLog.buildHadoopFsRelationWithFileIndex, original comment: + // We pass all table columns as `dataSchema` so that Spark will preserve the partition + // column locations. Otherwise, for any partition columns not in `dataSchema`, Spark would + // just append them to the end of `dataSchema`. + dataSchema = DeltaColumnMapping.dropColumnMappingMetadata( + TahoeDeltaTableUtils.removeInternalMetadata( + spark, + SchemaUtils.dropNullTypeColumns(deltaSharingTableMetadata.metadata.schema) + ) + ), + bucketSpec = None, + // Handle column mapping metadata in schema. + fileFormat = fileIndex.fileFormat( + deltaSharingTableMetadata.protocol.deltaProtocol, + deltaSharingTableMetadata.metadata.deltaMetadata + ), + options = Map.empty + )(spark) + } + + override def shortName(): String = "deltaSharing" +} + +private[sharing] object DeltaSharingDataSource { + def setupFileSystem(sqlContext: SQLContext): Unit = { + sqlContext.sparkContext.hadoopConfiguration + .setIfUnset("fs.delta-sharing.impl", "io.delta.sharing.client.DeltaSharingFileSystem") + sqlContext.sparkContext.hadoopConfiguration + .setIfUnset( + "fs.delta-sharing-log.impl", + "io.delta.sharing.spark.DeltaSharingLogFileSystem" + ) + PreSignedUrlCache.registerIfNeeded(SparkEnv.get) + } +} diff --git a/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingFileIndex.scala b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingFileIndex.scala new file mode 100644 index 00000000000..0920464495f --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingFileIndex.scala @@ -0,0 +1,270 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.lang.ref.WeakReference + +import org.apache.spark.sql.delta.{DeltaFileFormat, DeltaLog} +import org.apache.spark.sql.delta.files.{SupportsRowIndexFilters, TahoeLogFileIndex} +import io.delta.sharing.client.DeltaSharingClient +import io.delta.sharing.client.model.{Table => DeltaSharingTable} +import io.delta.sharing.client.util.{ConfUtils, JsonUtils} +import io.delta.sharing.filters.{AndOp, BaseOp, OpConverter} +import org.apache.hadoop.fs.Path + +import org.apache.spark.delta.sharing.CachedTableManager +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.execution.datasources.{FileIndex, PartitionDirectory} +import org.apache.spark.sql.types.StructType + +private[sharing] case class DeltaSharingFileIndexParams( + path: Path, + spark: SparkSession, + deltaSharingTableMetadata: DeltaSharingUtils.DeltaSharingTableMetadata, + options: DeltaSharingOptions) + +/** + * A file index for delta sharing batch queries, that wraps a delta sharing table and client, which + * is used to issue rpcs to delta sharing server to fetch pre-signed urls, then a local delta log is + * constructed, and a TahoeFileIndex can be built on top of it. + */ +case class DeltaSharingFileIndex( + params: DeltaSharingFileIndexParams, + table: DeltaSharingTable, + client: DeltaSharingClient, + limitHint: Option[Long]) + extends FileIndex + with SupportsRowIndexFilters + with DeltaFileFormat + with Logging { + private val queryCustomTablePath = client.getProfileProvider.getCustomTablePath( + params.path.toString + ) + + override def spark: SparkSession = params.spark + + override def refresh(): Unit = {} + + override def sizeInBytes: Long = + Option(params.deltaSharingTableMetadata.metadata.size).getOrElse { + // Throw error if metadata.size is not returned, to urge the server to respond a table size. + throw new IllegalStateException( + "size is null in the metadata returned from the delta " + + s"sharing server: ${params.deltaSharingTableMetadata.metadata}." + ) + } + + override def partitionSchema: StructType = + params.deltaSharingTableMetadata.metadata.partitionSchema + + // Returns the partition columns of the shared delta table based on the returned metadata. + def partitionColumns: Seq[String] = + params.deltaSharingTableMetadata.metadata.deltaMetadata.partitionColumns + + override def rootPaths: Seq[Path] = params.path :: Nil + + override def inputFiles: Array[String] = { + throw new UnsupportedOperationException("DeltaSharingFileIndex.inputFiles") + } + + // A map that from queriedTableQueryId that we've issued delta sharing rpc, to the deltaLog + // constructed with the response. + // It is because this function will be called twice or more in a spark query, with this set, we + // can avoid doing duplicated work of making expensive rpc and constructing the delta log. + private val queriedTableQueryIdToDeltaLog = scala.collection.mutable.Map[String, DeltaLog]() + + def fetchFilesAndConstructDeltaLog( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression], + overrideLimit: Option[Long]): DeltaLog = { + val jsonPredicateHints = convertToJsonPredicate(partitionFilters, dataFilters) + val queryParamsHashId = DeltaSharingUtils.getQueryParamsHashId( + params.options, + // Using .sql instead of toString because it doesn't include class pointer, which + // keeps the string the same for the same filters. + partitionFilters.map(_.sql).mkString(";"), + dataFilters.map(_.sql).mkString(";"), + jsonPredicateHints.getOrElse(""), + params.deltaSharingTableMetadata.version + ) + val tablePathWithHashIdSuffix = DeltaSharingUtils.getTablePathWithIdSuffix( + queryCustomTablePath, + queryParamsHashId + ) + // listFiles will be called twice or more in a spark query, with this check we can avoid + // duplicated work of making expensive rpc and constructing the delta log. + queriedTableQueryIdToDeltaLog.get(tablePathWithHashIdSuffix) match { + case Some(deltaLog) => deltaLog + case None => + createDeltaLog( + jsonPredicateHints, + queryParamsHashId, + tablePathWithHashIdSuffix, + overrideLimit + ) + } + } + + private def createDeltaLog( + jsonPredicateHints: Option[String], + queryParamsHashId: String, + tablePathWithHashIdSuffix: String, + overrideLimit: Option[Long]): DeltaLog = { + // 1. Call client.getFiles. + val startTime = System.currentTimeMillis() + val deltaTableFiles = client.getFiles( + table = table, + predicates = Nil, + limit = overrideLimit.orElse(limitHint), + versionAsOf = params.options.versionAsOf, + timestampAsOf = params.options.timestampAsOf, + jsonPredicateHints = jsonPredicateHints, + refreshToken = None + ) + logInfo( + s"Fetched ${deltaTableFiles.lines.size} lines for table $table with version " + + s"${deltaTableFiles.version} from delta sharing server, took " + + s"${(System.currentTimeMillis() - startTime) / 1000.0}s." + ) + + // 2. Prepare a DeltaLog. + val deltaLogMetadata = + DeltaSharingLogFileSystem.constructLocalDeltaLogAtVersionZero( + deltaTableFiles.lines, + tablePathWithHashIdSuffix + ) + + // 3. Register parquet file id to url mapping + CachedTableManager.INSTANCE.register( + // Using params.path instead of queryCustomTablePath because it will be customized + // within CachedTableManager. + tablePath = DeltaSharingUtils.getTablePathWithIdSuffix( + params.path.toString, + queryParamsHashId + ), + idToUrl = deltaLogMetadata.idToUrl, + refs = Seq(new WeakReference(this)), + profileProvider = client.getProfileProvider, + refresher = DeltaSharingUtils.getRefresherForGetFiles( + client = client, + table = table, + predicates = Nil, + limit = overrideLimit.orElse(limitHint), + versionAsOf = params.options.versionAsOf, + timestampAsOf = params.options.timestampAsOf, + jsonPredicateHints = jsonPredicateHints, + refreshToken = deltaTableFiles.refreshToken + ), + expirationTimestamp = + if (CachedTableManager.INSTANCE + .isValidUrlExpirationTime(deltaLogMetadata.minUrlExpirationTimestamp)) { + deltaLogMetadata.minUrlExpirationTimestamp.get + } else { + System.currentTimeMillis() + CachedTableManager.INSTANCE.preSignedUrlExpirationMs + }, + refreshToken = deltaTableFiles.refreshToken + ) + + // 4. Create a local file index and call listFiles of this class. + val deltaLog = DeltaLog.forTable( + params.spark, + DeltaSharingLogFileSystem.encode(tablePathWithHashIdSuffix) + ) + + // In theory there should only be one entry in this set since each query creates its own + // FileIndex class. This is purged together with the FileIndex class when the query + // finishes. + queriedTableQueryIdToDeltaLog.put(tablePathWithHashIdSuffix, deltaLog) + + deltaLog + } + + def asTahoeFileIndex( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): TahoeLogFileIndex = { + val deltaLog = fetchFilesAndConstructDeltaLog(partitionFilters, dataFilters, None) + new TahoeLogFileIndex( + params.spark, + deltaLog, + deltaLog.dataPath, + deltaLog.unsafeVolatileSnapshot + ) + } + + override def listFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[PartitionDirectory] = { + // NOTE: The server is not required to apply all filters, so we apply them client-side as well. + asTahoeFileIndex(partitionFilters, dataFilters).listFiles(partitionFilters, dataFilters) + } + + // Converts the specified SQL expressions to a json predicate. + // + // If jsonPredicatesV2 are enabled, converts both partition and data filters + // and combines them using an AND. + // + // If the conversion fails, returns a None, which will imply that we will + // not perform json predicate based filtering. + private def convertToJsonPredicate( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Option[String] = { + if (!ConfUtils.jsonPredicatesEnabled(params.spark.sessionState.conf)) { + return None + } + + // Convert the partition filters. + val partitionOp = try { + OpConverter.convert(partitionFilters) + } catch { + case e: Exception => + log.error("Error while converting partition filters: " + e) + None + } + + // If V2 predicates are enabled, also convert the data filters. + val dataOp = try { + if (ConfUtils.jsonPredicatesV2Enabled(params.spark.sessionState.conf)) { + log.info("Converting data filters") + OpConverter.convert(dataFilters) + } else { + None + } + } catch { + case e: Exception => + log.error("Error while converting data filters: " + e) + None + } + + // Combine partition and data filters using an AND operation. + val combinedOp = if (partitionOp.isDefined && dataOp.isDefined) { + Some(AndOp(Seq(partitionOp.get, dataOp.get))) + } else if (partitionOp.isDefined) { + partitionOp + } else { + dataOp + } + log.info("Using combined predicate: " + combinedOp) + + if (combinedOp.isDefined) { + Some(JsonUtils.toJson[BaseOp](combinedOp.get)) + } else { + None + } + } +} diff --git a/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingLogFileSystem.scala b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingLogFileSystem.scala new file mode 100644 index 00000000000..9010e9487d2 --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingLogFileSystem.scala @@ -0,0 +1,892 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.io.{ByteArrayInputStream, FileNotFoundException} +import java.net.{URI, URLDecoder, URLEncoder} + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.reflect.ClassTag + +import org.apache.spark.sql.delta.actions.{ + AddCDCFile, + AddFile, + DeletionVectorDescriptor, + RemoveFile, + SingleAction +} +import org.apache.spark.sql.delta.util.FileNames +import io.delta.sharing.client.util.JsonUtils +import io.delta.sharing.spark.DeltaSharingUtils.{ + DeltaSharingTableMetadata, + FAKE_CHECKPOINT_BYTE_ARRAY +} +import org.apache.hadoop.fs._ +import org.apache.hadoop.fs.permission.FsPermission +import org.apache.hadoop.util.Progressable + +import org.apache.spark.SparkEnv +import org.apache.spark.internal.Logging +import org.apache.spark.storage.{BlockId} + +/** Read-only file system for delta sharing log. + * This is a faked file system to serve data under path delta-sharing-log:/. The delta log will be + * prepared by DeltaSharingDataSource and its related classes, put in blockManager, and then serve + * to DeltaLog with a path pointing to this file system. + * In executor, when it tries to read data from the delta log, this file system class will return + * the data fetched from the block manager. + */ +private[sharing] class DeltaSharingLogFileSystem extends FileSystem with Logging { + import DeltaSharingLogFileSystem._ + + override def getScheme: String = SCHEME + + override def getUri(): URI = URI.create(s"$SCHEME:///") + + override def open(f: Path, bufferSize: Int): FSDataInputStream = { + if (FileNames.isCheckpointFile(f)) { + new FSDataInputStream( + new SeekableByteArrayInputStream(DeltaSharingUtils.FAKE_CHECKPOINT_BYTE_ARRAY) + ) + } else if (FileNames.isDeltaFile(f)) { + val iterator = + SparkEnv.get.blockManager.get[String](getDeltaSharingLogBlockId(f.toString)) match { + case Some(block) => block.data.asInstanceOf[Iterator[String]] + case _ => throw new FileNotFoundException(s"Cannot find block for delta log file: $f.") + } + // Explicitly call hasNext to allow the reader lock on the block to be released. + val arrayBuilder = Array.newBuilder[Byte] + while (iterator.hasNext) { + val actionJsonStr = iterator.next() + arrayBuilder ++= actionJsonStr.getBytes() + } + // We still have to load the full content of a delta log file in memory to serve them. + // This still exposes the risk of OOM. + new FSDataInputStream(new SeekableByteArrayInputStream(arrayBuilder.result())) + } else { + val content = getBlockAndReleaseLockHelper[String](f, None) + new FSDataInputStream(new SeekableByteArrayInputStream(content.getBytes())) + } + } + + override def exists(f: Path): Boolean = { + // The reason of using the variable exists is to allow us to explicitly release the reader lock + // on the blockId. + val blockId = getDeltaSharingLogBlockId(f.toString) + val exists = SparkEnv.get.blockManager.get(blockId).isDefined + if (exists) { + releaseLockHelper(blockId) + } + exists + } + + // Delta sharing log file system serves checkpoint file with a CONSTANT value so we construct the + // FileStatus when the function is being called. + // For other files, they will be constructed and put into block manager when constructing the + // delta log based on the rpc response from the server. + override def getFileStatus(f: Path): FileStatus = { + val status = if (FileNames.isCheckpointFile(f)) { + DeltaSharingLogFileStatus( + path = f.toString, + size = FAKE_CHECKPOINT_BYTE_ARRAY.size, + modificationTime = 0L + ) + } else { + getBlockAndReleaseLockHelper[DeltaSharingLogFileStatus](f, Some("_status")) + } + + new FileStatus( + /* length */ status.size, + /* isdir */ false, + /* block_replication */ 0, + /* blocksize */ 1, + /* modification_time */ status.modificationTime, + /* path */ new Path(status.path) + ) + } + + /** + * @param f: a Path pointing to a delta log directory of a delta sharing table, example: + * delta-sharing-log:/customized-delta-sharing-table/_delta_log + * The iterator contains a list of tuple(json_file_path, json_file_size) which are + * pre-prepared and set in the block manager by DeltaSharingDataSource and its related + * classes. + * @return the list of json files under the /_delta_log directory, if prepared. + */ + override def listStatus(f: Path): Array[FileStatus] = { + val iterator = + SparkEnv.get.blockManager + .get[DeltaSharingLogFileStatus](getDeltaSharingLogBlockId(f.toString)) match { + case Some(block) => block.data.asInstanceOf[Iterator[DeltaSharingLogFileStatus]] + case _ => throw new FileNotFoundException(s"Failed to list files for path: $f.") + } + + // Explicitly call hasNext to allow the reader lock on the block to be released. + val arrayBuilder = Array.newBuilder[FileStatus] + while (iterator.hasNext) { + val fileStatus = iterator.next() + arrayBuilder += new FileStatus( + /* length */ fileStatus.size, + /* isdir */ false, + /* block_replication */ 0, + /* blocksize */ 1, + /* modification_time */ fileStatus.modificationTime, + /* path */ new Path(fileStatus.path) + ) + } + arrayBuilder.result() + } + + override def create( + f: Path, + permission: FsPermission, + overwrite: Boolean, + bufferSize: Int, + replication: Short, + blockSize: Long, + progress: Progressable): FSDataOutputStream = { + throw new UnsupportedOperationException(s"create: $f") + } + + override def append(f: Path, bufferSize: Int, progress: Progressable): FSDataOutputStream = { + throw new UnsupportedOperationException(s"append: $f") + } + + override def rename(src: Path, dst: Path): Boolean = { + throw new UnsupportedOperationException(s"rename: src:$src, dst:$dst") + } + + override def delete(f: Path, recursive: Boolean): Boolean = { + throw new UnsupportedOperationException(s"delete: $f") + } + override def listStatusIterator(f: Path): RemoteIterator[FileStatus] = { + throw new UnsupportedOperationException(s"listStatusIterator: $f") + } + + override def setWorkingDirectory(newDir: Path): Unit = + throw new UnsupportedOperationException(s"setWorkingDirectory: $newDir") + + override def getWorkingDirectory: Path = new Path(getUri) + + override def mkdirs(f: Path, permission: FsPermission): Boolean = { + throw new UnsupportedOperationException(s"mkdirs: $f") + } + + override def close(): Unit = { + super.close() + } + + private def getBlockAndReleaseLockHelper[T: ClassTag](f: Path, suffix: Option[String]): T = { + val blockId = getDeltaSharingLogBlockId(suffix.foldLeft(f.toString)(_ + _)) + val result = SparkEnv.get.blockManager.getSingle[T](blockId).getOrElse { + throw new FileNotFoundException(f.toString) + } + releaseLockHelper(blockId) + + result + } + + private def releaseLockHelper(blockId: BlockId): Unit = { + try { + SparkEnv.get.blockManager.releaseLock(blockId) + } catch { + // releaseLock may fail when the lock is not hold by this thread, we are not exactly sure + // when it fails or not, but no need to fail the entire delta sharing query. + case e: Throwable => logWarning(s"Error while releasing lock for blockId:$blockId: $e.") + } + } +} + +/** + * A case class including the metadata for the constructed delta log based on the delta sharing + * rpc response. + * @param idToUrl stores the id to url mapping, used to register to CachedTableManager + * @param minUrlExpirationTimestamp used to indicate when to refresh urls in CachedTableManager + * @param numFileActionsInMinVersionOpt This is needed because DeltaSource is not advancing the + * offset to the next version automatically when scanning + * through a snapshot, so DeltaSharingSource needs to count the + * number of files in the min version and advance the offset to + * the next version when the offset is at the last index of the + * version. + * @param minVersion minVersion of all the files returned from server + * @param maxVersion maxVersion of all the files returned from server + */ +case class ConstructedDeltaLogMetadata( + idToUrl: Map[String, String], + minUrlExpirationTimestamp: Option[Long], + numFileActionsInMinVersionOpt: Option[Int], + minVersion: Long, + maxVersion: Long) + +private[sharing] object DeltaSharingLogFileSystem extends Logging { + + val SCHEME = "delta-sharing-log" + + // The constant added as prefix to all delta sharing block ids. + private val BLOCK_ID_TEST_PREFIX = "test_" + + // It starts with test_ to match the prefix of TestBlockId. + // In the meantime, we'll investigate in an option to add a general purposed BlockId subclass + // and use it in delta sharing. + val DELTA_SHARING_LOG_BLOCK_ID_PREFIX = "test_delta-sharing-log:" + + def getDeltaSharingLogBlockId(path: String): BlockId = { + BlockId(BLOCK_ID_TEST_PREFIX + path) + } + + /** + * Encode `tablePath` to a `Path` in the following format: + * + * ``` + * delta-sharing-log:/// + * ``` + * + * This format can be decoded by `DeltaSharingLogFileSystem.decode`. + * It will be used to: + * 1) construct a DeltaLog class which points to a delta sharing table. + * 2) construct a block id to look for commit files of the delta sharing table. + */ + def encode(tablePath: String): Path = { + val encodedTablePath = URLEncoder.encode(tablePath, "UTF-8") + new Path(s"$SCHEME:///$encodedTablePath") + } + + def decode(path: Path): String = { + val encodedTablePath = path.toString + .stripPrefix(s"$SCHEME:///") + .stripPrefix(s"$SCHEME:/") + URLDecoder.decode(encodedTablePath, "UTF-8") + } + + // Convert a deletion vector path to a delta sharing path. + // Only paths needs to be converted since it's pre-signed url. Inline DV should be handled + // in place. And UUID should throw error since it should be converted to pre-signed url when + // returned from the server. + private def getDeltaSharingDeletionVectorDescriptor( + fileAction: model.DeltaSharingFileAction, + customTablePath: String): DeletionVectorDescriptor = { + if (fileAction.getDeletionVectorOpt.isEmpty) { + null + } else { + val deletionVector = fileAction.getDeletionVectorOpt.get + deletionVector.storageType match { + case DeletionVectorDescriptor.PATH_DV_MARKER => + deletionVector.copy( + pathOrInlineDv = fileAction.getDeletionVectorDeltaSharingPath(customTablePath) + ) + case DeletionVectorDescriptor.INLINE_DV_MARKER => deletionVector + case storageType => + throw new IllegalStateException( + s"Unexpected DV storage type:" + + s"$storageType in the delta sharing response for ${fileAction.json}." + ) + } + } + } + + // Only absolute path (which is pre-signed url) need to be put in IdToUrl mapping. + // inline DV should be processed in place, and UUID should throw error. + private def requiresIdToUrlForDV(deletionVectorOpt: Option[DeletionVectorDescriptor]): Boolean = { + deletionVectorOpt.isDefined && + deletionVectorOpt.get.storageType == DeletionVectorDescriptor.PATH_DV_MARKER + } + + /** + * Convert DeltaSharingFileAction with delta sharing file path and serialize as json to store in + * the delta log. + * + * @param fileAction The DeltaSharingFileAction to convert. + * @param customTablePath The table path used to construct action.path field. + * @return json serialization of delta action. + */ + private def getActionWithDeltaSharingPath( + fileAction: model.DeltaSharingFileAction, + customTablePath: String): String = { + val deltaSharingPath = fileAction.getDeltaSharingPath(customTablePath) + val newSingleAction = fileAction.deltaSingleAction.unwrap match { + case add: AddFile => + add.copy( + path = deltaSharingPath, + deletionVector = getDeltaSharingDeletionVectorDescriptor(fileAction, customTablePath) + ) + case cdc: AddCDCFile => + assert( + cdc.deletionVector == null, + "deletionVector not null in the AddCDCFile from delta" + + s" sharing response: ${cdc.json}" + ) + cdc.copy(path = deltaSharingPath) + case remove: RemoveFile => + remove.copy( + path = deltaSharingPath, + deletionVector = getDeltaSharingDeletionVectorDescriptor(fileAction, customTablePath) + ) + case action => + throw new IllegalStateException( + s"unexpected action in delta sharing " + + s"response: ${action.json}" + ) + } + newSingleAction.json + } + + // Sort by id to keep a stable order of the files within a version in the delta log. + private def deltaSharingFileActionIncreaseOrderFunc( + f1: model.DeltaSharingFileAction, + f2: model.DeltaSharingFileAction): Boolean = { + f1.id < f2.id + } + + /** + * Cleanup the delta log upon explicit stop of a query on a delta sharing table. + * + * @param deltaLogPath deltaLogPath is constructed per query with credential scope id as prefix + * and a uuid as suffix, which is very unique to the query and won't interfere + * with other queries. + */ + def tryToCleanUpDeltaLog(deltaLogPath: String): Unit = { + def shouldCleanUp(blockId: BlockId): Boolean = { + if (!blockId.name.startsWith(DELTA_SHARING_LOG_BLOCK_ID_PREFIX)) { + return false + } + val blockName = blockId.name + // deltaLogPath is constructed per query with credential scope id as prefix and a uuid as + // suffix, which is very unique to the query and won't interfere with other queries. + blockName.startsWith(BLOCK_ID_TEST_PREFIX + deltaLogPath) + } + + val blockManager = SparkEnv.get.blockManager + val matchingBlockIds = blockManager.getMatchingBlockIds(shouldCleanUp(_)) + logInfo( + s"Trying to clean up ${matchingBlockIds.size} blocks for $deltaLogPath." + ) + + val problematicBlockIds = Seq.newBuilder[BlockId] + matchingBlockIds.foreach { b => + try { + blockManager.removeBlock(b) + } catch { + case _: Throwable => problematicBlockIds += b + } + } + + val problematicBlockIdsSeq = problematicBlockIds.result().toSeq + if (problematicBlockIdsSeq.size > 0) { + logWarning( + s"Done cleaning up ${matchingBlockIds.size} blocks for $deltaLogPath, but " + + s"failed to remove: ${problematicBlockIdsSeq}." + ) + } else { + logInfo( + s"Done cleaning up ${matchingBlockIds.size} blocks for $deltaLogPath." + ) + } + } + + /** + * @param deltaLogPath The delta log directory to clean up. It is constructed per query with + * credential scope id as prefix and a uuid as suffix, which is very unique + * to the query and won't interfere with other queries. + * @param maxVersion maxVersion of any checkpoint or delta file that needs clean up, inclusive. + */ + def tryToCleanUpPreviousBlocks(deltaLogPath: String, maxVersion: Long): Unit = { + if (maxVersion < 0) { + logInfo( + s"Skipping clean up previous blocks for $deltaLogPath because maxVersion(" + + s"$maxVersion) < 0." + ) + return + } + + def shouldCleanUp(blockId: BlockId): Boolean = { + if (!blockId.name.startsWith(DELTA_SHARING_LOG_BLOCK_ID_PREFIX)) { + return false + } + val blockName = blockId.name + blockName.startsWith(BLOCK_ID_TEST_PREFIX + deltaLogPath) && FileNames + .getFileVersionOpt(new Path(blockName.stripPrefix(BLOCK_ID_TEST_PREFIX))) + .exists(_ <= maxVersion) + } + + val blockManager = SparkEnv.get.blockManager + val matchingBlockIds = blockManager.getMatchingBlockIds(shouldCleanUp(_)) + logInfo( + s"Trying to clean up ${matchingBlockIds.size} previous blocks for $deltaLogPath " + + s"before version: $maxVersion." + ) + + val problematicBlockIds = Seq.newBuilder[BlockId] + matchingBlockIds.foreach { b => + try { + blockManager.removeBlock(b) + } catch { + case _: Throwable => problematicBlockIds += b + } + } + + val problematicBlockIdsSeq = problematicBlockIds.result().toSeq + if (problematicBlockIdsSeq.size > 0) { + logWarning( + s"Done cleaning up ${matchingBlockIds.size} previous blocks for $deltaLogPath " + + s"before version: $maxVersion, but failed to remove: ${problematicBlockIdsSeq}." + ) + } else { + logInfo( + s"Done cleaning up ${matchingBlockIds.size} previous blocks for $deltaLogPath " + + s"before version: $maxVersion." + ) + } + } + + /** + * Construct local delta log based on delta log actions returned from delta sharing server. + * + * @param lines a list of delta actions, to be processed and put in the local delta log, + * each action contains a version field to indicate the version of log to + * put it in. + * @param customTablePath query customized table path, used to construct action.path field for + * DeltaSharingFileSystem + * @param startingVersionOpt If set, used to construct the delta file (.json log file) from the + * given startingVersion. This is needed by DeltaSharingSource to + * construct the delta log for the rpc no matter if there are files in + * that version or not, so DeltaSource can read delta actions from the + * starting version (instead from checkpoint). + * @param endingVersionOpt If set, used to construct the delta file (.json log file) until the + * given endingVersion. This is needed by DeltaSharingSource to construct + * the delta log for the rpc no matter if there are files in that version + * or not. + * NOTE: DeltaSource will not advance the offset if there are no files in + * a version of the delta log, but we still create the delta log file for + * that version to avoid missing delta log (json) files. + * @return ConstructedDeltaLogMetadata, which contains 3 fields: + * - idToUrl: mapping from file id to pre-signed url + * - minUrlExpirationTimestamp timestamp indicating the when to refresh pre-signed urls. + * Both are used to register to CachedTableManager. + * - maxVersion: the max version returned in the http response, used by + * DeltaSharingSource to quickly understand the progress of rpcs from the server. + */ + def constructLocalDeltaLogAcrossVersions( + lines: Seq[String], + customTablePath: String, + startingVersionOpt: Option[Long], + endingVersionOpt: Option[Long]): ConstructedDeltaLogMetadata = { + val startTime = System.currentTimeMillis() + assert( + startingVersionOpt.isDefined == endingVersionOpt.isDefined, + s"startingVersionOpt($startingVersionOpt) and endingVersionOpt($endingVersionOpt) should be" + + " both defined or not." + ) + if (startingVersionOpt.isDefined) { + assert( + startingVersionOpt.get <= endingVersionOpt.get, + s"startingVersionOpt($startingVersionOpt) must be smaller than " + + s"endingVersionOpt($endingVersionOpt)." + ) + } + var minVersion = Long.MaxValue + var maxVersion = 0L + var minUrlExpirationTimestamp: Option[Long] = None + val idToUrl = scala.collection.mutable.Map[String, String]() + val versionToDeltaSharingFileActions = + scala.collection.mutable.Map[Long, ArrayBuffer[model.DeltaSharingFileAction]]() + val versionToMetadata = scala.collection.mutable.Map[Long, model.DeltaSharingMetadata]() + val versionToJsonLogBuilderMap = scala.collection.mutable.Map[Long, ArrayBuffer[String]]() + val versionToJsonLogSize = scala.collection.mutable.Map[Long, Long]().withDefaultValue(0L) + var numFileActionsInMinVersion = 0 + val versionToTimestampMap = scala.collection.mutable.Map[Long, Long]() + var startingMetadataLineOpt: Option[String] = None + var startingProtocolLineOpt: Option[String] = None + + lines.foreach { line => + val action = JsonUtils.fromJson[model.DeltaSharingSingleAction](line).unwrap + action match { + case fileAction: model.DeltaSharingFileAction => + minVersion = minVersion.min(fileAction.version) + maxVersion = maxVersion.max(fileAction.version) + // Store file actions in an array to sort them based on id later. + versionToDeltaSharingFileActions.getOrElseUpdate( + fileAction.version, + ArrayBuffer[model.DeltaSharingFileAction]() + ) += fileAction + case metadata: model.DeltaSharingMetadata => + if (metadata.version != null) { + // This is to handle the cdf and streaming query result. + minVersion = minVersion.min(metadata.version) + maxVersion = maxVersion.max(metadata.version) + versionToMetadata(metadata.version) = metadata + if (metadata.version == minVersion) { + startingMetadataLineOpt = Some(metadata.deltaMetadata.json + "\n") + } + } else { + // This is to handle the snapshot query result from DeltaSharingSource. + startingMetadataLineOpt = Some(metadata.deltaMetadata.json + "\n") + } + case protocol: model.DeltaSharingProtocol => + startingProtocolLineOpt = Some(protocol.deltaProtocol.json + "\n") + case _ => // do nothing, ignore the line. + } + } + + if (startingVersionOpt.isDefined) { + minVersion = minVersion.min(startingVersionOpt.get) + } else if (minVersion == Long.MaxValue) { + // This means there are no files returned from server for this cdf request. + // A 0.json file will be prepared with metadata and protocol only. + minVersion = 0 + } + if (endingVersionOpt.isDefined) { + maxVersion = maxVersion.max(endingVersionOpt.get) + } + // Store the starting protocol and metadata in the minVersion.json. + val protocolAndMetadataStr = startingMetadataLineOpt.getOrElse("") + startingProtocolLineOpt + .getOrElse("") + versionToJsonLogBuilderMap.getOrElseUpdate( + minVersion, + ArrayBuffer[String]() + ) += protocolAndMetadataStr + versionToJsonLogSize(minVersion) += protocolAndMetadataStr.length + numFileActionsInMinVersion = versionToDeltaSharingFileActions + .getOrElseUpdate(minVersion, ArrayBuffer[model.DeltaSharingFileAction]()) + .size + + // Write metadata to the delta log json file. + versionToMetadata.foreach { + case (version, metadata) => + if (version != minVersion) { + val metadataStr = metadata.deltaMetadata.json + "\n" + versionToJsonLogBuilderMap.getOrElseUpdate( + version, + ArrayBuffer[String]() + ) += metadataStr + versionToJsonLogSize(version) += metadataStr.length + } + } + // Write file actions to the delta log json file. + var previousIdOpt: Option[String] = None + versionToDeltaSharingFileActions.foreach { + case (version, actions) => + previousIdOpt = None + actions.toSeq.sortWith(deltaSharingFileActionIncreaseOrderFunc).foreach { fileAction => + assert( + // Using > instead of >= because there can be a removeFile and addFile pointing to the + // same parquet file which result in the same file id, since id is a hash of file path. + // This is ok because eventually it can read data out of the correct parquet file. + !previousIdOpt.exists(_ > fileAction.id), + s"fileActions must be in increasing order by id: ${previousIdOpt} is not smaller than" + + s" ${fileAction.id}, in version:$version." + ) + previousIdOpt = Some(fileAction.id) + + // 1. build it to url mapping + idToUrl(fileAction.id) = fileAction.path + if (requiresIdToUrlForDV(fileAction.getDeletionVectorOpt)) { + idToUrl(fileAction.deletionVectorFileId) = + fileAction.getDeletionVectorOpt.get.pathOrInlineDv + } + + // 2. prepare json log content. + versionToTimestampMap.getOrElseUpdate(version, fileAction.timestamp) + val actionJsonStr = getActionWithDeltaSharingPath(fileAction, customTablePath) + "\n" + versionToJsonLogBuilderMap.getOrElseUpdate( + version, + ArrayBuffer[String]() + ) += actionJsonStr + versionToJsonLogSize(version) += actionJsonStr.length + + // 3. process expiration timestamp + if (fileAction.expirationTimestamp != null) { + minUrlExpirationTimestamp = minUrlExpirationTimestamp + .filter(_ < fileAction.expirationTimestamp) + .orElse(Some(fileAction.expirationTimestamp)) + } + } + } + + val encodedTablePath = DeltaSharingLogFileSystem.encode(customTablePath) + val deltaLogPath = s"${encodedTablePath.toString}/_delta_log" + val fileSizeTsSeq = Seq.newBuilder[DeltaSharingLogFileStatus] + + if (minVersion > 0) { + // If the minVersion is not 0 in the response, then prepare checkpoint at minVersion - 1: + // need to prepare two files: 1) (minVersion-1).checkpoint.parquet 2) _last_checkpoint + val checkpointVersion = minVersion - 1 + + // 1) store the checkpoint byte array in BlockManager for future read. + val checkpointParquetFileName = + FileNames.checkpointFileSingular(new Path(deltaLogPath), checkpointVersion).toString + fileSizeTsSeq += DeltaSharingLogFileStatus( + path = checkpointParquetFileName, + size = FAKE_CHECKPOINT_BYTE_ARRAY.size, + modificationTime = 0L + ) + + // 2) Prepare the content for _last_checkpoint + val lastCheckpointContent = + s"""{"version":${checkpointVersion},"size":${FAKE_CHECKPOINT_BYTE_ARRAY.size}}""" + val lastCheckpointPath = new Path(deltaLogPath, "_last_checkpoint").toString + fileSizeTsSeq += DeltaSharingLogFileStatus( + path = lastCheckpointPath, + size = lastCheckpointContent.length, + modificationTime = 0L + ) + DeltaSharingUtils.overrideSingleBlock[String]( + blockId = getDeltaSharingLogBlockId(lastCheckpointPath), + value = lastCheckpointContent + ) + } + + for (version <- minVersion to maxVersion) { + val jsonFilePath = FileNames.deltaFile(new Path(deltaLogPath), version).toString + DeltaSharingUtils.overrideIteratorBlock[String]( + getDeltaSharingLogBlockId(jsonFilePath), + versionToJsonLogBuilderMap.getOrElse(version, Seq.empty).toIterator + ) + fileSizeTsSeq += DeltaSharingLogFileStatus( + path = jsonFilePath, + size = versionToJsonLogSize.getOrElse(version, 0), + modificationTime = versionToTimestampMap.get(version).getOrElse(0L) + ) + } + + DeltaSharingUtils.overrideIteratorBlock[DeltaSharingLogFileStatus]( + getDeltaSharingLogBlockId(deltaLogPath), + fileSizeTsSeq.result().toIterator + ) + logInfo( + s"It takes ${(System.currentTimeMillis() - startTime) / 1000.0}s to construct delta log" + + s"for $customTablePath from $minVersion to $maxVersion, with ${idToUrl.toMap.size} urls." + ) + ConstructedDeltaLogMetadata( + idToUrl = idToUrl.toMap, + minUrlExpirationTimestamp = minUrlExpirationTimestamp, + numFileActionsInMinVersionOpt = Some(numFileActionsInMinVersion), + minVersion = minVersion, + maxVersion = maxVersion + ) + } + + /** Set the modificationTime to zero, this is to align with the time returned from + * DeltaSharingFileSystem.getFileStatus + */ + private def setModificationTimestampToZero(deltaSingleAction: SingleAction): SingleAction = { + deltaSingleAction.unwrap match { + case a: AddFile => a.copy(modificationTime = 0).wrap + case _ => deltaSingleAction + } + } + + /** + * Construct local delta log at version zero based on lines returned from delta sharing server, + * to support latest snapshot or time travel queries. Storing both protocol/metadata and + * the actual data actions in version 0 will simplify both the log construction and log reply. + * + * @param lines a list of delta actions, to be processed and put in the local delta log, + * each action contains a version field to indicate the version of log to + * put it in. + * @param customTablePath query customized table path, used to construct action.path field for + * DeltaSharingFileSystem + * @return ConstructedDeltaLogMetadata, which contains 3 fields: + * - idToUrl: mapping from file id to pre-signed url + * - minUrlExpirationTimestamp timestamp indicating the when to refresh pre-signed urls. + * Both are used to register to CachedTableManager. + * - maxVersion: to be 0. + */ + def constructLocalDeltaLogAtVersionZero( + lines: Seq[String], + customTablePath: String): ConstructedDeltaLogMetadata = { + val startTime = System.currentTimeMillis() + val jsonLogSeq = Seq.newBuilder[String] + var jsonLogSize = 0 + var minUrlExpirationTimestamp: Option[Long] = None + val fileActionsSeq = ArrayBuffer[model.DeltaSharingFileAction]() + val idToUrl = scala.collection.mutable.Map[String, String]() + lines.foreach { line => + val action = JsonUtils.fromJson[model.DeltaSharingSingleAction](line).unwrap + action match { + case fileAction: model.DeltaSharingFileAction => + // Store file actions in an array to sort them based on id later. + fileActionsSeq += fileAction.copy( + deltaSingleAction = setModificationTimestampToZero(fileAction.deltaSingleAction) + ) + case protocol: model.DeltaSharingProtocol => + val protocolJsonStr = protocol.deltaProtocol.json + "\n" + jsonLogSize += protocolJsonStr.length + jsonLogSeq += protocolJsonStr + case metadata: model.DeltaSharingMetadata => + val metadataJsonStr = metadata.deltaMetadata.json + "\n" + jsonLogSize += metadataJsonStr.length + jsonLogSeq += metadataJsonStr + case _ => + throw new IllegalStateException( + s"unknown action in the delta sharing " + + s"response: $line" + ) + } + } + var previousIdOpt: Option[String] = None + fileActionsSeq.toSeq.sortWith(deltaSharingFileActionIncreaseOrderFunc).foreach { fileAction => + assert( + // Using > instead of >= because there can be a removeFile and addFile pointing to the same + // parquet file which result in the same file id, since id is a hash of file path. + // This is ok because eventually it can read data out of the correct parquet file. + !previousIdOpt.exists(_ > fileAction.id), + s"fileActions must be in increasing order by id: ${previousIdOpt} is not smaller than" + + s" ${fileAction.id}." + ) + previousIdOpt = Some(fileAction.id) + + // 1. build id to url mapping + idToUrl(fileAction.id) = fileAction.path + if (requiresIdToUrlForDV(fileAction.getDeletionVectorOpt)) { + idToUrl(fileAction.deletionVectorFileId) = + fileAction.getDeletionVectorOpt.get.pathOrInlineDv + } + + // 2. prepare json log content. + val actionJsonStr = getActionWithDeltaSharingPath(fileAction, customTablePath) + "\n" + jsonLogSize += actionJsonStr.length + jsonLogSeq += actionJsonStr + + // 3. process expiration timestamp + if (fileAction.expirationTimestamp != null) { + minUrlExpirationTimestamp = + if (minUrlExpirationTimestamp.isDefined && + minUrlExpirationTimestamp.get < fileAction.expirationTimestamp) { + minUrlExpirationTimestamp + } else { + Some(fileAction.expirationTimestamp) + } + } + } + + val encodedTablePath = DeltaSharingLogFileSystem.encode(customTablePath) + + // Always use 0.json for snapshot queries. + val deltaLogPath = s"${encodedTablePath.toString}/_delta_log" + val jsonFilePath = FileNames.deltaFile(new Path(deltaLogPath), 0).toString + DeltaSharingUtils.overrideIteratorBlock[String]( + getDeltaSharingLogBlockId(jsonFilePath), + jsonLogSeq.result().toIterator + ) + + val fileStatusSeq = Seq( + DeltaSharingLogFileStatus(path = jsonFilePath, size = jsonLogSize, modificationTime = 0L) + ) + DeltaSharingUtils.overrideIteratorBlock[DeltaSharingLogFileStatus]( + getDeltaSharingLogBlockId(deltaLogPath), + fileStatusSeq.toIterator + ) + logInfo( + s"It takes ${(System.currentTimeMillis() - startTime) / 1000.0}s to construct delta" + + s" log for $customTablePath with ${idToUrl.toMap.size} urls." + ) + ConstructedDeltaLogMetadata( + idToUrl = idToUrl.toMap, + minUrlExpirationTimestamp = minUrlExpirationTimestamp, + numFileActionsInMinVersionOpt = None, + minVersion = 0, + maxVersion = 0 + ) + } + + // Create a delta log directory with protocol and metadata at version 0. + // Used by DeltaSharingSource to initialize a DeltaLog class, which is then used to initialize + // a DeltaSource class, also the metadata id will be used for schemaTrackingLocation. + // There are no data files in the delta log because the DeltaSource class is initialized before + // any rpcs to the delta sharing server, so no data files are available yet. + def constructDeltaLogWithMetadataAtVersionZero( + customTablePath: String, + deltaSharingTableMetadata: DeltaSharingTableMetadata): Unit = { + val encodedTablePath = DeltaSharingLogFileSystem.encode(customTablePath) + val deltaLogPath = s"${encodedTablePath.toString}/_delta_log" + + // Always use 0.json for snapshot queries. + val jsonLogStr = deltaSharingTableMetadata.protocol.deltaProtocol.json + "\n" + + deltaSharingTableMetadata.metadata.deltaMetadata.json + "\n" + + val jsonFilePath = FileNames.deltaFile(new Path(deltaLogPath), 0).toString + DeltaSharingUtils.overrideIteratorBlock[String]( + getDeltaSharingLogBlockId(jsonFilePath), + Seq(jsonLogStr).toIterator + ) + + val fileStatusSeq = Seq( + DeltaSharingLogFileStatus( + path = jsonFilePath, + size = jsonLogStr.length, + modificationTime = 0L + ) + ) + DeltaSharingUtils.overrideIteratorBlock[DeltaSharingLogFileStatus]( + getDeltaSharingLogBlockId(deltaLogPath), + fileStatusSeq.toIterator + ) + } +} + +/** + * A ByteArrayInputStream that implements interfaces required by FSDataInputStream, which is the + * return type of DeltaSharingLogFileSystem.open. It will convert the string content as array of + * bytes and allow caller to read data out of it. + * The string content are list of json serializations of delta actions in a json delta log file. + */ +private[sharing] class SeekableByteArrayInputStream(bytes: Array[Byte]) + extends ByteArrayInputStream(bytes) + with Seekable + with PositionedReadable { + assert(available == bytes.length) + + override def seek(pos: Long): Unit = { + if (mark != 0) { + throw new IllegalStateException("Cannot seek if mark is set") + } + reset() + skip(pos) + } + + override def seekToNewSource(pos: Long): Boolean = { + false // there aren't multiple sources available + } + + override def getPos(): Long = { + bytes.length - available + } + + override def read(buffer: Array[Byte], offset: Int, length: Int): Int = { + super.read(buffer, offset, length) + } + + override def read(pos: Long, buffer: Array[Byte], offset: Int, length: Int): Int = { + if (pos >= bytes.length) { + return -1 + } + val readSize = math.min(length, bytes.length - pos).toInt + System.arraycopy(bytes, pos.toInt, buffer, offset, readSize) + readSize + } + + override def readFully(pos: Long, buffer: Array[Byte], offset: Int, length: Int): Unit = { + System.arraycopy(bytes, pos.toInt, buffer, offset, length) + } + + override def readFully(pos: Long, buffer: Array[Byte]): Unit = { + System.arraycopy(bytes, pos.toInt, buffer, 0, buffer.length) + } +} + +case class DeltaSharingLogFileStatus(path: String, size: Long, modificationTime: Long) diff --git a/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingUtils.scala b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingUtils.scala new file mode 100644 index 00000000000..401188b7552 --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/DeltaSharingUtils.scala @@ -0,0 +1,521 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.nio.charset.StandardCharsets.UTF_8 +import java.text.SimpleDateFormat +import java.util.{TimeZone, UUID} + +import scala.reflect.ClassTag + +import org.apache.spark.sql.delta.{ + ColumnMappingTableFeature, + DeletionVectorsTableFeature, + DeltaLog, + DeltaParquetFileFormat, + SnapshotDescriptor +} +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import com.google.common.hash.Hashing +import io.delta.sharing.client.{DeltaSharingClient, DeltaSharingRestClient} +import io.delta.sharing.client.model.{DeltaTableFiles, DeltaTableMetadata, Table} +import io.delta.sharing.client.util.JsonUtils + +import org.apache.spark.SparkEnv +import org.apache.spark.delta.sharing.TableRefreshResult +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.datasources.FileFormat +import org.apache.spark.storage.{BlockId, StorageLevel} + +object DeltaSharingUtils extends Logging { + + val STREAMING_SUPPORTED_READER_FEATURES: Seq[String] = + Seq(DeletionVectorsTableFeature.name, ColumnMappingTableFeature.name) + val SUPPORTED_READER_FEATURES: Seq[String] = + Seq(DeletionVectorsTableFeature.name, ColumnMappingTableFeature.name) + + // The prefix will be used for block ids of all blocks that store the delta log in BlockManager. + // It's used to ensure delta sharing queries don't mess up with blocks with other applications. + val DELTA_SHARING_BLOCK_ID_PREFIX = "test_delta-sharing" + + // Refresher function for CachedTableManager to use. + // It takes refreshToken: Option[String] as a parameter and return TableRefreshResult. + type RefresherFunction = Option[String] => TableRefreshResult + + case class DeltaSharingTableMetadata( + version: Long, + protocol: model.DeltaSharingProtocol, + metadata: model.DeltaSharingMetadata + ) + + // A wrapper function for streaming query to get the latest version/protocol/metadata of the + // shared table. + def getDeltaSharingTableMetadata( + client: DeltaSharingClient, + table: Table): DeltaSharingTableMetadata = { + val deltaTableMetadata = client.getMetadata(table) + getDeltaSharingTableMetadata(table, deltaTableMetadata) + } + + def queryDeltaTableMetadata( + client: DeltaSharingClient, + table: Table, + versionAsOf: Option[Long] = None, + timestampAsOf: Option[String] = None): DeltaTableMetadata = { + val deltaTableMetadata = client.getMetadata(table, versionAsOf, timestampAsOf) + logInfo( + s"getMetadata returned in ${deltaTableMetadata.respondedFormat} format for table " + + s"$table with v_${versionAsOf.map(_.toString).getOrElse("None")} " + + s"t_${timestampAsOf.getOrElse("None")} from delta sharing server." + ) + deltaTableMetadata + } + + /** + * parse the protocol and metadata from rpc response for getMetadata. + */ + def getDeltaSharingTableMetadata( + table: Table, + deltaTableMetadata: DeltaTableMetadata): DeltaSharingTableMetadata = { + + var metadataOption: Option[model.DeltaSharingMetadata] = None + var protocolOption: Option[model.DeltaSharingProtocol] = None + + deltaTableMetadata.lines + .map( + JsonUtils.fromJson[model.DeltaSharingSingleAction](_).unwrap + ) + .foreach { + case m: model.DeltaSharingMetadata => metadataOption = Some(m) + case p: model.DeltaSharingProtocol => protocolOption = Some(p) + case _ => // ignore other lines + } + + DeltaSharingTableMetadata( + version = deltaTableMetadata.version, + protocol = protocolOption.getOrElse { + throw new IllegalStateException( + s"Failed to get Protocol for ${table.toString}, " + + s"response from server:${deltaTableMetadata.lines}." + ) + }, + metadata = metadataOption.getOrElse { + throw new IllegalStateException( + s"Failed to get Metadata for ${table.toString}, " + + s"response from server:${deltaTableMetadata.lines}." + ) + } + ) + } + + private def getTableRefreshResult(tableFiles: DeltaTableFiles): TableRefreshResult = { + var minUrlExpiration: Option[Long] = None + val idToUrl = tableFiles.lines + .map( + JsonUtils.fromJson[model.DeltaSharingSingleAction](_).unwrap + ) + .collect { + case fileAction: model.DeltaSharingFileAction => + if (fileAction.expirationTimestamp != null) { + minUrlExpiration = minUrlExpiration + .filter(_ < fileAction.expirationTimestamp) + .orElse(Some(fileAction.expirationTimestamp)) + } + fileAction.id -> fileAction.path + } + .toMap + + TableRefreshResult(idToUrl, minUrlExpiration, tableFiles.refreshToken) + } + + /** + * Get the refresher function for a delta sharing table who calls client.getFiles with the + * provided parameters. + * + * @return A refresher function used by the CachedTableManager to refresh urls. + */ + def getRefresherForGetFiles( + client: DeltaSharingClient, + table: Table, + predicates: Seq[String], + limit: Option[Long], + versionAsOf: Option[Long], + timestampAsOf: Option[String], + jsonPredicateHints: Option[String], + refreshToken: Option[String]): RefresherFunction = { (_: Option[String]) => + { + val tableFiles = client + .getFiles( + table = table, + predicates = predicates, + limit = limit, + versionAsOf = versionAsOf, + timestampAsOf = timestampAsOf, + jsonPredicateHints = jsonPredicateHints, + refreshToken = refreshToken + ) + getTableRefreshResult(tableFiles) + } + } + + /** + * Get the refresher function for a delta sharing table who calls client.getCDFFiles with the + * provided parameters. + * + * @return A refresher function used by the CachedTableManager to refresh urls. + */ + def getRefresherForGetCDFFiles( + client: DeltaSharingClient, + table: Table, + cdfOptions: Map[String, String]): RefresherFunction = { (_: Option[String]) => + { + val tableFiles = client.getCDFFiles( + table = table, + cdfOptions = cdfOptions, + includeHistoricalMetadata = true + ) + getTableRefreshResult(tableFiles) + } + } + + /** + * Get the refresher function for a delta sharing table who calls client.getFiles with the + * provided parameters. + * + * @return A refresher function used by the CachedTableManager to refresh urls. + */ + def getRefresherForGetFilesWithStartingVersion( + client: DeltaSharingClient, + table: Table, + startingVersion: Long, + endingVersion: Option[Long]): RefresherFunction = { (_: Option[String]) => + { + val tableFiles = client + .getFiles(table = table, startingVersion = startingVersion, endingVersion = endingVersion) + getTableRefreshResult(tableFiles) + } + } + + def overrideSingleBlock[T: ClassTag](blockId: BlockId, value: T): Unit = { + assert( + blockId.name.startsWith(DELTA_SHARING_BLOCK_ID_PREFIX), + s"invalid delta sharing log block id: $blockId" + ) + removeBlockForJsonLogIfExists(blockId) + SparkEnv.get.blockManager.putSingle[T]( + blockId = blockId, + value = value, + level = StorageLevel.MEMORY_AND_DISK_SER, + tellMaster = true + ) + } + + def overrideIteratorBlock[T: ClassTag](blockId: BlockId, values: Iterator[T]): Unit = { + assert( + blockId.name.startsWith(DELTA_SHARING_BLOCK_ID_PREFIX), + s"invalid delta sharing log block id: $blockId" + ) + removeBlockForJsonLogIfExists(blockId) + SparkEnv.get.blockManager.putIterator[T]( + blockId = blockId, + values = values, + level = StorageLevel.MEMORY_AND_DISK_SER, + tellMaster = true + ) + } + + // A helper function used by DeltaSharingSource and DeltaSharingDataSource to get + // SnapshotDescriptor used for delta sharing streaming. + def getDeltaLogAndSnapshotDescriptor( + spark: SparkSession, + deltaSharingTableMetadata: DeltaSharingTableMetadata, + customTablePathWithUUIDSuffix: String): (DeltaLog, SnapshotDescriptor) = { + // Create a delta log with metadata at version 0. + // Used by DeltaSharingSource to initialize a DeltaLog class, which is then used to initialize + // a DeltaSource class, also the metadata id will be used for schemaTrackingLocation. + DeltaSharingLogFileSystem.constructDeltaLogWithMetadataAtVersionZero( + customTablePathWithUUIDSuffix, + deltaSharingTableMetadata + ) + val tablePath = DeltaSharingLogFileSystem.encode(customTablePathWithUUIDSuffix).toString + val localDeltaLog = DeltaLog.forTable(spark, tablePath) + ( + localDeltaLog, + new SnapshotDescriptor { + val deltaLog: DeltaLog = localDeltaLog + val metadata: Metadata = deltaSharingTableMetadata.metadata.deltaMetadata + val protocol: Protocol = deltaSharingTableMetadata.protocol.deltaProtocol + val version = deltaSharingTableMetadata.version + val numOfFilesIfKnown = None + val sizeInBytesIfKnown = None + } + ) + } + + // Get a query hash id based on the query parameters: time travel options and filters. + // The id concatenated with table name and used in local DeltaLog and CachedTableManager. + // This is to uniquely identify the delta sharing table used twice in the same query but with + // different query parameters, so we can differentiate their delta log and entries in the + // CachedTableManager. + private[sharing] def getQueryParamsHashId( + options: DeltaSharingOptions, + partitionFiltersString: String, + dataFiltersString: String, + jsonPredicateHints: String, + version: Long): String = { + val fullQueryString = s"${options.versionAsOf}_${options.timestampAsOf}_" + + s"${partitionFiltersString}_${dataFiltersString}_${jsonPredicateHints}_${version}" + Hashing.sha256().hashString(fullQueryString, UTF_8).toString + } + + // Get a query hash id based on the query parameters: cdfOptions. + // The id concatenated with table name and used in local DeltaLoc and CachedTableManager. + // This is to uniquely identify the delta sharing table used twice in the same query but with + // different query parameters, so we can differentiate their delta log and entries in the + // CachedTableManager. + private[sharing] def getQueryParamsHashId(cdfOptions: Map[String, String]): String = { + Hashing.sha256().hashString(cdfOptions.toString, UTF_8).toString + } + + // Concatenate table path with an id as a suffix, to uniquely identify a delta sharing table and + // its corresponding delta log in a query. + private[sharing] def getTablePathWithIdSuffix(customTablePath: String, id: String): String = { + s"${customTablePath}_${id}" + } + + // Get a unique string composed of a formatted timestamp and an uuid. + // Used as a suffix for the table name and its delta log path of a delta sharing table in a + // streaming job, to avoid overwriting the delta log from multiple references of the same delta + // sharing table in one streaming job. + private[sharing] def getFormattedTimestampWithUUID(): String = { + val dateFormat = new SimpleDateFormat("yyyyMMdd_HHmmss") + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")) + val formattedDateTime = dateFormat.format(System.currentTimeMillis()) + val uuid = UUID.randomUUID().toString().split('-').head + s"${formattedDateTime}_${uuid}" + } + + private def removeBlockForJsonLogIfExists(blockId: BlockId): Unit = { + val blockManager = SparkEnv.get.blockManager + blockManager.getMatchingBlockIds(_.name == blockId.name).foreach { b => + logWarning(s"Found and removing existing block for $blockId.") + blockManager.removeBlock(b) + } + } + + // This is a base64 encoded string of the content of an empty delta checkpoint file. + // Will be used to fake a checkpoint file in the locally constructed delta log for cdf and + // streaming queries. + val FAKE_CHECKPOINT_FILE_BASE64_ENCODED_STRING = + """ +UEFSMRUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAVBhUIAAAHGAMAAAADAAAVABUOFRIV ++tzH6QMcFQQVABUGFQgAAAcYAwAAAAMAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVGhUeFdWf39gCHBUEFQAV +BhUGAAANMAIAAAADAAMAAAADAAAVABUcFSAV7J+l5AIcFQQVABUGFQYAAA40AgAAAAMABAAAAAMAAAAVABUOFRIV+tzH6QMcFQQV +ABUGFQgAAAcYAwAAAAMAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAVBhUIAAAHGAMA +AAADAAAVABUaFR4V1Z/f2AIcFQQVABUGFQYAAA0wAgAAAAMAAwAAAAMAABUAFRwVIBXsn6XkAhwVBBUAFQYVBgAADjQCAAAAAwAE +AAAAAwAAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAVBhUIAAAHGAMAAAADAAAVABUO +FRIV+tzH6QMcFQQVABUGFQgAAAcYAwAAAAMAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUE +FQAVBhUIAAAHGAMAAAADAAAVABUOFRIV+tzH6QMcFQQVABUGFQgAAAcYAwAAAAMAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgD +AAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAVBhUIAAAHGAMAAAADAAAVABUOFRIV+tzH6QMcFQQVABUGFQgAAAcYAwAAAAMAABUAFQ4V +EhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAVBhUIAAAHGAMAAAADAAAVABUOFRIV+tzH6QMcFQQV +ABUGFQgAAAcYAwAAAAMAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAVBhUIAAAHGAMA +AAADAAAVABUaFR4V1Z/f2AIcFQQVABUGFQYAAA0wAgAAAAMAAwAAAAMAABUAFRwVIBXsn6XkAhwVBBUAFQYVBgAADjQCAAAAAwAE +AAAAAwAAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAVBhUIAAAHGAMAAAADAAAVABUO +FRIV+tzH6QMcFQQVABUGFQgAAAcYAwAAAAMAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUE +FQAVBhUIAAAHGAMAAAADAAAVABUOFRIV+tzH6QMcFQQVABUGFQgAAAcYAwAAAAMAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgD +AAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAVBhUIAAAHGAMAAAADAAAVABUOFRIV+tzH6QMcFQQVABUGFQgAAAcYAwAAAAMAABUAFSIV +JhWRlf/uBxwVBBUAFQYVCAAAEUADAAAAAwgABgAAAHRlc3RJZBUAFQ4VEhXyyKGvDxwVBBUAFQYVCAAABxgDAAAAAwQAFQAVDhUS +FfLIoa8PHBUEFQAVBhUIAAAHGAMAAAADBAAVABUkFSgV3dDvmgccFQQVABUGFQgAABJEAwAAAAMMAAcAAABwYXJxdWV0FQAVHBUg +FfzUikccFQQVABUGFQYAAA40AgAAAAMABAAAAAMYAAAVABUcFSAV/NSKRxwVBBUAFQYVBgAADjQCAAAAAwAEAAAAAxgAABUAFQ4V +EhXyyKGvDxwVBBUAFQYVCAAABxgDAAAAAwQAFQAVHBUgFYySkKYBHBUEFQAVBhUGAAAONAIAAAADAAQAAAADEAAAFQAVGhUeFbrI +7KoEHBUEFQAVBhUGAAANMAIAAAADAAMAAAADCAAVABUcFSAVjJKQpgEcFQQVABUGFQYAAA40AgAAAAMABAAAAAMQAAAVABUOFRIV +8sihrw8cFQQVABUGFQgAAAcYAwAAAAMEABUAFRYVGhXVxIjAChwVBBUAFQYVCAAACygDAAAAAwIAAQAAABUAFRYVGhWJ+6XrCBwV +BBUAFQYVCAAACygDAAAAAwIAAgAAABUAFRwVIBWCt7b4AhwVBBUAFQYVBgAADjQCAAAAAwAEAAAAAwEAABUAFRwVIBWCt7b4AhwV +BBUAFQYVBgAADjQCAAAAAwAEAAAAAwEAABUAFQ4VEhX63MfpAxwVBBUAFQYVCAAABxgDAAAAAwAAFQAVDhUSFfrcx+kDHBUEFQAV +BhUIAAAHGAMAAAADAAAVABUOFRIV+tzH6QMcFQQVABUGFQgAAAcYAwAAAAMAABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYE +ABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYE +ABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYE +ABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYE +ABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYE +ABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYE +ABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYE +ABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRAhkYBnRlc3RJZBkY +BnRlc3RJZBUCGRYCABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRAhkYB3BhcnF1ZXQZGAdwYXJxdWV0FQIZFgIAGREB +GRgAGRgAFQIZFgQAGREBGRgAGRgAFQIZFgQAGREBGRgAGRgAFQIZFgQAGREBGRgAGRgAFQIZFgQAGREBGRgAGRgAFQIZFgQAGREB +GRgAGRgAFQIZFgQAGREBGRgAGRgAFQIZFgQAGRECGRgEAQAAABkYBAEAAAAVAhkWAgAZEQIZGAQCAAAAGRgEAgAAABUCGRYCABkR +ARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkRARkYABkYABUCGRYEABkc +FggVQBYAAAAZHBZIFUAWAAAAGRwWiAEVQBYAAAAZHBbIARVAFgAAABkcFogCFUwWAAAAGRwW1AIVThYAAAAZHBaiAxVAFgAAABkc +FuIDFUAWAAAAGRwWogQVQBYAAAAZHBbiBBVMFgAAABkcFq4FFU4WAAAAGRwW/AUVQBYAAAAZHBa8BhVAFgAAABkcFvwGFUAWAAAA +GRwWvAcVQBYAAAAZHBb8BxVAFgAAABkcFrwIFUAWAAAAGRwW/AgVQBYAAAAZHBa8CRVAFgAAABkcFvwJFUAWAAAAGRwWvAoVQBYA +AAAZHBb8ChVAFgAAABkcFrwLFUAWAAAAGRwW/AsVQBYAAAAZHBa8DBVAFgAAABkcFvwMFUwWAAAAGRwWyA0VThYAAAAZHBaWDhVA +FgAAABkcFtYOFUAWAAAAGRwWlg8VQBYAAAAZHBbWDxVAFgAAABkcFpYQFUAWAAAAGRwW1hAVQBYAAAAZHBaWERVAFgAAABkcFtYR +FUAWAAAAGRwWlhIVQBYAAAAZHBbWEhVUFgAAABkcFqoTFUAWAAAAGRwW6hMVQBYAAAAZHBaqFBVWFgAAABkcFoAVFUwWAAAAGRwW +zBUVTBYAAAAZHBaYFhVAFgAAABkcFtgWFU4WAAAAGRwWphcVTBYAAAAZHBbyFxVOFgAAABkcFsAYFUAWAAAAGRwWgBkVSBYAAAAZ +HBbIGRVIFgAAABkcFpAaFU4WAAAAGRwW3hoVThYAAAAZHBasGxVAFgAAABkcFuwbFUAWAAAAGRwWrBwVQBYAAAAVAhn8UUgMc3Bh +cmtfc2NoZW1hFQwANQIYA3R4bhUGABUMJQIYBWFwcElkJQBMHAAAABUEJQIYB3ZlcnNpb24AFQQlAhgLbGFzdFVwZGF0ZWQANQIY +A2FkZBUWABUMJQIYBHBhdGglAEwcAAAANQIYD3BhcnRpdGlvblZhbHVlcxUCFQJMLAAAADUEGAlrZXlfdmFsdWUVBAAVDCUAGANr +ZXklAEwcAAAAFQwlAhgFdmFsdWUlAEwcAAAAFQQlAhgEc2l6ZQAVBCUCGBBtb2RpZmljYXRpb25UaW1lABUAJQIYCmRhdGFDaGFu +Z2UANQIYBHRhZ3MVAhUCTCwAAAA1BBgJa2V5X3ZhbHVlFQQAFQwlABgDa2V5JQBMHAAAABUMJQIYBXZhbHVlJQBMHAAAADUCGA5k +ZWxldGlvblZlY3RvchUMABUMJQIYC3N0b3JhZ2VUeXBlJQBMHAAAABUMJQIYDnBhdGhPcklubGluZUR2JQBMHAAAABUCJQIYBm9m +ZnNldAAVAiUCGAtzaXplSW5CeXRlcwAVBCUCGAtjYXJkaW5hbGl0eQAVBCUCGAttYXhSb3dJbmRleAAVBCUCGAliYXNlUm93SWQA +FQQlAhgXZGVmYXVsdFJvd0NvbW1pdFZlcnNpb24AFQwlAhgFc3RhdHMlAEwcAAAANQIYDHN0YXRzX3BhcnNlZBUCABUEJQIYCm51 +bVJlY29yZHMANQIYBnJlbW92ZRUSABUMJQIYBHBhdGglAEwcAAAAFQQlAhgRZGVsZXRpb25UaW1lc3RhbXAAFQAlAhgKZGF0YUNo +YW5nZQAVACUCGBRleHRlbmRlZEZpbGVNZXRhZGF0YQA1AhgPcGFydGl0aW9uVmFsdWVzFQIVAkwsAAAANQQYCWtleV92YWx1ZRUE +ABUMJQAYA2tleSUATBwAAAAVDCUCGAV2YWx1ZSUATBwAAAAVBCUCGARzaXplADUCGA5kZWxldGlvblZlY3RvchUMABUMJQIYC3N0 +b3JhZ2VUeXBlJQBMHAAAABUMJQIYDnBhdGhPcklubGluZUR2JQBMHAAAABUCJQIYBm9mZnNldAAVAiUCGAtzaXplSW5CeXRlcwAV +BCUCGAtjYXJkaW5hbGl0eQAVBCUCGAttYXhSb3dJbmRleAAVBCUCGAliYXNlUm93SWQAFQQlAhgXZGVmYXVsdFJvd0NvbW1pdFZl +cnNpb24ANQIYCG1ldGFEYXRhFRAAFQwlAhgCaWQlAEwcAAAAFQwlAhgEbmFtZSUATBwAAAAVDCUCGAtkZXNjcmlwdGlvbiUATBwA +AAA1AhgGZm9ybWF0FQQAFQwlAhgIcHJvdmlkZXIlAEwcAAAANQIYB29wdGlvbnMVAhUCTCwAAAA1BBgJa2V5X3ZhbHVlFQQAFQwl +ABgDa2V5JQBMHAAAABUMJQIYBXZhbHVlJQBMHAAAABUMJQIYDHNjaGVtYVN0cmluZyUATBwAAAA1AhgQcGFydGl0aW9uQ29sdW1u +cxUCFQZMPAAAADUEGARsaXN0FQIAFQwlAhgHZWxlbWVudCUATBwAAAA1AhgNY29uZmlndXJhdGlvbhUCFQJMLAAAADUEGAlrZXlf +dmFsdWUVBAAVDCUAGANrZXklAEwcAAAAFQwlAhgFdmFsdWUlAEwcAAAAFQQlAhgLY3JlYXRlZFRpbWUANQIYCHByb3RvY29sFQgA +FQIlAhgQbWluUmVhZGVyVmVyc2lvbgAVAiUCGBBtaW5Xcml0ZXJWZXJzaW9uADUCGA5yZWFkZXJGZWF0dXJlcxUCFQZMPAAAADUE +GARsaXN0FQIAFQwlAhgHZWxlbWVudCUATBwAAAA1AhgOd3JpdGVyRmVhdHVyZXMVAhUGTDwAAAA1BBgEbGlzdBUCABUMJQIYB2Vs +ZW1lbnQlAEwcAAAANQIYDmRvbWFpbk1ldGFkYXRhFQYAFQwlAhgGZG9tYWluJQBMHAAAABUMJQIYDWNvbmZpZ3VyYXRpb24lAEwc +AAAAFQAlAhgHcmVtb3ZlZAAWBBkcGfw2JggcFQwZNQAGCBkoA3R4bgVhcHBJZBUCFgQWPBZAJgg8NgQAGRwVABUAFQIAABaUKhUU +FuwcFR4AJkgcFQQZNQAGCBkoA3R4bgd2ZXJzaW9uFQIWBBY8FkAmSDw2BAAZHBUAFQAVAgAAFqgqFRQWih0VHgAmiAEcFQQZNQAG +CBkoA3R4bgtsYXN0VXBkYXRlZBUCFgQWPBZAJogBPDYEABkcFQAVABUCAAAWvCoVFhaoHRUeACbIARwVDBk1AAYIGSgDYWRkBHBh +dGgVAhYEFjwWQCbIATw2BAAZHBUAFQAVAgAAFtIqFRYWxh0VHgAmiAIcFQwZJQAGGUgDYWRkD3BhcnRpdGlvblZhbHVlcwlrZXlf +dmFsdWUDa2V5FQIWBBZIFkwmiAI8NgQAGRwVABUAFQIAABboKhUWFuQdFR4AJtQCHBUMGSUABhlIA2FkZA9wYXJ0aXRpb25WYWx1 +ZXMJa2V5X3ZhbHVlBXZhbHVlFQIWBBZKFk4m1AI8NgQAGRwVABUAFQIAABb+KhUWFoIeFR4AJqIDHBUEGTUABggZKANhZGQEc2l6 +ZRUCFgQWPBZAJqIDPDYEABkcFQAVABUCAAAWlCsVFhagHhUeACbiAxwVBBk1AAYIGSgDYWRkEG1vZGlmaWNhdGlvblRpbWUVAhYE +FjwWQCbiAzw2BAAZHBUAFQAVAgAAFqorFRYWvh4VHgAmogQcFQAZNQAGCBkoA2FkZApkYXRhQ2hhbmdlFQIWBBY8FkAmogQ8NgQA +GRwVABUAFQIAABbAKxUWFtweFR4AJuIEHBUMGSUABhlIA2FkZAR0YWdzCWtleV92YWx1ZQNrZXkVAhYEFkgWTCbiBDw2BAAZHBUA +FQAVAgAAFtYrFRYW+h4VHgAmrgUcFQwZJQAGGUgDYWRkBHRhZ3MJa2V5X3ZhbHVlBXZhbHVlFQIWBBZKFk4mrgU8NgQAGRwVABUA +FQIAABbsKxUWFpgfFR4AJvwFHBUMGTUABggZOANhZGQOZGVsZXRpb25WZWN0b3ILc3RvcmFnZVR5cGUVAhYEFjwWQCb8BTw2BAAZ +HBUAFQAVAgAAFoIsFRYWth8VHgAmvAYcFQwZNQAGCBk4A2FkZA5kZWxldGlvblZlY3Rvcg5wYXRoT3JJbmxpbmVEdhUCFgQWPBZA +JrwGPDYEABkcFQAVABUCAAAWmCwVFhbUHxUeACb8BhwVAhk1AAYIGTgDYWRkDmRlbGV0aW9uVmVjdG9yBm9mZnNldBUCFgQWPBZA +JvwGPDYEABkcFQAVABUCAAAWriwVFhbyHxUeACa8BxwVAhk1AAYIGTgDYWRkDmRlbGV0aW9uVmVjdG9yC3NpemVJbkJ5dGVzFQIW +BBY8FkAmvAc8NgQAGRwVABUAFQIAABbELBUWFpAgFR4AJvwHHBUEGTUABggZOANhZGQOZGVsZXRpb25WZWN0b3ILY2FyZGluYWxp +dHkVAhYEFjwWQCb8Bzw2BAAZHBUAFQAVAgAAFtosFRYWriAVHgAmvAgcFQQZNQAGCBk4A2FkZA5kZWxldGlvblZlY3RvcgttYXhS +b3dJbmRleBUCFgQWPBZAJrwIPDYEABkcFQAVABUCAAAW8CwVFhbMIBUeACb8CBwVBBk1AAYIGSgDYWRkCWJhc2VSb3dJZBUCFgQW +PBZAJvwIPDYEABkcFQAVABUCAAAWhi0VFhbqIBUeACa8CRwVBBk1AAYIGSgDYWRkF2RlZmF1bHRSb3dDb21taXRWZXJzaW9uFQIW +BBY8FkAmvAk8NgQAGRwVABUAFQIAABacLRUWFoghFR4AJvwJHBUMGTUABggZKANhZGQFc3RhdHMVAhYEFjwWQCb8CTw2BAAZHBUA +FQAVAgAAFrItFRYWpiEVHgAmvAocFQQZNQAGCBk4A2FkZAxzdGF0c19wYXJzZWQKbnVtUmVjb3JkcxUCFgQWPBZAJrwKPDYEABkc +FQAVABUCAAAWyC0VFhbEIRUeACb8ChwVDBk1AAYIGSgGcmVtb3ZlBHBhdGgVAhYEFjwWQCb8Cjw2BAAZHBUAFQAVAgAAFt4tFRYW +4iEVHgAmvAscFQQZNQAGCBkoBnJlbW92ZRFkZWxldGlvblRpbWVzdGFtcBUCFgQWPBZAJrwLPDYEABkcFQAVABUCAAAW9C0VFhaA +IhUeACb8CxwVABk1AAYIGSgGcmVtb3ZlCmRhdGFDaGFuZ2UVAhYEFjwWQCb8Czw2BAAZHBUAFQAVAgAAFoouFRYWniIVHgAmvAwc +FQAZNQAGCBkoBnJlbW92ZRRleHRlbmRlZEZpbGVNZXRhZGF0YRUCFgQWPBZAJrwMPDYEABkcFQAVABUCAAAWoC4VFha8IhUeACb8 +DBwVDBklAAYZSAZyZW1vdmUPcGFydGl0aW9uVmFsdWVzCWtleV92YWx1ZQNrZXkVAhYEFkgWTCb8DDw2BAAZHBUAFQAVAgAAFrYu +FRYW2iIVHgAmyA0cFQwZJQAGGUgGcmVtb3ZlD3BhcnRpdGlvblZhbHVlcwlrZXlfdmFsdWUFdmFsdWUVAhYEFkoWTibIDTw2BAAZ +HBUAFQAVAgAAFswuFRYW+CIVHgAmlg4cFQQZNQAGCBkoBnJlbW92ZQRzaXplFQIWBBY8FkAmlg48NgQAGRwVABUAFQIAABbiLhUW +FpYjFR4AJtYOHBUMGTUABggZOAZyZW1vdmUOZGVsZXRpb25WZWN0b3ILc3RvcmFnZVR5cGUVAhYEFjwWQCbWDjw2BAAZHBUAFQAV +AgAAFvguFRYWtCMVHgAmlg8cFQwZNQAGCBk4BnJlbW92ZQ5kZWxldGlvblZlY3Rvcg5wYXRoT3JJbmxpbmVEdhUCFgQWPBZAJpYP +PDYEABkcFQAVABUCAAAWji8VFhbSIxUeACbWDxwVAhk1AAYIGTgGcmVtb3ZlDmRlbGV0aW9uVmVjdG9yBm9mZnNldBUCFgQWPBZA +JtYPPDYEABkcFQAVABUCAAAWpC8VFhbwIxUeACaWEBwVAhk1AAYIGTgGcmVtb3ZlDmRlbGV0aW9uVmVjdG9yC3NpemVJbkJ5dGVz +FQIWBBY8FkAmlhA8NgQAGRwVABUAFQIAABa6LxUWFo4kFR4AJtYQHBUEGTUABggZOAZyZW1vdmUOZGVsZXRpb25WZWN0b3ILY2Fy +ZGluYWxpdHkVAhYEFjwWQCbWEDw2BAAZHBUAFQAVAgAAFtAvFRYWrCQVHgAmlhEcFQQZNQAGCBk4BnJlbW92ZQ5kZWxldGlvblZl +Y3RvcgttYXhSb3dJbmRleBUCFgQWPBZAJpYRPDYEABkcFQAVABUCAAAW5i8VFhbKJBUeACbWERwVBBk1AAYIGSgGcmVtb3ZlCWJh +c2VSb3dJZBUCFgQWPBZAJtYRPDYEABkcFQAVABUCAAAW/C8VFhboJBUeACaWEhwVBBk1AAYIGSgGcmVtb3ZlF2RlZmF1bHRSb3dD +b21taXRWZXJzaW9uFQIWBBY8FkAmlhI8NgQAGRwVABUAFQIAABaSMBUWFoYlFR4AJtYSHBUMGTUABggZKAhtZXRhRGF0YQJpZBUC +FgQWUBZUJtYSPBgGdGVzdElkGAZ0ZXN0SWQWAigGdGVzdElkGAZ0ZXN0SWQAGRwVABUAFQIAABaoMBUWFqQlFTYAJqoTHBUMGTUA +BggZKAhtZXRhRGF0YQRuYW1lFQIWBBY8FkAmqhM8NgQAGRwVABUAFQIAABa+MBUWFtolFR4AJuoTHBUMGTUABggZKAhtZXRhRGF0 +YQtkZXNjcmlwdGlvbhUCFgQWPBZAJuoTPDYEABkcFQAVABUCAAAW1DAVFhb4JRUeACaqFBwVDBk1AAYIGTgIbWV0YURhdGEGZm9y +bWF0CHByb3ZpZGVyFQIWBBZSFlYmqhQ8GAdwYXJxdWV0GAdwYXJxdWV0FgIoB3BhcnF1ZXQYB3BhcnF1ZXQAGRwVABUAFQIAABbq +MBUWFpYmFToAJoAVHBUMGSUABhlYCG1ldGFEYXRhBmZvcm1hdAdvcHRpb25zCWtleV92YWx1ZQNrZXkVAhYEFkgWTCaAFTw2BAAZ +HBUAFQAVAgAAFoAxFRYW0CYVHgAmzBUcFQwZJQAGGVgIbWV0YURhdGEGZm9ybWF0B29wdGlvbnMJa2V5X3ZhbHVlBXZhbHVlFQIW +BBZIFkwmzBU8NgQAGRwVABUAFQIAABaWMRUWFu4mFR4AJpgWHBUMGTUABggZKAhtZXRhRGF0YQxzY2hlbWFTdHJpbmcVAhYEFjwW +QCaYFjw2BAAZHBUAFQAVAgAAFqwxFRYWjCcVHgAm2BYcFQwZJQAGGUgIbWV0YURhdGEQcGFydGl0aW9uQ29sdW1ucwRsaXN0B2Vs +ZW1lbnQVAhYEFkoWTibYFjw2BAAZHBUAFQAVAgAAFsIxFRYWqicVHgAmphccFQwZJQAGGUgIbWV0YURhdGENY29uZmlndXJhdGlv +bglrZXlfdmFsdWUDa2V5FQIWBBZIFkwmphc8NgQAGRwVABUAFQIAABbYMRUWFsgnFR4AJvIXHBUMGSUABhlICG1ldGFEYXRhDWNv +bmZpZ3VyYXRpb24Ja2V5X3ZhbHVlBXZhbHVlFQIWBBZKFk4m8hc8NgQAGRwVABUAFQIAABbuMRUWFuYnFR4AJsAYHBUEGTUABggZ +KAhtZXRhRGF0YQtjcmVhdGVkVGltZRUCFgQWPBZAJsAYPDYEABkcFQAVABUCAAAWhDIVFhaEKBUeACaAGRwVAhk1AAYIGSgIcHJv +dG9jb2wQbWluUmVhZGVyVmVyc2lvbhUCFgQWRBZIJoAZPBgEAQAAABgEAQAAABYCKAQBAAAAGAQBAAAAABkcFQAVABUCAAAWmjIV +FhaiKBUuACbIGRwVAhk1AAYIGSgIcHJvdG9jb2wQbWluV3JpdGVyVmVyc2lvbhUCFgQWRBZIJsgZPBgEAgAAABgEAgAAABYCKAQC +AAAAGAQCAAAAABkcFQAVABUCAAAWsDIVFhbQKBUuACaQGhwVDBklAAYZSAhwcm90b2NvbA5yZWFkZXJGZWF0dXJlcwRsaXN0B2Vs +ZW1lbnQVAhYEFkoWTiaQGjw2BAAZHBUAFQAVAgAAFsYyFRYW/igVHgAm3hocFQwZJQAGGUgIcHJvdG9jb2wOd3JpdGVyRmVhdHVy +ZXMEbGlzdAdlbGVtZW50FQIWBBZKFk4m3ho8NgQAGRwVABUAFQIAABbcMhUWFpwpFR4AJqwbHBUMGTUABggZKA5kb21haW5NZXRh +ZGF0YQZkb21haW4VAhYEFjwWQCasGzw2BAAZHBUAFQAVAgAAFvIyFRYWuikVHgAm7BscFQwZNQAGCBkoDmRvbWFpbk1ldGFkYXRh +DWNvbmZpZ3VyYXRpb24VAhYEFjwWQCbsGzw2BAAZHBUAFQAVAgAAFogzFRYW2CkVHgAmrBwcFQAZNQAGCBkoDmRvbWFpbk1ldGFk +YXRhB3JlbW92ZWQVAhYEFjwWQCasHDw2BAAZHBUAFQAVAgAAFp4zFRYW9ikVHgAWjBsWBCYIFuQcFAAAGVwYGW9yZy5hcGFjaGUu +c3BhcmsudGltZVpvbmUYE0FtZXJpY2EvTG9zX0FuZ2VsZXMAGBxvcmcuYXBhY2hlLnNwYXJrLmxlZ2FjeUlOVDk2GAAAGBhvcmcu +YXBhY2hlLnNwYXJrLnZlcnNpb24YBTQuMC4wABgpb3JnLmFwYWNoZS5zcGFyay5zcWwucGFycXVldC5yb3cubWV0YWRhdGEYiyV7 +InR5cGUiOiJzdHJ1Y3QiLCJmaWVsZHMiOlt7Im5hbWUiOiJ0eG4iLCJ0eXBlIjp7InR5cGUiOiJzdHJ1Y3QiLCJmaWVsZHMiOlt7 +Im5hbWUiOiJhcHBJZCIsInR5cGUiOiJzdHJpbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJ2ZXJz +aW9uIiwidHlwZSI6ImxvbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJsYXN0VXBkYXRlZCIsInR5 +cGUiOiJsb25nIiwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX1dfSwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0s +eyJuYW1lIjoiYWRkIiwidHlwZSI6eyJ0eXBlIjoic3RydWN0IiwiZmllbGRzIjpbeyJuYW1lIjoicGF0aCIsInR5cGUiOiJzdHJp +bmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJwYXJ0aXRpb25WYWx1ZXMiLCJ0eXBlIjp7InR5cGUi +OiJtYXAiLCJrZXlUeXBlIjoic3RyaW5nIiwidmFsdWVUeXBlIjoic3RyaW5nIiwidmFsdWVDb250YWluc051bGwiOnRydWV9LCJu +dWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJzaXplIiwidHlwZSI6ImxvbmciLCJudWxsYWJsZSI6dHJ1ZSwi +bWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJtb2RpZmljYXRpb25UaW1lIiwidHlwZSI6ImxvbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0 +YWRhdGEiOnt9fSx7Im5hbWUiOiJkYXRhQ2hhbmdlIiwidHlwZSI6ImJvb2xlYW4iLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEi +Ont9fSx7Im5hbWUiOiJ0YWdzIiwidHlwZSI6eyJ0eXBlIjoibWFwIiwia2V5VHlwZSI6InN0cmluZyIsInZhbHVlVHlwZSI6InN0 +cmluZyIsInZhbHVlQ29udGFpbnNOdWxsIjp0cnVlfSwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoiZGVs +ZXRpb25WZWN0b3IiLCJ0eXBlIjp7InR5cGUiOiJzdHJ1Y3QiLCJmaWVsZHMiOlt7Im5hbWUiOiJzdG9yYWdlVHlwZSIsInR5cGUi +OiJzdHJpbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJwYXRoT3JJbmxpbmVEdiIsInR5cGUiOiJz +dHJpbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJvZmZzZXQiLCJ0eXBlIjoiaW50ZWdlciIsIm51 +bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6InNpemVJbkJ5dGVzIiwidHlwZSI6ImludGVnZXIiLCJudWxsYWJs +ZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJjYXJkaW5hbGl0eSIsInR5cGUiOiJsb25nIiwibnVsbGFibGUiOnRydWUs +Im1ldGFkYXRhIjp7fX0seyJuYW1lIjoibWF4Um93SW5kZXgiLCJ0eXBlIjoibG9uZyIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0 +YSI6e319XX0sIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6ImJhc2VSb3dJZCIsInR5cGUiOiJsb25nIiwi +bnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoiZGVmYXVsdFJvd0NvbW1pdFZlcnNpb24iLCJ0eXBlIjoibG9u +ZyIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6InN0YXRzIiwidHlwZSI6InN0cmluZyIsIm51bGxhYmxl +Ijp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6InN0YXRzX3BhcnNlZCIsInR5cGUiOnsidHlwZSI6InN0cnVjdCIsImZpZWxk +cyI6W3sibmFtZSI6Im51bVJlY29yZHMiLCJ0eXBlIjoibG9uZyIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319XX0sIm51 +bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319XX0sIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6InJlbW92 +ZSIsInR5cGUiOnsidHlwZSI6InN0cnVjdCIsImZpZWxkcyI6W3sibmFtZSI6InBhdGgiLCJ0eXBlIjoic3RyaW5nIiwibnVsbGFi +bGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoiZGVsZXRpb25UaW1lc3RhbXAiLCJ0eXBlIjoibG9uZyIsIm51bGxhYmxl +Ijp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6ImRhdGFDaGFuZ2UiLCJ0eXBlIjoiYm9vbGVhbiIsIm51bGxhYmxlIjp0cnVl +LCJtZXRhZGF0YSI6e319LHsibmFtZSI6ImV4dGVuZGVkRmlsZU1ldGFkYXRhIiwidHlwZSI6ImJvb2xlYW4iLCJudWxsYWJsZSI6 +dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJwYXJ0aXRpb25WYWx1ZXMiLCJ0eXBlIjp7InR5cGUiOiJtYXAiLCJrZXlUeXBl +Ijoic3RyaW5nIiwidmFsdWVUeXBlIjoic3RyaW5nIiwidmFsdWVDb250YWluc051bGwiOnRydWV9LCJudWxsYWJsZSI6dHJ1ZSwi +bWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJzaXplIiwidHlwZSI6ImxvbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7 +Im5hbWUiOiJkZWxldGlvblZlY3RvciIsInR5cGUiOnsidHlwZSI6InN0cnVjdCIsImZpZWxkcyI6W3sibmFtZSI6InN0b3JhZ2VU +eXBlIiwidHlwZSI6InN0cmluZyIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6InBhdGhPcklubGluZUR2 +IiwidHlwZSI6InN0cmluZyIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6Im9mZnNldCIsInR5cGUiOiJp +bnRlZ2VyIiwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoic2l6ZUluQnl0ZXMiLCJ0eXBlIjoiaW50ZWdl +ciIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6ImNhcmRpbmFsaXR5IiwidHlwZSI6ImxvbmciLCJudWxs +YWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJtYXhSb3dJbmRleCIsInR5cGUiOiJsb25nIiwibnVsbGFibGUiOnRy +dWUsIm1ldGFkYXRhIjp7fX1dfSwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoiYmFzZVJvd0lkIiwidHlw +ZSI6ImxvbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJkZWZhdWx0Um93Q29tbWl0VmVyc2lvbiIs +InR5cGUiOiJsb25nIiwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX1dfSwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7 +fX0seyJuYW1lIjoibWV0YURhdGEiLCJ0eXBlIjp7InR5cGUiOiJzdHJ1Y3QiLCJmaWVsZHMiOlt7Im5hbWUiOiJpZCIsInR5cGUi +OiJzdHJpbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJuYW1lIiwidHlwZSI6InN0cmluZyIsIm51 +bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6ImRlc2NyaXB0aW9uIiwidHlwZSI6InN0cmluZyIsIm51bGxhYmxl +Ijp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6ImZvcm1hdCIsInR5cGUiOnsidHlwZSI6InN0cnVjdCIsImZpZWxkcyI6W3si +bmFtZSI6InByb3ZpZGVyIiwidHlwZSI6InN0cmluZyIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6Im9w +dGlvbnMiLCJ0eXBlIjp7InR5cGUiOiJtYXAiLCJrZXlUeXBlIjoic3RyaW5nIiwidmFsdWVUeXBlIjoic3RyaW5nIiwidmFsdWVD +b250YWluc051bGwiOnRydWV9LCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fV19LCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRh +dGEiOnt9fSx7Im5hbWUiOiJzY2hlbWFTdHJpbmciLCJ0eXBlIjoic3RyaW5nIiwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7 +fX0seyJuYW1lIjoicGFydGl0aW9uQ29sdW1ucyIsInR5cGUiOnsidHlwZSI6ImFycmF5IiwiZWxlbWVudFR5cGUiOiJzdHJpbmci +LCJjb250YWluc051bGwiOnRydWV9LCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5hbWUiOiJjb25maWd1cmF0aW9u +IiwidHlwZSI6eyJ0eXBlIjoibWFwIiwia2V5VHlwZSI6InN0cmluZyIsInZhbHVlVHlwZSI6InN0cmluZyIsInZhbHVlQ29udGFp +bnNOdWxsIjp0cnVlfSwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoiY3JlYXRlZFRpbWUiLCJ0eXBlIjoi +bG9uZyIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319XX0sIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFt +ZSI6InByb3RvY29sIiwidHlwZSI6eyJ0eXBlIjoic3RydWN0IiwiZmllbGRzIjpbeyJuYW1lIjoibWluUmVhZGVyVmVyc2lvbiIs +InR5cGUiOiJpbnRlZ2VyIiwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoibWluV3JpdGVyVmVyc2lvbiIs +InR5cGUiOiJpbnRlZ2VyIiwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoicmVhZGVyRmVhdHVyZXMiLCJ0 +eXBlIjp7InR5cGUiOiJhcnJheSIsImVsZW1lbnRUeXBlIjoic3RyaW5nIiwiY29udGFpbnNOdWxsIjp0cnVlfSwibnVsbGFibGUi +OnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoid3JpdGVyRmVhdHVyZXMiLCJ0eXBlIjp7InR5cGUiOiJhcnJheSIsImVsZW1l +bnRUeXBlIjoic3RyaW5nIiwiY29udGFpbnNOdWxsIjp0cnVlfSwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX1dfSwibnVs +bGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0seyJuYW1lIjoiZG9tYWluTWV0YWRhdGEiLCJ0eXBlIjp7InR5cGUiOiJzdHJ1Y3Qi +LCJmaWVsZHMiOlt7Im5hbWUiOiJkb21haW4iLCJ0eXBlIjoic3RyaW5nIiwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX0s +eyJuYW1lIjoiY29uZmlndXJhdGlvbiIsInR5cGUiOiJzdHJpbmciLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fSx7Im5h +bWUiOiJyZW1vdmVkIiwidHlwZSI6ImJvb2xlYW4iLCJudWxsYWJsZSI6dHJ1ZSwibWV0YWRhdGEiOnt9fV19LCJudWxsYWJsZSI6 +dHJ1ZSwibWV0YWRhdGEiOnt9fV19ABgfb3JnLmFwYWNoZS5zcGFyay5sZWdhY3lEYXRlVGltZRgAABhacGFycXVldC1tciB2ZXJz +aW9uIDEuMTIuMy1kYXRhYnJpY2tzLTAwMDIgKGJ1aWxkIDI0ODRhOTVkYmUxNmEwMDIzZTNlYjI5YzIwMWY5OWZmOWVhNzcxZWUp +Gfw2HAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAA +HAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAA +HAAAHAAAHAAAHAAAHAAAAJUqAABQQVIx""".stripMargin.replaceAll("\n", "") + + // Pre-prepare the byte array for (minVersion-1).checkpoint.parquet. + val FAKE_CHECKPOINT_BYTE_ARRAY = { + java.util.Base64.getDecoder.decode(FAKE_CHECKPOINT_FILE_BASE64_ENCODED_STRING) + } +} diff --git a/sharing/src/main/scala/io/delta/sharing/spark/PrepareDeltaSharingScan.scala b/sharing/src/main/scala/io/delta/sharing/spark/PrepareDeltaSharingScan.scala new file mode 100644 index 00000000000..6c828edc308 --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/PrepareDeltaSharingScan.scala @@ -0,0 +1,105 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import org.apache.spark.sql.delta.{DeltaTableUtils => SqlDeltaTableUtils} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.{PreparedDeltaFileIndex, PrepareDeltaScan} +import io.delta.sharing.client.util.ConfUtils +import io.delta.sharing.spark.DeltaSharingFileIndex + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical._ + +/** + * Before query planning, we prepare any scans over delta sharing tables by pushing + * any filters or limits to delta sharing server through RPC, allowing us to return only needed + * files and gather more accurate statistics for CBO and metering. + */ +class PrepareDeltaSharingScan(override val spark: SparkSession) extends PrepareDeltaScan(spark) { + + /** + * Prepares delta sharing scans sequentially. + */ + override protected def prepareDeltaScan(plan: LogicalPlan): LogicalPlan = { + transformWithSubqueries(plan) { + case scan @ DeltaSharingTableScan(_, filters, dsFileIndex, limit, _) => + val partitionCols = dsFileIndex.partitionColumns + val (partitionFilters, dataFilters) = filters.partition { e => + SqlDeltaTableUtils.isPredicatePartitionColumnsOnly(e, partitionCols, spark) + } + logInfo(s"Classified filters: partition: $partitionFilters, data: $dataFilters") + val deltaLog = dsFileIndex.fetchFilesAndConstructDeltaLog( + partitionFilters, + dataFilters, + limit.map(_.toLong) + ) + val snapshot = deltaLog.snapshot + val deltaScan = limit match { + case Some(limit) => snapshot.filesForScan(limit, filters) + case _ => snapshot.filesForScan(filters) + } + val preparedIndex = PreparedDeltaFileIndex( + spark, + deltaLog, + deltaLog.dataPath, + preparedScan = deltaScan, + versionScanned = Some(snapshot.version) + ) + SqlDeltaTableUtils.replaceFileIndex(scan, preparedIndex) + } + } + + // Just return the plan if statistics based skipping is off. + // It will fall back to just partition pruning at planning time. + // When data skipping is disabled, just convert Delta sharing scans to normal tahoe scans. + // NOTE: File skipping is only disabled on the client, so we still pass filters to the server. + override protected def prepareDeltaScanWithoutFileSkipping(plan: LogicalPlan): LogicalPlan = { + plan.transformDown { + case scan@DeltaSharingTableScan(_, filters, sharingIndex, _, _) => + val partitionCols = sharingIndex.partitionColumns + val (partitionFilters, dataFilters) = filters.partition { e => + SqlDeltaTableUtils.isPredicatePartitionColumnsOnly(e, partitionCols, spark) + } + logInfo(s"Classified filters: partition: $partitionFilters, data: $dataFilters") + val fileIndex = sharingIndex.asTahoeFileIndex(partitionFilters, dataFilters) + SqlDeltaTableUtils.replaceFileIndex(scan, fileIndex) + } + } + + // TODO: Support metadata-only query optimization! + override def optimizeQueryWithMetadata(plan: LogicalPlan): LogicalPlan = plan + + /** + * This is an extractor object. See https://docs.scala-lang.org/tour/extractor-objects.html. + */ + object DeltaSharingTableScan extends DeltaTableScan[DeltaSharingFileIndex] { + // Since delta library is used to read the data on constructed delta log, this should also + // consider the spark config for delta limit pushdown. + override def limitPushdownEnabled(plan: LogicalPlan): Boolean = + ConfUtils.limitPushdownEnabled(plan.conf) && + (spark.conf.get(DeltaSQLConf.DELTA_LIMIT_PUSHDOWN_ENABLED.key) == "true") + + override def getPartitionColumns(fileIndex: DeltaSharingFileIndex): Seq[String] = + fileIndex.partitionColumns + + override def getPartitionFilters(fileIndex: DeltaSharingFileIndex): Seq[Expression] = + Seq.empty[Expression] + + } +} diff --git a/sharing/src/main/scala/io/delta/sharing/spark/model.scala b/sharing/src/main/scala/io/delta/sharing/spark/model.scala new file mode 100644 index 00000000000..103bbab8266 --- /dev/null +++ b/sharing/src/main/scala/io/delta/sharing/spark/model.scala @@ -0,0 +1,209 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark.model + +import java.net.URLEncoder + +import org.apache.spark.sql.delta.actions.{ + AddCDCFile, + AddFile, + DeletionVectorDescriptor, + FileAction, + Metadata, + Protocol, + RemoveFile, + SingleAction +} +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore +import org.apache.spark.sql.delta.util.JsonUtils +import com.fasterxml.jackson.annotation._ +import com.fasterxml.jackson.annotation.JsonInclude.Include +import io.delta.sharing.client.DeltaSharingFileSystem + +import org.apache.spark.sql.types.{DataType, StructType} + +// Represents a single action in the response of a Delta Sharing rpc. +sealed trait DeltaSharingAction { + def wrap: DeltaSharingSingleAction + def json: String = JsonUtils.toJson(wrap) +} + +/** A serialization helper to create a common action envelope, for delta sharing actions in the + * response of a rpc. + */ +case class DeltaSharingSingleAction( + protocol: DeltaSharingProtocol = null, + metaData: DeltaSharingMetadata = null, + file: DeltaSharingFileAction = null) { + def unwrap: DeltaSharingAction = { + if (file != null) { + file + } else if (metaData != null) { + metaData + } else if (protocol != null) { + protocol + } else { + null + } + } +} + +/** + * The delta sharing protocol from the response of a rpc. It only wraps a delta protocol now, but + * can be extended with additional delta sharing fields if needed later. + */ +case class DeltaSharingProtocol(deltaProtocol: Protocol) extends DeltaSharingAction { + + override def wrap: DeltaSharingSingleAction = DeltaSharingSingleAction(protocol = this) +} + +/** + * The delta sharing metadata from the response of a rpc. + * It wraps a delta metadata, and adds three delta sharing fields: + * - version: the version of the metadata, used to generate faked delta log file on the client + * side. + * - size: the estimated size of the table at the version, used to estimate query size. + * - numFiles: the number of files of the table at the version, used to estimate query size. + */ +case class DeltaSharingMetadata( + version: java.lang.Long = null, + size: java.lang.Long = null, + numFiles: java.lang.Long = null, + deltaMetadata: Metadata) + extends DeltaSharingAction { + + /** Returns the schema as a [[StructType]] */ + @JsonIgnore + lazy val schema: StructType = deltaMetadata.schema + + /** Returns the partitionSchema as a [[StructType]] */ + @JsonIgnore + lazy val partitionSchema: StructType = deltaMetadata.partitionSchema + + override def wrap: DeltaSharingSingleAction = DeltaSharingSingleAction(metaData = this) +} + +/** + * DeltaResponseFileAction used in delta sharing protocol. It wraps a delta single action, + * and adds 4 delta sharing related fields: id/version/timestamp/expirationTimestamp. + * - id: used to uniquely identify a file, and in idToUrl mapping for executor to get + * presigned url. + * - version/timestamp: the version and timestamp of the commit, used to generate faked delta + * log file on the client side. + * - expirationTimestamp: indicate when the presigned url is going to expire and need a + * refresh. + * The server is responsible to redact sensitive fields such as "tags" before returning. + */ +case class DeltaSharingFileAction( + id: String, + version: java.lang.Long = null, + timestamp: java.lang.Long = null, + expirationTimestamp: java.lang.Long = null, + deletionVectorFileId: String = null, + deltaSingleAction: SingleAction) + extends DeltaSharingAction { + + lazy val path: String = { + deltaSingleAction.unwrap match { + case file: FileAction => file.path + case action => + throw new IllegalStateException( + s"unexpected action in delta sharing " + + s"response: ${action.json}" + ) + } + } + + lazy val size: Long = { + deltaSingleAction.unwrap match { + case add: AddFile => add.size + case cdc: AddCDCFile => cdc.size + case remove: RemoveFile => + remove.size.getOrElse { + throw new IllegalStateException( + "size is missing for the remove file returned from server" + + s", which is required by delta sharing client, response:${remove.json}." + ) + } + case action => + throw new IllegalStateException( + s"unexpected action in delta sharing " + + s"response: ${action.json}" + ) + } + } + + def getDeletionVectorOpt: Option[DeletionVectorDescriptor] = { + deltaSingleAction.unwrap match { + case file: FileAction => Option.apply(file.deletionVector) + case _ => None + } + } + + def getDeletionVectorDeltaSharingPath(tablePath: String): String = { + getDeletionVectorOpt.map { deletionVector => + // Adding offset to dvFileSize so it can load all needed bytes in memory, + // starting from the beginning of the file instead of the `offset`. + // There could be other DVs beyond this length in the file, but not needed by this DV. + val dvFileSize = DeletionVectorStore.getTotalSizeOfDVFieldsInFile( + deletionVector.sizeInBytes + ) + deletionVector.offset.getOrElse(0) + // This path is going to be put in the delta log file and processed by delta code, where + // absolutePath() is applied to the path in all places, such as TahoeFileIndex and + // DeletionVectorDescriptor, and in absolutePath, URI will apply a decode of the path. + // Additional encoding on the tablePath and table id to allow the path still able to be + // processed by DeltaSharingFileSystem after URI decodes it. + DeltaSharingFileSystem + .DeltaSharingPath( + URLEncoder.encode(tablePath, "UTF-8"), + URLEncoder.encode(deletionVectorFileId, "UTF-8"), + dvFileSize + ) + .toPath + .toString + }.orNull + } + + /** + * A helper function to get the delta sharing path for this file action to put in delta log, + * in the format below: + * ``` + * delta-sharing:///// + * ``` + * + * This is to make a unique and unchanged path for each file action, which will be mapped to + * pre-signed url by DeltaSharingFileSystem.open(). size is needed to know how much bytes to read + * from the FSDataInputStream. + */ + def getDeltaSharingPath(tablePath: String): String = { + // This path is going to be put in the delta log file and processed by delta code, where + // absolutePath() is applied to the path in all places, such as TahoeFileIndex and + // DeletionVectorDescriptor, and in absolutePath, URI will apply a decode of the path. + // Additional encoding on the tablePath and table id to allow the path still able to be + // processed by DeltaSharingFileSystem after URI decodes it. + DeltaSharingFileSystem + .DeltaSharingPath( + URLEncoder.encode(tablePath, "UTF-8"), + URLEncoder.encode(id, "UTF-8"), + size + ) + .toPath + .toString + } + + override def wrap: DeltaSharingSingleAction = DeltaSharingSingleAction(file = this) +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala new file mode 100644 index 00000000000..16f37f9ae89 --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala @@ -0,0 +1,901 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.DeltaOptions.{ + IGNORE_CHANGES_OPTION, + IGNORE_DELETES_OPTION, + SKIP_CHANGE_COMMITS_OPTION +} +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import io.delta.sharing.client.DeltaSharingRestClient +import io.delta.sharing.client.model.{Table => DeltaSharingTable} +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.Row +import org.apache.spark.sql.delta.sharing.DeltaSharingTestSparkUtils +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.streaming.StreamTest +import org.apache.spark.sql.types.{ + DateType, + IntegerType, + LongType, + StringType, + StructType, + TimestampType +} + +class DeltaFormatSharingSourceSuite + extends StreamTest + with DeltaSQLCommandTest + with DeltaSharingTestSparkUtils + with DeltaSharingDataSourceDeltaTestUtils { + + import testImplicits._ + + private def getSource(parameters: Map[String, String]): DeltaFormatSharingSource = { + val options = new DeltaSharingOptions(parameters) + val path = options.options.getOrElse( + "path", + throw DeltaSharingErrors.pathNotSpecifiedException + ) + val parsedPath = DeltaSharingRestClient.parsePath(path) + val client = DeltaSharingRestClient( + profileFile = parsedPath.profileFile, + forStreaming = true, + responseFormat = "delta", + readerFeatures = DeltaSharingUtils.STREAMING_SUPPORTED_READER_FEATURES.mkString(",") + ) + val dsTable = DeltaSharingTable( + share = parsedPath.share, + schema = parsedPath.schema, + name = parsedPath.table + ) + DeltaFormatSharingSource( + spark = spark, + client = client, + table = dsTable, + options = options, + parameters = parameters, + sqlConf = sqlContext.sparkSession.sessionState.conf, + metadataPath = "" + ) + } + + private def assertBlocksAreCleanedUp(): Unit = { + val blockManager = SparkEnv.get.blockManager + val matchingBlockIds = blockManager.getMatchingBlockIds( + _.name.startsWith(DeltaSharingLogFileSystem.DELTA_SHARING_LOG_BLOCK_ID_PREFIX) + ) + assert(matchingBlockIds.isEmpty, "delta sharing blocks are not cleaned up.") + } + + test("DeltaFormatSharingSource able to get schema") { + withTempDir { tempDir => + val deltaTableName = "delta_table_schema" + withTable(deltaTableName) { + createTable(deltaTableName) + val sharedTableName = "shared_table_schema" + prepareMockedClientMetadata(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + val profileFile = prepareProfileFile(tempDir) + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val deltaSharingSource = getSource( + Map("path" -> s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName") + ) + val expectedSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("c2", StringType) + .add("c3", DateType) + .add("c4", TimestampType) + assert(deltaSharingSource.schema == expectedSchema) + + // CDF schema + val cdfDeltaSharingSource = getSource( + Map( + "path" -> s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName", + "readChangeFeed" -> "true" + ) + ) + val expectedCdfSchema: StructType = expectedSchema + .copy() + .add("_change_type", StringType) + .add("_commit_version", LongType) + .add("_commit_timestamp", TimestampType) + assert(cdfDeltaSharingSource.schema == expectedCdfSchema) + } + } + } + } + + test("DeltaFormatSharingSource do not support cdc") { + withTempDir { tempDir => + val sharedTableName = "shared_streaming_table_nocdc" + val profileFile = prepareProfileFile(tempDir) + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + val e = intercept[Exception] { + val df = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .load(tablePath) + testStream(df)( + AssertOnQuery { q => + q.processAllAvailable(); true + } + ) + } + assert(e.getMessage.contains("Delta sharing cdc streaming is not supported")) + } + } + } + + test("DeltaFormatSharingSource getTableVersion error") { + withTempDir { tempDir => + val deltaTableName = "delta_table_version_error" + withTable(deltaTableName) { + sql( + s""" + |CREATE TABLE $deltaTableName (value STRING) + |USING DELTA + |""".stripMargin) + val sharedTableName = "shared_streaming_table_version_error" + val profileFile = prepareProfileFile(tempDir) + prepareMockedClientMetadata(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName, Some(-1L)) + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + val e = intercept[Exception] { + val df = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + testStream(df)( + AssertOnQuery { q => + q.processAllAvailable(); true + } + ) + } + assert( + e.getMessage.contains("Delta Sharing Server returning negative table version: -1.") + ) + } + } + } + } + + test("DeltaFormatSharingSource simple query works") { + withTempDir { tempDir => + val deltaTableName = "delta_table_simple" + withTable(deltaTableName) { + sql(s""" + |CREATE TABLE $deltaTableName (value STRING) + |USING DELTA + |""".stripMargin) + + val sharedTableName = "shared_streaming_table_simple" + prepareMockedClientMetadata(deltaTableName, sharedTableName) + + val profileFile = prepareProfileFile(tempDir) + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + def InsertToDeltaTable(values: String): Unit = { + sql(s"INSERT INTO $deltaTableName VALUES $values") + } + + InsertToDeltaTable("""("keep1"), ("keep2"), ("drop3")""") + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName, Some(1L)) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + val df = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .filter($"value" contains "keep") + + testStream(df)( + AssertOnQuery { q => + q.processAllAvailable(); true + }, + CheckAnswer("keep1", "keep2"), + StopStream + ) + } + } + } + } + + test("restart works sharing") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_restart" + withTable(deltaTableName) { + createTableForStreaming(deltaTableName) + val sharedTableName = "shared_streaming_table_restart" + prepareMockedClientMetadata(deltaTableName, sharedTableName) + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + def InsertToDeltaTable(values: String): Unit = { + sql(s"INSERT INTO $deltaTableName VALUES $values") + } + + // TODO: check testStream() function helper + def processAllAvailableInStream(): Unit = { + val q = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .filter($"value" contains "keep") + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .start(outputDir.toString) + + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + // Able to stream snapshot at version 1. + InsertToDeltaTable("""("keep1"), ("keep2"), ("drop1")""") + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + versionAsOf = Some(1L) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2").toDF() + ) + + // No new data, so restart will not process any new data. + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2").toDF() + ) + + // Able to stream new data at version 2. + InsertToDeltaTable("""("keep3"), ("keep4"), ("drop2")""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 2, + 2 + ) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2", "keep3", "keep4").toDF() + ) + + sql(s"""OPTIMIZE $deltaTableName""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 2, + 3 + ) + // Optimize doesn't produce new data, so restart will not process any new data. + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2", "keep3", "keep4").toDF() + ) + + // Able to stream new data at version 3. + InsertToDeltaTable("""("keep5"), ("keep6"), ("drop3")""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 3, + 4 + ) + + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2", "keep3", "keep4", "keep5", "keep6").toDF() + ) + assertBlocksAreCleanedUp() + } + } + } + } + + test("streaming works with deletes on basic table") { + withTempDir { inputDir => + val deltaTableName = "delta_table_deletes" + withTable(deltaTableName) { + createTableForStreaming(deltaTableName) + val sharedTableName = "shared_streaming_table_deletes" + prepareMockedClientMetadata(deltaTableName, sharedTableName) + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + def InsertToDeltaTable(values: String): Unit = { + sql(s"INSERT INTO $deltaTableName VALUES $values") + } + + def processAllAvailableInStream( + sourceOptions: Map[String, String], + expectations: StreamAction*): Unit = { + val df = spark.readStream + .format("deltaSharing") + .options(sourceOptions) + .load(tablePath) + + val base = Seq(StartStream(), ProcessAllAvailable()) + testStream(df)((base ++ expectations): _*) + } + + // Insert at version 1 and 2. + InsertToDeltaTable("""("keep1")""") + InsertToDeltaTable("""("keep2")""") + // delete at version 3. + sql(s"""DELETE FROM $deltaTableName WHERE value = "keep1" """) + // update at version 4. + sql(s"""UPDATE $deltaTableName SET value = "keep3" WHERE value = "keep2" """) + + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + versionAsOf = Some(4L) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + processAllAvailableInStream( + Map("responseFormat" -> "delta"), + CheckAnswer("keep3") + ) + + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 0, + 4 + ) + + // The streaming query will fail because changes detected in version 4. + // This is the original delta behavior. + val e = intercept[Exception] { + processAllAvailableInStream( + Map("responseFormat" -> "delta", "startingVersion" -> "0") + ) + } + for (msg <- Seq( + "Detected", + "not supported", + "true" + )) { + assert(e.getMessage.contains(msg)) + } + + // The streaming query will fail because changes detected in version 4. + // This is the original delta behavior. + val e2 = intercept[Exception] { + processAllAvailableInStream( + Map( + "responseFormat" -> "delta", + "startingVersion" -> "0", + IGNORE_DELETES_OPTION -> "true" + ) + ) + } + for (msg <- Seq( + "Detected", + "not supported", + "true" + )) { + assert(e2.getMessage.contains(msg)) + } + + // The streaming query will succeed because ignoreChanges helps to ignore the updates, but + // added updated data "keep3". + processAllAvailableInStream( + Map( + "responseFormat" -> "delta", + "startingVersion" -> "0", + IGNORE_CHANGES_OPTION -> "true" + ), + CheckAnswer("keep1", "keep2", "keep3") + ) + + // The streaming query will succeed because skipChangeCommits helps to ignore the whole + // commit with data update, so updated data is not produced either. + processAllAvailableInStream( + Map( + "responseFormat" -> "delta", + "startingVersion" -> "0", + SKIP_CHANGE_COMMITS_OPTION -> "true" + ), + CheckAnswer("keep1", "keep2") + ) + assertBlocksAreCleanedUp() + } + } + } + } + + test("streaming works with DV") { + withTempDir { inputDir => + val deltaTableName = "delta_table_dv" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = false) + spark.sql( + s"ALTER TABLE $deltaTableName SET TBLPROPERTIES('delta.enableDeletionVectors' = true)" + ) + val sharedTableName = "shared_streaming_table_dv" + prepareMockedClientMetadata(deltaTableName, sharedTableName) + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + def InsertToDeltaTable(values: String): Unit = { + sql(s"INSERT INTO $deltaTableName VALUES $values") + } + + def processAllAvailableInStream( + sourceOptions: Map[String, String], + expectations: StreamAction*): Unit = { + val df = spark.readStream + .format("deltaSharing") + .options(sourceOptions) + .load(tablePath) + .filter($"c2" contains "keep") + .select("c1") + + val base = Seq(StartStream(), ProcessAllAvailable()) + testStream(df)((base ++ expectations): _*) + } + + // Insert at version 2. + InsertToDeltaTable("""(1, "keep1"),(2, "keep1"),(3, "keep1"),(1,"drop1")""") + // delete at version 3. + sql(s"""DELETE FROM $deltaTableName WHERE c1 >= 2 """) + + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + versionAsOf = Some(3L) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + processAllAvailableInStream( + Map("responseFormat" -> "delta"), + CheckAnswer(1) + ) + + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + startingVersion = 0, + endingVersion = 3, + assertDVExists = true + ) + + // The streaming query will fail because deletes detected in version 3. And there are no + // options provided to ignore the deletion. + val e = intercept[Exception] { + processAllAvailableInStream( + Map("responseFormat" -> "delta", "startingVersion" -> "0") + ) + } + for (msg <- Seq( + "Detected a data update", + "not supported", + SKIP_CHANGE_COMMITS_OPTION, + "true" + )) { + assert(e.getMessage.contains(msg)) + } + + // The streaming query will fail because deletes detected in version 3, and it's + // recognized as updates and ignoreDeletes doesn't help. This is the original delta + // behavior. + val e2 = intercept[Exception] { + processAllAvailableInStream( + Map( + "responseFormat" -> "delta", + "startingVersion" -> "0", + IGNORE_DELETES_OPTION -> "true" + ) + ) + } + for (msg <- Seq( + "Detected a data update", + "not supported", + SKIP_CHANGE_COMMITS_OPTION, + "true" + )) { + assert(e2.getMessage.contains(msg)) + } + + // The streaming query will succeed because ignoreChanges helps to ignore the delete, but + // added duplicated data 1. + processAllAvailableInStream( + Map( + "responseFormat" -> "delta", + "startingVersion" -> "0", + IGNORE_CHANGES_OPTION -> "true" + ), + CheckAnswer(1, 2, 3, 1) + ) + + // The streaming query will succeed because skipChangeCommits helps to ignore the whole + // commit with data update, so no duplicated data is produced either. + processAllAvailableInStream( + Map( + "responseFormat" -> "delta", + "startingVersion" -> "0", + SKIP_CHANGE_COMMITS_OPTION -> "true" + ), + CheckAnswer(1, 2, 3) + ) + assertBlocksAreCleanedUp() + } + } + } + } + + test("startingVersion works") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_startVersion" + withTable(deltaTableName) { + createTableForStreaming(deltaTableName) + val sharedTableName = "shared_streaming_table_startVersion" + prepareMockedClientMetadata(deltaTableName, sharedTableName) + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + def InsertToDeltaTable(values: String): Unit = { + sql(s"INSERT INTO $deltaTableName VALUES $values") + } + + def processAllAvailableInStream(): Unit = { + val q = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .option("startingVersion", 0) + .load(tablePath) + .filter($"value" contains "keep") + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .start(outputDir.toString) + + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + // Able to stream snapshot at version 1. + InsertToDeltaTable("""("keep1"), ("keep2"), ("drop1")""") + prepareMockedClientAndFileSystemResultForStreaming( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + startingVersion = 0L, + endingVersion = 1L + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2").toDF() + ) + + // No new data, so restart will not process any new data. + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2").toDF() + ) + + // Able to stream new data at version 2. + InsertToDeltaTable("""("keep3"), ("keep4"), ("drop2")""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 0, + 2 + ) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2", "keep3", "keep4").toDF() + ) + + sql(s"""OPTIMIZE $deltaTableName""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 2, + 3 + ) + // Optimize doesn't produce new data, so restart will not process any new data. + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2", "keep3", "keep4").toDF() + ) + + // No new data, so restart will not process any new data. It will ask for the last commit + // so that it can figure out that there's nothing to do. + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 3, + 3 + ) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2", "keep3", "keep4").toDF() + ) + + // Able to stream new data at version 3. + InsertToDeltaTable("""("keep5"), ("keep6"), ("drop3")""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 3, + 4 + ) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2", "keep3", "keep4", "keep5", "keep6").toDF() + ) + + // No new data, so restart will not process any new data. It will ask for the last commit + // so that it can figure out that there's nothing to do. + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 4, + 4 + ) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq("keep1", "keep2", "keep3", "keep4", "keep5", "keep6").toDF() + ) + assertBlocksAreCleanedUp() + } + } + } + } + + test("files are in a stable order for streaming") { + // This test function is to check that DeltaSharingLogFileSystem puts the files in the delta log + // in a stable order for each commit, regardless of the returning order from the server, so that + // the DeltaSource can produce a stable file index. + // We are using maxBytesPerTrigger which causes the streaming to stop in the middle of a commit + // to be able to test this behavior. + withTempDirs { (inputDir, outputDir, checkpointDir) => + withTempDirs { (_, outputDir2, checkpointDir2) => + val deltaTableName = "delta_table_order" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = false) + val sharedTableName = "shared_streaming_table_order" + prepareMockedClientMetadata(deltaTableName, sharedTableName) + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + def InsertToDeltaTable(values: String): Unit = { + sql(s"INSERT INTO $deltaTableName VALUES $values") + } + + // Able to stream snapshot at version 1. + InsertToDeltaTable("""(1, "one"), (2, "two"), (3, "three")""") + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + def processAllAvailableInStream( + outputDirStr: String, + checkpointDirStr: String): Unit = { + val q = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .option("maxBytesPerTrigger", "1b") + .load(tablePath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDirStr) + .start(outputDirStr) + + try { + q.processAllAvailable() + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 3) + progress.foreach { p => + assert(p.numInputRows === 1) + } + } finally { + q.stop() + } + } + + // First output, without reverseFileOrder + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + versionAsOf = Some(1L) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + processAllAvailableInStream(outputDir.toString, checkpointDir.toString) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq((1, "one"), (2, "two"), (3, "three")).toDF() + ) + + // Second output, with reverseFileOrder = true + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + versionAsOf = Some(1L), + reverseFileOrder = true + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + processAllAvailableInStream(outputDir2.toString, checkpointDir2.toString) + checkAnswer( + spark.read.format("delta").load(outputDir2.getCanonicalPath), + Seq((1, "one"), (2, "two"), (3, "three")).toDF() + ) + + // Check each version of the two output are the same, which means the files are sorted + // by DeltaSharingLogFileSystem, and are processed in a deterministic order by the + // DeltaSource. + val deltaLog = DeltaLog.forTable(spark, new Path(outputDir.toString)) + Seq(0, 1, 2).foreach { v => + val version = deltaLog.snapshot.version - v + val df1 = spark.read + .format("delta") + .option("versionAsOf", version) + .load(outputDir.getCanonicalPath) + val df2 = spark.read + .format("delta") + .option("versionAsOf", version) + .load(outputDir2.getCanonicalPath) + checkAnswer(df1, df2) + assert(df1.count() == (3 - v)) + } + assertBlocksAreCleanedUp() + } + } + } + } + } + + test("DeltaFormatSharingSource query with two delta sharing tables works") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_two" + + def InsertToDeltaTable(values: String): Unit = { + sql(s"INSERT INTO $deltaTableName VALUES $values") + } + + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = false) + val sharedTableName = "shared_streaming_table_two" + prepareMockedClientMetadata(deltaTableName, sharedTableName) + + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + InsertToDeltaTable("""(1, "one"), (2, "one")""") + InsertToDeltaTable("""(1, "two"), (2, "two")""") + InsertToDeltaTable("""(1, "three"), (2, "three")""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResult( + deltaTableName, + sharedTableName, + Some(3L) + ) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + startingVersion = 1, + endingVersion = 3 + ) + + def processAllAvailableInStream(): Unit = { + val dfLatest = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + val dfV1 = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .option("startingVersion", 1) + .load(tablePath) + .select(col("c2"), col("c1").as("v1c1")) + .filter(col("v1c1") === 1) + + val q = dfLatest + .join(dfV1, "c2") + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .start(outputDir.toString) + + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + // c1 from dfLatest, c2 from dfLatest, c1 from dfV1 + var expected = Seq( + Row("one", 1, 1), + Row("one", 2, 1), + Row("two", 1, 1), + Row("two", 2, 1), + Row("three", 1, 1), + Row("three", 2, 1) + ) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + expected + ) + + InsertToDeltaTable("""(1, "four"), (2, "four")""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + startingVersion = 4, + endingVersion = 4 + ) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + startingVersion = 1, + endingVersion = 4 + ) + + expected = expected ++ Seq(Row("four", 1, 1), Row("four", 2, 1)) + processAllAvailableInStream() + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + expected + ) + assertBlocksAreCleanedUp() + } + } + } + } +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingCDFUtilsSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingCDFUtilsSuite.scala new file mode 100644 index 00000000000..687d9d10e94 --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingCDFUtilsSuite.scala @@ -0,0 +1,243 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.io.File + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import io.delta.sharing.client.{ + DeltaSharingClient, + DeltaSharingProfileProvider, + DeltaSharingRestClient +} +import io.delta.sharing.client.model.{DeltaTableFiles, DeltaTableMetadata, Table} +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.{SparkConf, SparkEnv} +import org.apache.spark.delta.sharing.{PreSignedUrlCache, PreSignedUrlFetcher} +import org.apache.spark.sql.{QueryTest, SparkSession} +import org.apache.spark.sql.delta.sharing.DeltaSharingTestSparkUtils +import org.apache.spark.sql.test.{SharedSparkSession} + +private object CDFTesTUtils { + val paths = Seq("http://path1", "http://path2") + + val SparkConfForReturnExpTime = "spark.delta.sharing.fileindexsuite.returnexptime" + + // 10 seconds + val expirationTimeMs = 10000 + + def getExpirationTimestampStr(returnExpTime: Boolean): String = { + if (returnExpTime) { + s""""expirationTimestamp":${System.currentTimeMillis() + expirationTimeMs},""" + } else { + "" + } + } + + // scalastyle:off line.size.limit + val fileStr1Id = "11d9b72771a72f178a6f2839f7f08528" + val metaDataStr = + """{"metaData":{"size":809,"deltaMetadata":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c2"],"configuration":{"delta.enableChangeDataFeed":"true"},"createdTime":1691734718560}}}""" + def getAddFileStr1(path: String, returnExpTime: Boolean = false): String = { + s"""{"file":{"id":"11d9b72771a72f178a6f2839f7f08528",${getExpirationTimestampStr( + returnExpTime + )}"deltaSingleAction":{"add":{"path":"${path}",""" + """"partitionValues":{"c2":"one"},"size":809,"modificationTime":1691734726073,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"c1\":1,\"c2\":\"one\"},\"maxValues\":{\"c1\":2,\"c2\":\"one\"},\"nullCount\":{\"c1\":0,\"c2\":0}}","tags":{"INSERTION_TIME":"1691734726073000","MIN_INSERTION_TIME":"1691734726073000","MAX_INSERTION_TIME":"1691734726073000","OPTIMIZE_TARGET_SIZE":"268435456"}}}}}""" + } + def getAddFileStr2(returnExpTime: Boolean = false): String = { + s"""{"file":{"id":"22d9b72771a72f178a6f2839f7f08529",${getExpirationTimestampStr( + returnExpTime + )}""" + """"deltaSingleAction":{"add":{"path":"http://path2","partitionValues":{"c2":"two"},"size":809,"modificationTime":1691734726073,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"c1\":1,\"c2\":\"two\"},\"maxValues\":{\"c1\":2,\"c2\":\"two\"},\"nullCount\":{\"c1\":0,\"c2\":0}}","tags":{"INSERTION_TIME":"1691734726073000","MIN_INSERTION_TIME":"1691734726073000","MAX_INSERTION_TIME":"1691734726073000","OPTIMIZE_TARGET_SIZE":"268435456"}}}}}""" + } + // scalastyle:on line.size.limit +} + +/** + * A mocked delta sharing client for unit tests. + */ +class TestDeltaSharingClientForCDFUtils( + profileProvider: DeltaSharingProfileProvider, + timeoutInSeconds: Int = 120, + numRetries: Int = 10, + maxRetryDuration: Long = Long.MaxValue, + sslTrustAll: Boolean = false, + forStreaming: Boolean = false, + responseFormat: String = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA, + readerFeatures: String = "", + queryTablePaginationEnabled: Boolean = false, + maxFilesPerReq: Int = 100000) + extends DeltaSharingClient { + + import CDFTesTUtils._ + + private lazy val returnExpirationTimestamp = SparkSession.active.sessionState.conf + .getConfString( + SparkConfForReturnExpTime + ) + .toBoolean + + var numGetFileCalls: Int = -1 + + override def listAllTables(): Seq[Table] = throw new UnsupportedOperationException("not needed") + + override def getMetadata( + table: Table, + versionAsOf: Option[Long], + timestampAsOf: Option[String]): DeltaTableMetadata = { + throw new UnsupportedOperationException("getMetadata is not supported now.") + } + + override def getTableVersion(table: Table, startingTimestamp: Option[String] = None): Long = { + throw new UnsupportedOperationException("getTableVersion is not supported now.") + } + + override def getFiles( + table: Table, + predicates: Seq[String], + limit: Option[Long], + versionAsOf: Option[Long], + timestampAsOf: Option[String], + jsonPredicateHints: Option[String], + refreshToken: Option[String] + ): DeltaTableFiles = { + throw new UnsupportedOperationException("getFiles is not supported now.") + } + + override def getFiles( + table: Table, + startingVersion: Long, + endingVersion: Option[Long] + ): DeltaTableFiles = { + throw new UnsupportedOperationException(s"getFiles with startingVersion($startingVersion)") + } + + override def getCDFFiles( + table: Table, + cdfOptions: Map[String, String], + includeHistoricalMetadata: Boolean + ): DeltaTableFiles = { + numGetFileCalls += 1 + DeltaTableFiles( + version = 0, + lines = Seq[String]( + """{"protocol":{"deltaProtocol":{"minReaderVersion": 1, "minWriterVersion": 1}}}""", + metaDataStr, + getAddFileStr1(paths(numGetFileCalls.min(1)), returnExpirationTimestamp), + getAddFileStr2(returnExpirationTimestamp) + ), + respondedFormat = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA + ) + } + + override def getForStreaming(): Boolean = forStreaming + + override def getProfileProvider: DeltaSharingProfileProvider = profileProvider +} + +class DeltaSharingCDFUtilsSuite + extends QueryTest + with DeltaSQLCommandTest + with SharedSparkSession + with DeltaSharingTestSparkUtils { + + import CDFTesTUtils._ + + private val shareName = "share" + private val schemaName = "default" + private val sharedTableName = "table" + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set("spark.delta.sharing.preSignedUrl.expirationMs", expirationTimeMs.toString) + .set("spark.delta.sharing.driver.refreshCheckIntervalMs", "1000") + .set("spark.delta.sharing.driver.refreshThresholdMs", "2000") + .set("spark.delta.sharing.driver.accessThresholdToExpireMs", "60000") + } + + test("refresh works") { + PreSignedUrlCache.registerIfNeeded(SparkEnv.get) + + withTempDir { tempDir => + val profileFile = new File(tempDir, "foo.share") + FileUtils.writeStringToFile( + profileFile, + s"""{ + | "shareCredentialsVersion": 1, + | "endpoint": "https://localhost:12345/not-used-endpoint", + | "bearerToken": "mock" + |}""".stripMargin, + "utf-8" + ) + + def test(): Unit = { + val profilePath = profileFile.getCanonicalPath + val tablePath = new Path(s"$profilePath#$shareName.$schemaName.$sharedTableName") + val client = DeltaSharingRestClient(profilePath, false, "delta") + val dsTable = Table(share = shareName, schema = schemaName, name = sharedTableName) + + val options = new DeltaSharingOptions(Map("path" -> tablePath.toString)) + DeltaSharingCDFUtils.prepareCDFRelation( + SparkSession.active.sqlContext, + options, + dsTable, + client + ) + + val preSignedUrlCacheRef = PreSignedUrlCache.getEndpointRefInExecutor(SparkEnv.get) + val path = options.options.getOrElse( + "path", + throw DeltaSharingErrors.pathNotSpecifiedException + ) + val fetcher = new PreSignedUrlFetcher( + preSignedUrlCacheRef, + DeltaSharingUtils.getTablePathWithIdSuffix( + path, + DeltaSharingUtils.getQueryParamsHashId(options.cdfOptions) + ), + fileStr1Id, + 1000 + ) + // sleep for 25000ms to ensure that the urls are refreshed. + Thread.sleep(25000) + + // Verify that the url is refreshed as paths(1), not paths(0) anymore. + assert(fetcher.getUrl == paths(1)) + } + + withSQLConf( + "spark.delta.sharing.client.class" -> classOf[TestDeltaSharingClientForCDFUtils].getName, + "fs.delta-sharing-log.impl" -> classOf[DeltaSharingLogFileSystem].getName, + "spark.delta.sharing.profile.provider.class" -> + "io.delta.sharing.client.DeltaSharingFileProfileProvider", + SparkConfForReturnExpTime -> "true" + ) { + test() + } + + withSQLConf( + "spark.delta.sharing.client.class" -> classOf[TestDeltaSharingClientForCDFUtils].getName, + "fs.delta-sharing-log.impl" -> classOf[DeltaSharingLogFileSystem].getName, + "spark.delta.sharing.profile.provider.class" -> + "io.delta.sharing.client.DeltaSharingFileProfileProvider", + SparkConfForReturnExpTime -> "false" + ) { + test() + } + } + } +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceCMSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceCMSuite.scala new file mode 100644 index 00000000000..ee4cc01454f --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceCMSuite.scala @@ -0,0 +1,984 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.io.File + +import org.apache.spark.sql.delta.{ + BatchCDFSchemaEndVersion, + BatchCDFSchemaLatest, + BatchCDFSchemaLegacy, + DeltaUnsupportedOperationException +} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.delta.sharing.DeltaSharingTestSparkUtils +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.streaming.{StreamingQueryException, StreamTest, Trigger} +import org.apache.spark.sql.types.{IntegerType, StringType, StructType} + +// Unit tests to verify that delta format sharing support column mapping (CM). +class DeltaSharingDataSourceCMSuite + extends StreamTest + with DeltaSQLCommandTest + with DeltaSharingTestSparkUtils + with DeltaSharingDataSourceDeltaTestUtils { + + import testImplicits._ + + override def beforeEach(): Unit = { + super.beforeEach() + spark.conf.set("spark.databricks.delta.streaming.allowSourceColumnRenameAndDrop", "false") + } + + + private def testReadCMTable( + deltaTableName: String, + sharedTablePath: String, + dropC1: Boolean = false): Unit = { + val expectedSchema: StructType = if (deltaTableName == "cm_id_table") { + spark.read.format("delta").table(deltaTableName).schema + } else { + if (dropC1) { + new StructType() + .add("c2rename", StringType) + } else { + new StructType() + .add("c1", IntegerType) + .add("c2rename", StringType) + } + } + assert( + expectedSchema == spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(sharedTablePath) + .schema + ) + + val sharingDf = + spark.read.format("deltaSharing").option("responseFormat", "delta").load(sharedTablePath) + val deltaDf = spark.read.format("delta").table(deltaTableName) + checkAnswer(sharingDf, deltaDf) + assert(sharingDf.count() > 0) + + val filteredSharingDf = + spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(sharedTablePath) + .filter(col("c2rename") === "one") + val filteredDeltaDf = + spark.read + .format("delta") + .table(deltaTableName) + .filter(col("c2rename") === "one") + checkAnswer(filteredSharingDf, filteredDeltaDf) + assert(filteredSharingDf.count() > 0) + } + + private def testReadCMCdf( + deltaTableName: String, + sharedTablePath: String, + startingVersion: Int): Unit = { + val schema = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(sharedTablePath) + .schema + val expectedSchema = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .table(deltaTableName) + .schema + assert(expectedSchema == schema) + + val deltaDf = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .table(deltaTableName) + val sharingDf = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(sharedTablePath) + if (startingVersion <= 2) { + Seq(BatchCDFSchemaEndVersion, BatchCDFSchemaLatest, BatchCDFSchemaLegacy).foreach { m => + withSQLConf( + DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE.key -> + m.name + ) { + val deltaException = intercept[DeltaUnsupportedOperationException] { + deltaDf.collect() + } + assert( + deltaException.getMessage.contains("Retrieving table changes between") && + deltaException.getMessage.contains("failed because of an incompatible") + ) + val sharingException = intercept[DeltaUnsupportedOperationException] { + sharingDf.collect() + } + assert( + sharingException.getMessage.contains("Retrieving table changes between") && + sharingException.getMessage.contains("failed because of an incompatible") + ) + } + } + } else { + checkAnswer(sharingDf, deltaDf) + assert(sharingDf.count() > 0) + } + } + + private def testReadingSharedCMTable( + tempDir: File, + deltaTableName: String, + sharedTableNameBase: String): Unit = { + val sharedTableNameBasic = sharedTableNameBase + "_one" + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableNameBasic + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableNameBasic) + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testReadCMTable( + deltaTableName = deltaTableName, + sharedTablePath = s"${profileFile.getCanonicalPath}#share1.default.$sharedTableNameBasic" + ) + } + + val sharedTableNameCdf = sharedTableNameBase + "_cdf" + // Test CM and CDF + // Error when reading cdf with startingVersion <= 2, matches delta behavior. + prepareMockedClientGetTableVersion(deltaTableName, sharedTableNameCdf) + prepareMockedClientAndFileSystemResultForCdf( + deltaTableName, + sharedTableNameCdf, + startingVersion = 0 + ) + prepareMockedClientAndFileSystemResultForCdf( + deltaTableName, + sharedTableNameCdf, + startingVersion = 2 + ) + prepareMockedClientAndFileSystemResultForCdf( + deltaTableName, + sharedTableNameCdf, + startingVersion = 3 + ) + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testReadCMCdf( + deltaTableName, + s"${profileFile.getCanonicalPath}#share1.default.$sharedTableNameCdf", + 0 + ) + testReadCMCdf( + deltaTableName, + s"${profileFile.getCanonicalPath}#share1.default.$sharedTableNameCdf", + 2 + ) + testReadCMCdf( + deltaTableName, + s"${profileFile.getCanonicalPath}#share1.default.$sharedTableNameCdf", + 3 + ) + } + + val sharedTableNameDrop = sharedTableNameBase + "_drop" + // DROP COLUMN + sql(s"ALTER TABLE $deltaTableName DROP COLUMN c1") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableNameDrop) + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableNameDrop + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableNameDrop) + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testReadCMTable( + deltaTableName = deltaTableName, + sharedTablePath = s"${profileFile.getCanonicalPath}#share1.default.$sharedTableNameDrop", + dropC1 = true + ) + } + } + + /** + * column mapping tests + */ + test( + "DeltaSharingDataSource able to read data for cm name mode" + ) { + withTempDir { tempDir => + val deltaTableName = "delta_table_cm_name" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = true) + sql(s"""INSERT INTO $deltaTableName VALUES (1, "one"), (2, "one")""") + spark.sql( + s"""ALTER TABLE $deltaTableName SET TBLPROPERTIES('delta.minReaderVersion' = '2', + |'delta.minWriterVersion' = '5', + |'delta.columnMapping.mode' = 'name')""".stripMargin + ) + sql(s"""ALTER TABLE $deltaTableName RENAME COLUMN c2 TO c2rename""") + sql(s"""INSERT INTO $deltaTableName VALUES (1, "two"), (2, "two")""") + + sql(s"""DELETE FROM $deltaTableName where c1=1""") + sql(s"""UPDATE $deltaTableName set c1="3" where c2rename="one"""") + + val sharedTableName = "shared_table_cm_name" + testReadingSharedCMTable(tempDir, deltaTableName, sharedTableName) + } + } + } + + test("DeltaSharingDataSource able to read data for cm id mode") { + withTempDir { tempDir => + val deltaTableName = "delta_table_cm_id" + withTable(deltaTableName) { + createCMIdTableWithCdf(deltaTableName) + sql(s"""INSERT INTO $deltaTableName VALUES (1, "one"), (2, "one")""") + sql(s"""INSERT INTO $deltaTableName VALUES (1, "two"), (2, "two")""") + + sql(s"""ALTER TABLE $deltaTableName RENAME COLUMN c2 TO c2rename""") + sql(s"""INSERT INTO $deltaTableName VALUES (1, "two"), (2, "two")""") + + sql(s"""DELETE FROM $deltaTableName where c1=1""") + sql(s"""UPDATE $deltaTableName set c1="3" where c2rename="one"""") + + val sharedTableName = "shared_table_cm_id" + testReadingSharedCMTable(tempDir, deltaTableName, sharedTableName) + } + } + } + + /** + * Streaming Test + */ + private def InsertToDeltaTable(tableName: String, values: String): Unit = { + sql(s"INSERT INTO $tableName VALUES $values") + } + + private def processAllAvailableInStream( + tablePath: String, + checkpointDirStr: String, + outputDirStr: String): Unit = { + val q = spark.readStream + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDirStr) + .option("mergeSchema", "true") + .start(outputDirStr) + + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + private def processStreamWithSchemaTracking( + tablePath: String, + checkpointDirStr: String, + outputDirStr: String, + trigger: Option[Trigger] = None, + maxFilesPerTrigger: Option[Int] = None): Unit = { + var dataStreamReader = spark.readStream + .format("deltaSharing") + .option("schemaTrackingLocation", checkpointDirStr) + .option("responseFormat", "delta") + if (maxFilesPerTrigger.isDefined || trigger.isDefined) { + // When trigger.Once is defined, maxFilesPerTrigger is ignored -- this is the + // behavior of the streaming engine. And AvailableNow is converted as Once for delta sharing. + dataStreamReader = + dataStreamReader.option("maxFilesPerTrigger", maxFilesPerTrigger.getOrElse(1)) + } + var dataStreamWriter = dataStreamReader + .load(tablePath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDirStr) + .option("mergeSchema", "true") + if (trigger.isDefined) { + dataStreamWriter = dataStreamWriter.trigger(trigger.get) + } + + val q = dataStreamWriter.start(outputDirStr) + + try { + q.processAllAvailable() + if (maxFilesPerTrigger.isDefined && trigger.isEmpty) { + val progress = q.recentProgress.filter(_.numInputRows != 0) + // 2 batches -- 2 files are processed, this is how the delta table is constructed. + assert(progress.length === 2) + progress.foreach { p => + assert(p.numInputRows === 2) // 2 rows per batch -- 2 rows in each file. + } + } + } finally { + q.stop() + } + } + + private def prepareProcessAndCheckInitSnapshot( + deltaTableName: String, + sharedTableName: String, + sharedTablePath: String, + checkpointDirStr: String, + outputDir: File, + useSchemaTracking: Boolean, + trigger: Option[Trigger] = None + ): Unit = { + InsertToDeltaTable(deltaTableName, """(1, "one"), (2, "one"), (1, "two")""") + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + versionAsOf = Some(1L) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientMetadata(deltaTableName, sharedTableName) + if (useSchemaTracking) { + processStreamWithSchemaTracking( + sharedTablePath, + checkpointDirStr, + outputDir.toString, + trigger + ) + } else { + processAllAvailableInStream( + sharedTablePath, + checkpointDirStr, + outputDir.toString + ) + } + + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq((1, "one"), (2, "one"), (1, "two")).toDF() + ) + } + + def prepareNewInsert( + deltaTableName: String, + sharedTableName: String, + values: String, + startingVersion: Long, + endingVersion: Long): Unit = { + InsertToDeltaTable(deltaTableName, values) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + startingVersion, + endingVersion + ) + } + + private def renameColumnAndPrepareRpcResponse( + deltaTableName: String, + sharedTableName: String, + startingVersion: Long, + endingVersion: Long, + insertAfterRename: Boolean): Unit = { + // Rename on the original delta table. + sql(s"""ALTER TABLE $deltaTableName RENAME COLUMN c2 TO c2rename""") + if (insertAfterRename) { + InsertToDeltaTable(deltaTableName, """(1, "three")""") + InsertToDeltaTable(deltaTableName, """(2, "three")""") + } + // Prepare all the delta sharing rpcs. + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientMetadata(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + startingVersion, + endingVersion + ) + } + + private def expectUseSchemaLogException( + tablePath: String, + checkpointDirStr: String, + outputDirStr: String): Unit = { + val error = intercept[StreamingQueryException] { + processAllAvailableInStream( + tablePath, + checkpointDirStr, + outputDirStr + ) + }.toString + assert(error.contains("DELTA_STREAMING_INCOMPATIBLE_SCHEMA_CHANGE_USE_SCHEMA_LOG")) + assert(error.contains("Please provide a 'schemaTrackingLocation'")) + } + + private def expectMetadataEvolutionException( + tablePath: String, + checkpointDirStr: String, + outputDirStr: String, + trigger: Option[Trigger] = None, + maxFilesPerTrigger: Option[Int] = None): Unit = { + val error = intercept[StreamingQueryException] { + processStreamWithSchemaTracking( + tablePath, + checkpointDirStr, + outputDirStr, + trigger, + maxFilesPerTrigger + ) + }.toString + assert(error.contains("DELTA_STREAMING_METADATA_EVOLUTION")) + assert(error.contains("Please restart the stream to continue")) + } + + private def expectSqlConfException( + tablePath: String, + checkpointDirStr: String, + outputDirStr: String, + trigger: Option[Trigger] = None, + maxFilesPerTrigger: Option[Int] = None): Unit = { + val error = intercept[StreamingQueryException] { + processStreamWithSchemaTracking( + tablePath, + checkpointDirStr, + outputDirStr, + trigger, + maxFilesPerTrigger + ) + }.toString + assert(error.contains("DELTA_STREAMING_CANNOT_CONTINUE_PROCESSING_POST_SCHEMA_EVOLUTION")) + assert(error.contains("delta.streaming.allowSourceColumnRenameAndDrop")) + } + + private def processWithSqlConf( + tablePath: String, + checkpointDirStr: String, + outputDirStr: String, + trigger: Option[Trigger] = None, + maxFilesPerTrigger: Option[Int] = None): Unit = { + // Using allowSourceColumnRenameAndDrop instead of + // allowSourceColumnRenameAndDrop.[checkpoint_hash] because the checkpointDir changes + // every test. + spark.conf + .set("spark.databricks.delta.streaming.allowSourceColumnRenameAndDrop", "always") + processStreamWithSchemaTracking( + tablePath, + checkpointDirStr, + outputDirStr, + trigger, + maxFilesPerTrigger + ) + } + + private def testRestartStreamingFourTimes( + tablePath: String, + checkpointDir: java.io.File, + outputDirStr: String): Unit = { + val checkpointDirStr = checkpointDir.toString + + // 1. Followed the previous error message to use schemaTrackingLocation, but received + // error suggesting restart. + expectMetadataEvolutionException(tablePath, checkpointDirStr, outputDirStr) + + // 2. Followed the previous error message to restart, but need to restart again for + // DeltaSource to handle offset movement, this is the SAME behavior as stream reading from + // the delta table directly. + expectMetadataEvolutionException(tablePath, checkpointDirStr, outputDirStr) + + // 3. Followed the previous error message to restart, but cannot write to the dest table. + expectSqlConfException(tablePath, checkpointDirStr, outputDirStr) + + // 4. Restart with new sqlConf, able to process new data and writing to a new column. + // Not using allowSourceColumnRenameAndDrop.[checkpoint_hash] because the checkpointDir + // changes every test, using allowSourceColumnRenameAndDrop=always instead. + processWithSqlConf(tablePath, checkpointDirStr, outputDirStr) + } + + test("cm streaming works with newly added schemaTrackingLocation") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_cm_streaming_basic" + withTable(deltaTableName) { + createCMIdTableWithCdf(deltaTableName) + val sharedTableName = "shared_table_cm_streaming_basic" + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + // 1. Able to stream snapshot at version 1. + // The streaming is started without schemaTrackingLocation. + prepareProcessAndCheckInitSnapshot( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + sharedTablePath = tablePath, + checkpointDirStr = checkpointDir.toString, + outputDir = outputDir, + useSchemaTracking = false + ) + + // 2. Able to stream new data at version 2. + // The streaming is continued without schemaTrackingLocation. + prepareNewInsert( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + values = """(2, "two")""", + startingVersion = 2, + endingVersion = 2 + ) + processAllAvailableInStream( + tablePath, + checkpointDir.toString, + outputDir.toString + ) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq((1, "one"), (2, "one"), (1, "two"), (2, "two")).toDF() + ) + + // 3. column renaming at version 3, and expect exception. + renameColumnAndPrepareRpcResponse( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + startingVersion = 2, + endingVersion = 3, + insertAfterRename = false + ) + expectUseSchemaLogException(tablePath, checkpointDir.toString, outputDir.toString) + + // 4. insert new data at version 4. + prepareNewInsert( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + values = """(1, "three"), (2, "three")""", + startingVersion = 2, + endingVersion = 4 + ) + // Additional preparation for rpc because deltaSource moved the offset to (3, -20) and + // (3, -19) after restart. + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 3, + 4 + ) + + // 5. with 4 restarts, able to continue the streaming + // The streaming is re-started WITH schemaTrackingLocation, and it's able to capture the + // schema used in previous version, based on the initial call of getBatch for the latest + // offset, which pulls the metadata from the server. + testRestartStreamingFourTimes(tablePath, checkpointDir, outputDir.toString) + + // An additional column is added to the output table. + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq( + (1, "one", null), + (2, "one", null), + (1, "two", null), + (2, "two", null), + (1, null, "three"), + (2, null, "three") + ).toDF() + ) + } + } + } + } + + test("cm streaming works with restart on snapshot query") { + // The main difference in this test is the rename happens after processing the initial snapshot, + // (instead of after making continuous progress), to test that the restart could fetch the + // latest metadata and the metadata from lastest offset. + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_streaming_restart" + withTable(deltaTableName) { + createCMIdTableWithCdf(deltaTableName) + val sharedTableName = "shared_table_streaming_restart" + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + // 1. Able to stream snapshot at version 1. + prepareProcessAndCheckInitSnapshot( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + sharedTablePath = tablePath, + checkpointDirStr = checkpointDir.toString, + outputDir = outputDir, + useSchemaTracking = false + ) + + // 2. column renaming at version 2, and expect exception. + renameColumnAndPrepareRpcResponse( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + startingVersion = 2, + endingVersion = 2, + insertAfterRename = false + ) + expectUseSchemaLogException(tablePath, checkpointDir.toString, outputDir.toString) + + // 3. insert new data at version 3. + prepareNewInsert( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + values = """(1, "three"), (2, "three")""", + startingVersion = 2, + endingVersion = 3 + ) + + // 4. with 4 restarts, able to continue the streaming + testRestartStreamingFourTimes(tablePath, checkpointDir, outputDir.toString) + + // An additional column is added to the output table. + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq( + (1, "one", null), + (2, "one", null), + (1, "two", null), + (1, null, "three"), + (2, null, "three") + ).toDF() + ) + } + } + } + } + + test("cm streaming works with schemaTracking used at start") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_streaming_schematracking" + withTable(deltaTableName) { + createCMIdTableWithCdf(deltaTableName) + val sharedTableName = "shared_table_streaming_schematracking" + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + // 1. Able to stream snapshot at version 1. + prepareProcessAndCheckInitSnapshot( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + sharedTablePath = tablePath, + checkpointDirStr = checkpointDir.toString, + outputDir = outputDir, + useSchemaTracking = true + ) + + // 2. Able to stream new data at version 2. + prepareNewInsert( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + values = """(2, "two")""", + startingVersion = 2, + endingVersion = 2 + ) + processStreamWithSchemaTracking( + tablePath, + checkpointDir.toString, + outputDir.toString + ) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq((1, "one"), (2, "one"), (1, "two"), (2, "two")).toDF() + ) + + // 3. column renaming at version 3, and expect exception. + renameColumnAndPrepareRpcResponse( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + startingVersion = 2, + endingVersion = 3, + insertAfterRename = false + ) + expectMetadataEvolutionException(tablePath, checkpointDir.toString, outputDir.toString) + + // 4. First see exception, then with sql conf, able to stream new data at version 4. + prepareNewInsert( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + values = """(1, "three"), (2, "three")""", + startingVersion = 3, + endingVersion = 4 + ) + expectSqlConfException(tablePath, checkpointDir.toString, outputDir.toString) + processWithSqlConf(tablePath, checkpointDir.toString, outputDir.toString) + + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq( + (1, "one", null), + (2, "one", null), + (1, "two", null), + (2, "two", null), + (1, null, "three"), + (2, null, "three") + ).toDF() + ) + } + } + } + } + + test("cm streaming works with restart with accumulated inserts after rename") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_streaming_accumulate" + withTable(deltaTableName) { + createCMIdTableWithCdf(deltaTableName) + val sharedTableName = "shared_table_streaming_accumulate" + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + // 1. Able to stream snapshot at version 1. + prepareProcessAndCheckInitSnapshot( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + sharedTablePath = tablePath, + checkpointDirStr = checkpointDir.toString, + outputDir = outputDir, + useSchemaTracking = false + ) + + // 2. column renaming at version 2, and expect exception. + renameColumnAndPrepareRpcResponse( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + startingVersion = 2, + endingVersion = 4, + insertAfterRename = true + ) + expectUseSchemaLogException(tablePath, checkpointDir.toString, outputDir.toString) + + // 4. with 4 restarts, able to continue the streaming + testRestartStreamingFourTimes(tablePath, checkpointDir, outputDir.toString) + + // An additional column is added to the output table. + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq( + (1, "one", null), + (2, "one", null), + (1, "two", null), + (1, null, "three"), + (2, null, "three") + ).toDF() + ) + } + } + } + } + + test("cm streaming works with column drop and add") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_column_drop" + withTable(deltaTableName) { + createCMIdTableWithCdf(deltaTableName) + val sharedTableName = "shared_table_column_drop" + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + // 1. Able to stream snapshot at version 1. + prepareProcessAndCheckInitSnapshot( + deltaTableName = deltaTableName, + sharedTableName = sharedTableName, + sharedTablePath = tablePath, + checkpointDirStr = checkpointDir.toString, + outputDir = outputDir, + useSchemaTracking = true + ) + + // 2. drop column c1 at version 2 + sql(s"ALTER TABLE $deltaTableName DROP COLUMN c1") + // 3. add column c3 at version 3 + sql(s"ALTER TABLE $deltaTableName ADD COLUMN (c3 int)") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientMetadata(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 2, + 3 + ) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 3, + 3 + ) + + // Needs a 3 restarts for deltaSource to catch up. + expectMetadataEvolutionException(tablePath, checkpointDir.toString, outputDir.toString) + expectSqlConfException(tablePath, checkpointDir.toString, outputDir.toString) + spark.conf + .set("spark.databricks.delta.streaming.allowSourceColumnRenameAndDrop", "always") + expectMetadataEvolutionException(tablePath, checkpointDir.toString, outputDir.toString) + processWithSqlConf(tablePath, checkpointDir.toString, outputDir.toString) + + // 4. insert at version 4 + InsertToDeltaTable(deltaTableName, """("four", 4)""") + // 5. insert at version 5 + InsertToDeltaTable(deltaTableName, """("five", 5)""") + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 3, + 5 + ) + + processStreamWithSchemaTracking( + tablePath, + checkpointDir.toString, + outputDir.toString + ) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq[(java.lang.Integer, String, java.lang.Integer)]( + (1, "one", null), + (2, "one", null), + (1, "two", null), + (null, "four", 4), + (null, "five", 5) + ).toDF() + ) + } + } + } + } + + + test("cm streaming works with MaxFilesPerTrigger") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaTableName = "delta_table_maxfiles" + withTable(deltaTableName) { + createCMIdTableWithCdf(deltaTableName) + val sharedTableName = "shared_table_maxfiles" + val profileFile = prepareProfileFile(inputDir) + val tablePath = profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + // 1. Able to stream snapshot at version 1. + InsertToDeltaTable(deltaTableName, """(1, "one"), (2, "one"), (1, "two"), (2, "two")""") + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + versionAsOf = Some(1L) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientMetadata(deltaTableName, sharedTableName) + + // process with maxFilesPerTrigger. + processStreamWithSchemaTracking( + tablePath, + checkpointDir.toString, + outputDir.toString, + trigger = None, + maxFilesPerTrigger = Some(1) + ) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq((1, "one"), (2, "one"), (1, "two"), (2, "two")).toDF() + ) + + // 2. column renaming at version 2, no exception because of Trigger.Once. + sql(s"""ALTER TABLE $deltaTableName RENAME COLUMN c2 TO c2rename""") + + // Prepare all the delta sharing rpcs. + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientMetadata(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + startingVersion = 1, + endingVersion = 2 + ) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + startingVersion = 2, + endingVersion = 2 + ) + + // maxFilesPerTrigger doesn't change whether exception is thrown or not. + expectMetadataEvolutionException( + tablePath, + checkpointDir.toString, + outputDir.toString, + trigger = None, + maxFilesPerTrigger = Some(1) + ) + + // 4. First see exception, then with sql conf, able to stream new data at version 4 and 5. + InsertToDeltaTable( + deltaTableName, + """(1, "three"), (2, "three"), (1, "four"), (2, "four")""" + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForStreaming( + deltaTableName, + sharedTableName, + 2, + 3 + ) + + expectSqlConfException( + tablePath, + checkpointDir.toString, + outputDir.toString, + trigger = None, + maxFilesPerTrigger = Some(1) + ) + processWithSqlConf( + tablePath, + checkpointDir.toString, + outputDir.toString, + trigger = None, + maxFilesPerTrigger = Some(1) + ) + + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq( + (1, "one", null), + (2, "one", null), + (1, "two", null), + (2, "two", null), + (1, null, "three"), + (2, null, "three"), + (1, null, "four"), + (2, null, "four") + ).toDF() + ) + } + } + } + } +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala new file mode 100644 index 00000000000..44eb0cea681 --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala @@ -0,0 +1,1272 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.delta.sharing.DeltaSharingTestSparkUtils +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.types.{ + DateType, + IntegerType, + LongType, + StringType, + StructType, + TimestampType +} + +trait DeltaSharingDataSourceDeltaSuiteBase + extends QueryTest + with DeltaSQLCommandTest + with DeltaSharingTestSparkUtils + with DeltaSharingDataSourceDeltaTestUtils { + + override def beforeEach(): Unit = { + spark.sessionState.conf.setConfString( + "spark.delta.sharing.jsonPredicateV2Hints.enabled", + "false" + ) + } + + /** + * metadata tests + */ + test("failed to getMetadata") { + withTempDir { tempDir => + val sharedTableName = "shared_table_broken_json" + + def test(tablePath: String, tableFullName: String): Unit = { + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId(sharedTableName, "getMetadata"), + values = Seq("bad protocol string", "bad metadata string").toIterator + ) + DeltaSharingUtils.overrideSingleBlock[Long]( + blockId = TestClientForDeltaFormatSharing.getBlockId(sharedTableName, "getTableVersion"), + value = 1 + ) + // JsonParseException on "bad protocol string" + val exception = intercept[com.fasterxml.jackson.core.JsonParseException] { + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath).schema + } + assert(exception.getMessage.contains("Unrecognized token 'bad'")) + + // table_with_broken_protocol + // able to parse as a DeltaSharingSingleAction, but it's an addFile, not metadata. + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId(sharedTableName, "getMetadata"), + // scalastyle:off line.size.limit + values = Seq( + """{"add": {"path":"random","id":"random","partitionValues":{},"size":1,"motificationTime":1,"dataChange":false}}""" + ).toIterator + ) + val exception2 = intercept[IllegalStateException] { + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath).schema + } + assert( + exception2.getMessage + .contains(s"Failed to get Protocol for $tableFullName") + ) + + // table_with_broken_metadata + // able to parse as a DeltaSharingSingleAction, but it's an addFile, not metadata. + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId(sharedTableName, "getMetadata"), + values = Seq( + """{"protocol":{"minReaderVersion":1}}""" + ).toIterator + ) + val exception3 = intercept[IllegalStateException] { + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath).schema + } + assert( + exception3.getMessage + .contains(s"Failed to get Metadata for $tableFullName") + ) + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + val tableFullName = s"share1.default.$sharedTableName" + test(s"${profileFile.getCanonicalPath}#$tableFullName", tableFullName) + } + } + } + + def assertLimit(tableName: String, expectedLimit: Seq[Long]): Unit = { + assert(expectedLimit == + TestClientForDeltaFormatSharing.limits.filter(_._1.contains(tableName)).map(_._2)) + } + + def assertRequestedFormat(tableName: String, expectedFormat: Seq[String]): Unit = { + assert(expectedFormat == + TestClientForDeltaFormatSharing.requestedFormat.filter(_._1.contains(tableName)).map(_._2)) + } + + def assertJsonPredicateHints(tableName: String, expectedHints: Seq[String]): Unit = { + assert(expectedHints == + TestClientForDeltaFormatSharing.jsonPredicateHints.filter(_._1.contains(tableName)).map(_._2) + ) + } + /** + * snapshot queries + */ + test("DeltaSharingDataSource able to read simple data") { + withTempDir { tempDir => + val deltaTableName = "delta_table_simple" + withTable(deltaTableName) { + createTable(deltaTableName) + sql( + s"INSERT INTO $deltaTableName" + + """ VALUES (1, "one", "2023-01-01", "2023-01-01 00:00:00"), + |(2, "two", "2023-02-02", "2023-02-02 00:00:00")""".stripMargin + ) + + val sharedTableName = "shared_table_simple" + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + val expectedSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("c2", StringType) + .add("c3", DateType) + .add("c4", TimestampType) + val expected = Seq( + Row(1, "one", sqlDate("2023-01-01"), sqlTimestamp("2023-01-01 00:00:00")), + Row(2, "two", sqlDate("2023-02-02"), sqlTimestamp("2023-02-02 00:00:00")) + ) + + Seq(true, false).foreach { skippingEnabled => + Seq(true, false).foreach { sharingConfig => + Seq(true, false).foreach { deltaConfig => + val sharedTableName = s"shared_table_simple_" + + s"${skippingEnabled}_${sharingConfig}_$deltaConfig" + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + def test(tablePath: String, tableName: String): Unit = { + assert( + expectedSchema == spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .schema + ) + val df = + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath) + checkAnswer(df, expected) + assert(df.count() > 0) + assertLimit(tableName, Seq.empty[Long]) + val limitDf = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .limit(1) + assert(limitDf.collect().size == 1) + assertLimit(tableName, Some(1L).filter(_ => skippingEnabled && sharingConfig && deltaConfig).toSeq) + } + + val limitPushdownConfigs = Map( + "spark.delta.sharing.limitPushdown.enabled" -> sharingConfig.toString, + DeltaSQLConf.DELTA_LIMIT_PUSHDOWN_ENABLED.key -> deltaConfig.toString, + DeltaSQLConf.DELTA_STATS_SKIPPING.key -> skippingEnabled.toString + ) + withSQLConf((limitPushdownConfigs ++ getDeltaSharingClassesSQLConf).toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + val tableName = s"share1.default.$sharedTableName" + test(s"${profileFile.getCanonicalPath}#$tableName", tableName) + } + } + } + } + } + } + } + + test("DeltaSharingDataSource able to read data with changes") { + withTempDir { tempDir => + val deltaTableName = "delta_table_change" + + def test(tablePath: String, expectedCount: Int, expectedSchema: StructType): Unit = { + assert( + expectedSchema == spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .schema + ) + + val deltaDf = spark.read.format("delta").table(deltaTableName) + val sharingDf = + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath) + checkAnswer(deltaDf, sharingDf) + assert(sharingDf.count() == expectedCount) + } + + withTable(deltaTableName) { + val sharedTableName = "shared_table_change" + createTable(deltaTableName) + + // test 1: insert 2 rows + sql( + s"INSERT INTO $deltaTableName" + + """ VALUES (1, "one", "2023-01-01", "2023-01-01 00:00:00"), + |(2, "two", "2023-02-02", "2023-02-02 00:00:00")""".stripMargin + ) + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + val expectedSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("c2", StringType) + .add("c3", DateType) + .add("c4", TimestampType) + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + val tableName = s"share1.default.$sharedTableName" + test(s"${profileFile.getCanonicalPath}#$tableName", 2, expectedSchema) + } + + // test 2: insert 2 more rows, and rename a column + spark.sql( + s"""ALTER TABLE $deltaTableName SET TBLPROPERTIES('delta.minReaderVersion' = '2', + |'delta.minWriterVersion' = '5', + |'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true)""".stripMargin + ) + sql( + s"INSERT INTO $deltaTableName" + + """ VALUES (3, "three", "2023-03-03", "2023-03-03 00:00:00"), + |(4, "four", "2023-04-04", "2023-04-04 00:00:00")""".stripMargin + ) + sql(s"""ALTER TABLE $deltaTableName RENAME COLUMN c3 TO c3rename""") + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + val expectedNewSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("c2", StringType) + .add("c3rename", DateType) + .add("c4", TimestampType) + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + val tableName = s"share1.default.$sharedTableName" + test(s"${profileFile.getCanonicalPath}#$tableName", 4, expectedNewSchema) + } + + // test 3: delete 1 row + sql(s"DELETE FROM $deltaTableName WHERE c1 = 2") + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + val tableName = s"share1.default.$sharedTableName" + test(s"${profileFile.getCanonicalPath}#$tableName", 3, expectedNewSchema) + } + } + } + } + + test("DeltaSharingDataSource able to auto resolve responseFormat") { + withTempDir { tempDir => + val deltaTableName = "delta_table_auto" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = false) + sql( + s"""INSERT INTO $deltaTableName VALUES (1, "one"), (2, "one")""".stripMargin + ) + sql( + s"""INSERT INTO $deltaTableName VALUES (1, "two"), (2, "two")""".stripMargin + ) + + val expectedSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("c2", StringType) + + def testAutoResolve(tablePath: String, tableName: String, expectedFormat: String): Unit = { + assert( + expectedSchema == spark.read + .format("deltaSharing") + .load(tablePath) + .schema + ) + + val deltaDf = spark.read.format("delta").table(deltaTableName) + val sharingDf = spark.read.format("deltaSharing").load(tablePath) + checkAnswer(deltaDf, sharingDf) + assert(sharingDf.count() > 0) + assertLimit(tableName, Seq.empty[Long]) + assertRequestedFormat(tableName, Seq(expectedFormat)) + + val limitDf = spark.read + .format("deltaSharing") + .load(tablePath) + .limit(1) + assert(limitDf.collect().size == 1) + assertLimit(tableName, Seq(1L)) + + val deltaDfV1 = spark.read.format("delta").option("versionAsOf", 1).table(deltaTableName) + val sharingDfV1 = + spark.read.format("deltaSharing").option("versionAsOf", 1).load(tablePath) + checkAnswer(deltaDfV1, sharingDfV1) + assert(sharingDfV1.count() > 0) + assertRequestedFormat(tableName, Seq(expectedFormat)) + } + + // Test for delta format response + val sharedDeltaTable = "shared_delta_table" + prepareMockedClientAndFileSystemResult(deltaTableName, sharedDeltaTable) + prepareMockedClientAndFileSystemResult( + deltaTableName, + sharedDeltaTable, + versionAsOf = Some(1) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedDeltaTable) + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testAutoResolve( + s"${profileFile.getCanonicalPath}#share1.default.$sharedDeltaTable", + s"share1.default.$sharedDeltaTable", + "delta" + ) + } + + // Test for parquet format response + val sharedParquetTable = "shared_parquet_table" + prepareMockedClientAndFileSystemResultForParquet( + deltaTableName, + sharedParquetTable + ) + prepareMockedClientAndFileSystemResultForParquet( + deltaTableName, + sharedParquetTable, + versionAsOf = Some(1) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedParquetTable) + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testAutoResolve( + s"${profileFile.getCanonicalPath}#share1.default.$sharedParquetTable", + s"share1.default.$sharedParquetTable", + "parquet" + ) + } + } + } + } + + test("DeltaSharingDataSource able to read data with filters and select") { + withTempDir { tempDir => + val deltaTableName = "delta_table_filters" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = false) + sql(s"""INSERT INTO $deltaTableName VALUES (1, "first"), (2, "first")""") + sql(s"""INSERT INTO $deltaTableName VALUES (1, "second"), (2, "second")""") + sql(s"""INSERT INTO $deltaTableName VALUES (1, "third"), (2, "third")""") + + Seq("c1", "c2", "c1c2").foreach { filterColumn => + val sharedTableName = s"shared_table_filters_$filterColumn" + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + spark.sessionState.conf.setConfString( + "spark.delta.sharing.jsonPredicateV2Hints.enabled", + "true" + ) + + // The files returned from delta sharing client are the same for these queries. + // This is to test the filters are passed correctly to TahoeLogFileIndex for the local delta + // log. + def testFiltersAndSelect(tablePath: String, tableName: String): Unit = { + // select + var expected = Seq(Row(1), Row(1), Row(1), Row(2), Row(2), Row(2)) + var df = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .select("c1") + checkAnswer(df, expected) + assertJsonPredicateHints(tableName, Seq.empty[String]) + + expected = Seq( + Row("first"), + Row("first"), + Row("second"), + Row("second"), + Row("third"), + Row("third") + ) + df = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .select("c2") + checkAnswer(df, expected) + assertJsonPredicateHints(tableName, Seq.empty[String]) + + // filter + var expectedJson = "" + if (filterColumn == "c1c2") { + expected = Seq(Row(1, "first"), Row(1, "second"), Row(1, "third"), Row(2, "second")) + df = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .filter(col("c1") === 1 || col("c2") === "second") + checkAnswer(df, expected) + expectedJson = + """{"op":"or","children":[ + | {"op":"equal","children":[ + | {"op":"column","name":"c1","valueType":"int"}, + | {"op":"literal","value":"1","valueType":"int"}]}, + | {"op":"equal","children":[ + | {"op":"column","name":"c2","valueType":"string"}, + | {"op":"literal","value":"second","valueType":"string"}]} + |]}""".stripMargin.replaceAll("\n", "").replaceAll(" ", "") + assertJsonPredicateHints(tableName, Seq(expectedJson)) + } else if (filterColumn == "c1") { + expected = Seq(Row(1, "first"), Row(1, "second"), Row(1, "third")) + df = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .filter(col("c1") === 1) + checkAnswer(df, expected) + expectedJson = + """{"op":"and","children":[ + | {"op":"not","children":[ + | {"op":"isNull","children":[ + | {"op":"column","name":"c1","valueType":"int"}]}]}, + | {"op":"equal","children":[ + | {"op":"column","name":"c1","valueType":"int"}, + | {"op":"literal","value":"1","valueType":"int"}]} + |]}""".stripMargin.replaceAll("\n", "").replaceAll(" ", "") + assertJsonPredicateHints(tableName, Seq(expectedJson)) + } else { + assert(filterColumn == "c2") + expected = Seq(Row(1, "second"), Row(2, "second")) + expectedJson = + """{"op":"and","children":[ + | {"op":"not","children":[ + | {"op":"isNull","children":[ + | {"op":"column","name":"c2","valueType":"string"}]}]}, + | {"op":"equal","children":[ + | {"op":"column","name":"c2","valueType":"string"}, + | {"op":"literal","value":"second","valueType":"string"}]} + |]}""".stripMargin.replaceAll("\n", "").replaceAll(" ", "") + df = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .filter(col("c2") === "second") + checkAnswer(df, expected) + assertJsonPredicateHints(tableName, Seq(expectedJson)) + + // filters + select as well + expected = Seq(Row(1), Row(2)) + df = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .filter(col("c2") === "second") + .select("c1") + checkAnswer(df, expected) + assertJsonPredicateHints(tableName, Seq(expectedJson)) + } + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testFiltersAndSelect( + s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName", + s"share1.default.$sharedTableName" + ) + } + } + } + } + } + + test("DeltaSharingDataSource able to read data for time travel queries") { + withTempDir { tempDir => + val deltaTableName = "delta_table_time_travel" + withTable(deltaTableName) { + createTable(deltaTableName) + + sql( + s"INSERT INTO $deltaTableName" + + """ VALUES (1, "one", "2023-01-01", "2023-01-01 00:00:00")""".stripMargin + ) + sql( + s"INSERT INTO $deltaTableName" + + """ VALUES (2, "two", "2023-02-02", "2023-02-02 00:00:00")""".stripMargin + ) + sql( + s"INSERT INTO $deltaTableName" + + """ VALUES (3, "three", "2023-03-03", "2023-03-03 00:00:00")""".stripMargin + ) + + val sharedTableNameV1 = "shared_table_v1" + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableNameV1, + versionAsOf = Some(1L) + ) + + def testVersionAsOf1(tablePath: String): Unit = { + val dfV1 = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("versionAsOf", 1) + .load(tablePath) + val expectedV1 = Seq( + Row(1, "one", sqlDate("2023-01-01"), sqlTimestamp("2023-01-01 00:00:00")) + ) + checkAnswer(dfV1, expectedV1) + } + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testVersionAsOf1(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableNameV1") + } + + // using different table name because spark caches the content read from a file, i.e., + // the delta log from 0.json. + // TODO: figure out how to get a per query id and use it in getCustomTablePath to + // differentiate the same table used in different queries. + // TODO: Also check if it's possible to disable the file cache. + val sharedTableNameV3 = "shared_table_v3" + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableNameV3, + versionAsOf = Some(3L) + ) + + def testVersionAsOf3(tablePath: String): Unit = { + val dfV3 = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("versionAsOf", 3) + .load(tablePath) + val expectedV3 = Seq( + Row(1, "one", sqlDate("2023-01-01"), sqlTimestamp("2023-01-01 00:00:00")), + Row(2, "two", sqlDate("2023-02-02"), sqlTimestamp("2023-02-02 00:00:00")), + Row(3, "three", sqlDate("2023-03-03"), sqlTimestamp("2023-03-03 00:00:00")) + ) + checkAnswer(dfV3, expectedV3) + } + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testVersionAsOf3(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableNameV3") + } + + val sharedTableNameTs = "shared_table_ts" + // Given the result of delta sharing rpc is mocked, the actual value of the timestampStr + // can be any thing that's valid for DeltaSharingOptions, and formattedTimestamp is the + // parsed result and will be sent in the delta sharing rpc. + val timestampStr = "2023-01-01 00:00:00" + val formattedTimestamp = "2023-01-01T08:00:00Z" + + prepareMockedClientGetTableVersion(deltaTableName, sharedTableNameTs) + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableNameTs, + versionAsOf = None, + timestampAsOf = Some(formattedTimestamp) + ) + + def testTimestampQuery(tablePath: String): Unit = { + val dfTs = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("timestampAsOf", timestampStr) + .load(tablePath) + val expectedTs = Seq( + Row(1, "one", sqlDate("2023-01-01"), sqlTimestamp("2023-01-01 00:00:00")), + Row(2, "two", sqlDate("2023-02-02"), sqlTimestamp("2023-02-02 00:00:00")), + Row(3, "three", sqlDate("2023-03-03"), sqlTimestamp("2023-03-03 00:00:00")) + ) + checkAnswer(dfTs, expectedTs) + } + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testTimestampQuery(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableNameTs") + } + } + } + } + + test("DeltaSharingDataSource able to read data with more entries") { + withTempDir { tempDir => + val deltaTableName = "delta_table_more" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = false) + // The table operations take about 6~10 seconds. + for (i <- 0 to 9) { + val iteration = s"iteration $i" + val valuesBuilder = Seq.newBuilder[String] + for (j <- 0 to 49) { + valuesBuilder += s"""(${i * 10 + j}, "$iteration")""" + } + sql(s"INSERT INTO $deltaTableName VALUES ${valuesBuilder.result().mkString(",")}") + } + + val sharedTableName = "shared_table_more" + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + val expectedSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("c2", StringType) + val expected = spark.read.format("delta").table(deltaTableName) + + def test(tablePath: String): Unit = { + assert( + expectedSchema == spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .schema + ) + val df = + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath) + checkAnswer(df, expected) + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + test(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName") + } + } + } + } + + test("DeltaSharingDataSource able to read data with join on the same table") { + withTempDir { tempDir => + val deltaTableName = "delta_table_join" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = false) + sql(s"""INSERT INTO $deltaTableName VALUES (1, "first"), (2, "first")""") + sql(s"""INSERT INTO $deltaTableName VALUES (1, "second"), (2, "second")""") + sql(s"""INSERT INTO $deltaTableName VALUES (1, "third"), (2, "third")""") + + val sharedTableName = "shared_table_join" + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResult( + deltaTableName, + sharedTableName, + versionAsOf = Some(1L) + ) + + def testJoin(tablePath: String): Unit = { + // Query the same latest version + val deltaDfLatest = spark.read.format("delta").table(deltaTableName) + val deltaDfV1 = spark.read.format("delta").option("versionAsOf", 1).table(deltaTableName) + val sharingDfLatest = + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath) + val sharingDfV1 = + spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("versionAsOf", 1) + .load(tablePath) + + var deltaDfJoined = deltaDfLatest.join(deltaDfLatest, "c1") + var sharingDfJoined = sharingDfLatest.join(sharingDfLatest, "c1") + // CheckAnswer ensures that delta sharing produces the same result as delta. + // The check on the size is used to double check that a valid dataframe is generated. + checkAnswer(deltaDfJoined, sharingDfJoined) + assert(sharingDfJoined.count() > 0) + + // Query the same versionAsOf + deltaDfJoined = deltaDfV1.join(deltaDfV1, "c1") + sharingDfJoined = sharingDfV1.join(sharingDfV1, "c1") + checkAnswer(deltaDfJoined, sharingDfJoined) + assert(sharingDfJoined.count() > 0) + + // Query with different versions + deltaDfJoined = deltaDfLatest.join(deltaDfV1, "c1") + sharingDfJoined = sharingDfLatest.join(sharingDfV1, "c1") + checkAnswer(deltaDfJoined, sharingDfJoined) + // Size is 6 because for each of the 6 rows in latest, there is 1 row with the same c1 + // value in v1. + assert(sharingDfJoined.count() > 0) + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testJoin(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName") + } + } + } + } + + test("DeltaSharingDataSource able to read empty data") { + withTempDir { tempDir => + val deltaTableName = "delta_table_empty" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = true) + sql(s"""INSERT INTO $deltaTableName VALUES (1, "first"), (2, "first")""") + sql(s"""INSERT INTO $deltaTableName VALUES (1, "second"), (2, "second")""") + sql(s"DELETE FROM $deltaTableName WHERE c1 <= 2") + // This command is just to create an empty table version at version 4. + spark.sql(s"ALTER TABLE $deltaTableName SET TBLPROPERTIES('delta.minReaderVersion' = 1)") + + val sharedTableName = "shared_table_empty" + prepareMockedClientAndFileSystemResult(deltaTableName, sharedTableName) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + def testEmpty(tablePath: String): Unit = { + val deltaDf = spark.read.format("delta").table(deltaTableName) + val sharingDf = + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath) + checkAnswer(deltaDf, sharingDf) + assert(sharingDf.count() == 0) + + val deltaCdfDf = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", 4) + .table(deltaTableName) + val sharingCdfDf = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", 4) + .load(tablePath) + checkAnswer(deltaCdfDf, sharingCdfDf) + assert(sharingCdfDf.count() == 0) + } + + // There's only metadata change but not actual files in version 4. + prepareMockedClientAndFileSystemResultForCdf( + deltaTableName, + sharedTableName, + startingVersion = 4 + ) + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testEmpty(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName") + } + } + } + } + + /** + * cdf queries + */ + test("DeltaSharingDataSource able to read data for simple cdf query") { + withTempDir { tempDir => + val deltaTableName = "delta_table_cdf" + withTable(deltaTableName) { + sql(s""" + |CREATE TABLE $deltaTableName (c1 INT, c2 STRING) USING DELTA PARTITIONED BY (c2) + |TBLPROPERTIES (delta.enableChangeDataFeed = true) + |""".stripMargin) + // 2 inserts in version 1, 1 with c1=2 + sql(s"""INSERT INTO $deltaTableName VALUES (1, "one"), (2, "two")""") + // 1 insert in version 2, 0 with c1=2 + sql(s"""INSERT INTO $deltaTableName VALUES (3, "two")""") + // 0 operations in version 3 + sql(s"""OPTIMIZE $deltaTableName""") + // 2 updates in version 4, 2 with c1=2 + sql(s"""UPDATE $deltaTableName SET c2="new two" where c1=2""") + // 1 delete in version 5, 1 with c1=2 + sql(s"""DELETE FROM $deltaTableName WHERE c1 = 2""") + + val sharedTableName = "shard_table_cdf" + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + Seq(0, 1, 2, 3, 4, 5).foreach { startingVersion => + val ts = getTimeStampForVersion(deltaTableName, startingVersion) + val startingTimestamp = DateTimeUtils.toJavaTimestamp(ts * 1000).toInstant.toString + prepareMockedClientAndFileSystemResultForCdf( + deltaTableName, + sharedTableName, + startingVersion, + Some(startingTimestamp) + ) + + def test(tablePath: String): Unit = { + val expectedSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("c2", StringType) + .add("_change_type", StringType) + .add("_commit_version", LongType) + .add("_commit_timestamp", TimestampType) + val schema = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(tablePath) + .schema + assert(expectedSchema == schema) + + val expected = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .table(deltaTableName) + val df = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(tablePath) + checkAnswer(df, expected) + assert(df.count() > 0) + } + + def testFiltersAndSelect(tablePath: String): Unit = { + val expectedSchema: StructType = new StructType() + .add("c2", StringType) + .add("_change_type", StringType) + .add("_commit_version", LongType) + val schema = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(tablePath) + .select("c2", "_change_type", "_commit_version") + .schema + assert(expectedSchema == schema) + + val expected = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .table(deltaTableName) + .select("c2", "_change_type", "_commit_version") + val dfVersion = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(tablePath) + .select("c2", "_change_type", "_commit_version") + checkAnswer(dfVersion, expected) + val dfTime = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingTimestamp", startingTimestamp) + .load(tablePath) + .select("c2", "_change_type", "_commit_version") + checkAnswer(dfTime, expected) + assert(dfTime.count() > 0) + + val expectedFiltered = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .table(deltaTableName) + .select("c2", "_change_type", "_commit_version") + .filter(col("c1") === 2) + val dfFiltered = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(tablePath) + .select("c2", "_change_type", "_commit_version") + .filter(col("c1") === 2) + checkAnswer(dfFiltered, expectedFiltered) + assert(dfFiltered.count() > 0) + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + test(profileFile.getCanonicalPath + s"#share1.default.$sharedTableName") + testFiltersAndSelect( + profileFile.getCanonicalPath + s"#share1.default.$sharedTableName" + ) + } + } + + // test join on the same table in cdf query + def testJoin(tablePath: String): Unit = { + val deltaV0 = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", 0) + .table(deltaTableName) + val deltaV3 = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", 3) + .table(deltaTableName) + val sharingV0 = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", 0) + .load(tablePath) + val sharingV3 = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", 3) + .load(tablePath) + + def testJoinedDf( + deltaLeft: DataFrame, + deltaRight: DataFrame, + sharingLeft: DataFrame, + sharingRight: DataFrame, + expectedSize: Int): Unit = { + val deltaJoined = deltaLeft.join(deltaRight, usingColumns = Seq("c1", "c2")) + val sharingJoined = sharingLeft.join(sharingRight, usingColumns = Seq("c1", "c2")) + checkAnswer(deltaJoined, sharingJoined) + assert(sharingJoined.count() > 0) + } + testJoinedDf(deltaV0, deltaV0, sharingV0, sharingV0, 10) + testJoinedDf(deltaV3, deltaV3, sharingV3, sharingV3, 5) + testJoinedDf(deltaV0, deltaV3, sharingV0, sharingV3, 6) + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testJoin(profileFile.getCanonicalPath + s"#share1.default.$sharedTableName") + } + } + } + } + + test("DeltaSharingDataSource able to read data for cdf query with more entries") { + withTempDir { tempDir => + val deltaTableName = "delta_table_cdf_more" + withTable(deltaTableName) { + createSimpleTable(deltaTableName, enableCdf = true) + // The table operations take about 20~30 seconds. + for (i <- 0 to 9) { + val iteration = s"iteration $i" + val valuesBuilder = Seq.newBuilder[String] + for (j <- 0 to 49) { + valuesBuilder += s"""(${i * 10 + j}, "$iteration")""" + } + sql(s"INSERT INTO $deltaTableName VALUES ${valuesBuilder.result().mkString(",")}") + sql(s"""UPDATE $deltaTableName SET c1 = c1 + 100 where c2 = "${iteration}"""") + sql(s"""DELETE FROM $deltaTableName where c2 = "${iteration}"""") + } + + val sharedTableName = "shard_table_cdf_more" + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + Seq(0, 10, 20, 30).foreach { startingVersion => + prepareMockedClientAndFileSystemResultForCdf( + deltaTableName, + sharedTableName, + startingVersion + ) + + val expected = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .table(deltaTableName) + + def test(tablePath: String): Unit = { + val df = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(tablePath) + checkAnswer(df, expected) + assert(df.count() > 0) + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + test(profileFile.getCanonicalPath + s"#share1.default.$sharedTableName") + } + } + } + } + } + + /** + * deletion vector tests + */ + test("DeltaSharingDataSource able to read data for dv table") { + withTempDir { tempDir => + val deltaTableName = "delta_table_dv" + withTable(deltaTableName) { + spark + .range(start = 0, end = 100) + .withColumn("partition", col("id").divide(10).cast("int")) + .write + .partitionBy("partition") + .format("delta") + .saveAsTable(deltaTableName) + spark + .range(start = 100, end = 200) + .withColumn("partition", col("id").mod(100).divide(10).cast("int")) + .write + .mode("append") + .partitionBy("partition") + .format("delta") + .saveAsTable(deltaTableName) + spark.sql( + s"ALTER TABLE $deltaTableName SET TBLPROPERTIES('delta.enableDeletionVectors' = true)" + ) + + // Delete 2 rows per partition. + sql(s"""DELETE FROM $deltaTableName where mod(id, 10) < 2""") + // Delete 1 more row per partition. + sql(s"""DELETE FROM $deltaTableName where mod(id, 10) = 3""") + // Delete 1 more row per partition. + sql(s"""DELETE FROM $deltaTableName where mod(id, 10) = 6""") + + Seq(true, false).foreach { skippingEnabled => + val sharedTableName = s"shared_table_dv_$skippingEnabled" + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + assertMultipleDvsInOneFile = true + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + def testReadDVTable(tablePath: String): Unit = { + val expectedSchema: StructType = new StructType() + .add("id", LongType) + .add("partition", IntegerType) + assert( + expectedSchema == spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .schema + ) + + val sharingDf = + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath) + val deltaDf = spark.read.format("delta").table(deltaTableName) + val filteredSharingDf = + spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .filter(col("id").mod(10) > 5) + val filteredDeltaDf = + spark.read + .format("delta") + .table(deltaTableName) + .filter(col("id").mod(10) > 5) + + if (!skippingEnabled) { + def assertError(dataFrame: DataFrame): Unit = { + val ex = intercept[IllegalArgumentException] { + dataFrame.collect() + } + assert(ex.getMessage contains + "Cannot work with a non-pinned table snapshot of the TahoeFileIndex") + } + assertError(sharingDf) + assertError(filteredDeltaDf) + } else { + checkAnswer(sharingDf, deltaDf) + assert(sharingDf.count() > 0) + checkAnswer(filteredSharingDf, filteredDeltaDf) + assert(filteredSharingDf.count() > 0) + } + } + + val additionalConfigs = Map( + DeltaSQLConf.DELTA_STATS_SKIPPING.key -> skippingEnabled.toString + ) + withSQLConf((additionalConfigs ++ getDeltaSharingClassesSQLConf).toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testReadDVTable(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName") + } + } + } + } + } + + test("DeltaSharingDataSource able to read data for dv and cdf") { + withTempDir { tempDir => + val deltaTableName = "delta_table_dv_cdf" + withTable(deltaTableName) { + createDVTableWithCdf(deltaTableName) + // version 1: 20 inserts + spark + .range(start = 0, end = 20) + .select(col("id").cast("int").as("c1")) + .withColumn("partition", col("c1").divide(10).cast("int")) + .write + .mode("append") + .format("delta") + .saveAsTable(deltaTableName) + // version 2: 20 inserts + spark + .range(start = 100, end = 120) + .select(col("id").cast("int").as("c1")) + .withColumn("partition", col("c1").mod(100).divide(10).cast("int")) + .write + .mode("append") + .format("delta") + .saveAsTable(deltaTableName) + // version 3: 20 updates + sql(s"""UPDATE $deltaTableName SET c1=c1+5 where partition=0""") + // This deletes will create one DV file used by AddFile from both version 1 and version 2. + // version 4: 14 deletes + sql(s"""DELETE FROM $deltaTableName WHERE mod(c1, 100)<=10""") + + val sharedTableName = "shard_table_dv_cdf" + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + + Seq(0, 1, 2, 3, 4).foreach { startingVersion => + prepareMockedClientAndFileSystemResultForCdf( + deltaTableName, + sharedTableName, + startingVersion, + assertMultipleDvsInOneFile = true + ) + + def testReadDVCdf(tablePath: String): Unit = { + val schema = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(tablePath) + .schema + val expectedSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("partition", IntegerType) + .add("_change_type", StringType) + .add("_commit_version", LongType) + .add("_commit_timestamp", TimestampType) + assert(expectedSchema == schema) + + val deltaDf = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .table(deltaTableName) + val sharingDf = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", startingVersion) + .load(tablePath) + checkAnswer(sharingDf, deltaDf) + assert(sharingDf.count() > 0) + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testReadDVCdf(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName") + } + } + } + } + } + + test("DeltaSharingDataSource able to read data for inline dv") { + import org.apache.spark.sql.delta.deletionvectors.RoaringBitmapArrayFormat + Seq(RoaringBitmapArrayFormat.Portable, RoaringBitmapArrayFormat.Native).foreach { format => + withTempDir { tempDir => + val deltaTableName = s"delta_table_inline_dv_$format" + withTable(deltaTableName) { + createDVTableWithCdf(deltaTableName) + // Use divide 10 to set partition column to 0 for all values, then use repartition to + // ensure the 5 values are written in one file. + spark + .range(start = 0, end = 5) + .select(col("id").cast("int").as("c1")) + .withColumn("partition", col("c1").divide(10).cast("int")) + .repartition(1) + .write + .mode("append") + .format("delta") + .saveAsTable(deltaTableName) + + val sharedTableName = s"shared_table_inline_dv_$format" + prepareMockedClientAndFileSystemResult( + deltaTable = deltaTableName, + sharedTable = sharedTableName, + inlineDvFormat = Some(format) + ) + prepareMockedClientGetTableVersion(deltaTableName, sharedTableName) + prepareMockedClientAndFileSystemResultForCdf( + deltaTableName, + sharedTableName, + startingVersion = 1, + inlineDvFormat = Some(format) + ) + + def testReadInlineDVCdf(tablePath: String): Unit = { + val deltaDf = spark.read + .format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", 1) + .table(deltaTableName) + .filter(col("c1") > 1) + val sharingDf = spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .option("readChangeFeed", "true") + .option("startingVersion", 1) + .load(tablePath) + checkAnswer(sharingDf, deltaDf) + assert(sharingDf.count() > 0) + } + + def testReadInlineDV(tablePath: String): Unit = { + val expectedSchema: StructType = new StructType() + .add("c1", IntegerType) + .add("partition", IntegerType) + assert( + expectedSchema == spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .schema + ) + + val sharingDf = + spark.read.format("deltaSharing").option("responseFormat", "delta").load(tablePath) + val expectedDf = Seq(Row(1, 0), Row(3, 0), Row(4, 0)) + checkAnswer(sharingDf, expectedDf) + + val filteredSharingDf = + spark.read + .format("deltaSharing") + .option("responseFormat", "delta") + .load(tablePath) + .filter(col("c1") < 4) + val expectedFilteredDf = Seq(Row(1, 0), Row(3, 0)) + checkAnswer(filteredSharingDf, expectedFilteredDf) + } + + withSQLConf(getDeltaSharingClassesSQLConf.toSeq: _*) { + val profileFile = prepareProfileFile(tempDir) + testReadInlineDV(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName") + testReadInlineDVCdf(s"${profileFile.getCanonicalPath}#share1.default.$sharedTableName") + } + } + } + } + } +} + +class DeltaSharingDataSourceDeltaSuite extends DeltaSharingDataSourceDeltaSuiteBase {} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaTestUtils.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaTestUtils.scala new file mode 100644 index 00000000000..64aa3f3678b --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaTestUtils.scala @@ -0,0 +1,661 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.io.File +import java.nio.charset.StandardCharsets.UTF_8 + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.{DeltaLog, Snapshot} +import org.apache.spark.sql.delta.actions.{ + Action, + AddCDCFile, + AddFile, + DeletionVectorDescriptor, + Metadata, + RemoveFile +} +import org.apache.spark.sql.delta.deletionvectors.{ + RoaringBitmapArray, + RoaringBitmapArrayFormat +} +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import com.google.common.hash.Hashing +import io.delta.sharing.client.model.{ + AddFile => ClientAddFile, + Metadata => ClientMetadata, + Protocol => ClientProtocol +} +import io.delta.sharing.spark.model.{ + DeltaSharingFileAction, + DeltaSharingMetadata, + DeltaSharingProtocol +} +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession + +trait DeltaSharingDataSourceDeltaTestUtils extends SharedSparkSession { + + override def beforeAll(): Unit = { + super.beforeAll() + // close DeltaSharingFileSystem to avoid impact from other unit tests. + FileSystem.closeAll() + } + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set("spark.delta.sharing.preSignedUrl.expirationMs", "30000") + .set("spark.delta.sharing.driver.refreshCheckIntervalMs", "3000") + .set("spark.delta.sharing.driver.refreshThresholdMs", "10000") + .set("spark.delta.sharing.driver.accessThresholdToExpireMs", "300000") + } + + private[spark] def removePartitionPrefix(filePath: String): String = { + filePath.split("/").last + } + + private def getResponseDVAndId( + sharedTable: String, + deletionVector: DeletionVectorDescriptor): (DeletionVectorDescriptor, String) = { + if (deletionVector != null) { + if (deletionVector.storageType == DeletionVectorDescriptor.INLINE_DV_MARKER) { + (deletionVector, Hashing.sha256().hashString(deletionVector.uniqueId, UTF_8).toString) + } else { + val dvPath = deletionVector.absolutePath(new Path("not-used")) + ( + deletionVector.copy( + pathOrInlineDv = TestDeltaSharingFileSystem.encode(sharedTable, dvPath.getName), + storageType = DeletionVectorDescriptor.PATH_DV_MARKER + ), + Hashing.sha256().hashString(deletionVector.uniqueId, UTF_8).toString + ) + } + } else { + (null, null) + } + } + + private def isDataFile(filePath: String): Boolean = { + filePath.endsWith(".parquet") || filePath.endsWith(".bin") + } + + // Convert from delta AddFile to DeltaSharingFileAction to serialize to json. + private def getDeltaSharingFileActionForAddFile( + addFile: AddFile, + sharedTable: String, + version: Long, + timestamp: Long): DeltaSharingFileAction = { + val parquetFile = removePartitionPrefix(addFile.path) + + val (responseDV, dvFileId) = getResponseDVAndId(sharedTable, addFile.deletionVector) + + DeltaSharingFileAction( + id = Hashing.sha256().hashString(parquetFile, UTF_8).toString, + version = version, + timestamp = timestamp, + deletionVectorFileId = dvFileId, + deltaSingleAction = addFile + .copy( + path = TestDeltaSharingFileSystem.encode(sharedTable, parquetFile), + deletionVector = responseDV + ) + .wrap + ) + } + + // Convert from delta RemoveFile to DeltaSharingFileAction to serialize to json. + // scalastyle:off removeFile + private def getDeltaSharingFileActionForRemoveFile( + removeFile: RemoveFile, + sharedTable: String, + version: Long, + timestamp: Long): DeltaSharingFileAction = { + val parquetFile = removePartitionPrefix(removeFile.path) + + val (responseDV, dvFileId) = getResponseDVAndId(sharedTable, removeFile.deletionVector) + + DeltaSharingFileAction( + id = Hashing.sha256().hashString(parquetFile, UTF_8).toString, + version = version, + timestamp = timestamp, + deletionVectorFileId = dvFileId, + deltaSingleAction = removeFile + .copy( + path = TestDeltaSharingFileSystem.encode(sharedTable, parquetFile), + deletionVector = responseDV + ) + .wrap + ) + // scalastyle:on removeFile + } + + // Reset the result for client.GetTableVersion for the sharedTable based on the latest table + // version of the deltaTable, use BlockManager to store the result. + private[spark] def prepareMockedClientGetTableVersion( + deltaTable: String, + sharedTable: String, + inputVersion: Option[Long] = None): Unit = { + DeltaSharingUtils.overrideSingleBlock[Long]( + blockId = TestClientForDeltaFormatSharing.getBlockId(sharedTable, "getTableVersion"), + value = inputVersion.getOrElse(getSnapshotToUse(deltaTable, None).version) + ) + } + + def getTimeStampForVersion(deltaTable: String, version: Long): Long = { + val snapshotToUse = getSnapshotToUse(deltaTable, None) + FileUtils + .listFiles(new File(snapshotToUse.deltaLog.logPath.toUri()), null, true) + .asScala + .foreach { f => + if (FileNames.isDeltaFile(new Path(f.getName))) { + if (FileNames.getFileVersion(new Path(f.getName)) == version) { + return f.lastModified + } + } + } + 0 + } + + // Prepare the result(Protocol and Metadata) for client.GetMetadata for the sharedTable based on + // the latest table info of the deltaTable, store them in BlockManager. + private[spark] def prepareMockedClientMetadata(deltaTable: String, sharedTable: String): Unit = { + val snapshotToUse = getSnapshotToUse(deltaTable, None) + val dsProtocol: DeltaSharingProtocol = DeltaSharingProtocol(snapshotToUse.protocol) + val dsMetadata: DeltaSharingMetadata = DeltaSharingMetadata( + deltaMetadata = snapshotToUse.metadata + ) + + // Put the metadata in blockManager for DeltaSharingClient to return for getMetadata. + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId(sharedTable, "getMetadata"), + values = Seq(dsProtocol.json, dsMetadata.json).toIterator + ) + } + + private def updateAddFileWithInlineDV( + addFile: AddFile, + inlineDvFormat: RoaringBitmapArrayFormat.Value, + bitmap: RoaringBitmapArray): AddFile = { + val dv = DeletionVectorDescriptor.inlineInLog( + bitmap.serializeAsByteArray(inlineDvFormat), + bitmap.cardinality + ) + addFile + .removeRows( + deletionVector = dv, + updateStats = true + ) + ._1 + } + + private def updateDvPathToCount( + addFile: AddFile, + pathToCount: scala.collection.mutable.Map[String, Int]): Unit = { + if (addFile.deletionVector != null && + addFile.deletionVector.storageType != DeletionVectorDescriptor.INLINE_DV_MARKER) { + val dvPath = addFile.deletionVector.pathOrInlineDv + pathToCount.put(dvPath, pathToCount.getOrElse(dvPath, 0) + 1) + } + } + + // Sort by id in decreasing order. + private def deltaSharingFileActionDecreaseOrderFunc( + f1: model.DeltaSharingFileAction, + f2: model.DeltaSharingFileAction): Boolean = { + f1.id > f2.id + } + + // Sort by id in increasing order. + private def deltaSharingFileActionIncreaseOrderFunc( + f1: model.DeltaSharingFileAction, + f2: model.DeltaSharingFileAction): Boolean = { + f1.id < f2.id + } + + private def getSnapshotToUse(deltaTable: String, versionAsOf: Option[Long]): Snapshot = { + val deltaLog = DeltaLog.forTable(spark, new TableIdentifier(deltaTable)) + if (versionAsOf.isDefined) { + deltaLog.getSnapshotAt(versionAsOf.get) + } else { + deltaLog.update() + } + } + + // This function does 2 jobs: + // 1. Prepare the result for functions of delta sharing rest client, i.e., (Protocol, Metadata) + // for getMetadata, (Protocol, Metadata, and list of lines from delta actions) for getFiles, use + // BlockManager to store the data to make them available across different classes. All the lines + // are for responseFormat=parquet. + // 2. Put the parquet file in blockManager for DeltaSharingFileSystem to load bytes out of it. + private[spark] def prepareMockedClientAndFileSystemResultForParquet( + deltaTable: String, + sharedTable: String, + versionAsOf: Option[Long] = None): Unit = { + val lines = Seq.newBuilder[String] + var totalSize = 0L + val clientAddFilesArrayBuffer = ArrayBuffer[ClientAddFile]() + + // To prepare faked delta sharing responses with needed files for DeltaSharingClient. + val snapshotToUse = getSnapshotToUse(deltaTable, versionAsOf) + + snapshotToUse.allFiles.collect().foreach { addFile => + val parquetFile = removePartitionPrefix(addFile.path) + val clientAddFile = ClientAddFile( + url = TestDeltaSharingFileSystem.encode(sharedTable, parquetFile), + id = Hashing.md5().hashString(parquetFile, UTF_8).toString, + partitionValues = addFile.partitionValues, + size = addFile.size, + stats = null, + version = snapshotToUse.version, + timestamp = snapshotToUse.timestamp + ) + totalSize = totalSize + addFile.size + clientAddFilesArrayBuffer += clientAddFile + } + + // Scan through the parquet files of the local delta table, and prepare the data of parquet file + // reading in DeltaSharingFileSystem. + val files = + FileUtils.listFiles(new File(snapshotToUse.deltaLog.dataPath.toUri()), null, true).asScala + files.foreach { f => + val filePath = f.getCanonicalPath + if (isDataFile(filePath)) { + // Put the parquet file in blockManager for DeltaSharingFileSystem to load bytes out of it. + DeltaSharingUtils.overrideIteratorBlock[Byte]( + blockId = TestDeltaSharingFileSystem.getBlockId(sharedTable, f.getName), + values = FileUtils.readFileToByteArray(f).toIterator + ) + } + } + + val clientProtocol = ClientProtocol(minReaderVersion = 1) + // This is specifically to set the size of the metadata. + val deltaMetadata = snapshotToUse.metadata + val clientMetadata = ClientMetadata( + id = deltaMetadata.id, + name = deltaMetadata.name, + description = deltaMetadata.description, + schemaString = deltaMetadata.schemaString, + configuration = deltaMetadata.configuration, + partitionColumns = deltaMetadata.partitionColumns, + size = totalSize + ) + lines += JsonUtils.toJson(clientProtocol.wrap) + lines += JsonUtils.toJson(clientMetadata.wrap) + clientAddFilesArrayBuffer.toSeq.foreach { clientAddFile => + lines += JsonUtils.toJson(clientAddFile.wrap) + } + + // Put the metadata in blockManager for DeltaSharingClient to return metadata when being asked. + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId( + sharedTableName = sharedTable, + queryType = "getMetadata", + versionAsOf = versionAsOf + ), + values = Seq( + JsonUtils.toJson(clientProtocol.wrap), + JsonUtils.toJson(clientMetadata.wrap) + ).toIterator + ) + + // Put the delta log (list of actions) in blockManager for DeltaSharingClient to return as the + // http response when getFiles is called. + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId( + sharedTableName = sharedTable, + queryType = "getFiles", + versionAsOf = versionAsOf + ), + values = lines.result().toIterator + ) + } + + // This function does 2 jobs: + // 1. Prepare the result for functions of delta sharing rest client, i.e., (Protocol, Metadata) + // for getMetadata, (Protocol, Metadata, and list of lines from delta actions) for getFiles, use + // BlockManager to store the data to make them available across different classes. + // 2. Put the parquet file in blockManager for DeltaSharingFileSystem to load bytes out of it. + private[spark] def prepareMockedClientAndFileSystemResult( + deltaTable: String, + sharedTable: String, + versionAsOf: Option[Long] = None, + timestampAsOf: Option[String] = None, + inlineDvFormat: Option[RoaringBitmapArrayFormat.Value] = None, + assertMultipleDvsInOneFile: Boolean = false, + reverseFileOrder: Boolean = false): Unit = { + val lines = Seq.newBuilder[String] + var totalSize = 0L + + // To prepare faked delta sharing responses with needed files for DeltaSharingClient. + val snapshotToUse = getSnapshotToUse(deltaTable, versionAsOf) + val fileActionsArrayBuffer = ArrayBuffer[model.DeltaSharingFileAction]() + val dvPathToCount = scala.collection.mutable.Map[String, Int]() + snapshotToUse.allFiles.collect().foreach { addFile => + if (assertMultipleDvsInOneFile) { + updateDvPathToCount(addFile, dvPathToCount) + } + + val updatedAdd = if (inlineDvFormat.isDefined) { + // Remove row 0 and 2 in the AddFile. + updateAddFileWithInlineDV(addFile, inlineDvFormat.get, RoaringBitmapArray(0L, 2L)) + } else { + addFile + } + + val dsAddFile = getDeltaSharingFileActionForAddFile( + updatedAdd, + sharedTable, + snapshotToUse.version, + snapshotToUse.timestamp + ) + totalSize = totalSize + addFile.size + fileActionsArrayBuffer += dsAddFile + } + val fileActionSeq = if (reverseFileOrder) { + fileActionsArrayBuffer.toSeq.sortWith(deltaSharingFileActionDecreaseOrderFunc) + } else { + fileActionsArrayBuffer.toSeq.sortWith(deltaSharingFileActionIncreaseOrderFunc) + } + var previousIdOpt: Option[String] = None + fileActionSeq.foreach { fileAction => + if (reverseFileOrder) { + assert( + // Using < instead of <= because there can be a removeFile and addFile pointing to the + // same parquet file which result in the same file id, since id is a hash of file path. + // This is ok because eventually it can read data out of the correct parquet file. + !previousIdOpt.exists(_ < fileAction.id), + s"fileActions must be in decreasing order by id: ${previousIdOpt} is not smaller than" + + s" ${fileAction.id}." + ) + previousIdOpt = Some(fileAction.id) + } + lines += fileAction.json + } + if (assertMultipleDvsInOneFile) { + assert(dvPathToCount.max._2 > 1) + } + + // Scan through the parquet files of the local delta table, and prepare the data of parquet file + // reading in DeltaSharingFileSystem. + val files = + FileUtils.listFiles(new File(snapshotToUse.deltaLog.dataPath.toUri()), null, true).asScala + files.foreach { f => + val filePath = f.getCanonicalPath + if (isDataFile(filePath)) { + // Put the parquet file in blockManager for DeltaSharingFileSystem to load bytes out of it. + DeltaSharingUtils.overrideIteratorBlock[Byte]( + blockId = TestDeltaSharingFileSystem.getBlockId(sharedTable, f.getName), + values = FileUtils.readFileToByteArray(f).toIterator + ) + } + } + + // This is specifically to set the size of the metadata. + val dsMetadata = DeltaSharingMetadata( + deltaMetadata = snapshotToUse.metadata, + size = totalSize + ) + val dsProtocol = DeltaSharingProtocol(deltaProtocol = snapshotToUse.protocol) + // Put the metadata in blockManager for DeltaSharingClient to return metadata when being asked. + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId( + sharedTableName = sharedTable, + queryType = "getMetadata", + versionAsOf = versionAsOf, + timestampAsOf = timestampAsOf + ), + values = Seq(dsProtocol.json, dsMetadata.json).toIterator + ) + + lines += dsProtocol.json + lines += dsMetadata.json + // Put the delta log (list of actions) in blockManager for DeltaSharingClient to return as the + // http response when getFiles is called. + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId( + sharedTableName = sharedTable, + queryType = "getFiles", + versionAsOf = versionAsOf, + timestampAsOf = timestampAsOf + ), + values = lines.result().toIterator + ) + } + + private[spark] def prepareMockedClientAndFileSystemResultForStreaming( + deltaTable: String, + sharedTable: String, + startingVersion: Long, + endingVersion: Long, + assertDVExists: Boolean = false): Unit = { + val actionLines = Seq.newBuilder[String] + + var maxVersion = -1L + var totalSize = 0L + + val deltaLog = DeltaLog.forTable(spark, new TableIdentifier(deltaTable)) + val startingSnapshot = deltaLog.getSnapshotAt(startingVersion) + actionLines += DeltaSharingProtocol(deltaProtocol = startingSnapshot.protocol).json + actionLines += DeltaSharingMetadata( + deltaMetadata = startingSnapshot.metadata, + version = startingVersion + ).json + + val logFiles = + FileUtils.listFiles(new File(deltaLog.logPath.toUri()), null, true).asScala + var dvExists = false + logFiles.foreach { f => + if (FileNames.isDeltaFile(new Path(f.getName))) { + val version = FileNames.getFileVersion(new Path(f.getName)) + if (version >= startingVersion && version <= endingVersion) { + // protocol/metadata are processed from startingSnapshot, only process versions greater + // than startingVersion for real actions and possible metadata changes. + maxVersion = maxVersion.max(version) + val timestamp = f.lastModified + + FileUtils.readLines(f).asScala.foreach { l => + val action = Action.fromJson(l) + action match { + case m: Metadata => + actionLines += DeltaSharingMetadata( + deltaMetadata = m, + version = version + ).json + case addFile: AddFile if addFile.dataChange => + // Convert from delta AddFile to DeltaSharingAddFile to serialize to json. + val dsAddFile = + getDeltaSharingFileActionForAddFile(addFile, sharedTable, version, timestamp) + dvExists = dvExists || (dsAddFile.deletionVectorFileId != null) + totalSize = totalSize + addFile.size + actionLines += dsAddFile.json + case removeFile: RemoveFile if removeFile.dataChange => + // scalastyle:off removeFile + val dsRemoveFile = getDeltaSharingFileActionForRemoveFile( + removeFile, + sharedTable, + version, + timestamp + ) + // scalastyle:on removeFile + dvExists = dvExists || (dsRemoveFile.deletionVectorFileId != null) + totalSize = totalSize + removeFile.size.getOrElse(0L) + actionLines += dsRemoveFile.json + case _ => // ignore all other actions such as CommitInfo. + } + } + } + } + } + val dataFiles = + FileUtils.listFiles(new File(deltaLog.dataPath.toUri()), null, true).asScala + dataFiles.foreach { f => + if (isDataFile(f.getCanonicalPath)) { + DeltaSharingUtils.overrideIteratorBlock[Byte]( + blockId = TestDeltaSharingFileSystem.getBlockId(sharedTable, f.getName), + values = FileUtils.readFileToByteArray(f).toIterator + ) + } + } + + if (assertDVExists) { + assert(dvExists, "There should be DV in the files returned from server.") + } + + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId( + sharedTable, + s"getFiles_${startingVersion}_$endingVersion" + ), + values = actionLines.result().toIterator + ) + } + + private[spark] def prepareMockedClientAndFileSystemResultForCdf( + deltaTable: String, + sharedTable: String, + startingVersion: Long, + startingTimestamp: Option[String] = None, + inlineDvFormat: Option[RoaringBitmapArrayFormat.Value] = None, + assertMultipleDvsInOneFile: Boolean = false): Unit = { + val actionLines = Seq.newBuilder[String] + + var maxVersion = -1L + var totalSize = 0L + + val deltaLog = DeltaLog.forTable(spark, new TableIdentifier(deltaTable)) + val startingSnapshot = deltaLog.getSnapshotAt(startingVersion) + actionLines += DeltaSharingProtocol(deltaProtocol = startingSnapshot.protocol).json + actionLines += DeltaSharingMetadata( + deltaMetadata = startingSnapshot.metadata, + version = startingVersion + ).json + + val dvPathToCount = scala.collection.mutable.Map[String, Int]() + val files = + FileUtils.listFiles(new File(deltaLog.logPath.toUri()), null, true).asScala + files.foreach { f => + if (FileNames.isDeltaFile(new Path(f.getName))) { + val version = FileNames.getFileVersion(new Path(f.getName)) + if (version >= startingVersion) { + // protocol/metadata are processed from startingSnapshot, only process versions greater + // than startingVersion for real actions and possible metadata changes. + maxVersion = maxVersion.max(version) + val timestamp = f.lastModified + FileUtils.readLines(f).asScala.foreach { l => + val action = Action.fromJson(l) + action match { + case m: Metadata => + actionLines += DeltaSharingMetadata( + deltaMetadata = m, + version = version + ).json + case addFile: AddFile if addFile.dataChange => + if (assertMultipleDvsInOneFile) { + updateDvPathToCount(addFile, dvPathToCount) + } + val updatedAdd = if (inlineDvFormat.isDefined) { + // Remove row 0 and 1 in the AddFile. + updateAddFileWithInlineDV(addFile, inlineDvFormat.get, RoaringBitmapArray(0L, 1L)) + } else { + addFile + } + val dsAddFile = + getDeltaSharingFileActionForAddFile(updatedAdd, sharedTable, version, timestamp) + totalSize = totalSize + updatedAdd.size + actionLines += dsAddFile.json + case removeFile: RemoveFile if removeFile.dataChange => + // scalastyle:off removeFile + val dsRemoveFile = getDeltaSharingFileActionForRemoveFile( + removeFile, + sharedTable, + version, + timestamp + ) + // scalastyle:on removeFile + totalSize = totalSize + removeFile.size.getOrElse(0L) + actionLines += dsRemoveFile.json + case cdcFile: AddCDCFile => + val parquetFile = removePartitionPrefix(cdcFile.path) + + // Convert from delta AddCDCFile to DeltaSharingFileAction to serialize to json. + val dsCDCFile = DeltaSharingFileAction( + id = Hashing.sha256().hashString(parquetFile, UTF_8).toString, + version = version, + timestamp = timestamp, + deltaSingleAction = cdcFile + .copy( + path = TestDeltaSharingFileSystem.encode(sharedTable, parquetFile) + ) + .wrap + ) + totalSize = totalSize + cdcFile.size + actionLines += dsCDCFile.json + case _ => // ignore other lines + } + } + } + } + } + val dataFiles = + FileUtils.listFiles(new File(deltaLog.dataPath.toUri()), null, true).asScala + dataFiles.foreach { f => + val filePath = f.getCanonicalPath + if (isDataFile(filePath)) { + DeltaSharingUtils.overrideIteratorBlock[Byte]( + blockId = TestDeltaSharingFileSystem.getBlockId(sharedTable, f.getName), + values = FileUtils.readFileToByteArray(f).toIterator + ) + } + } + + if (assertMultipleDvsInOneFile) { + assert(dvPathToCount.max._2 > 1) + } + + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = + TestClientForDeltaFormatSharing.getBlockId(sharedTable, s"getCDFFiles_$startingVersion"), + values = actionLines.result().toIterator + ) + if (startingTimestamp.isDefined) { + DeltaSharingUtils.overrideIteratorBlock[String]( + blockId = TestClientForDeltaFormatSharing.getBlockId( + sharedTable, + s"getCDFFiles_${startingTimestamp.get}" + ), + values = actionLines.result().toIterator + ) + } + } + + protected def getDeltaSharingClassesSQLConf: Map[String, String] = { + Map( + "fs.delta-sharing.impl" -> classOf[TestDeltaSharingFileSystem].getName, + "spark.delta.sharing.client.class" -> + classOf[TestClientForDeltaFormatSharing].getName, + "spark.delta.sharing.profile.provider.class" -> + "io.delta.sharing.client.DeltaSharingFileProfileProvider" + ) + } +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingFileIndexSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingFileIndexSuite.scala new file mode 100644 index 00000000000..74bce1bb8b4 --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingFileIndexSuite.scala @@ -0,0 +1,450 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.io.File + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import io.delta.sharing.client.{ + DeltaSharingClient, + DeltaSharingFileSystem, + DeltaSharingProfileProvider, + DeltaSharingRestClient +} +import io.delta.sharing.client.model.{DeltaTableFiles, DeltaTableMetadata, Table} +import io.delta.sharing.client.util.JsonUtils +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkEnv +import org.apache.spark.delta.sharing.{PreSignedUrlCache, PreSignedUrlFetcher} +import org.apache.spark.sql.{QueryTest, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{ + AttributeReference => SqlAttributeReference, + EqualTo => SqlEqualTo, + Literal => SqlLiteral +} +import org.apache.spark.sql.delta.sharing.DeltaSharingTestSparkUtils +import org.apache.spark.sql.types.{FloatType, IntegerType} + +private object TestUtils { + val paths = Seq("http://path1", "http://path2") + + val SparkConfForReturnExpTime = "spark.delta.sharing.fileindexsuite.returnexptime" + val SparkConfForUrlExpirationMs = "spark.delta.sharing.fileindexsuite.urlExpirationMs" + + // 10 seconds + val defaultUrlExpirationMs = 10000 + + def getExpirationTimestampStr(urlExpirationMs: Option[Int]): String = { + if (urlExpirationMs.isDefined) { + s""""expirationTimestamp":${System.currentTimeMillis() + urlExpirationMs.get},""" + } else { + "" + } + } + + // scalastyle:off line.size.limit + val protocolStr = + """{"protocol":{"deltaProtocol":{"minReaderVersion": 1, "minWriterVersion": 1}}}""" + val metaDataStr = + """{"metaData":{"size":809,"deltaMetadata":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c2"],"configuration":{},"createdTime":1691734718560}}}""" + val metaDataWithoutSizeStr = + """{"metaData":{"deltaMetadata":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c2"],"configuration":{},"createdTime":1691734718560}}}""" + def getAddFileStr1(path: String, urlExpirationMs: Option[Int] = None): String = { + s"""{"file":{"id":"11d9b72771a72f178a6f2839f7f08528",${getExpirationTimestampStr( + urlExpirationMs + )}"deltaSingleAction":{"add":{"path":"${path}",""" + """"partitionValues":{"c2":"one"},"size":809,"modificationTime":1691734726073,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"c1\":1,\"c2\":\"one\"},\"maxValues\":{\"c1\":2,\"c2\":\"one\"},\"nullCount\":{\"c1\":0,\"c2\":0}}","tags":{"INSERTION_TIME":"1691734726073000","MIN_INSERTION_TIME":"1691734726073000","MAX_INSERTION_TIME":"1691734726073000","OPTIMIZE_TARGET_SIZE":"268435456"}}}}}""" + } + def getAddFileStr2(urlExpirationMs: Option[Int] = None): String = { + s"""{"file":{"id":"22d9b72771a72f178a6f2839f7f08529",${getExpirationTimestampStr( + urlExpirationMs + )}""" + """"deltaSingleAction":{"add":{"path":"http://path2","partitionValues":{"c2":"two"},"size":809,"modificationTime":1691734726073,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"c1\":1,\"c2\":\"two\"},\"maxValues\":{\"c1\":2,\"c2\":\"two\"},\"nullCount\":{\"c1\":0,\"c2\":0}}","tags":{"INSERTION_TIME":"1691734726073000","MIN_INSERTION_TIME":"1691734726073000","MAX_INSERTION_TIME":"1691734726073000","OPTIMIZE_TARGET_SIZE":"268435456"}}}}}""" + } + // scalastyle:on line.size.limit +} + +/** + * A mocked delta sharing client for unit tests. + */ +class TestDeltaSharingClientForFileIndex( + profileProvider: DeltaSharingProfileProvider, + timeoutInSeconds: Int = 120, + numRetries: Int = 10, + maxRetryDuration: Long = Long.MaxValue, + sslTrustAll: Boolean = false, + forStreaming: Boolean = false, + responseFormat: String = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA, + readerFeatures: String = "", + queryTablePaginationEnabled: Boolean = false, + maxFilesPerReq: Int = 100000) + extends DeltaSharingClient { + + import TestUtils._ + + private lazy val returnExpirationTimestamp = SparkSession.active.sessionState.conf + .getConfString( + SparkConfForReturnExpTime, + "false" + ) + .toBoolean + private lazy val urlExpirationMsOpt = if (returnExpirationTimestamp) { + val urlExpirationMs = SparkSession.active.sessionState.conf + .getConfString( + SparkConfForUrlExpirationMs, + defaultUrlExpirationMs.toString + ) + .toInt + Some(urlExpirationMs) + } else { + None + } + + var numGetFileCalls: Int = -1 + + var savedLimits = Seq.empty[Long] + var savedJsonPredicateHints = Seq.empty[String] + + override def listAllTables(): Seq[Table] = throw new UnsupportedOperationException("not needed") + + override def getMetadata( + table: Table, + versionAsOf: Option[Long], + timestampAsOf: Option[String]): DeltaTableMetadata = { + throw new UnsupportedOperationException("getMetadata is not supported now.") + } + + override def getTableVersion(table: Table, startingTimestamp: Option[String] = None): Long = { + throw new UnsupportedOperationException("getTableVersion is not supported now.") + } + + override def getFiles( + table: Table, + predicates: Seq[String], + limit: Option[Long], + versionAsOf: Option[Long], + timestampAsOf: Option[String], + jsonPredicateHints: Option[String], + refreshToken: Option[String] + ): DeltaTableFiles = { + numGetFileCalls += 1 + limit.foreach(lim => savedLimits = savedLimits :+ lim) + jsonPredicateHints.foreach(p => { + savedJsonPredicateHints = savedJsonPredicateHints :+ p + }) + + DeltaTableFiles( + version = 0, + lines = Seq[String]( + protocolStr, + metaDataStr, + getAddFileStr1(paths(numGetFileCalls.min(1)), urlExpirationMsOpt), + getAddFileStr2(urlExpirationMsOpt) + ), + respondedFormat = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA + ) + } + + override def getFiles( + table: Table, + startingVersion: Long, + endingVersion: Option[Long] + ): DeltaTableFiles = { + throw new UnsupportedOperationException(s"getFiles with startingVersion($startingVersion)") + } + + override def getCDFFiles( + table: Table, + cdfOptions: Map[String, String], + includeHistoricalMetadata: Boolean + ): DeltaTableFiles = { + throw new UnsupportedOperationException( + s"getCDFFiles with cdfOptions:[$cdfOptions], " + + s"includeHistoricalMetadata:$includeHistoricalMetadata" + ) + } + + override def getForStreaming(): Boolean = forStreaming + + override def getProfileProvider: DeltaSharingProfileProvider = profileProvider + + def clear() { + savedLimits = Seq.empty[Long] + savedJsonPredicateHints = Seq.empty[String] + } +} + +class DeltaSharingFileIndexSuite + extends QueryTest + with DeltaSQLCommandTest + with DeltaSharingDataSourceDeltaTestUtils + with DeltaSharingTestSparkUtils { + + import TestUtils._ + + private def getMockedDeltaSharingMetadata(metaData: String): model.DeltaSharingMetadata = { + JsonUtils.fromJson[model.DeltaSharingSingleAction](metaData).metaData + } + + private def getMockedDeltaSharingFileAction(id: String): model.DeltaSharingFileAction = { + if (id.startsWith("11")) { + JsonUtils.fromJson[model.DeltaSharingSingleAction](getAddFileStr1(paths(0))).file + } else { + JsonUtils.fromJson[model.DeltaSharingSingleAction](getAddFileStr2()).file + } + } + + private val shareName = "share" + private val schemaName = "default" + private val sharedTableName = "table" + + private def prepareDeltaSharingFileIndex( + profilePath: String, + metaData: String): (Path, DeltaSharingFileIndex, DeltaSharingClient) = { + val tablePath = new Path(s"$profilePath#$shareName.$schemaName.$sharedTableName") + val client = DeltaSharingRestClient(profilePath, false, "delta") + + val spark = SparkSession.active + val params = new DeltaSharingFileIndexParams( + tablePath, + spark, + DeltaSharingUtils.DeltaSharingTableMetadata( + version = 0, + protocol = JsonUtils.fromJson[model.DeltaSharingSingleAction](protocolStr).protocol, + metadata = getMockedDeltaSharingMetadata(metaData) + ), + new DeltaSharingOptions(Map("path" -> tablePath.toString)) + ) + val dsTable = Table(share = shareName, schema = schemaName, name = sharedTableName) + (tablePath, new DeltaSharingFileIndex(params, dsTable, client, None), client) + } + + test("basic functions works") { + withTempDir { tempDir => + val profileFile = new File(tempDir, "foo.share") + FileUtils.writeStringToFile( + profileFile, + s"""{ + | "shareCredentialsVersion": 1, + | "endpoint": "https://localhost:12345/not-used-endpoint", + | "bearerToken": "mock" + |}""".stripMargin, + "utf-8" + ) + withSQLConf( + "spark.delta.sharing.client.class" -> classOf[TestDeltaSharingClientForFileIndex].getName, + "fs.delta-sharing-log.impl" -> classOf[DeltaSharingLogFileSystem].getName, + "spark.delta.sharing.profile.provider.class" -> + "io.delta.sharing.client.DeltaSharingFileProfileProvider" + ) { + val (tablePath, fileIndex, _) = + prepareDeltaSharingFileIndex(profileFile.getCanonicalPath, metaDataStr) + + assert(fileIndex.sizeInBytes == 809) + assert(fileIndex.partitionSchema.toDDL == "c2 STRING") + assert(fileIndex.rootPaths.length == 1) + assert(fileIndex.rootPaths.head == tablePath) + + intercept[UnsupportedOperationException] { + fileIndex.inputFiles + } + + val partitionDirectoryList = fileIndex.listFiles(Seq.empty, Seq.empty) + assert(partitionDirectoryList.length == 2) + partitionDirectoryList.foreach { partitionDirectory => + assert(!partitionDirectory.values.anyNull) + assert( + partitionDirectory.values.getString(0) == "one" || + partitionDirectory.values.getString(0) == "two" + ) + + partitionDirectory.files.foreach { f => + // Verify that the path can be decoded + val decodedPath = DeltaSharingFileSystem.decode(f.fileStatus.getPath) + val dsFileAction = getMockedDeltaSharingFileAction(decodedPath.fileId) + assert(decodedPath.tablePath.startsWith(tablePath.toString)) + assert(decodedPath.fileId == dsFileAction.id) + assert(decodedPath.fileSize == dsFileAction.size) + + assert(f.fileStatus.getLen == dsFileAction.size) + assert(f.fileStatus.getModificationTime == 0) + assert(f.fileStatus.isDirectory == false) + } + } + + // Check exception is thrown when metadata doesn't have size + val (_, fileIndex2, _) = + prepareDeltaSharingFileIndex(profileFile.getCanonicalPath, metaDataWithoutSizeStr) + val ex = intercept[IllegalStateException] { + fileIndex2.sizeInBytes + } + assert(ex.toString.contains("size is null in the metadata")) + } + } + } + + test("refresh works") { + PreSignedUrlCache.registerIfNeeded(SparkEnv.get) + + withTempDir { tempDir => + val profileFile = new File(tempDir, "foo.share") + FileUtils.writeStringToFile( + profileFile, + s"""{ + | "shareCredentialsVersion": 1, + | "endpoint": "https://localhost:12345/not-used-endpoint", + | "bearerToken": "mock" + |}""".stripMargin, + "utf-8" + ) + + def test(): Unit = { + val (_, fileIndex, _) = + prepareDeltaSharingFileIndex(profileFile.getCanonicalPath, metaDataStr) + val preSignedUrlCacheRef = PreSignedUrlCache.getEndpointRefInExecutor(SparkEnv.get) + + val partitionDirectoryList = fileIndex.listFiles(Seq.empty, Seq.empty) + assert(partitionDirectoryList.length == 2) + partitionDirectoryList.foreach { partitionDirectory => + partitionDirectory.files.foreach { f => + val decodedPath = DeltaSharingFileSystem.decode(f.fileStatus.getPath) + if (decodedPath.fileId.startsWith("11")) { + val fetcher = new PreSignedUrlFetcher( + preSignedUrlCacheRef, + decodedPath.tablePath, + decodedPath.fileId, + 1000 + ) + // sleep for 25000ms to ensure that the urls are refreshed. + Thread.sleep(25000) + + // Verify that the url is refreshed as paths(1), not paths(0) anymore. + assert(fetcher.getUrl == paths(1)) + } + } + } + } + + withSQLConf( + "spark.delta.sharing.client.class" -> classOf[TestDeltaSharingClientForFileIndex].getName, + "fs.delta-sharing-log.impl" -> classOf[DeltaSharingLogFileSystem].getName, + "spark.delta.sharing.profile.provider.class" -> + "io.delta.sharing.client.DeltaSharingFileProfileProvider", + SparkConfForReturnExpTime -> "true" + ) { + test() + } + + withSQLConf( + "spark.delta.sharing.client.class" -> classOf[TestDeltaSharingClientForFileIndex].getName, + "fs.delta-sharing-log.impl" -> classOf[DeltaSharingLogFileSystem].getName, + "spark.delta.sharing.profile.provider.class" -> + "io.delta.sharing.client.DeltaSharingFileProfileProvider", + SparkConfForReturnExpTime -> "false" + ) { + test() + } + } + } + + test("jsonPredicate test") { + withTempDir { tempDir => + val profileFile = new File(tempDir, "foo.share") + FileUtils.writeStringToFile( + profileFile, + s"""{ + | "shareCredentialsVersion": 1, + | "endpoint": "https://localhost:12345/not-used-endpoint", + | "bearerToken": "mock" + |}""".stripMargin, + "utf-8" + ) + withSQLConf( + "spark.delta.sharing.client.class" -> classOf[TestDeltaSharingClientForFileIndex].getName, + "fs.delta-sharing-log.impl" -> classOf[DeltaSharingLogFileSystem].getName, + "spark.delta.sharing.profile.provider.class" -> + "io.delta.sharing.client.DeltaSharingFileProfileProvider", + SparkConfForReturnExpTime -> "true", + SparkConfForUrlExpirationMs -> "3600000" // 1h + ) { + val (tablePath, fileIndex, client) = + prepareDeltaSharingFileIndex(profileFile.getCanonicalPath, metaDataStr) + val testClient = client.asInstanceOf[TestDeltaSharingClientForFileIndex] + + val spark = SparkSession.active + spark.sessionState.conf + .setConfString("spark.delta.sharing.jsonPredicateHints.enabled", "true") + + // We will send an equal op on partition filters as a SQL expression tree. + val partitionSqlEq = SqlEqualTo( + SqlAttributeReference("id", IntegerType)(), + SqlLiteral(23, IntegerType) + ) + // The client should get json for jsonPredicateHints. + val expectedJson = + """{"op":"equal", + |"children":[ + | {"op":"column","name":"id","valueType":"int"}, + | {"op":"literal","value":"23","valueType":"int"}] + |}""".stripMargin.replaceAll("\n", "").replaceAll(" ", "") + + fileIndex.listFiles(Seq(partitionSqlEq), Seq.empty) + assert(testClient.savedJsonPredicateHints.size === 1) + assert(expectedJson == testClient.savedJsonPredicateHints(0)) + testClient.clear() + + // We will send another equal op as a SQL expression tree for data filters. + val dataSqlEq = SqlEqualTo( + SqlAttributeReference("cost", FloatType)(), + SqlLiteral(23.5.toFloat, FloatType) + ) + + // With V2 predicates disabled, the client should get json for partition filters only. + fileIndex.listFiles(Seq(partitionSqlEq), Seq(dataSqlEq)) + assert(testClient.savedJsonPredicateHints.size === 1) + assert(expectedJson == testClient.savedJsonPredicateHints(0)) + testClient.clear() + + // With V2 predicates enabled, the client should get json for partition and data filters + // joined at the top level by an AND operation. + val expectedJson2 = + """{"op":"and","children":[ + | {"op":"equal","children":[ + | {"op":"column","name":"id","valueType":"int"}, + | {"op":"literal","value":"23","valueType":"int"}]}, + | {"op":"equal","children":[ + | {"op":"column","name":"cost","valueType":"float"}, + | {"op":"literal","value":"23.5","valueType":"float"}]} + |]}""".stripMargin.replaceAll("\n", "").replaceAll(" ", "") + spark.sessionState.conf.setConfString( + "spark.delta.sharing.jsonPredicateV2Hints.enabled", + "true" + ) + fileIndex.listFiles(Seq(partitionSqlEq), Seq(dataSqlEq)) + assert(testClient.savedJsonPredicateHints.size === 1) + assert(expectedJson2 == testClient.savedJsonPredicateHints(0)) + testClient.clear() + + // With json predicates disabled, we should not get anything. + spark.sessionState.conf + .setConfString("spark.delta.sharing.jsonPredicateHints.enabled", "false") + fileIndex.listFiles(Seq(partitionSqlEq), Seq.empty) + assert(testClient.savedJsonPredicateHints.size === 0) + } + } + } +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingLogFileSystemSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingLogFileSystemSuite.scala new file mode 100644 index 00000000000..e79f82c401b --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingLogFileSystemSuite.scala @@ -0,0 +1,124 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.{SharedSparkContext, SparkEnv, SparkFunSuite} +import org.apache.spark.sql.delta.sharing.DeltaSharingTestSparkUtils +import org.apache.spark.storage.StorageLevel + +class DeltaSharingLogFileSystemSuite extends SparkFunSuite with SharedSparkContext { + import DeltaSharingLogFileSystem._ + + var hadoopConf: Configuration = new Configuration + + var path: Path = null + var fs: FileSystem = null + override def beforeAll(): Unit = { + super.beforeAll() + conf.set( + s"spark.hadoop.fs.${DeltaSharingLogFileSystem.SCHEME}.impl", + classOf[DeltaSharingLogFileSystem].getName + ) + hadoopConf = DeltaSharingTestSparkUtils.getHadoopConf(conf) + + path = encode(table1) + fs = path.getFileSystem(hadoopConf) + } + + // constants for testing. + private val table1 = "table1" + private val table2 = "table2" + + test("encode and decode") { + assert(decode(encode(table1)) == table1) + } + + test("file system should be cached") { + assert(fs.isInstanceOf[DeltaSharingLogFileSystem]) + assert(fs eq path.getFileSystem(hadoopConf)) + + assert(fs.getScheme == "delta-sharing-log") + assert(fs.getWorkingDirectory == new Path("delta-sharing-log:/")) + } + + test("unsupported functions") { + intercept[UnsupportedOperationException] { fs.create(path) } + intercept[UnsupportedOperationException] { fs.append(path) } + intercept[UnsupportedOperationException] { fs.rename(path, new Path(path, "a")) } + intercept[UnsupportedOperationException] { fs.delete(path, true) } + intercept[UnsupportedOperationException] { fs.listStatusIterator(path) } + intercept[UnsupportedOperationException] { fs.setWorkingDirectory(path) } + intercept[UnsupportedOperationException] { fs.mkdirs(path) } + } + + test("open works ok") { + val content = "this is the content\nanother line\nthird line" + SparkEnv.get.blockManager.putSingle[String]( + blockId = getDeltaSharingLogBlockId(path.toString), + value = content, + level = StorageLevel.MEMORY_AND_DISK_SER, + tellMaster = true + ) + assert(scala.io.Source.fromInputStream(fs.open(path)).mkString == content) + } + + test("exists works ok") { + val newPath = encode(table1) + val fileAndSizeSeq = Seq[DeltaSharingLogFileStatus]( + DeltaSharingLogFileStatus("filea", 10, 100) + ) + SparkEnv.get.blockManager.putIterator[DeltaSharingLogFileStatus]( + blockId = getDeltaSharingLogBlockId(newPath.toString), + values = fileAndSizeSeq.toIterator, + level = StorageLevel.MEMORY_AND_DISK_SER, + tellMaster = true + ) + + assert(fs.exists(newPath)) + assert(!fs.exists(new Path(newPath, "A"))) + } + + test("listStatus works ok") { + val newPath = encode(table2) + val fileAndSizeSeq = Seq[DeltaSharingLogFileStatus]( + DeltaSharingLogFileStatus("file_a", 10, 100), + DeltaSharingLogFileStatus("file_b", 20, 200) + ) + SparkEnv.get.blockManager.putIterator[DeltaSharingLogFileStatus]( + blockId = getDeltaSharingLogBlockId(newPath.toString), + values = fileAndSizeSeq.toIterator, + level = StorageLevel.MEMORY_AND_DISK_SER, + tellMaster = true + ) + + val files = fs.listStatus(newPath) + assert(files.length == 2) + assert(files(0).getPath == new Path("file_a")) + assert(files(0).getLen == 10) + assert(files(0).getModificationTime == 100) + assert(files(1).getPath == new Path("file_b")) + assert(files(1).getLen == 20) + assert(files(1).getModificationTime == 200) + + intercept[java.io.FileNotFoundException] { + fs.listStatus(new Path(newPath, "random")) + } + } +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingTestSparkUtils.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingTestSparkUtils.scala new file mode 100644 index 00000000000..6dad12a981b --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingTestSparkUtils.scala @@ -0,0 +1,129 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sharing + +import java.io.File + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{ + getZoneId, + stringToDate, + stringToTimestamp, + toJavaDate, + toJavaTimestamp +} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.unsafe.types.UTF8String + +trait DeltaSharingTestSparkUtils extends SQLTestUtils { + + /** + * Creates 3 temporary directories for use within a function. + * + * @param f function to be run with created temp directories + */ + protected def withTempDirs(f: (File, File, File) => Unit): Unit = { + withTempDir { file1 => + withTempDir { file2 => + withTempDir { file3 => + f(file1, file2, file3) + } + } + } + } + + protected def sqlDate(date: String): java.sql.Date = { + toJavaDate(stringToDate(UTF8String.fromString(date)).get) + } + + protected def sqlTimestamp(timestamp: String): java.sql.Timestamp = { + toJavaTimestamp( + stringToTimestamp( + UTF8String.fromString(timestamp), + getZoneId(SQLConf.get.sessionLocalTimeZone) + ).get + ) + } + + protected def createTable(tableName: String): Unit = { + sql(s"""CREATE TABLE $tableName (c1 INT, c2 STRING, c3 date, c4 timestamp) + |USING DELTA PARTITIONED BY (c2) + |""".stripMargin) + } + + protected def createTableForStreaming(tableName: String, enableDV: Boolean = false): Unit = { + val tablePropertiesStr = if (enableDV) { + "TBLPROPERTIES (delta.enableDeletionVectors = true)" + } else { + "" + } + sql(s""" + |CREATE TABLE $tableName (value STRING) + |USING DELTA + |$tablePropertiesStr + |""".stripMargin) + } + + protected def createSimpleTable(tableName: String, enableCdf: Boolean): Unit = { + val tablePropertiesStr = if (enableCdf) { + "TBLPROPERTIES (delta.enableChangeDataFeed = true)" + } else { + "" + } + sql(s"""CREATE TABLE $tableName (c1 INT, c2 STRING) + |USING DELTA PARTITIONED BY (c2) + |$tablePropertiesStr + |""".stripMargin) + } + + protected def createCMIdTableWithCdf(tableName: String): Unit = { + sql(s"""CREATE TABLE $tableName (c1 INT, c2 STRING) USING DELTA PARTITIONED BY (c2) + |TBLPROPERTIES ('delta.columnMapping.mode' = 'id', delta.enableChangeDataFeed = true) + |""".stripMargin) + } + + protected def createDVTableWithCdf(tableName: String): Unit = { + sql(s"""CREATE TABLE $tableName (c1 INT, partition INT) USING DELTA PARTITIONED BY (partition) + |TBLPROPERTIES (delta.enableDeletionVectors = true, delta.enableChangeDataFeed = true) + |""".stripMargin) + } + + protected def prepareProfileFile(tempDir: File): File = { + val profileFile = new File(tempDir, "foo.share") + FileUtils.writeStringToFile( + profileFile, + s"""{ + | "shareCredentialsVersion": 1, + | "endpoint": "https://localhost:12345/not-used-endpoint", + | "bearerToken": "mock" + |}""".stripMargin, + "utf-8" + ) + profileFile + } +} + +object DeltaSharingTestSparkUtils { + def getHadoopConf(sparkConf: SparkConf): Configuration = { + new SparkHadoopUtil().newConfiguration(sparkConf) + } +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingUtilsSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingUtilsSuite.scala new file mode 100644 index 00000000000..b759ad1998a --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingUtilsSuite.scala @@ -0,0 +1,58 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import scala.reflect.ClassTag + +import org.apache.spark.{SharedSparkContext, SparkEnv, SparkFunSuite} +import org.apache.spark.storage.BlockId + +class DeltaSharingUtilsSuite extends SparkFunSuite with SharedSparkContext { + import DeltaSharingUtils._ + + test("override single block in blockmanager works") { + val blockId = BlockId(s"${DeltaSharingUtils.DELTA_SHARING_BLOCK_ID_PREFIX}_1") + overrideSingleBlock[Int](blockId, 1) + assert(SparkEnv.get.blockManager.getSingle[Int](blockId).get == 1) + SparkEnv.get.blockManager.releaseLock(blockId) + overrideSingleBlock[String](blockId, "2") + assert(SparkEnv.get.blockManager.getSingle[String](blockId).get == "2") + SparkEnv.get.blockManager.releaseLock(blockId) + } + + def getSeqFromBlockManager[T: ClassTag](blockId: BlockId): Seq[T] = { + val iterator = SparkEnv.get.blockManager + .get[T](blockId) + .map( + _.data.asInstanceOf[Iterator[T]] + ) + .get + val seqBuilder = Seq.newBuilder[T] + while (iterator.hasNext) { + seqBuilder += iterator.next() + } + seqBuilder.result() + } + + test("override iterator block in blockmanager works") { + val blockId = BlockId(s"${DeltaSharingUtils.DELTA_SHARING_BLOCK_ID_PREFIX}_1") + overrideIteratorBlock[Int](blockId, values = Seq(1, 2).toIterator) + assert(getSeqFromBlockManager[Int](blockId) == Seq(1, 2)) + overrideIteratorBlock[String](blockId, values = Seq("3", "4").toIterator) + assert(getSeqFromBlockManager[String](blockId) == Seq("3", "4")) + } +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/TestClientForDeltaFormatSharing.scala b/sharing/src/test/scala/io/delta/sharing/spark/TestClientForDeltaFormatSharing.scala new file mode 100644 index 00000000000..e5d867a4ea9 --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/TestClientForDeltaFormatSharing.scala @@ -0,0 +1,273 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.util.JsonUtils +import io.delta.sharing.client.{ + DeltaSharingClient, + DeltaSharingProfileProvider, + DeltaSharingRestClient +} +import io.delta.sharing.client.model.{ + AddFile => ClientAddFile, + DeltaTableFiles, + DeltaTableMetadata, + SingleAction, + Table +} + +import org.apache.spark.SparkEnv +import org.apache.spark.storage.BlockId + +/** + * A mocked delta sharing client for DeltaFormatSharing. + * The test suite need to prepare the mocked delta sharing rpc response and store them in + * BlockManager. Then this client will just load the response of return upon rpc call. + */ +private[spark] class TestClientForDeltaFormatSharing( + profileProvider: DeltaSharingProfileProvider, + timeoutInSeconds: Int = 120, + numRetries: Int = 10, + maxRetryDuration: Long = Long.MaxValue, + sslTrustAll: Boolean = false, + forStreaming: Boolean = false, + responseFormat: String = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA, + readerFeatures: String = "", + queryTablePaginationEnabled: Boolean = false, + maxFilesPerReq: Int = 100000) + extends DeltaSharingClient { + + assert( + responseFormat == DeltaSharingRestClient.RESPONSE_FORMAT_PARQUET || + (readerFeatures.contains("deletionVectors") && readerFeatures.contains("columnMapping")), + "deletionVectors and columnMapping should be supported in all types of queries." + ) + + import TestClientForDeltaFormatSharing._ + + override def listAllTables(): Seq[Table] = throw new UnsupportedOperationException("not needed") + + override def getMetadata( + table: Table, + versionAsOf: Option[Long] = None, + timestampAsOf: Option[String] = None): DeltaTableMetadata = { + val iterator = SparkEnv.get.blockManager + .get[String](getBlockId(table.name, "getMetadata", versionAsOf, timestampAsOf)) + .map(_.data.asInstanceOf[Iterator[String]]) + .getOrElse { + throw new IllegalStateException( + s"getMetadata is missing for: ${table.name}, versionAsOf:$versionAsOf, " + + s"timestampAsOf:$timestampAsOf. This shouldn't happen in the unit test." + ) + } + // iterator.toSeq doesn't trigger CompletionIterator in BlockManager which releases the reader + // lock on the underlying block. iterator hasNext does trigger it. + val linesBuilder = Seq.newBuilder[String] + while (iterator.hasNext) { + linesBuilder += iterator.next() + } + if (table.name.contains("shared_parquet_table")) { + val lines = linesBuilder.result() + val protocol = JsonUtils.fromJson[SingleAction](lines(0)).protocol + val metadata = JsonUtils.fromJson[SingleAction](lines(1)).metaData + DeltaTableMetadata( + version = versionAsOf.getOrElse(getTableVersion(table)), + protocol = protocol, + metadata = metadata, + respondedFormat = DeltaSharingRestClient.RESPONSE_FORMAT_PARQUET + ) + } else { + DeltaTableMetadata( + version = versionAsOf.getOrElse(getTableVersion(table)), + lines = linesBuilder.result(), + respondedFormat = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA + ) + } + } + + override def getTableVersion(table: Table, startingTimestamp: Option[String] = None): Long = { + val versionOpt = SparkEnv.get.blockManager.getSingle[Long]( + getBlockId(table.name, "getTableVersion") + ) + val version = versionOpt.getOrElse { + throw new IllegalStateException( + s"getTableVersion is missing for: ${table.name}. This shouldn't happen in the unit test." + ) + } + SparkEnv.get.blockManager.releaseLock(getBlockId(table.name, "getTableVersion")) + version + } + + override def getFiles( + table: Table, + predicates: Seq[String], + limit: Option[Long], + versionAsOf: Option[Long], + timestampAsOf: Option[String], + jsonPredicateHints: Option[String], + refreshToken: Option[String] + ): DeltaTableFiles = { + val tableFullName = s"${table.share}.${table.schema}.${table.name}" + limit.foreach(lim => TestClientForDeltaFormatSharing.limits.put(tableFullName, lim)) + TestClientForDeltaFormatSharing.requestedFormat.put(tableFullName, responseFormat) + jsonPredicateHints.foreach(p => + TestClientForDeltaFormatSharing.jsonPredicateHints.put(tableFullName, p)) + + val iterator = SparkEnv.get.blockManager + .get[String](getBlockId(table.name, "getFiles", versionAsOf, timestampAsOf)) + .map(_.data.asInstanceOf[Iterator[String]]) + .getOrElse { + throw new IllegalStateException( + s"getFiles is missing for: ${table.name} versionAsOf:$versionAsOf, " + + s"timestampAsOf:$timestampAsOf. This shouldn't happen in the unit test." + ) + } + // iterator.toSeq doesn't trigger CompletionIterator in BlockManager which releases the reader + // lock on the underlying block. iterator hasNext does trigger it. + val linesBuilder = Seq.newBuilder[String] + while (iterator.hasNext) { + linesBuilder += iterator.next() + } + if (table.name.contains("shared_parquet_table")) { + val lines = linesBuilder.result() + val protocol = JsonUtils.fromJson[SingleAction](lines(0)).protocol + val metadata = JsonUtils.fromJson[SingleAction](lines(1)).metaData + val files = ArrayBuffer[ClientAddFile]() + lines.drop(2).foreach { line => + val action = JsonUtils.fromJson[SingleAction](line) + if (action.file != null) { + files.append(action.file) + } else { + throw new IllegalStateException(s"Unexpected Line:${line}") + } + } + DeltaTableFiles( + versionAsOf.getOrElse(getTableVersion(table)), + protocol, + metadata, + files.toSeq, + respondedFormat = DeltaSharingRestClient.RESPONSE_FORMAT_PARQUET + ) + } else { + DeltaTableFiles( + version = versionAsOf.getOrElse(getTableVersion(table)), + lines = linesBuilder.result(), + respondedFormat = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA + ) + } + } + + override def getFiles( + table: Table, + startingVersion: Long, + endingVersion: Option[Long] + ): DeltaTableFiles = { + assert( + endingVersion.isDefined, + "endingVersion is not defined. This shouldn't happen in unit test." + ) + val iterator = SparkEnv.get.blockManager + .get[String](getBlockId(table.name, s"getFiles_${startingVersion}_${endingVersion.get}")) + .map(_.data.asInstanceOf[Iterator[String]]) + .getOrElse { + throw new IllegalStateException( + s"getFiles is missing for: ${table.name} with [${startingVersion}, " + + s"${endingVersion.get}]. This shouldn't happen in the unit test." + ) + } + // iterator.toSeq doesn't trigger CompletionIterator in BlockManager which releases the reader + // lock on the underlying block. iterator hasNext does trigger it. + val linesBuilder = Seq.newBuilder[String] + while (iterator.hasNext) { + linesBuilder += iterator.next() + } + DeltaTableFiles( + version = getTableVersion(table), + lines = linesBuilder.result(), + respondedFormat = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA + ) + } + + override def getCDFFiles( + table: Table, + cdfOptions: Map[String, String], + includeHistoricalMetadata: Boolean + ): DeltaTableFiles = { + val suffix = cdfOptions + .get(DeltaSharingOptions.CDF_START_VERSION) + .getOrElse( + cdfOptions.get(DeltaSharingOptions.CDF_START_TIMESTAMP).get + ) + val iterator = SparkEnv.get.blockManager + .get[String]( + getBlockId( + table.name, + s"getCDFFiles_$suffix" + ) + ) + .map( + _.data.asInstanceOf[Iterator[String]] + ) + .getOrElse { + throw new IllegalStateException( + s"getCDFFiles is missing for: ${table.name}. This shouldn't happen in the unit test." + ) + } + // iterator.toSeq doesn't trigger CompletionIterator in BlockManager which releases the reader + // lock on the underlying block. iterator hasNext does trigger it. + val linesBuilder = Seq.newBuilder[String] + while (iterator.hasNext) { + linesBuilder += iterator.next() + } + DeltaTableFiles( + version = getTableVersion(table), + lines = linesBuilder.result(), + respondedFormat = DeltaSharingRestClient.RESPONSE_FORMAT_DELTA + ) + } + + override def getForStreaming(): Boolean = forStreaming + + override def getProfileProvider: DeltaSharingProfileProvider = profileProvider +} + +object TestClientForDeltaFormatSharing { + def getBlockId( + sharedTableName: String, + queryType: String, + versionAsOf: Option[Long] = None, + timestampAsOf: Option[String] = None): BlockId = { + assert(!(versionAsOf.isDefined && timestampAsOf.isDefined)) + val suffix = if (versionAsOf.isDefined) { + s"_v${versionAsOf.get}" + } else if (timestampAsOf.isDefined) { + s"_t${timestampAsOf.get}" + } else { + "" + } + BlockId( + s"${DeltaSharingUtils.DELTA_SHARING_BLOCK_ID_PREFIX}" + + s"_${sharedTableName}_$queryType$suffix" + ) + } + + val limits = scala.collection.mutable.Map[String, Long]() + val requestedFormat = scala.collection.mutable.Map[String, String]() + val jsonPredicateHints = scala.collection.mutable.Map[String, String]() +} diff --git a/sharing/src/test/scala/io/delta/sharing/spark/TestDeltaSharingFileSystem.scala b/sharing/src/test/scala/io/delta/sharing/spark/TestDeltaSharingFileSystem.scala new file mode 100644 index 00000000000..2d372afba75 --- /dev/null +++ b/sharing/src/test/scala/io/delta/sharing/spark/TestDeltaSharingFileSystem.scala @@ -0,0 +1,140 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sharing.spark + +import java.io.FileNotFoundException +import java.net.{URI, URLDecoder, URLEncoder} +import java.util.concurrent.TimeUnit + +import io.delta.sharing.client.DeltaSharingFileSystem +import org.apache.hadoop.fs._ +import org.apache.hadoop.fs.permission.FsPermission +import org.apache.hadoop.util.Progressable + +import org.apache.spark.SparkEnv +import org.apache.spark.delta.sharing.{PreSignedUrlCache, PreSignedUrlFetcher} +import org.apache.spark.storage.BlockId + +/** + * Read-only file system for DeltaSharingDataSourceDeltaSuite. + * To replace DeltaSharingFileSystem and return the content for parquet files. + */ +private[spark] class TestDeltaSharingFileSystem extends FileSystem { + import TestDeltaSharingFileSystem._ + + private lazy val preSignedUrlCacheRef = PreSignedUrlCache.getEndpointRefInExecutor(SparkEnv.get) + + override def getScheme: String = SCHEME + + override def getUri(): URI = URI.create(s"$SCHEME:///") + + override def open(f: Path, bufferSize: Int): FSDataInputStream = { + val path = DeltaSharingFileSystem.decode(f) + val fetcher = + new PreSignedUrlFetcher( + preSignedUrlCacheRef, + path.tablePath, + path.fileId, + TimeUnit.MINUTES.toMillis(10) + ) + val (tableName, parquetFilePath) = decode(fetcher.getUrl()) + val arrayBuilder = Array.newBuilder[Byte] + val iterator = SparkEnv.get.blockManager + .get[Byte](getBlockId(tableName, parquetFilePath)) + .map( + _.data.asInstanceOf[Iterator[Byte]] + ) + .getOrElse { + throw new FileNotFoundException(f.toString) + } + while (iterator.hasNext) { + arrayBuilder += iterator.next() + } + new FSDataInputStream(new SeekableByteArrayInputStream(arrayBuilder.result())) + } + + override def create( + f: Path, + permission: FsPermission, + overwrite: Boolean, + bufferSize: Int, + replication: Short, + blockSize: Long, + progress: Progressable): FSDataOutputStream = + throw new UnsupportedOperationException("create") + + override def append(f: Path, bufferSize: Int, progress: Progressable): FSDataOutputStream = + throw new UnsupportedOperationException("append") + + override def rename(src: Path, dst: Path): Boolean = + throw new UnsupportedOperationException("rename") + + override def delete(f: Path, recursive: Boolean): Boolean = + throw new UnsupportedOperationException("delete") + + override def listStatus(f: Path): Array[FileStatus] = + throw new UnsupportedOperationException("listStatus") + + override def setWorkingDirectory(new_dir: Path): Unit = + throw new UnsupportedOperationException("setWorkingDirectory") + + override def getWorkingDirectory: Path = new Path(getUri) + + override def mkdirs(f: Path, permission: FsPermission): Boolean = + throw new UnsupportedOperationException("mkdirs") + + override def getFileStatus(f: Path): FileStatus = { + val resolved = makeQualified(f) + new FileStatus(DeltaSharingFileSystem.decode(resolved).fileSize, false, 0, 1, 0, f) + } + + override def close(): Unit = { + super.close() + } +} + +private[spark] object TestDeltaSharingFileSystem { + val SCHEME = "delta-sharing" + + def getBlockId(tableName: String, parquetFilePath: String): BlockId = { + BlockId( + s"${DeltaSharingUtils.DELTA_SHARING_BLOCK_ID_PREFIX}_" + + s"{$tableName}_$parquetFilePath" + ) + } + + // The encoded string is purely for testing purpose to contain the table name and file path, + // which will be decoded and used to find block in block manager. + // In real traffic, it will be a pre-signed url. + def encode(tableName: String, parquetFilePath: String): String = { + val encodedTableName = URLEncoder.encode(tableName, "UTF-8") + val encodedParquetFilePath = URLEncoder.encode(parquetFilePath, "UTF-8") + // SCHEME:/// is needed for making this path an absolute path + s"$SCHEME:///$encodedTableName/$encodedParquetFilePath" + } + + def decode(encodedPath: String): (String, String) = { + val Array(tableName, parquetFilePath) = encodedPath + .stripPrefix(s"$SCHEME:///") + .stripPrefix(s"$SCHEME:/") + .split("/") + .map( + URLDecoder.decode(_, "UTF-8") + ) + (tableName, parquetFilePath) + } +} diff --git a/spark/src/main/antlr4/io/delta/sql/parser/DeltaSqlBase.g4 b/spark/src/main/antlr4/io/delta/sql/parser/DeltaSqlBase.g4 new file mode 100644 index 00000000000..5a16b863588 --- /dev/null +++ b/spark/src/main/antlr4/io/delta/sql/parser/DeltaSqlBase.g4 @@ -0,0 +1,395 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +grammar DeltaSqlBase; + +@members { + /** + * Verify whether current token is a valid decimal token (which contains dot). + * Returns true if the character that follows the token is not a digit or letter or underscore. + * + * For example: + * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. + * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. + * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. + * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is folllowed + * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' + * which is not a digit or letter or underscore. + */ + public boolean isValidDecimal() { + int nextChar = _input.LA(1); + if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || + nextChar == '_') { + return false; + } else { + return true; + } + } +} + +tokens { + DELIMITER +} + +singleStatement + : statement ';'* EOF + ; + +// If you add keywords here that should not be reserved, add them to 'nonReserved' list. +statement + : VACUUM (path=STRING | table=qualifiedName) + (RETAIN number HOURS)? (DRY RUN)? #vacuumTable + | (DESC | DESCRIBE) DETAIL (path=STRING | table=qualifiedName) #describeDeltaDetail + | GENERATE modeName=identifier FOR TABLE table=qualifiedName #generate + | (DESC | DESCRIBE) HISTORY (path=STRING | table=qualifiedName) + (LIMIT limit=INTEGER_VALUE)? #describeDeltaHistory + | CONVERT TO DELTA table=qualifiedName + (NO STATISTICS)? (PARTITIONED BY '(' colTypeList ')')? #convert + | RESTORE TABLE? table=qualifiedName TO? + clause=temporalClause #restore + | ALTER TABLE table=qualifiedName ADD CONSTRAINT name=identifier + constraint #addTableConstraint + | ALTER TABLE table=qualifiedName + DROP CONSTRAINT (IF EXISTS)? name=identifier #dropTableConstraint + | ALTER TABLE table=qualifiedName + DROP FEATURE featureName=featureNameValue (TRUNCATE HISTORY)? #alterTableDropFeature + | OPTIMIZE (path=STRING | table=qualifiedName) + (WHERE partitionPredicate=predicateToken)? + (zorderSpec)? #optimizeTable + | REORG TABLE table=qualifiedName + ( + (WHERE partitionPredicate=predicateToken)? APPLY LEFT_PAREN PURGE RIGHT_PAREN | + APPLY LEFT_PAREN UPGRADE UNIFORM LEFT_PAREN ICEBERG_COMPAT_VERSION EQ version=INTEGER_VALUE RIGHT_PAREN RIGHT_PAREN + ) #reorgTable + | cloneTableHeader SHALLOW CLONE source=qualifiedName clause=temporalClause? + (TBLPROPERTIES tableProps=propertyList)? + (LOCATION location=stringLit)? #clone + | .*? clusterBySpec+ .*? #clusterBy + | .*? #passThrough + ; + +createTableHeader + : CREATE TABLE (IF NOT EXISTS)? table=qualifiedName + ; + +replaceTableHeader + : (CREATE OR)? REPLACE TABLE table=qualifiedName + ; + +cloneTableHeader + : createTableHeader + | replaceTableHeader + ; + +zorderSpec + : ZORDER BY LEFT_PAREN interleave+=qualifiedName (COMMA interleave+=qualifiedName)* RIGHT_PAREN + | ZORDER BY interleave+=qualifiedName (COMMA interleave+=qualifiedName)* + ; + +clusterBySpec + : CLUSTER BY LEFT_PAREN interleave+=qualifiedName (COMMA interleave+=qualifiedName)* RIGHT_PAREN + ; + +temporalClause + : FOR? (SYSTEM_VERSION | VERSION) AS OF version=(INTEGER_VALUE | STRING) + | FOR? (SYSTEM_TIME | TIMESTAMP) AS OF timestamp=STRING + ; + +qualifiedName + : identifier ('.' identifier)* ('.' identifier)* + ; + +propertyList + : LEFT_PAREN property (COMMA property)* RIGHT_PAREN + ; + +property + : key=propertyKey (EQ? value=propertyValue)? + ; + +propertyKey + : identifier (DOT identifier)* + | stringLit + ; + +propertyValue + : INTEGER_VALUE + | DECIMAL_VALUE + | booleanValue + | identifier LEFT_PAREN stringLit COMMA stringLit RIGHT_PAREN + | value=stringLit + ; + +featureNameValue + : identifier + | stringLit + ; + +stringLit + : STRING + | DOUBLEQUOTED_STRING + ; + +booleanValue + : TRUE | FALSE + ; + +identifier + : IDENTIFIER #unquotedIdentifier + | quotedIdentifier #quotedIdentifierAlternative + | nonReserved #unquotedIdentifier + ; + +quotedIdentifier + : BACKQUOTED_IDENTIFIER + ; + +colTypeList + : colType (',' colType)* + ; + +colType + : colName=identifier dataType (NOT NULL)? (COMMENT STRING)? + ; + +dataType + : identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType + ; + +number + : MINUS? DECIMAL_VALUE #decimalLiteral + | MINUS? INTEGER_VALUE #integerLiteral + | MINUS? BIGINT_LITERAL #bigIntLiteral + | MINUS? SMALLINT_LITERAL #smallIntLiteral + | MINUS? TINYINT_LITERAL #tinyIntLiteral + | MINUS? DOUBLE_LITERAL #doubleLiteral + | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral + ; + +constraint + : CHECK '(' exprToken+ ')' #checkConstraint + ; + +// We don't have an expression rule in our grammar here, so we just grab the tokens and defer +// parsing them to later. Although this is the same as `exprToken`, we have to re-define it to +// workaround an ANTLR issue (https://github.com/delta-io/delta/issues/1205) +predicateToken + : .+? + ; + +// We don't have an expression rule in our grammar here, so we just grab the tokens and defer +// parsing them to later. +exprToken + : .+? + ; + +// Add keywords here so that people's queries don't break if they have a column name as one of +// these tokens +nonReserved + : VACUUM | RETAIN | HOURS | DRY | RUN + | CONVERT | TO | DELTA | PARTITIONED | BY + | DESC | DESCRIBE | LIMIT | DETAIL + | GENERATE | FOR | TABLE | CHECK | EXISTS | OPTIMIZE + | REORG | APPLY | PURGE | UPGRADE | UNIFORM | ICEBERG_COMPAT_VERSION + | RESTORE | AS | OF + | ZORDER | LEFT_PAREN | RIGHT_PAREN + | NO | STATISTICS + | CLONE | SHALLOW + | FEATURE | TRUNCATE + | CLUSTER + ; + +// Define how the keywords above should appear in a user's SQL statement. +ADD: 'ADD'; +ALTER: 'ALTER'; +APPLY: 'APPLY'; +AS: 'AS'; +BY: 'BY'; +CHECK: 'CHECK'; +CLONE: 'CLONE'; +CLUSTER: 'CLUSTER'; +COMMA: ','; +COMMENT: 'COMMENT'; +CONSTRAINT: 'CONSTRAINT'; +CONVERT: 'CONVERT'; +CREATE: 'CREATE'; +DELTA: 'DELTA'; +DESC: 'DESC'; +DESCRIBE: 'DESCRIBE'; +DETAIL: 'DETAIL'; +DOT: '.'; +DROP: 'DROP'; +DRY: 'DRY'; +EXISTS: 'EXISTS'; +FALSE: 'FALSE'; +FEATURE: 'FEATURE'; +FOR: 'FOR'; +GENERATE: 'GENERATE'; +HISTORY: 'HISTORY'; +HOURS: 'HOURS'; +ICEBERG_COMPAT_VERSION: 'ICEBERG_COMPAT_VERSION'; +IF: 'IF'; +LEFT_PAREN: '('; +LIMIT: 'LIMIT'; +LOCATION: 'LOCATION'; +MINUS: '-'; +NO: 'NO'; +NOT: 'NOT' | '!'; +NULL: 'NULL'; +OF: 'OF'; +OR: 'OR'; +OPTIMIZE: 'OPTIMIZE'; +REORG: 'REORG'; +PARTITIONED: 'PARTITIONED'; +PURGE: 'PURGE'; +REPLACE: 'REPLACE'; +RESTORE: 'RESTORE'; +RETAIN: 'RETAIN'; +RIGHT_PAREN: ')'; +RUN: 'RUN'; +SHALLOW: 'SHALLOW'; +SYSTEM_TIME: 'SYSTEM_TIME'; +SYSTEM_VERSION: 'SYSTEM_VERSION'; +TABLE: 'TABLE'; +TBLPROPERTIES: 'TBLPROPERTIES'; +TIMESTAMP: 'TIMESTAMP'; +TRUNCATE: 'TRUNCATE'; +TO: 'TO'; +TRUE: 'TRUE'; +UNIFORM: 'UNIFORM'; +UPGRADE: 'UPGRADE'; +VACUUM: 'VACUUM'; +VERSION: 'VERSION'; +WHERE: 'WHERE'; +ZORDER: 'ZORDER'; +STATISTICS: 'STATISTICS'; + +// Multi-character operator tokens need to be defined even though we don't explicitly reference +// them so that they can be recognized as single tokens when parsing. If we split them up and +// end up with expression text like 'a ! = b', Spark won't be able to parse '! =' back into the +// != operator. +EQ : '=' | '=='; +NSEQ: '<=>'; +NEQ : '<>'; +NEQJ: '!='; +LTE : '<=' | '!>'; +GTE : '>=' | '!<'; +CONCAT_PIPE: '||'; + +STRING + : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | '"' ( ~('"'|'\\') | ('\\' .) )* '"' + ; + +DOUBLEQUOTED_STRING + :'"' ( ~('"'|'\\') | ('\\' .) )* '"' + ; + +BIGINT_LITERAL + : DIGIT+ 'L' + ; + +SMALLINT_LITERAL + : DIGIT+ 'S' + ; + +TINYINT_LITERAL + : DIGIT+ 'Y' + ; + +INTEGER_VALUE + : DIGIT+ + ; + +DECIMAL_VALUE + : DIGIT+ EXPONENT + | DECIMAL_DIGITS EXPONENT? {isValidDecimal()}? + ; + +DOUBLE_LITERAL + : DIGIT+ EXPONENT? 'D' + | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}? + ; + +BIGDECIMAL_LITERAL + : DIGIT+ EXPONENT? 'BD' + | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? + ; + +IDENTIFIER + : (LETTER | DIGIT | '_')+ + ; + +BACKQUOTED_IDENTIFIER + : '`' ( ~'`' | '``' )* '`' + ; + +fragment DECIMAL_DIGITS + : DIGIT+ '.' DIGIT* + | '.' DIGIT+ + ; + +fragment EXPONENT + : 'E' [+-]? DIGIT+ + ; + +fragment DIGIT + : [0-9] + ; + +fragment LETTER + : [A-Z] + ; + +SIMPLE_COMMENT + : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) + ; + +BRACKETED_COMMENT + : '/*' .*? '*/' -> channel(HIDDEN) + ; + +WS : [ \r\n\t]+ -> channel(HIDDEN) + ; + +// Catch-all for anything we can't recognize. +// We use this to be able to ignore and recover all the text +// when splitting statements with DelimiterLexer +UNRECOGNIZED + : . + ; diff --git a/spark/src/main/java/org/apache/spark/sql/delta/RowIndexFilter.java b/spark/src/main/java/org/apache/spark/sql/delta/RowIndexFilter.java new file mode 100644 index 00000000000..c2d689e2f78 --- /dev/null +++ b/spark/src/main/java/org/apache/spark/sql/delta/RowIndexFilter.java @@ -0,0 +1,45 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta; + +import org.apache.spark.sql.execution.vectorized.WritableColumnVector; + +/** + * Provides filtering information for each row index within given range. + * Specific filters are implemented in subclasses. + */ +public interface RowIndexFilter { + + /** + * Materialize filtering information for all rows in the range [start, end) + * by filling a boolean column vector batch. + * + * @param start Beginning index of the filtering range (inclusive) + * @param end End index of the filtering range (exclusive) + * @param batch The column vector for the current batch to materialize the range into + */ + void materializeIntoVector(long start, long end, WritableColumnVector batch); + + /** + * Value that must be materialised for a row to be kept after filtering. + */ + public static final byte KEEP_ROW_VALUE = 0; + /** + * Value that must be materialised for a row to be dropped during filtering. + */ + public static final byte DROP_ROW_VALUE = 1; +} diff --git a/spark/src/main/java/org/apache/spark/sql/delta/RowIndexFilterType.java b/spark/src/main/java/org/apache/spark/sql/delta/RowIndexFilterType.java new file mode 100644 index 00000000000..34e553b8e1d --- /dev/null +++ b/spark/src/main/java/org/apache/spark/sql/delta/RowIndexFilterType.java @@ -0,0 +1,37 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta; + +/** Filter types corresponding to every row index filter implementations. */ +public enum RowIndexFilterType { + /** Corresponding to [[DropMarkedRowsFilter]]. */ + IF_CONTAINED(0), + /** Corresponding to [[KeepMarkedRowsFilter]]. */ + IF_NOT_CONTAINED(1), + /** Invalid filter type. */ + UNKNOWN(-1); + + private final int id; + + RowIndexFilterType(int id) { + this.id = id; + } + + public int getId() { + return this.id; + } +} diff --git a/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister new file mode 100644 index 00000000000..bdf59969fb3 --- /dev/null +++ b/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -0,0 +1 @@ +org.apache.spark.sql.delta.sources.DeltaDataSource \ No newline at end of file diff --git a/spark/src/main/resources/NOTICE.txt b/spark/src/main/resources/NOTICE.txt new file mode 120000 index 00000000000..9f5b4605595 --- /dev/null +++ b/spark/src/main/resources/NOTICE.txt @@ -0,0 +1 @@ +../../../../NOTICE.txt \ No newline at end of file diff --git a/spark/src/main/resources/error/delta-error-classes.json b/spark/src/main/resources/error/delta-error-classes.json new file mode 100644 index 00000000000..fa50b5835f5 --- /dev/null +++ b/spark/src/main/resources/error/delta-error-classes.json @@ -0,0 +1,2637 @@ +{ + "DELTA_ACTIVE_SPARK_SESSION_NOT_FOUND" : { + "message" : [ + "Could not find active SparkSession" + ], + "sqlState" : "08003" + }, + "DELTA_ACTIVE_TRANSACTION_ALREADY_SET" : { + "message" : [ + "Cannot set a new txn as active when one is already active" + ], + "sqlState" : "0B000" + }, + "DELTA_ADDING_COLUMN_WITH_INTERNAL_NAME_FAILED" : { + "message" : [ + "Failed to add column because the name is reserved." + ], + "sqlState" : "42000" + }, + "DELTA_ADDING_DELETION_VECTORS_DISALLOWED" : { + "message" : [ + "The current operation attempted to add a deletion vector to a table that does not permit the creation of new deletion vectors. Please file a bug report." + ], + "sqlState" : "0A000" + }, + "DELTA_ADDING_DELETION_VECTORS_WITH_TIGHT_BOUNDS_DISALLOWED" : { + "message" : [ + "All operations that add deletion vectors should set the tightBounds column in statistics to false. Please file a bug report." + ], + "sqlState" : "42000" + }, + "DELTA_ADD_COLUMN_AT_INDEX_LESS_THAN_ZERO" : { + "message" : [ + "Index to add column is lower than 0" + ], + "sqlState" : "42KD3" + }, + "DELTA_ADD_COLUMN_PARENT_NOT_STRUCT" : { + "message" : [ + "Cannot add because its parent is not a StructType. Found " + ], + "sqlState" : "42KD3" + }, + "DELTA_ADD_COLUMN_STRUCT_NOT_FOUND" : { + "message" : [ + "Struct not found at position " + ], + "sqlState" : "42KD3" + }, + "DELTA_ADD_CONSTRAINTS" : { + "message" : [ + "Please use ALTER TABLE ADD CONSTRAINT to add CHECK constraints." + ], + "sqlState" : "0A000" + }, + "DELTA_AGGREGATE_IN_GENERATED_COLUMN" : { + "message" : [ + "Found . A generated column cannot use an aggregate expression" + ], + "sqlState" : "42621" + }, + "DELTA_AGGREGATION_NOT_SUPPORTED" : { + "message" : [ + "Aggregate functions are not supported in the ." + ], + "sqlState" : "42903" + }, + "DELTA_ALTER_TABLE_CHANGE_COL_NOT_SUPPORTED" : { + "message" : [ + "ALTER TABLE CHANGE COLUMN is not supported for changing column to " + ], + "sqlState" : "42837" + }, + "DELTA_ALTER_TABLE_SET_CLUSTERING_TABLE_FEATURE_NOT_ALLOWED" : { + "message" : [ + "Cannot enable table feature using ALTER TABLE SET TBLPROPERTIES. Please use CREATE OR REPLACE TABLE CLUSTER BY to create a Delta table with clustering." + ], + "sqlState" : "42000" + }, + "DELTA_AMBIGUOUS_DATA_TYPE_CHANGE" : { + "message" : [ + "Cannot change data type of from to . This change contains column removals and additions, therefore they are ambiguous. Please make these changes individually using ALTER TABLE [ADD | DROP | RENAME] COLUMN." + ], + "sqlState" : "429BQ" + }, + "DELTA_AMBIGUOUS_PARTITION_COLUMN" : { + "message" : [ + "Ambiguous partition column can be ." + ], + "sqlState" : "42702" + }, + "DELTA_AMBIGUOUS_PATHS_IN_CREATE_TABLE" : { + "message" : [ + "CREATE TABLE contains two different locations: and .", + "You can remove the LOCATION clause from the CREATE TABLE statement, or set", + " to true to skip this check.", + "" + ], + "sqlState" : "42613" + }, + "DELTA_BLOCK_COLUMN_MAPPING_AND_CDC_OPERATION" : { + "message" : [ + "Operation \"\" is not allowed when the table has enabled change data feed (CDF) and has undergone schema changes using DROP COLUMN or RENAME COLUMN." + ], + "sqlState" : "42KD4" + }, + "DELTA_BLOOM_FILTER_DROP_ON_NON_EXISTING_COLUMNS" : { + "message" : [ + "Cannot drop bloom filter indices for the following non-existent column(s): " + ], + "sqlState" : "42703" + }, + "DELTA_CANNOT_CHANGE_DATA_TYPE" : { + "message" : [ + "Cannot change data type: " + ], + "sqlState" : "429BQ" + }, + "DELTA_CANNOT_CHANGE_LOCATION" : { + "message" : [ + "Cannot change the 'location' of the Delta table using SET TBLPROPERTIES. Please use ALTER TABLE SET LOCATION instead." + ], + "sqlState" : "42601" + }, + "DELTA_CANNOT_CHANGE_PROVIDER" : { + "message" : [ + "'provider' is a reserved table property, and cannot be altered." + ], + "sqlState" : "42939" + }, + "DELTA_CANNOT_CONVERT_TO_FILEFORMAT" : { + "message" : [ + "Can not convert to FileFormat." + ], + "sqlState" : "XXKDS" + }, + "DELTA_CANNOT_CREATE_BLOOM_FILTER_NON_EXISTING_COL" : { + "message" : [ + "Cannot create bloom filter indices for the following non-existent column(s): " + ], + "sqlState" : "42703" + }, + "DELTA_CANNOT_CREATE_LOG_PATH" : { + "message" : [ + "Cannot create " + ], + "sqlState" : "42KD5" + }, + "DELTA_CANNOT_DESCRIBE_VIEW_HISTORY" : { + "message" : [ + "Cannot describe the history of a view." + ], + "sqlState" : "42809" + }, + "DELTA_CANNOT_DROP_BLOOM_FILTER_ON_NON_INDEXED_COLUMN" : { + "message" : [ + "Cannot drop bloom filter index on a non indexed column: " + ], + "sqlState" : "42703" + }, + "DELTA_CANNOT_EVALUATE_EXPRESSION" : { + "message" : [ + "Cannot evaluate expression: " + ], + "sqlState" : "0AKDC" + }, + "DELTA_CANNOT_FIND_VERSION" : { + "message" : [ + "Cannot find 'sourceVersion' in " + ], + "sqlState" : "XXKDS" + }, + "DELTA_CANNOT_GENERATE_CODE_FOR_EXPRESSION" : { + "message" : [ + "Cannot generate code for expression: " + ], + "sqlState" : "0AKDC" + }, + "DELTA_CANNOT_GENERATE_UPDATE_EXPRESSIONS" : { + "message" : [ + "Calling without generated columns should always return a update expression for each column" + ], + "sqlState" : "XXKDS" + }, + "DELTA_CANNOT_MODIFY_APPEND_ONLY" : { + "message" : [ + "This table is configured to only allow appends. If you would like to permit updates or deletes, use 'ALTER TABLE SET TBLPROPERTIES (=false)'." + ], + "sqlState" : "42809" + }, + "DELTA_CANNOT_MODIFY_TABLE_PROPERTY" : { + "message" : [ + "The Delta table configuration cannot be specified by the user" + ], + "sqlState" : "42939" + }, + "DELTA_CANNOT_RECONSTRUCT_PATH_FROM_URI" : { + "message" : [ + "A uri () which cannot be turned into a relative path was found in the transaction log." + ], + "sqlState" : "22KD1" + }, + "DELTA_CANNOT_RENAME_PATH" : { + "message" : [ + "Cannot rename to " + ], + "sqlState" : "22KD1" + }, + "DELTA_CANNOT_REPLACE_MISSING_TABLE" : { + "message" : [ + "Table cannot be replaced as it does not exist. Use CREATE OR REPLACE TABLE to create the table." + ], + "sqlState" : "42P01" + }, + "DELTA_CANNOT_RESOLVE_COLUMN" : { + "message" : [ + "Can't resolve column in " + ], + "sqlState" : "42703" + }, + "DELTA_CANNOT_RESOLVE_SOURCE_COLUMN" : { + "message" : [ + "Couldn't resolve qualified source column within the source query." + ], + "sqlState" : "XXKDS" + }, + "DELTA_CANNOT_RESTORE_TABLE_VERSION" : { + "message" : [ + "Cannot restore table to version . Available versions: [, ]." + ], + "sqlState" : "22003" + }, + "DELTA_CANNOT_RESTORE_TIMESTAMP_GREATER" : { + "message" : [ + "Cannot restore table to timestamp () as it is after the latest version available. Please use a timestamp before ()" + ], + "sqlState" : "22003" + }, + "DELTA_CANNOT_SET_LOCATION_MULTIPLE_TIMES" : { + "message" : [ + "Can't set location multiple times. Found " + ], + "sqlState" : "XXKDS" + }, + "DELTA_CANNOT_SET_LOCATION_ON_PATH_IDENTIFIER" : { + "message" : [ + "Cannot change the location of a path based table." + ], + "sqlState" : "42613" + }, + "DELTA_CANNOT_UPDATE_ARRAY_FIELD" : { + "message" : [ + "Cannot update %1$s field %2$s type: update the element by updating %2$s.element" + ], + "sqlState" : "429BQ" + }, + "DELTA_CANNOT_UPDATE_MAP_FIELD" : { + "message" : [ + "Cannot update %1$s field %2$s type: update a map by updating %2$s.key or %2$s.value" + ], + "sqlState" : "429BQ" + }, + "DELTA_CANNOT_UPDATE_OTHER_FIELD" : { + "message" : [ + "Cannot update field of type " + ], + "sqlState" : "429BQ" + }, + "DELTA_CANNOT_UPDATE_STRUCT_FIELD" : { + "message" : [ + "Cannot update field type: update struct by adding, deleting, or updating its fields" + ], + "sqlState" : "429BQ" + }, + "DELTA_CANNOT_USE_ALL_COLUMNS_FOR_PARTITION" : { + "message" : [ + "Cannot use all columns for partition columns" + ], + "sqlState" : "428FT" + }, + "DELTA_CANNOT_WRITE_INTO_VIEW" : { + "message" : [ + " is a view. Writes to a view are not supported." + ], + "sqlState" : "0A000" + }, + "DELTA_CAST_OVERFLOW_IN_TABLE_WRITE" : { + "message" : [ + "Failed to write a value of type into the type column due to an overflow.", + "Use `try_cast` on the input value to tolerate overflow and return NULL instead.", + "If necessary, set to \"LEGACY\" to bypass this error or set to true to revert to the old behaviour and follow in UPDATE and MERGE." + ], + "sqlState" : "22003" + }, + "DELTA_CDC_NOT_ALLOWED_IN_THIS_VERSION" : { + "message" : [ + "Configuration delta.enableChangeDataFeed cannot be set. Change data feed from Delta is not yet available." + ], + "sqlState" : "0AKDC" + }, + "DELTA_CHANGE_DATA_FEED_INCOMPATIBLE_DATA_SCHEMA" : { + "message" : [ + "Retrieving table changes between version and failed because of an incompatible data schema.", + "Your read schema is at version , but we found an incompatible data schema at version .", + "If possible, please retrieve the table changes using the end version's schema by setting to `endVersion`, or contact support." + ], + "sqlState" : "0AKDC" + }, + "DELTA_CHANGE_DATA_FEED_INCOMPATIBLE_SCHEMA_CHANGE" : { + "message" : [ + "Retrieving table changes between version and failed because of an incompatible schema change.", + "Your read schema is at version , but we found an incompatible schema change at version .", + "If possible, please query table changes separately from version to - 1, and from version to ." + ], + "sqlState" : "0AKDC" + }, + "DELTA_CHANGE_TABLE_FEED_DISABLED" : { + "message" : [ + "Cannot write to table with delta.enableChangeDataFeed set. Change data feed from Delta is not available." + ], + "sqlState" : "42807" + }, + "DELTA_CHECKPOINT_NON_EXIST_TABLE" : { + "message" : [ + "Cannot checkpoint a non-existing table . Did you manually delete files in the _delta_log directory?" + ], + "sqlState" : "42K03" + }, + "DELTA_CHECKPOINT_SNAPSHOT_MISMATCH" : { + "message" : [ + "State of the checkpoint doesn't match that of the snapshot." + ], + "sqlState" : "XXKDS" + }, + "DELTA_CLONE_AMBIGUOUS_TARGET" : { + "message" : [ + "", + "Two paths were provided as the CLONE target so it is ambiguous which to use. An external", + "location for CLONE was provided at at the same time as the path", + "." + ], + "sqlState" : "42613" + }, + "DELTA_CLONE_UNSUPPORTED_SOURCE" : { + "message" : [ + "Unsupported clone source '', whose format is .", + "The supported formats are 'delta', 'iceberg' and 'parquet'." + ], + "sqlState" : "0AKDC" + }, + "DELTA_CLUSTERING_COLUMNS_MISMATCH" : { + "message" : [ + "The provided clustering columns do not match the existing table's.", + "- provided: ", + "- existing: " + ], + "sqlState" : "42P10" + }, + "DELTA_CLUSTERING_COLUMN_MISSING_STATS" : { + "message" : [ + "Clustering requires clustering columns to have stats. Couldn't find clustering column(s) '' in stats schema:\n" + ], + "sqlState" : "22000" + }, + "DELTA_CLUSTERING_REPLACE_TABLE_WITH_PARTITIONED_TABLE" : { + "message" : [ + "Replacing a clustered Delta table with a partitioned table is not allowed." + ], + "sqlState" : "42000" + }, + "DELTA_CLUSTERING_WITH_PARTITION_PREDICATE" : { + "message" : [ + "OPTIMIZE command for Delta table with clustering doesn't support partition predicates. Please remove the predicates: ." + ], + "sqlState" : "0A000" + }, + "DELTA_CLUSTERING_WITH_ZORDER_BY" : { + "message" : [ + "OPTIMIZE command for Delta table with clustering cannot specify ZORDER BY. Please remove ZORDER BY ()." + ], + "sqlState" : "42613" + }, + "DELTA_CLUSTER_BY_INVALID_NUM_COLUMNS" : { + "message" : [ + "CLUSTER BY supports up to clustering columns, but the table has clustering columns. Please remove the extra clustering columns." + ], + "sqlState" : "54000" + }, + "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_PARTITIONED_COLUMN" : { + "message" : [ + "Data skipping is not supported for partition column ''." + ], + "sqlState" : "0AKDC" + }, + "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_TYPE" : { + "message" : [ + "Data skipping is not supported for column '' of type ." + ], + "sqlState" : "0AKDC" + }, + "DELTA_COLUMN_MAPPING_MAX_COLUMN_ID_NOT_SET" : { + "message" : [ + "The max column id property () is not set on a column mapping enabled table." + ], + "sqlState" : "42703" + }, + "DELTA_COLUMN_MAPPING_MAX_COLUMN_ID_NOT_SET_CORRECTLY" : { + "message" : [ + "The max column id property () on a column mapping enabled table is , which cannot be smaller than the max column id for all fields ()." + ], + "sqlState" : "42703" + }, + "DELTA_COLUMN_NOT_FOUND" : { + "message" : [ + "Unable to find the column `` given []" + ], + "sqlState" : "42703" + }, + "DELTA_COLUMN_NOT_FOUND_IN_MERGE" : { + "message" : [ + "Unable to find the column '' of the target table from the INSERT columns: . INSERT clause must specify value for all the columns of the target table." + ], + "sqlState" : "42703" + }, + "DELTA_COLUMN_NOT_FOUND_IN_SCHEMA" : { + "message" : [ + "Couldn't find column in:\n" + ], + "sqlState" : "42703" + }, + "DELTA_COLUMN_PATH_NOT_NESTED" : { + "message" : [ + "Expected to be a nested data type, but found . Was looking for the", + "index of in a nested field", + "" + ], + "sqlState" : "42704" + }, + "DELTA_COLUMN_STRUCT_TYPE_MISMATCH" : { + "message" : [ + "Struct column cannot be inserted into a field in ." + ], + "sqlState" : "2200G" + }, + "DELTA_COMPLEX_TYPE_COLUMN_CONTAINS_NULL_TYPE" : { + "message" : [ + " Found nested NullType in column which is of . Delta doesn't support writing NullType in complex types." + ], + "sqlState" : "22005" + }, + "DELTA_CONFIGURE_SPARK_SESSION_WITH_EXTENSION_AND_CATALOG" : { + "message" : [ + "This Delta operation requires the SparkSession to be configured with the", + "DeltaSparkSessionExtension and the DeltaCatalog. Please set the necessary", + "configurations when creating the SparkSession as shown below.", + "", + " SparkSession.builder()", + " .config(\"spark.sql.extensions\", \"\")", + " .config(\"\", \"\")", + " ...", + " .getOrCreate()", + "", + "If you are using spark-shell/pyspark/spark-submit, you can add the required configurations to the command as show below:", + "--conf spark.sql.extensions= --conf =", + "" + ], + "sqlState" : "56038" + }, + "DELTA_CONFLICT_SET_COLUMN" : { + "message" : [ + "There is a conflict from these SET columns: ." + ], + "sqlState" : "42701" + }, + "DELTA_CONSTRAINT_ALREADY_EXISTS" : { + "message" : [ + "Constraint '' already exists. Please delete the old constraint first.", + "Old constraint:", + "" + ], + "sqlState" : "42710" + }, + "DELTA_CONSTRAINT_DOES_NOT_EXIST" : { + "message" : [ + "Cannot drop nonexistent constraint from table . To avoid throwing an error, provide the parameter IF EXISTS or set the SQL session configuration to ." + ], + "sqlState" : "42704" + }, + "DELTA_CONVERSION_NO_PARTITION_FOUND" : { + "message" : [ + "Found no partition information in the catalog for table . Have you run \"MSCK REPAIR TABLE\" on your table to discover partitions?" + ], + "sqlState" : "42KD6" + }, + "DELTA_CONVERSION_UNSUPPORTED_COLUMN_MAPPING" : { + "message" : [ + "The configuration '' cannot be set to `` when using CONVERT TO DELTA." + ], + "sqlState" : "0AKDC" + }, + "DELTA_CONVERT_NON_PARQUET_TABLE" : { + "message" : [ + "CONVERT TO DELTA only supports parquet tables, but you are trying to convert a source: " + ], + "sqlState" : "0AKDC" + }, + "DELTA_CONVERT_TO_DELTA_ROW_TRACKING_WITHOUT_STATS" : { + "message" : [ + "Cannot enable row tracking without collecting statistics.", + "If you want to enable row tracking, do the following:", + " 1. Enable statistics collection by running the command", + " SET = true", + " 2. Run CONVERT TO DELTA without the NO STATISTICS option.", + "", + "If you do not want to collect statistics, disable row tracking:", + " 1. Deactivate enabling the table feature by default by running the command:", + " RESET ", + " 2. Deactivate the table property by default by running:", + " SET = false" + ], + "sqlState" : "22000" + }, + "DELTA_CREATE_EXTERNAL_TABLE_WITHOUT_SCHEMA" : { + "message" : [ + "", + "You are trying to create an external table ", + "from `` using Delta, but the schema is not specified when the", + "input path is empty.", + "", + "To learn more about Delta, see " + ], + "sqlState" : "42601" + }, + "DELTA_CREATE_EXTERNAL_TABLE_WITHOUT_TXN_LOG" : { + "message" : [ + "", + "You are trying to create an external table ", + "from `%2$s` using Delta, but there is no transaction log present at", + "`%2$s/_delta_log`. Check the upstream job to make sure that it is writing using", + "format(\"delta\") and that the path is the root of the table.", + "", + "To learn more about Delta, see " + ], + "sqlState" : "42K03" + }, + "DELTA_CREATE_TABLE_SCHEME_MISMATCH" : { + "message" : [ + "The specified schema does not match the existing schema at .", + "", + "== Specified ==", + "", + "", + "== Existing ==", + "", + "", + "== Differences ==", + "", + "", + "If your intention is to keep the existing schema, you can omit the", + "schema from the create table command. Otherwise please ensure that", + "the schema matches." + ], + "sqlState" : "42KD7" + }, + "DELTA_CREATE_TABLE_SET_CLUSTERING_TABLE_FEATURE_NOT_ALLOWED" : { + "message" : [ + "Cannot enable table feature using TBLPROPERTIES. Please use CREATE OR REPLACE TABLE CLUSTER BY to create a Delta table with clustering." + ], + "sqlState" : "42000" + }, + "DELTA_CREATE_TABLE_WITH_DIFFERENT_PARTITIONING" : { + "message" : [ + "The specified partitioning does not match the existing partitioning at .", + "", + "== Specified ==", + "", + "", + "== Existing ==", + "", + "" + ], + "sqlState" : "42KD7" + }, + "DELTA_CREATE_TABLE_WITH_DIFFERENT_PROPERTY" : { + "message" : [ + "The specified properties do not match the existing properties at .", + "", + "== Specified ==", + "", + "", + "== Existing ==", + "", + "" + ], + "sqlState" : "42KD7" + }, + "DELTA_CREATE_TABLE_WITH_NON_EMPTY_LOCATION" : { + "message" : [ + "Cannot create table (''). The associated location ('') is not empty and also not a Delta table." + ], + "sqlState" : "42601" + }, + "DELTA_DATA_CHANGE_FALSE" : { + "message" : [ + "Cannot change table metadata because the 'dataChange' option is set to false. Attempted operation: ''." + ], + "sqlState" : "0AKDE" + }, + "DELTA_DELETION_VECTOR_CARDINALITY_MISMATCH" : { + "message" : [ + "Deletion vector integrity check failed. Encountered a cardinality mismatch." + ], + "sqlState" : "XXKDS" + }, + "DELTA_DELETION_VECTOR_CHECKSUM_MISMATCH" : { + "message" : [ + "Could not verify deletion vector integrity, CRC checksum verification failed." + ], + "sqlState" : "XXKDS" + }, + "DELTA_DELETION_VECTOR_INVALID_ROW_INDEX" : { + "message" : [ + "Deletion vector integrity check failed. Encountered an invalid row index." + ], + "sqlState" : "XXKDS" + }, + "DELTA_DELETION_VECTOR_MISSING_NUM_RECORDS" : { + "message" : [ + "It is invalid to commit files with deletion vectors that are missing the numRecords statistic." + ], + "sqlState" : "2D521" + }, + "DELTA_DELETION_VECTOR_SIZE_MISMATCH" : { + "message" : [ + "Deletion vector integrity check failed. Encountered a size mismatch." + ], + "sqlState" : "XXKDS" + }, + "DELTA_DOMAIN_METADATA_NOT_SUPPORTED" : { + "message" : [ + "Detected DomainMetadata action(s) for domains , but DomainMetadataTableFeature is not enabled." + ], + "sqlState" : "0A000" + }, + "DELTA_DROP_COLUMN_AT_INDEX_LESS_THAN_ZERO" : { + "message" : [ + "Index to drop column is lower than 0" + ], + "sqlState" : "42KD8" + }, + "DELTA_DUPLICATE_COLUMNS_FOUND" : { + "message" : [ + "Found duplicate column(s) : " + ], + "sqlState" : "42711" + }, + "DELTA_DUPLICATE_COLUMNS_ON_INSERT" : { + "message" : [ + "Duplicate column names in INSERT clause" + ], + "sqlState" : "42701" + }, + "DELTA_DUPLICATE_COLUMNS_ON_UPDATE_TABLE" : { + "message" : [ + "", + "Please remove duplicate columns before you update your table." + ], + "sqlState" : "42701" + }, + "DELTA_DUPLICATE_DATA_SKIPPING_COLUMNS" : { + "message" : [ + "Duplicated data skipping columns found: ." + ], + "sqlState" : "42701" + }, + "DELTA_DUPLICATE_DOMAIN_METADATA_INTERNAL_ERROR" : { + "message" : [ + "Internal error: two DomainMetadata actions within the same transaction have the same domain " + ], + "sqlState" : "42601" + }, + "DELTA_DYNAMIC_PARTITION_OVERWRITE_DISABLED" : { + "message" : [ + "Dynamic partition overwrite mode is specified by session config or write options, but it is disabled by `delta.dynamicPartitionOverwrite.enabled=false`." + ], + "sqlState" : "0A000" + }, + "DELTA_EMPTY_DATA" : { + "message" : [ + "Data used in creating the Delta table doesn't have any columns." + ], + "sqlState" : "428GU" + }, + "DELTA_EMPTY_DIRECTORY" : { + "message" : [ + "No file found in the directory: ." + ], + "sqlState" : "42K03" + }, + "DELTA_EXCEED_CHAR_VARCHAR_LIMIT" : { + "message" : [ + "Exceeds char/varchar type length limitation. Failed check: ." + ], + "sqlState" : "22001" + }, + "DELTA_EXPRESSIONS_NOT_FOUND_IN_GENERATED_COLUMN" : { + "message" : [ + "Cannot find the expressions in the generated column " + ], + "sqlState" : "XXKDS" + }, + "DELTA_EXTRACT_REFERENCES_FIELD_NOT_FOUND" : { + "message" : [ + "Field could not be found when extracting references." + ], + "sqlState" : "XXKDS" + }, + "DELTA_FAILED_CAST_PARTITION_VALUE" : { + "message" : [ + "Failed to cast partition value `` to " + ], + "sqlState" : "22018" + }, + "DELTA_FAILED_FIND_ATTRIBUTE_IN_OUTPUT_COLUMNS" : { + "message" : [ + "Could not find among the existing target output " + ], + "sqlState" : "42703" + }, + "DELTA_FAILED_FIND_PARTITION_COLUMN_IN_OUTPUT_PLAN" : { + "message" : [ + "Could not find in output plan." + ], + "sqlState" : "XXKDS" + }, + "DELTA_FAILED_INFER_SCHEMA" : { + "message" : [ + "Failed to infer schema from the given list of files." + ], + "sqlState" : "42KD9" + }, + "DELTA_FAILED_MERGE_SCHEMA_FILE" : { + "message" : [ + "Failed to merge schema of file :", + "" + ], + "sqlState" : "42KDA" + }, + "DELTA_FAILED_READ_FILE_FOOTER" : { + "message" : [ + "Could not read footer for file: " + ], + "sqlState" : "KD001" + }, + "DELTA_FAILED_RECOGNIZE_PREDICATE" : { + "message" : [ + "Cannot recognize the predicate ''" + ], + "sqlState" : "42601" + }, + "DELTA_FAILED_SCAN_WITH_HISTORICAL_VERSION" : { + "message" : [ + "Expect a full scan of the latest version of the Delta source, but found a historical scan of version " + ], + "sqlState" : "KD002" + }, + "DELTA_FAILED_TO_MERGE_FIELDS" : { + "message" : [ + "Failed to merge fields '' and ''. " + ], + "sqlState" : "22005" + }, + "DELTA_FAIL_RELATIVIZE_PATH" : { + "message" : [ + "Failed to relativize the path (). This can happen when absolute paths make", + "it into the transaction log, which start with the scheme", + "s3://, wasbs:// or adls://.", + "", + "If this table is NOT USED IN PRODUCTION, you can set the SQL configuration", + " to true.", + "Using this SQL configuration could lead to accidental data loss, therefore we do", + "not recommend the use of this flag unless this is for testing purposes." + ], + "sqlState" : "XXKDS" + }, + "DELTA_FEATURES_PROTOCOL_METADATA_MISMATCH" : { + "message" : [ + "Unable to operate on this table because the following table features are enabled in metadata but not listed in protocol: ." + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURES_REQUIRE_MANUAL_ENABLEMENT" : { + "message" : [ + "Your table schema requires manually enablement of the following table feature(s): .", + "", + "To do this, run the following command for each of features listed above:", + " ALTER TABLE table_name SET TBLPROPERTIES ('delta.feature.feature_name' = 'supported')", + "Replace \"table_name\" and \"feature_name\" with real values.", + "", + "Current supported feature(s): ." + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_DROP_CONFLICT_REVALIDATION_FAIL" : { + "message" : [ + "Cannot drop feature because a concurrent transaction modified the table.", + "Please try the operation again.", + "" + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_DROP_FEATURE_NOT_PRESENT" : { + "message" : [ + "Cannot drop from this table because it is not currently present in the table's protocol." + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_DROP_HISTORICAL_VERSIONS_EXIST" : { + "message" : [ + "Cannot drop because the Delta log contains historical versions that use the feature.", + "Please wait until the history retention period (=) ", + "has passed since the feature was last active.", + "", + "Alternatively, please wait for the TRUNCATE HISTORY retention period to expire ()", + "and then run:", + " ALTER TABLE table_name DROP FEATURE feature_name TRUNCATE HISTORY" + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_DROP_HISTORY_TRUNCATION_NOT_ALLOWED" : { + "message" : [ + "History truncation is only relevant for reader features." + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_DROP_NONREMOVABLE_FEATURE" : { + "message" : [ + "Cannot drop because dropping this feature is not supported." + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_DROP_UNSUPPORTED_CLIENT_FEATURE" : { + "message" : [ + "Cannot drop because it is not supported by this Delta version.", + "Consider using Delta with a higher version." + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD" : { + "message" : [ + "Dropping was partially successful.", + "", + "The feature is now no longer used in the current version of the table. However, the feature", + "is still present in historical versions of the table. The table feature cannot be dropped", + "from the table protocol until these historical versions have expired.", + "", + "To drop the table feature from the protocol, please wait for the historical versions to", + "expire, and then repeat this command. The retention period for historical versions is", + "currently configured as =.", + "", + "Alternatively, please wait for the TRUNCATE HISTORY retention period to expire ()", + "and then run:", + " ALTER TABLE table_name DROP FEATURE feature_name TRUNCATE HISTORY" + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_REQUIRES_HIGHER_READER_VERSION" : { + "message" : [ + "Unable to enable table feature because it requires a higher reader protocol version (current ). Consider upgrading the table's reader protocol version to , or to a version which supports reader table features. Refer to for more information on table protocol versions." + ], + "sqlState" : "0AKDE" + }, + "DELTA_FEATURE_REQUIRES_HIGHER_WRITER_VERSION" : { + "message" : [ + "Unable to enable table feature because it requires a higher writer protocol version (current ). Consider upgrading the table's writer protocol version to , or to a version which supports writer table features. Refer to for more information on table protocol versions." + ], + "sqlState" : "0AKDE" + }, + "DELTA_FILE_ALREADY_EXISTS" : { + "message" : [ + "Existing file path " + ], + "sqlState" : "42K04" + }, + "DELTA_FILE_LIST_AND_PATTERN_STRING_CONFLICT" : { + "message" : [ + "Cannot specify both file list and pattern string." + ], + "sqlState" : "42613" + }, + "DELTA_FILE_NOT_FOUND" : { + "message" : [ + "File path " + ], + "sqlState" : "42K03" + }, + "DELTA_FILE_OR_DIR_NOT_FOUND" : { + "message" : [ + "No such file or directory: " + ], + "sqlState" : "42K03" + }, + "DELTA_FILE_TO_OVERWRITE_NOT_FOUND" : { + "message" : [ + "File () to be rewritten not found among candidate files:\n" + ], + "sqlState" : "42K03" + }, + "DELTA_FOUND_MAP_TYPE_COLUMN" : { + "message" : [ + "A MapType was found. In order to access the key or value of a MapType, specify one", + "of:", + " or", + "", + "followed by the name of the column (only if that column is a struct type).", + "e.g. mymap.key.mykey", + "If the column is a basic type, mymap.key or mymap.value is sufficient." + ], + "sqlState" : "KD003" + }, + "DELTA_GENERATED_COLUMNS_DATA_TYPE_MISMATCH" : { + "message" : [ + "Column is a generated column or a column used by a generated column. The data type is . It doesn't accept data type " + ], + "sqlState" : "42K09" + }, + "DELTA_GENERATED_COLUMNS_EXPR_TYPE_MISMATCH" : { + "message" : [ + "The expression type of the generated column is , but the column type is " + ], + "sqlState" : "42K09" + }, + "DELTA_GENERATED_COLUMN_UPDATE_TYPE_MISMATCH" : { + "message" : [ + "Column is a generated column or a column used by a generated column. The data type is and cannot be converted to data type " + ], + "sqlState" : "42K09" + }, + "DELTA_ICEBERG_COMPAT_VIOLATION" : { + "message" : [ + "The validation of IcebergCompatV has failed." + ], + "subClass" : { + "CHANGE_VERSION_NEED_REWRITE" : { + "message" : [ + "Changing to IcebergCompatV requires rewriting the table. Please run REORG TABLE APPLY (UPGRADE UNIFORM ('ICEBERG_COMPAT_VERSION = '));", + "Note that REORG enables table feature IcebergCompatV and other Delta lake clients without that table feature support may not be able to write to the table." + ] + }, + "COMPAT_VERSION_NOT_SUPPORTED" : { + "message" : [ + "IcebergCompatVersion = is not supported. Supported versions are between 1 and " + ] + }, + "DELETION_VECTORS_NOT_PURGED" : { + "message" : [ + "IcebergCompatV requires Deletion Vectors to be completely purged from the table. Please run the REORG TABLE APPLY (PURGE) command." + ] + }, + "DELETION_VECTORS_SHOULD_BE_DISABLED" : { + "message" : [ + "IcebergCompatV requires Deletion Vectors to be disabled on the table. Please use the ALTER TABLE DROP FEATURE command to disable Deletion Vectors and to remove the existing Deletion Vectors from the table." + ] + }, + "DISABLING_REQUIRED_TABLE_FEATURE" : { + "message" : [ + "IcebergCompatV requires feature to be supported and enabled. You cannot drop it from the table. Instead, please disable IcebergCompatV first." + ] + }, + "FILES_NOT_ICEBERG_COMPAT" : { + "message" : [ + "Enabling Uniform Iceberg with IcebergCompatV requires all files to be iceberg compatible.", + "There are files in table version and files are not iceberg compatible, which is usually a result of concurrent write.", + "Please run the REORG TABLE table APPLY (UPGRADE UNIFORM (ICEBERG_COMPAT_VERSION=) command again." + ] + }, + "INCOMPATIBLE_TABLE_FEATURE" : { + "message" : [ + "IcebergCompatV is incompatible with feature ." + ] + }, + "MISSING_REQUIRED_TABLE_FEATURE" : { + "message" : [ + "IcebergCompatV requires feature to be supported and enabled." + ] + }, + "REPLACE_TABLE_CHANGE_PARTITION_NAMES" : { + "message" : [ + "IcebergCompatV doesn't support replacing partitioned tables with a differently-named partition spec, because Iceberg-Spark 1.1.0 doesn't.", + "Prev Partition Spec: ", + "New Partition Spec: " + ] + }, + "REWRITE_DATA_FAILED" : { + "message" : [ + "Rewriting data to IcebergCompatV failed.", + "Please run the REORG TABLE table APPLY (UPGRADE UNIFORM (ICEBERG_COMPAT_VERSION=) command again." + ] + }, + "UNSUPPORTED_DATA_TYPE" : { + "message" : [ + "IcebergCompatV does not support the data type in your schema. Your schema:", + "" + ] + }, + "VERSION_MUTUAL_EXCLUSIVE" : { + "message" : [ + "Only one IcebergCompat version can be enabled." + ] + }, + "WRONG_REQUIRED_TABLE_PROPERTY" : { + "message" : [ + "IcebergCompatV requires table property '' to be set to ''. Current value: ''." + ] + } + }, + "sqlState" : "KD00E" + }, + "DELTA_ILLEGAL_FILE_FOUND" : { + "message" : [ + "Illegal files found in a dataChange = false transaction. Files: " + ], + "sqlState" : "XXKDS" + }, + "DELTA_ILLEGAL_OPTION" : { + "message" : [ + "Invalid value '' for option '', " + ], + "sqlState" : "42616" + }, + "DELTA_ILLEGAL_USAGE" : { + "message" : [ + "The usage of
'.", + "Another stream may be reusing the same schema location, which is not allowed.", + "Please provide a new unique `schemaTrackingLocation` path or `streamingSourceTrackingId` as a reader option for one of the streams from this table." + ], + "sqlState" : "22000" + }, + "DELTA_STREAMING_SCHEMA_LOCATION_NOT_UNDER_CHECKPOINT" : { + "message" : [ + "Schema location '' must be placed under checkpoint location ''." + ], + "sqlState" : "22000" + }, + "DELTA_STREAMING_SCHEMA_LOG_DESERIALIZE_FAILED" : { + "message" : [ + "Incomplete log file in the Delta streaming source schema log at ''.", + "The schema log may have been corrupted. Please pick a new schema location." + ], + "sqlState" : "22000" + }, + "DELTA_STREAMING_SCHEMA_LOG_INCOMPATIBLE_DELTA_TABLE_ID" : { + "message" : [ + "Detected incompatible Delta table id when trying to read Delta stream.", + "Persisted table id: , Table id: ", + "The schema log might have been reused. Please pick a new schema location." + ], + "sqlState" : "22000" + }, + "DELTA_STREAMING_SCHEMA_LOG_INCOMPATIBLE_PARTITION_SCHEMA" : { + "message" : [ + "Detected incompatible partition schema when trying to read Delta stream.", + "Persisted schema: , Delta partition schema: ", + "Please pick a new schema location to reinitialize the schema log if you have manually changed the table's partition schema recently." + ], + "sqlState" : "22000" + }, + "DELTA_STREAMING_SCHEMA_LOG_INIT_FAILED_INCOMPATIBLE_METADATA" : { + "message" : [ + "We could not initialize the Delta streaming source schema log because", + "we detected an incompatible schema or protocol change while serving a streaming batch from table version to ." + ], + "sqlState" : "22000" + }, + "DELTA_STREAMING_SCHEMA_LOG_PARSE_SCHEMA_FAILED" : { + "message" : [ + "Failed to parse the schema from the Delta streaming source schema log.", + "The schema log may have been corrupted. Please pick a new schema location." + ], + "sqlState" : "22000" + }, + "DELTA_TABLE_ALREADY_CONTAINS_CDC_COLUMNS" : { + "message" : [ + "Unable to enable Change Data Capture on the table. The table already contains", + "reserved columns that will", + "be used internally as metadata for the table's Change Data Feed. To enable", + "Change Data Feed on the table rename/drop these columns.", + "" + ], + "sqlState" : "42711" + }, + "DELTA_TABLE_ALREADY_EXISTS" : { + "message" : [ + "Table already exists." + ], + "sqlState" : "42P07" + }, + "DELTA_TABLE_FOR_PATH_UNSUPPORTED_HADOOP_CONF" : { + "message" : [ + "Currently DeltaTable.forPath only supports hadoop configuration keys starting with but got " + ], + "sqlState" : "0AKDC" + }, + "DELTA_TABLE_FOUND_IN_EXECUTOR" : { + "message" : [ + "DeltaTable cannot be used in executors" + ], + "sqlState" : "XXKDS" + }, + "DELTA_TABLE_LOCATION_MISMATCH" : { + "message" : [ + "The location of the existing table is . It doesn't match the specified location ." + ], + "sqlState" : "42613" + }, + "DELTA_TABLE_NOT_FOUND" : { + "message" : [ + "Delta table doesn't exist." + ], + "sqlState" : "42P01" + }, + "DELTA_TABLE_NOT_SUPPORTED_IN_OP" : { + "message" : [ + "Table is not supported in . Please use a path instead." + ], + "sqlState" : "42809" + }, + "DELTA_TABLE_ONLY_OPERATION" : { + "message" : [ + " is not a Delta table. is only supported for Delta tables." + ], + "sqlState" : "0AKDD" + }, + "DELTA_TARGET_TABLE_FINAL_SCHEMA_EMPTY" : { + "message" : [ + "Target table final schema is empty." + ], + "sqlState" : "428GU" + }, + "DELTA_TIMESTAMP_GREATER_THAN_COMMIT" : { + "message" : [ + "The provided timestamp () is after the latest version available to this", + "table (). Please use a timestamp before or at ." + ], + "sqlState" : "42816" + }, + "DELTA_TIMESTAMP_INVALID" : { + "message" : [ + "The provided timestamp () cannot be converted to a valid timestamp." + ], + "sqlState" : "42816" + }, + "DELTA_TIME_TRAVEL_INVALID_BEGIN_VALUE" : { + "message" : [ + " needs to be a valid begin value." + ], + "sqlState" : "42604" + }, + "DELTA_TRUNCATED_TRANSACTION_LOG" : { + "message" : [ + ": Unable to reconstruct state at version as the transaction log has been truncated due to manual deletion or the log retention policy (=) and checkpoint retention policy (=)" + ], + "sqlState" : "42K03" + }, + "DELTA_TRUNCATE_TABLE_PARTITION_NOT_SUPPORTED" : { + "message" : [ + "Operation not allowed: TRUNCATE TABLE on Delta tables does not support partition predicates; use DELETE to delete specific partitions or rows." + ], + "sqlState" : "0AKDC" + }, + "DELTA_TXN_LOG_FAILED_INTEGRITY" : { + "message" : [ + "The transaction log has failed integrity checks. Failed verification at version of:", + "" + ], + "sqlState" : "XXKDS" + }, + "DELTA_UDF_IN_GENERATED_COLUMN" : { + "message" : [ + "Found . A generated column cannot use a user-defined function" + ], + "sqlState" : "42621" + }, + "DELTA_UNEXPECTED_ACTION_EXPRESSION" : { + "message" : [ + "Unexpected action expression ." + ], + "sqlState" : "42601" + }, + "DELTA_UNEXPECTED_ALIAS" : { + "message" : [ + "Expected Alias but got " + ], + "sqlState" : "XXKDS" + }, + "DELTA_UNEXPECTED_ATTRIBUTE_REFERENCE" : { + "message" : [ + "Expected AttributeReference but got " + ], + "sqlState" : "XXKDS" + }, + "DELTA_UNEXPECTED_CHANGE_FILES_FOUND" : { + "message" : [ + "Change files found in a dataChange = false transaction. Files:", + "" + ], + "sqlState" : "XXKDS" + }, + "DELTA_UNEXPECTED_NUM_PARTITION_COLUMNS_FROM_FILE_NAME" : { + "message" : [ + "Expecting partition column(s): , but found partition column(s): from parsing the file name: " + ], + "sqlState" : "KD009" + }, + "DELTA_UNEXPECTED_PARTIAL_SCAN" : { + "message" : [ + "Expect a full scan of Delta sources, but found a partial scan. path:" + ], + "sqlState" : "KD00A" + }, + "DELTA_UNEXPECTED_PARTITION_COLUMN_FROM_FILE_NAME" : { + "message" : [ + "Expecting partition column , but found partition column from parsing the file name: " + ], + "sqlState" : "KD009" + }, + "DELTA_UNEXPECTED_PARTITION_SCHEMA_FROM_USER" : { + "message" : [ + "CONVERT TO DELTA was called with a partition schema different from the partition schema inferred from the catalog, please avoid providing the schema so that the partition schema can be chosen from the catalog.", + "", + "catalog partition schema:", + "", + "provided partition schema:", + "" + ], + "sqlState" : "KD009" + }, + "DELTA_UNEXPECTED_PROJECT" : { + "message" : [ + "Expected Project but got " + ], + "sqlState" : "XXKDS" + }, + "DELTA_UNIVERSAL_FORMAT_VIOLATION" : { + "message" : [ + "The validation of Universal Format () has failed: " + ], + "sqlState" : "KD00E" + }, + "DELTA_UNKNOWN_CONFIGURATION" : { + "message" : [ + "Unknown configuration was specified: ", + "To disable this check, set =true in the Spark session configuration." + ], + "sqlState" : "F0000" + }, + "DELTA_UNKNOWN_PRIVILEGE" : { + "message" : [ + "Unknown privilege: " + ], + "sqlState" : "42601" + }, + "DELTA_UNKNOWN_READ_LIMIT" : { + "message" : [ + "Unknown ReadLimit: " + ], + "sqlState" : "42601" + }, + "DELTA_UNRECOGNIZED_COLUMN_CHANGE" : { + "message" : [ + "Unrecognized column change . You may be running an out-of-date Delta Lake version." + ], + "sqlState" : "42601" + }, + "DELTA_UNRECOGNIZED_FILE_ACTION" : { + "message" : [ + "Unrecognized file action with type ." + ], + "sqlState" : "XXKDS" + }, + "DELTA_UNRECOGNIZED_INVARIANT" : { + "message" : [ + "Unrecognized invariant. Please upgrade your Spark version." + ], + "sqlState" : "56038" + }, + "DELTA_UNRECOGNIZED_LOGFILE" : { + "message" : [ + "Unrecognized log file " + ], + "sqlState" : "KD00B" + }, + "DELTA_UNSET_NON_EXISTENT_PROPERTY" : { + "message" : [ + "Attempted to unset non-existent property '' in table " + ], + "sqlState" : "42616" + }, + "DELTA_UNSUPPORTED_ABS_PATH_ADD_FILE" : { + "message" : [ + " does not support adding files with an absolute path" + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_ALTER_TABLE_REPLACE_COL_OP" : { + "message" : [ + "Unsupported ALTER TABLE REPLACE COLUMNS operation. Reason:
", + "", + "Failed to change schema from:", + "", + "to:", + "" + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_CLONE_REPLACE_SAME_TABLE" : { + "message" : [ + "", + "You tried to REPLACE an existing table () with CLONE. This operation is", + "unsupported. Try a different target for CLONE or delete the table at the current target.", + "" + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_COLUMN_MAPPING_MODE_CHANGE" : { + "message" : [ + "Changing column mapping mode from '' to '' is not supported." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_COLUMN_MAPPING_PROTOCOL" : { + "message" : [ + "", + "Your current table protocol version does not support changing column mapping modes", + "using .", + "", + "Required Delta protocol version for column mapping:", + "", + "Your table's current Delta protocol version:", + "", + "" + ], + "sqlState" : "KD004" + }, + "DELTA_UNSUPPORTED_COLUMN_MAPPING_SCHEMA_CHANGE" : { + "message" : [ + "", + "Schema change is detected:", + "", + "old schema:", + "", + "", + "new schema:", + "", + "", + "Schema changes are not allowed during the change of column mapping mode.", + "", + "" + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_COLUMN_MAPPING_WRITE" : { + "message" : [ + "Writing data with column mapping mode is not supported." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_COLUMN_TYPE_IN_BLOOM_FILTER" : { + "message" : [ + "Creating a bloom filter index on a column with type is unsupported: " + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_DATA_TYPES" : { + "message" : [ + "Found columns using unsupported data types: . You can set '' to 'false' to disable the type check. Disabling this type check may allow users to create unsupported Delta tables and should only be used when trying to read/write legacy tables." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_DESCRIBE_DETAIL_VIEW" : { + "message" : [ + " is a view. DESCRIBE DETAIL is only supported for tables." + ], + "sqlState" : "42809" + }, + "DELTA_UNSUPPORTED_DROP_CLUSTERING_COLUMN" : { + "message" : [ + "Dropping clustering columns () is not allowed." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_DROP_COLUMN" : { + "message" : [ + "DROP COLUMN is not supported for your Delta table. " + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_DROP_NESTED_COLUMN_FROM_NON_STRUCT_TYPE" : { + "message" : [ + "Can only drop nested columns from StructType. Found " + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_DROP_PARTITION_COLUMN" : { + "message" : [ + "Dropping partition columns () is not allowed." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_EXPRESSION" : { + "message" : [ + "Unsupported expression type() for . The supported types are []." + ], + "sqlState" : "0A000" + }, + "DELTA_UNSUPPORTED_EXPRESSION_GENERATED_COLUMN" : { + "message" : [ + " cannot be used in a generated column" + ], + "sqlState" : "42621" + }, + "DELTA_UNSUPPORTED_FEATURES_FOR_READ" : { + "message" : [ + "Unsupported Delta read feature: table \"\" requires reader table feature(s) that are unsupported by Delta Lake \"\": ." + ], + "sqlState" : "56038" + }, + "DELTA_UNSUPPORTED_FEATURES_FOR_WRITE" : { + "message" : [ + "Unsupported Delta write feature: table \"\" requires writer table feature(s) that are unsupported by Delta Lake \"\": ." + ], + "sqlState" : "56038" + }, + "DELTA_UNSUPPORTED_FEATURES_IN_CONFIG" : { + "message" : [ + "Table features configured in the following Spark configs or Delta table properties are not recognized by this version of Delta Lake: ." + ], + "sqlState" : "56038" + }, + "DELTA_UNSUPPORTED_FEATURE_STATUS" : { + "message" : [ + "Expecting the status for table feature to be \"supported\", but got \"\"." + ], + "sqlState" : "0AKDE" + }, + "DELTA_UNSUPPORTED_FIELD_UPDATE_NON_STRUCT" : { + "message" : [ + "Updating nested fields is only supported for StructType, but you are trying to update a field of , which is of type: ." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_GENERATE_WITH_DELETION_VECTORS" : { + "message" : [ + "The 'GENERATE symlink_format_manifest' command is not supported on table versions with deletion vectors.", + "If you need to generate manifests, consider disabling deletion vectors on this table using 'ALTER TABLE table SET TBLPROPERTIES (delta.enableDeletionVectors = false)'." + ], + "sqlState" : "0A000" + }, + "DELTA_UNSUPPORTED_INVARIANT_NON_STRUCT" : { + "message" : [ + "Invariants on nested fields other than StructTypes are not supported." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_IN_SUBQUERY" : { + "message" : [ + "In subquery is not supported in the condition." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_MANIFEST_GENERATION_WITH_COLUMN_MAPPING" : { + "message" : [ + "Manifest generation is not supported for tables that leverage column mapping, as external readers cannot read these Delta tables. See Delta documentation for more details." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_MULTI_COL_IN_PREDICATE" : { + "message" : [ + "Multi-column In predicates are not supported in the condition." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_NESTED_COLUMN_IN_BLOOM_FILTER" : { + "message" : [ + "Creating a bloom filer index on a nested column is currently unsupported: " + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_NESTED_FIELD_IN_OPERATION" : { + "message" : [ + "Nested field is not supported in the (field = )." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_NON_EMPTY_CLONE" : { + "message" : [ + "The clone destination table is non-empty. Please TRUNCATE or DELETE FROM the table before running CLONE." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_OUTPUT_MODE" : { + "message" : [ + "Data source does not support output mode" + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_PARTITION_COLUMN_IN_BLOOM_FILTER" : { + "message" : [ + "Creating a bloom filter index on a partitioning column is unsupported: " + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_RENAME_COLUMN" : { + "message" : [ + "Column rename is not supported for your Delta table. " + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_SCHEMA_DURING_READ" : { + "message" : [ + "Delta does not support specifying the schema at read time." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_SOURCE" : { + "message" : [ + " destination only supports Delta sources.\n" + ], + "sqlState" : "0AKDD" + }, + "DELTA_UNSUPPORTED_STATIC_PARTITIONS" : { + "message" : [ + "Specifying static partitions in the partition spec is currently not supported during inserts" + ], + "sqlState" : "0AKDD" + }, + "DELTA_UNSUPPORTED_STATS_RECOMPUTE_WITH_DELETION_VECTORS" : { + "message" : [ + "Statistics re-computation on a Delta table with deletion vectors is not yet supported." + ], + "sqlState" : "0AKDD" + }, + "DELTA_UNSUPPORTED_SUBQUERY" : { + "message" : [ + "Subqueries are not supported in the (condition = )." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_SUBQUERY_IN_PARTITION_PREDICATES" : { + "message" : [ + "Subquery is not supported in partition predicates." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_TIME_TRAVEL_MULTIPLE_FORMATS" : { + "message" : [ + "Cannot specify time travel in multiple formats." + ], + "sqlState" : "42613" + }, + "DELTA_UNSUPPORTED_TIME_TRAVEL_VIEWS" : { + "message" : [ + "Cannot time travel views, subqueries, streams or change data feed queries." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_VACUUM_SPECIFIC_PARTITION" : { + "message" : [ + "Please provide the base path () when Vacuuming Delta tables. Vacuuming specific partitions is currently not supported." + ], + "sqlState" : "0AKDC" + }, + "DELTA_UNSUPPORTED_WRITES_STAGED_TABLE" : { + "message" : [ + "Table implementation does not support writes: " + ], + "sqlState" : "42807" + }, + "DELTA_UPDATE_SCHEMA_MISMATCH_EXPRESSION" : { + "message" : [ + "Cannot cast to . All nested columns must match." + ], + "sqlState" : "42846" + }, + "DELTA_VERSIONS_NOT_CONTIGUOUS" : { + "message" : [ + "Versions () are not contiguous." + ], + "sqlState" : "KD00C" + }, + "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES" : { + "message" : [ + "CHECK constraint violated by row with values:", + "" + ], + "sqlState" : "23001" + }, + "DELTA_VIOLATE_TABLE_PROPERTY_VALIDATION_FAILED" : { + "message" : [ + "The validation of the properties of table
has been violated:" + ], + "subClass" : { + "EXISTING_DELETION_VECTORS_WITH_INCREMENTAL_MANIFEST_GENERATION" : { + "message" : [ + "Symlink manifest generation is unsupported while deletion vectors are present in the table.", + "In order to produce a version of the table without deletion vectors, run 'REORG TABLE
APPLY (PURGE)'." + ] + }, + "PERSISTENT_DELETION_VECTORS_IN_NON_PARQUET_TABLE" : { + "message" : [ + "Persistent deletion vectors are only supported on Parquet-based Delta tables." + ] + }, + "PERSISTENT_DELETION_VECTORS_WITH_INCREMENTAL_MANIFEST_GENERATION" : { + "message" : [ + "Persistent deletion vectors and incremental symlink manifest generation are mutually exclusive." + ] + } + }, + "sqlState" : "0A000" + }, + "DELTA_ZORDERING_COLUMN_DOES_NOT_EXIST" : { + "message" : [ + "Z-Ordering column does not exist in data schema." + ], + "sqlState" : "42703" + }, + "DELTA_ZORDERING_ON_COLUMN_WITHOUT_STATS" : { + "message" : [ + "Z-Ordering on will be", + " ineffective, because we currently do not collect stats for these columns.", + " You can disable", + " this check by setting", + " '%%sql set = false'" + ], + "sqlState" : "KD00D" + }, + "DELTA_ZORDERING_ON_PARTITION_COLUMN" : { + "message" : [ + " is a partition column. Z-Ordering can only be performed on data columns" + ], + "sqlState" : "42P10" + }, + "DIFFERENT_DELTA_TABLE_READ_BY_STREAMING_SOURCE" : { + "message" : [ + "The streaming query was reading from an unexpected Delta table (id = ''). ", + "It used to read from another Delta table (id = '') according to checkpoint. ", + "This may happen when you changed the code to read from a new table or you deleted and ", + "re-created a table. Please revert your change or delete your streaming query checkpoint ", + "to restart from scratch." + ], + "sqlState" : "55019" + }, + "INCORRECT_NUMBER_OF_ARGUMENTS" : { + "message" : [ + ", requires at least arguments and at most arguments." + ], + "sqlState" : "22023" + }, + "RESERVED_CDC_COLUMNS_ON_WRITE" : { + "message" : [ + "", + "The write contains reserved columns that are used", + "internally as metadata for Change Data Feed. To write to the table either rename/drop", + "these columns or disable Change Data Feed on the table by setting", + " to false." + ], + "sqlState" : "42939" + }, + "WRONG_COLUMN_DEFAULTS_FOR_DELTA_ALTER_TABLE_ADD_COLUMN_NOT_SUPPORTED" : { + "message" : [ + "Failed to execute the command because DEFAULT values are not supported when adding new", + "columns to previously existing Delta tables; please add the column without a default", + "value first, then run a second ALTER TABLE ALTER COLUMN SET DEFAULT command to apply", + "for future inserted rows instead." + ], + "sqlState" : "0AKDC" + }, + "WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED" : { + "message" : [ + "Failed to execute command because it assigned a column DEFAULT value,", + "but the corresponding table feature was not enabled. Please retry the command again", + "after executing ALTER TABLE tableName SET", + "TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported')." + ], + "sqlState" : "0AKDE" + } +} diff --git a/spark/src/main/scala/com/databricks/spark/util/DatabricksLogging.scala b/spark/src/main/scala/com/databricks/spark/util/DatabricksLogging.scala new file mode 100644 index 00000000000..d1e8f135c27 --- /dev/null +++ b/spark/src/main/scala/com/databricks/spark/util/DatabricksLogging.scala @@ -0,0 +1,166 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.databricks.spark.util + +import scala.collection.mutable.ArrayBuffer + +/** + * This file contains stub implementation for logging that exists in Databricks. + */ + +/** Used to return a recorded usage record for testing. */ +case class UsageRecord( + metric: String, + quantity: Double, + blob: String, + tags: Map[String, String] = Map.empty, + opType: Option[OpType] = None, + opTarget: Option[String] = None) + +class TagDefinition(val name: String) { + def this() = this("BACKWARD COMPATIBILITY") +} + +object TagDefinitions { + object TAG_TAHOE_PATH extends TagDefinition("tahoePath") + object TAG_TAHOE_ID extends TagDefinition("tahoeId") + object TAG_ASYNC extends TagDefinition("async") + object TAG_LOG_STORE_CLASS extends TagDefinition("logStore") + object TAG_OP_TYPE extends TagDefinition("opType") +} + +case class OpType(typeName: String, description: String) + +class MetricDefinition(val name: String) { + def this() = this("BACKWARD COMPATIBILITY") +} + +object MetricDefinitions { + object EVENT_LOGGING_FAILURE extends MetricDefinition("loggingFailureEvent") + object EVENT_TAHOE extends MetricDefinition("tahoeEvent") with CentralizableMetric + val METRIC_OPERATION_DURATION = new MetricDefinition("sparkOperationDuration") + with CentralizableMetric +} + +object Log4jUsageLogger { + @volatile var usageTracker: ArrayBuffer[UsageRecord] = null + + /** + * Records and returns all usage logs that are emitted while running the given function. + * Intended for testing metrics that we expect to report. Note that this class does not + * support nested invocations of the tracker. + */ + def track(f: => Unit): Seq[UsageRecord] = { + synchronized { + assert(usageTracker == null, "Usage tracking does not support nested invocation.") + usageTracker = new ArrayBuffer[UsageRecord]() + } + var records: ArrayBuffer[UsageRecord] = null + try { + f + } finally { + records = usageTracker + synchronized { + usageTracker = null + } + } + records.toSeq + } +} + +trait DatabricksLogging { + import MetricDefinitions._ + + // scalastyle:off println + def logConsole(line: String): Unit = println(line) + // scalastyle:on println + + def recordUsage( + metric: MetricDefinition, + quantity: Double, + additionalTags: Map[TagDefinition, String] = Map.empty, + blob: String = null, + forceSample: Boolean = false, + trimBlob: Boolean = true, + silent: Boolean = false): Unit = { + Log4jUsageLogger.synchronized { + if (Log4jUsageLogger.usageTracker != null) { + val record = + UsageRecord(metric.name, quantity, blob, additionalTags.map(kv => (kv._1.name, kv._2))) + Log4jUsageLogger.usageTracker.append(record) + } + } + } + + def recordEvent( + metric: MetricDefinition, + additionalTags: Map[TagDefinition, String] = Map.empty, + blob: String = null, + trimBlob: Boolean = true): Unit = { + recordUsage(metric, 1, additionalTags, blob, trimBlob) + } + + def recordOperation[S]( + opType: OpType, + opTarget: String = null, + extraTags: Map[TagDefinition, String], + isSynchronous: Boolean = true, + alwaysRecordStats: Boolean = false, + allowAuthTags: Boolean = false, + killJvmIfStuck: Boolean = false, + outputMetric: MetricDefinition = METRIC_OPERATION_DURATION, + silent: Boolean = true)(thunk: => S): S = { + try { + thunk + } finally { + Log4jUsageLogger.synchronized { + if (Log4jUsageLogger.usageTracker != null) { + val record = UsageRecord(outputMetric.name, 0, null, + extraTags.map(kv => (kv._1.name, kv._2)), Some(opType), Some(opTarget)) + Log4jUsageLogger.usageTracker.append(record) + } + } + } + } + + def recordProductUsage( + metric: MetricDefinition with CentralizableMetric, + quantity: Double, + additionalTags: Map[TagDefinition, String] = Map.empty, + blob: String = null, + forceSample: Boolean = false, + trimBlob: Boolean = true, + silent: Boolean = false): Unit = { + Log4jUsageLogger.synchronized { + if (Log4jUsageLogger.usageTracker != null) { + val record = + UsageRecord(metric.name, quantity, blob, additionalTags.map(kv => (kv._1.name, kv._2))) + Log4jUsageLogger.usageTracker.append(record) + } + } + } + + def recordProductEvent( + metric: MetricDefinition with CentralizableMetric, + additionalTags: Map[TagDefinition, String] = Map.empty, + blob: String = null, + trimBlob: Boolean = true): Unit = { + recordProductUsage(metric, 1, additionalTags, blob, trimBlob) + } +} + +trait CentralizableMetric diff --git a/spark/src/main/scala/io/delta/exceptions/DeltaConcurrentExceptions.scala b/spark/src/main/scala/io/delta/exceptions/DeltaConcurrentExceptions.scala new file mode 100644 index 00000000000..ba0d1d290cc --- /dev/null +++ b/spark/src/main/scala/io/delta/exceptions/DeltaConcurrentExceptions.scala @@ -0,0 +1,110 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.exceptions + +import org.apache.spark.annotation.Evolving + +/** + * :: Evolving :: + * + * The basic class for all Delta commit conflict exceptions. + * + * @since 1.0.0 + */ +@Evolving +abstract class DeltaConcurrentModificationException(message: String) + extends org.apache.spark.sql.delta.DeltaConcurrentModificationException(message) + +/** + * :: Evolving :: + * + * Thrown when a concurrent transaction has written data after the current transaction read the + * table. + * + * @since 1.0.0 + */ +@Evolving +class ConcurrentWriteException(message: String) + extends org.apache.spark.sql.delta.ConcurrentWriteException(message) + +/** + * :: Evolving :: + * + * Thrown when the metadata of the Delta table has changed between the time of read + * and the time of commit. + * + * @since 1.0.0 + */ +@Evolving +class MetadataChangedException(message: String) + extends org.apache.spark.sql.delta.MetadataChangedException(message) + +/** + * :: Evolving :: + * + * Thrown when the protocol version has changed between the time of read + * and the time of commit. + * + * @since 1.0.0 + */ +@Evolving +class ProtocolChangedException(message: String) + extends org.apache.spark.sql.delta.ProtocolChangedException(message) + +/** + * :: Evolving :: + * + * Thrown when files are added that would have been read by the current transaction. + * + * @since 1.0.0 + */ +@Evolving +class ConcurrentAppendException(message: String) + extends org.apache.spark.sql.delta.ConcurrentAppendException(message) + +/** + * :: Evolving :: + * + * Thrown when the current transaction reads data that was deleted by a concurrent transaction. + * + * @since 1.0.0 + */ +@Evolving +class ConcurrentDeleteReadException(message: String) + extends org.apache.spark.sql.delta.ConcurrentDeleteReadException(message) + +/** + * :: Evolving :: + * + * Thrown when the current transaction deletes data that was deleted by a concurrent transaction. + * + * @since 1.0.0 + */ +@Evolving +class ConcurrentDeleteDeleteException(message: String) + extends org.apache.spark.sql.delta.ConcurrentDeleteDeleteException(message) + +/** + * :: Evolving :: + * + * Thrown when concurrent transaction both attempt to update the same idempotent transaction. + * + * @since 1.0.0 + */ +@Evolving +class ConcurrentTransactionException(message: String) + extends org.apache.spark.sql.delta.ConcurrentTransactionException(message) diff --git a/spark/src/main/scala/io/delta/implicits/package.scala b/spark/src/main/scala/io/delta/implicits/package.scala new file mode 100644 index 00000000000..1cc2a50ca2a --- /dev/null +++ b/spark/src/main/scala/io/delta/implicits/package.scala @@ -0,0 +1,76 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta + +import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter} +import org.apache.spark.sql.streaming.{DataStreamReader, DataStreamWriter, StreamingQuery} + +package object implicits { + + /** + * Extends the DataFrameReader API by adding a delta function + * Usage: + * {{{ + * spark.read.delta(path) + * }}} + */ + implicit class DeltaDataFrameReader(val reader: DataFrameReader) extends AnyVal { + def delta(path: String): DataFrame = { + reader.format("delta").load(path) + } + } + + /** + * Extends the DataStreamReader API by adding a delta function + * Usage: + * {{{ + * spark.readStream.delta(path) + * }}} + */ + implicit class DeltaDataStreamReader(val dataStreamReader: DataStreamReader) extends AnyVal { + def delta(path: String): DataFrame = { + dataStreamReader.format("delta").load(path) + } + } + + /** + * Extends the DataFrameWriter API by adding a delta function + * Usage: + * {{{ + * df.write.delta(path) + * }}} + */ + implicit class DeltaDataFrameWriter[T](val dfWriter: DataFrameWriter[T]) extends AnyVal { + def delta(output: String): Unit = { + dfWriter.format("delta").save(output) + } + } + + /** + * Extends the DataStreamWriter API by adding a delta function + * Usage: + * {{{ + * ds.writeStream.delta(path) + * }}} + */ + implicit class DeltaDataStreamWriter[T] + (val dataStreamWriter: DataStreamWriter[T]) extends AnyVal { + def delta(path: String): StreamingQuery = { + dataStreamWriter.format("delta").start(path) + } + } +} diff --git a/spark/src/main/scala/io/delta/sql/DeltaSparkSessionExtension.scala b/spark/src/main/scala/io/delta/sql/DeltaSparkSessionExtension.scala new file mode 100644 index 00000000000..19e8493b551 --- /dev/null +++ b/spark/src/main/scala/io/delta/sql/DeltaSparkSessionExtension.scala @@ -0,0 +1,178 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sql + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.optimizer.RangePartitionIdRewrite +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.PrepareDeltaScan +import io.delta.sql.parser.DeltaSqlParser + +import org.apache.spark.sql.SparkSessionExtensions +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.delta.PreprocessTimeTravel +import org.apache.spark.sql.internal.SQLConf + +/** + * An extension for Spark SQL to activate Delta SQL parser to support Delta SQL grammar. + * + * Scala example to create a `SparkSession` with the Delta SQL parser: + * {{{ + * import org.apache.spark.sql.SparkSession + * + * val spark = SparkSession + * .builder() + * .appName("...") + * .master("...") + * .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + * .getOrCreate() + * }}} + * + * Java example to create a `SparkSession` with the Delta SQL parser: + * {{{ + * import org.apache.spark.sql.SparkSession; + * + * SparkSession spark = SparkSession + * .builder() + * .appName("...") + * .master("...") + * .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + * .getOrCreate(); + * }}} + * + * Python example to create a `SparkSession` with the Delta SQL parser (PySpark doesn't pick up the + * SQL conf "spark.sql.extensions" in Apache Spark 2.4.x, hence we need to activate it manually in + * 2.4.x. However, because `SparkSession` has been created and everything has been materialized, we + * need to clone a new session to trigger the initialization. See SPARK-25003): + * {{{ + * from pyspark.sql import SparkSession + * + * spark = SparkSession \ + * .builder \ + * .appName("...") \ + * .master("...") \ + * .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + * .getOrCreate() + * if spark.sparkContext().version < "3.": + * spark.sparkContext()._jvm.io.delta.sql.DeltaSparkSessionExtension() \ + * .apply(spark._jsparkSession.extensions()) + * spark = SparkSession(spark.sparkContext(), spark._jsparkSession.cloneSession()) + * }}} + * + * @since 0.4.0 + */ +class DeltaSparkSessionExtension extends (SparkSessionExtensions => Unit) { + override def apply(extensions: SparkSessionExtensions): Unit = { + extensions.injectParser { (session, parser) => + new DeltaSqlParser(parser) + } + extensions.injectResolutionRule { session => + ResolveDeltaPathTable(session) + } + extensions.injectResolutionRule { session => + new PreprocessTimeTravel(session) + } + extensions.injectResolutionRule { session => + // To ensure the parquet field id reader is turned on, these fields are required to support + // id column mapping mode for Delta. + // Spark has the read flag default off, so we have to turn it on manually for Delta. + session.sessionState.conf.setConf(SQLConf.PARQUET_FIELD_ID_READ_ENABLED, true) + session.sessionState.conf.setConf(SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED, true) + new DeltaAnalysis(session) + } + // [SPARK-45383] Spark CheckAnalysis rule misses a case for RelationTimeTravel, and so a + // non-existent table throws an internal spark error instead of the expected AnalysisException. + extensions.injectCheckRule { session => + new CheckUnresolvedRelationTimeTravel(session) + } + extensions.injectCheckRule { session => + new DeltaUnsupportedOperationsCheck(session) + } + // Rule for rewriting the place holder for range_partition_id to manually construct the + // `RangePartitioner` (which requires an RDD to be sampled in order to determine + // range partition boundaries) + extensions.injectOptimizerRule { session => + new RangePartitionIdRewrite(session) + } + extensions.injectPostHocResolutionRule { session => + new PreprocessTableUpdate(session.sessionState.conf) + } + extensions.injectPostHocResolutionRule { session => + new PreprocessTableMerge(session.sessionState.conf) + } + extensions.injectPostHocResolutionRule { session => + new PreprocessTableDelete(session.sessionState.conf) + } + // Resolve new UpCast expressions that might have been introduced by [[PreprocessTableUpdate]] + // and [[PreprocessTableMerge]]. + extensions.injectPostHocResolutionRule { session => + PostHocResolveUpCast(session) + } + // We don't use `injectOptimizerRule` here as we won't want to apply further optimizations after + // `PrepareDeltaScan`. + // For example, `ConstantFolding` will break unit tests in `OptimizeGeneratedColumnSuite`. + extensions.injectPreCBORule { session => + new PrepareDeltaScan(session) + } + + // Tries to load PrepareDeltaSharingScan class with class reflection, when delta-sharing-spark + // 3.1+ package is installed, this will be loaded and delta sharing batch queries with + // DeltaSharingFileIndex will be handled by the rule. + // When the package is not installed or upon any other issues, it should do nothing and not + // affect all the existing rules. + try { + // scalastyle:off classforname + val constructor = Class.forName("io.delta.sharing.spark.PrepareDeltaSharingScan") + .getConstructor(classOf[org.apache.spark.sql.SparkSession]) + // scalastyle:on classforname + extensions.injectPreCBORule { session => + try { + // Inject the PrepareDeltaSharingScan rule if enabled, otherwise, inject the no op + // rule. It can be disabled if there are any issues so all existing rules are not blocked. + if ( + session.conf.get(DeltaSQLConf.DELTA_SHARING_ENABLE_DELTA_FORMAT_BATCH.key) == "true" + ) { + constructor.newInstance(session).asInstanceOf[Rule[LogicalPlan]] + } else { + new NoOpRule + } + } catch { + // Inject a no op rule which doesn't apply any changes to the logical plan. + case NonFatal(_) => new NoOpRule + } + } + } catch { + case NonFatal(_) => // Do nothing + } + + DeltaTableValueFunctions.supportedFnNames.foreach { fnName => + extensions.injectTableFunction( + DeltaTableValueFunctions.getTableValueFunctionInjection(fnName)) + } + } + + /** + * An no op rule which doesn't apply any changes to the LogicalPlan. Used to be injected upon + * exceptions. + */ + class NoOpRule extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan + } +} diff --git a/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala b/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala new file mode 100644 index 00000000000..3fc0cb4c0eb --- /dev/null +++ b/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala @@ -0,0 +1,696 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sql.parser + +import java.util.Locale + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.catalyst.TimeTravel +import org.apache.spark.sql.delta.skipping.clustering.temp.{ClusterByParserUtils, ClusterByPlan, ClusterBySpec} + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.commands._ +import io.delta.sql.parser.DeltaSqlBaseParser._ +import io.delta.tables.execution.VacuumTableCommand +import org.antlr.v4.runtime._ +import org.antlr.v4.runtime.atn.PredictionMode +import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException} +import org.antlr.v4.runtime.tree._ + +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.parser.{ParseErrorListener, ParseException, ParserInterface} +import org.apache.spark.sql.catalyst.parser.ParserUtils.{checkDuplicateClauses, string, withOrigin} +import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddConstraint, AlterTableDropConstraint, AlterTableDropFeature, CloneTableStatement, LogicalPlan, RestoreTableStatement} +import org.apache.spark.sql.catalyst.trees.Origin +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableCatalog} +import org.apache.spark.sql.errors.QueryParsingErrors +import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution} +import org.apache.spark.sql.types._ + +/** + * A SQL parser that tries to parse Delta commands. If failing to parse the SQL text, it will + * forward the call to `delegate`. + */ +class DeltaSqlParser(val delegate: ParserInterface) extends ParserInterface { + private val builder = new DeltaSqlAstBuilder + private val substitution = new VariableSubstitution + + override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser => + builder.visit(parser.singleStatement()) match { + case clusterByPlan: ClusterByPlan => + ClusterByParserUtils(clusterByPlan, delegate).parsePlan(sqlText) + case plan: LogicalPlan => plan + case _ => delegate.parsePlan(sqlText) + } + } + + /** + * This API is used just for parsing the SELECT queries. Delta parser doesn't override + * the Spark parser, that means this can be delegated directly to the Spark parser. + */ + override def parseQuery(sqlText: String): LogicalPlan = delegate.parseQuery(sqlText) + + // scalastyle:off line.size.limit + /** + * Fork from `org.apache.spark.sql.catalyst.parser.AbstractSqlParser#parse(java.lang.String, scala.Function1)`. + * + * @see https://github.com/apache/spark/blob/v2.4.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala#L81 + */ + // scalastyle:on + protected def parse[T](command: String)(toResult: DeltaSqlBaseParser => T): T = { + val lexer = new DeltaSqlBaseLexer( + new UpperCaseCharStream(CharStreams.fromString(substitution.substitute(command)))) + lexer.removeErrorListeners() + lexer.addErrorListener(ParseErrorListener) + + val tokenStream = new CommonTokenStream(lexer) + val parser = new DeltaSqlBaseParser(tokenStream) + parser.addParseListener(PostProcessor) + parser.removeErrorListeners() + parser.addErrorListener(ParseErrorListener) + + try { + try { + // first, try parsing with potentially faster SLL mode + parser.getInterpreter.setPredictionMode(PredictionMode.SLL) + toResult(parser) + } catch { + case e: ParseCancellationException => + // if we fail, parse with LL mode + tokenStream.seek(0) // rewind input stream + parser.reset() + + // Try Again. + parser.getInterpreter.setPredictionMode(PredictionMode.LL) + toResult(parser) + } + } catch { + case e: ParseException if e.command.isDefined => + throw e + case e: ParseException => + throw e.withCommand(command) + case e: AnalysisException => + val position = Origin(e.line, e.startPosition) + throw new ParseException( + command = Option(command), + start = position, + stop = position, + errorClass = "DELTA_PARSING_ANALYSIS_ERROR", + messageParameters = Map("msg" -> e.message)) + } + } + + override def parseExpression(sqlText: String): Expression = delegate.parseExpression(sqlText) + + override def parseTableIdentifier(sqlText: String): TableIdentifier = + delegate.parseTableIdentifier(sqlText) + + override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = + delegate.parseFunctionIdentifier(sqlText) + + override def parseMultipartIdentifier (sqlText: String): Seq[String] = + delegate.parseMultipartIdentifier(sqlText) + + override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText) + + override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText) +} + +/** + * Define how to convert an AST generated from `DeltaSqlBase.g4` to a `LogicalPlan`. The parent + * class `DeltaSqlBaseBaseVisitor` defines all visitXXX methods generated from `#` instructions in + * `DeltaSqlBase.g4` (such as `#vacuumTable`). + */ +class DeltaSqlAstBuilder extends DeltaSqlBaseBaseVisitor[AnyRef] { + + import org.apache.spark.sql.catalyst.parser.ParserUtils._ + + /** + * Convert a property list into a key-value map. + * This should be called through [[visitPropertyKeyValues]] or [[visitPropertyKeys]]. + */ + override def visitPropertyList( + ctx: PropertyListContext): Map[String, String] = withOrigin(ctx) { + val properties = ctx.property.asScala.map { property => + val key = visitPropertyKey(property.key) + val value = visitPropertyValue(property.value) + key -> value + } + // Check for duplicate property names. + checkDuplicateKeys(properties.toSeq, ctx) + properties.toMap + } + + /** + * Parse a key-value map from a [[PropertyListContext]], assuming all values are specified. + */ + def visitPropertyKeyValues(ctx: PropertyListContext): Map[String, String] = { + val props = visitPropertyList(ctx) + val badKeys = props.collect { case (key, null) => key } + if (badKeys.nonEmpty) { + operationNotAllowed( + s"Values must be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx) + } + props + } + + /** + * Parse a list of keys from a [[PropertyListContext]], assuming no values are specified. + */ + def visitPropertyKeys(ctx: PropertyListContext): Seq[String] = { + val props = visitPropertyList(ctx) + val badKeys = props.filter { case (_, v) => v != null }.keys + if (badKeys.nonEmpty) { + operationNotAllowed( + s"Values should not be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx) + } + props.keys.toSeq + } + + /** + * A property key can either be String or a collection of dot separated elements. This + * function extracts the property key based on whether its a string literal or a property + * identifier. + */ + override def visitPropertyKey(key: PropertyKeyContext): String = { + if (key.stringLit() != null) { + string(visitStringLit(key.stringLit())) + } else { + key.getText + } + } + + /** + * A property value can be String, Integer, Boolean or Decimal. This function extracts + * the property value based on whether its a string, integer, boolean or decimal literal. + */ + override def visitPropertyValue(value: PropertyValueContext): String = { + if (value == null) { + null + } else if (value.identifier != null) { + value.identifier.getText + } else if (value.value != null) { + string(visitStringLit(value.value)) + } else if (value.booleanValue != null) { + value.getText.toLowerCase(Locale.ROOT) + } else { + value.getText + } + } + + override def visitStringLit(ctx: StringLitContext): Token = { + if (ctx != null) { + if (ctx.STRING != null) { + ctx.STRING.getSymbol + } else { + ctx.DOUBLEQUOTED_STRING.getSymbol + } + } else { + null + } + } + + /** + * Parse either create table header or replace table header. + * @return TableIdentifier for the target table + * Boolean for whether we are creating a table + * Boolean for whether we are replacing a table + * Boolean for whether we are creating a table if not exists + */ + override def visitCloneTableHeader( + ctx: CloneTableHeaderContext): (TableIdentifier, Boolean, Boolean, Boolean) = withOrigin(ctx) { + ctx.children.asScala.head match { + case createHeader: CreateTableHeaderContext => + (visitTableIdentifier(createHeader.table), true, false, createHeader.EXISTS() != null) + case replaceHeader: ReplaceTableHeaderContext => + (visitTableIdentifier(replaceHeader.table), replaceHeader.CREATE() != null, true, false) + case _ => + throw new DeltaParseException(ctx, "DELTA_PARSING_INCORRECT_CLONE_HEADER") + } + } + + /** + * Creates a [[CloneTableStatement]] logical plan. Example SQL: + * {{{ + * CREATE [OR REPLACE] TABLE SHALLOW CLONE + * [TBLPROPERTIES ('propA' = 'valueA', ...)] + * [LOCATION '/path/to/cloned/table'] + * }}} + */ + override def visitClone(ctx: CloneContext): LogicalPlan = withOrigin(ctx) { + val (target, isCreate, isReplace, ifNotExists) = visitCloneTableHeader(ctx.cloneTableHeader()) + + if (!isCreate && ifNotExists) { + throw new DeltaParseException( + ctx.cloneTableHeader(), + "DELTA_PARSING_MUTUALLY_EXCLUSIVE_CLAUSES", + Map("clauseOne" -> "IF NOT EXISTS", "clauseTwo" -> "REPLACE") + ) + } + + // Get source for clone (and time travel source if necessary) + // The source relation can be an Iceberg table in form of `catalog.db.table` so we visit + // a multipart identifier instead of TableIdentifier (which does not support 3L namespace) + // in Spark 3.3. In Spark 3.4 we should have TableIdentifier supporting 3L namespace so we + // could revert back to that. + val sourceRelation = new UnresolvedRelation(visitMultipartIdentifier(ctx.source)) + val maybeTimeTravelSource = maybeTimeTravelChild(ctx.clause, sourceRelation) + val targetRelation = UnresolvedRelation(target.nameParts) + + val tablePropertyOverrides = Option(ctx.tableProps) + .map(visitPropertyKeyValues) + .getOrElse(Map.empty[String, String]) + + CloneTableStatement( + maybeTimeTravelSource, + targetRelation, + ifNotExists, + isReplace, + isCreate, + tablePropertyOverrides, + Option(ctx.location).map(s => string(visitStringLit(s)))) + } + + /** + * Create a [[VacuumTableCommand]] logical plan. Example SQL: + * {{{ + * VACUUM ('/path/to/dir' | delta.`/path/to/dir`) [RETAIN number HOURS] [DRY RUN]; + * }}} + */ + override def visitVacuumTable(ctx: VacuumTableContext): AnyRef = withOrigin(ctx) { + VacuumTableCommand( + Option(ctx.path).map(string), + Option(ctx.table).map(visitTableIdentifier), + Option(ctx.number).map(_.getText.toDouble), + ctx.RUN != null) + } + + /** Provides a list of unresolved attributes for multi dimensional clustering. */ + override def visitZorderSpec(ctx: ZorderSpecContext): Seq[UnresolvedAttribute] = { + ctx.interleave.asScala + .map(_.identifier.asScala.map(_.getText).toSeq) + .map(new UnresolvedAttribute(_)).toSeq + } + + /** + * Create a [[OptimizeTableCommand]] logical plan. + * Syntax: + * {{{ + * OPTIMIZE + * [WHERE predicate-using-partition-columns] + * [ZORDER BY [(] col1, col2 ..[)]] + * }}} + * Examples: + * {{{ + * OPTIMIZE '/path/to/delta/table'; + * OPTIMIZE delta_table_name; + * OPTIMIZE delta.`/path/to/delta/table`; + * OPTIMIZE delta_table_name WHERE partCol = 25; + * OPTIMIZE delta_table_name WHERE partCol = 25 ZORDER BY col2, col2; + * }}} + */ + override def visitOptimizeTable(ctx: OptimizeTableContext): AnyRef = withOrigin(ctx) { + if (ctx.path == null && ctx.table == null) { + throw new DeltaParseException( + ctx, + "DELTA_PARSING_MISSING_TABLE_NAME_OR_PATH", + Map("command" -> "OPTIMIZE") + ) + } + val interleaveBy = Option(ctx.zorderSpec).map(visitZorderSpec).getOrElse(Seq.empty) + OptimizeTableCommand( + Option(ctx.path).map(string), + Option(ctx.table).map(visitTableIdentifier), + Option(ctx.partitionPredicate).map(extractRawText(_)).toSeq)(interleaveBy) + } + + /** + * Creates a [[DeltaReorgTable]] logical plan. + * Examples: + * {{{ + * -- Physically delete dropped rows and columns of target table + * REORG TABLE (delta.`/path/to/table` | delta_table_name) + * [WHERE partition_predicate] APPLY (PURGE) + * + * -- Rewrite the files in UNIFORM(ICEBERG) compliant way. + * REORG TABLE table_name (delta.`/path/to/table` | catalog.db.table) + * APPLY (UPGRADE UNIFORM(ICEBERG_COMPAT_VERSION=version)) + * }}} + */ + override def visitReorgTable(ctx: ReorgTableContext): AnyRef = withOrigin(ctx) { + if (ctx.table == null) { + throw new DeltaParseException( + ctx, + "DELTA_PARSING_MISSING_TABLE_NAME_OR_PATH", + Map("command" -> "REORG") + ) + } + + val targetIdentifier = visitTableIdentifier(ctx.table) + val tableNameParts = targetIdentifier.database.toSeq :+ targetIdentifier.table + val targetTable = createUnresolvedTable(tableNameParts, "REORG") + + val reorgTableSpec = if (ctx.PURGE != null) { + DeltaReorgTableSpec(DeltaReorgTableMode.PURGE, None) + } else if (ctx.ICEBERG_COMPAT_VERSION != null) { + DeltaReorgTableSpec(DeltaReorgTableMode.UNIFORM_ICEBERG, Option(ctx.version).map(_.getText.toInt)) + } else { + throw new ParseException( + "Invalid syntax: REORG TABLE only support PURGE/UPGRADE UNIFORM.", + ctx) + } + + DeltaReorgTable(targetTable, reorgTableSpec)(Option(ctx.partitionPredicate).map(extractRawText(_)).toSeq) + } + + override def visitDescribeDeltaDetail( + ctx: DescribeDeltaDetailContext): LogicalPlan = withOrigin(ctx) { + DescribeDeltaDetailCommand( + Option(ctx.path).map(string), + Option(ctx.table).map(visitTableIdentifier), + Map.empty) + } + + override def visitDescribeDeltaHistory( + ctx: DescribeDeltaHistoryContext): LogicalPlan = withOrigin(ctx) { + DescribeDeltaHistory( + Option(ctx.path).map(string), + Option(ctx.table).map(visitTableIdentifier), + Option(ctx.limit).map(_.getText.toInt)) + } + + override def visitGenerate(ctx: GenerateContext): LogicalPlan = withOrigin(ctx) { + DeltaGenerateCommand( + modeName = ctx.modeName.getText, + tableId = visitTableIdentifier(ctx.table), + Map.empty) + } + + override def visitConvert(ctx: ConvertContext): LogicalPlan = withOrigin(ctx) { + ConvertToDeltaCommand( + visitTableIdentifier(ctx.table), + Option(ctx.colTypeList).map(colTypeList => StructType(visitColTypeList(colTypeList))), + ctx.STATISTICS() == null, None) + } + + override def visitRestore(ctx: RestoreContext): LogicalPlan = withOrigin(ctx) { + val tableRelation = UnresolvedRelation(visitTableIdentifier(ctx.table).nameParts) + val timeTravelTableRelation = maybeTimeTravelChild(ctx.clause, tableRelation) + RestoreTableStatement(timeTravelTableRelation.asInstanceOf[TimeTravel]) + } + + /** + * Captures any CLUSTER BY clause and creates a [[ClusterByPlan]] logical plan. + * The plan will be used as a sentinel for DeltaSqlParser to process it further. + */ + override def visitClusterBy(ctx: ClusterByContext): LogicalPlan = withOrigin(ctx) { + val clusterBySpecCtx = ctx.clusterBySpec.asScala.head + checkDuplicateClauses(ctx.clusterBySpec, "CLUSTER BY", clusterBySpecCtx) + val columnNames = + clusterBySpecCtx.interleave.asScala + .map(_.identifier.asScala.map(_.getText).toSeq) + .map(_.asInstanceOf[Seq[String]]).toSeq + // get CLUSTER BY clause positions. + val startIndex = clusterBySpecCtx.getStart.getStartIndex + val stopIndex = clusterBySpecCtx.getStop.getStopIndex + + // get CLUSTER BY parenthesis positions. + val parenStartIndex = clusterBySpecCtx.LEFT_PAREN().getSymbol.getStartIndex + val parenStopIndex = clusterBySpecCtx.RIGHT_PAREN().getSymbol.getStopIndex + ClusterByPlan( + ClusterBySpec(columnNames), + startIndex, + stopIndex, + parenStartIndex, + parenStopIndex, + clusterBySpecCtx) + } + + /** + * Time travel the table to the given version or timestamp. + */ + private def maybeTimeTravelChild(ctx: TemporalClauseContext, child: LogicalPlan): LogicalPlan = { + if (ctx == null) return child + TimeTravel( + child, + Option(ctx.timestamp).map(token => Literal(token.getText.replaceAll("^'|'$", ""))), + Option(ctx.version).map(_.getText.toLong), + Some("sql")) + } + + override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) { + visit(ctx.statement).asInstanceOf[LogicalPlan] + } + + protected def visitTableIdentifier(ctx: QualifiedNameContext): TableIdentifier = withOrigin(ctx) { + ctx.identifier.asScala.toSeq match { + case Seq(tbl) => TableIdentifier(tbl.getText) + case Seq(db, tbl) => TableIdentifier(tbl.getText, Some(db.getText)) + case Seq(catalog, db, tbl) => + TableIdentifier(tbl.getText, Some(db.getText), Some(catalog.getText)) + case _ => throw new DeltaParseException( + ctx, + "DELTA_PARSING_ILLEGAL_TABLE_NAME", + Map("table" -> ctx.getText)) + } + } + + protected def visitMultipartIdentifier(ctx: QualifiedNameContext): Seq[String] = withOrigin(ctx) { + ctx.identifier.asScala.map(_.getText).toSeq + } + + override def visitPassThrough(ctx: PassThroughContext): LogicalPlan = null + + override def visitColTypeList(ctx: ColTypeListContext): Seq[StructField] = withOrigin(ctx) { + ctx.colType().asScala.map(visitColType).toSeq + } + + override def visitColType(ctx: ColTypeContext): StructField = withOrigin(ctx) { + import ctx._ + + val builder = new MetadataBuilder + + StructField( + ctx.colName.getText, + typedVisit[DataType](ctx.dataType), + nullable = NOT == null, + builder.build()) + } + + private def createUnresolvedTable( + tableName: Seq[String], + commandName: String): UnresolvedTable = { + UnresolvedTable(tableName, commandName, relationTypeMismatchHint = None) + } + + // Build the text of the CHECK constraint expression. The user-specified whitespace is in the + // HIDDEN channel where we can't get to it, so we just paste together all the tokens with a single + // space. This produces some strange spacing (e.g. `structCol . arr [ 0 ]`), but right now we + // think that's preferable to the additional complexity involved in trying to produce cleaner + // output. + private def buildCheckConstraintText(tokens: Seq[ExprTokenContext]): String = { + tokens.map(_.getText).mkString(" ") + } + + private def extractRawText(exprContext: ParserRuleContext): String = { + // Extract the raw expression which will be parsed later + exprContext.getStart.getInputStream.getText(new Interval( + exprContext.getStart.getStartIndex, + exprContext.getStop.getStopIndex)) + } + + override def visitAddTableConstraint( + ctx: AddTableConstraintContext): LogicalPlan = withOrigin(ctx) { + val checkConstraint = ctx.constraint().asInstanceOf[CheckConstraintContext] + + AlterTableAddConstraint( + createUnresolvedTable(ctx.table.identifier.asScala.map(_.getText).toSeq, + "ALTER TABLE ... ADD CONSTRAINT"), + ctx.name.getText, + buildCheckConstraintText(checkConstraint.exprToken().asScala.toSeq)) + } + + override def visitDropTableConstraint( + ctx: DropTableConstraintContext): LogicalPlan = withOrigin(ctx) { + AlterTableDropConstraint( + createUnresolvedTable(ctx.table.identifier.asScala.map(_.getText).toSeq, + "ALTER TABLE ... DROP CONSTRAINT"), + ctx.name.getText, + ifExists = ctx.EXISTS != null) + } + + /** + * A featureNameValue can either be String or an identifier. This function extracts + * the featureNameValue based on whether its a string literal or an identifier. + */ + override def visitFeatureNameValue(featureNameValue: FeatureNameValueContext): String = { + if (featureNameValue.stringLit() != null) { + string(visitStringLit(featureNameValue.stringLit())) + } else { + featureNameValue.getText + } + } + + /** + * Parse an ALTER TABLE DROP FEATURE command. + */ + override def visitAlterTableDropFeature(ctx: AlterTableDropFeatureContext): LogicalPlan = { + val truncateHistory = ctx.TRUNCATE != null && ctx.HISTORY != null + AlterTableDropFeature( + createUnresolvedTable(ctx.table.identifier.asScala.map(_.getText).toSeq, + "ALTER TABLE ... DROP FEATURE"), + visitFeatureNameValue(ctx.featureName), + truncateHistory) + } + + protected def typedVisit[T](ctx: ParseTree): T = { + ctx.accept(this).asInstanceOf[T] + } + + override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { + val dataType = ctx.identifier.getText.toLowerCase(Locale.ROOT) + (dataType, ctx.INTEGER_VALUE().asScala.toList) match { + case ("boolean", Nil) => BooleanType + case ("tinyint" | "byte", Nil) => ByteType + case ("smallint" | "short", Nil) => ShortType + case ("int" | "integer", Nil) => IntegerType + case ("bigint" | "long", Nil) => LongType + case ("float", Nil) => FloatType + case ("double", Nil) => DoubleType + case ("date", Nil) => DateType + case ("timestamp", Nil) => TimestampType + case ("string", Nil) => StringType + case ("char", length :: Nil) => CharType(length.getText.toInt) + case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) + case ("binary", Nil) => BinaryType + case ("decimal", Nil) => DecimalType.USER_DEFAULT + case ("decimal", precision :: Nil) => DecimalType(precision.getText.toInt, 0) + case ("decimal", precision :: scale :: Nil) => + DecimalType(precision.getText.toInt, scale.getText.toInt) + case ("interval", Nil) => CalendarIntervalType + case (dt, params) => + val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else dt + throw new DeltaParseException( + ctx, + "DELTA_PARSING_UNSUPPORTED_DATA_TYPE", + Map("dataType" -> dtStr) + ) + } + } +} + +// scalastyle:off line.size.limit +/** + * Fork from `org.apache.spark.sql.catalyst.parser.UpperCaseCharStream`. + * + * @see https://github.com/apache/spark/blob/v2.4.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala#L157 + */ +// scalastyle:on +class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream { + override def consume(): Unit = wrapped.consume + override def getSourceName(): String = wrapped.getSourceName + override def index(): Int = wrapped.index + override def mark(): Int = wrapped.mark + override def release(marker: Int): Unit = wrapped.release(marker) + override def seek(where: Int): Unit = wrapped.seek(where) + override def size(): Int = wrapped.size + + override def getText(interval: Interval): String = { + // ANTLR 4.7's CodePointCharStream implementations have bugs when + // getText() is called with an empty stream, or intervals where + // the start > end. See + // https://github.com/antlr/antlr4/commit/ac9f7530 for one fix + // that is not yet in a released ANTLR artifact. + if (size() > 0 && (interval.b - interval.a >= 0)) { + wrapped.getText(interval) + } else { + "" + } + } + + override def LA(i: Int): Int = { + val la = wrapped.LA(i) + if (la == 0 || la == IntStream.EOF) la + else Character.toUpperCase(la) + } +} + +// scalastyle:off line.size.limit +/** + * Fork from `org.apache.spark.sql.catalyst.parser.PostProcessor`. + * + * @see https://github.com/apache/spark/blob/v2.4.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala#L248 + */ +// scalastyle:on +case object PostProcessor extends DeltaSqlBaseBaseListener { + + /** Remove the back ticks from an Identifier. */ + override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = { + replaceTokenByIdentifier(ctx, 1) { token => + // Remove the double back ticks in the string. + token.setText(token.getText.replace("``", "`")) + token + } + } + + /** Treat non-reserved keywords as Identifiers. */ + override def exitNonReserved(ctx: NonReservedContext): Unit = { + replaceTokenByIdentifier(ctx, 0)(identity) + } + + private def replaceTokenByIdentifier( + ctx: ParserRuleContext, + stripMargins: Int)( + f: CommonToken => CommonToken = identity): Unit = { + val parent = ctx.getParent + parent.removeLastChild() + val token = ctx.getChild(0).getPayload.asInstanceOf[Token] + val newToken = new CommonToken( + new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream), + DeltaSqlBaseParser.IDENTIFIER, + token.getChannel, + token.getStartIndex + stripMargins, + token.getStopIndex - stripMargins) + parent.addChild(new TerminalNodeImpl(f(newToken))) + } +} diff --git a/spark/src/main/scala/io/delta/tables/DeltaColumnBuilder.scala b/spark/src/main/scala/io/delta/tables/DeltaColumnBuilder.scala new file mode 100644 index 00000000000..9ad567e95ea --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/DeltaColumnBuilder.scala @@ -0,0 +1,139 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.sources.DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY + +import org.apache.spark.annotation._ +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.types.{DataType, MetadataBuilder, StructField} + +/** + * :: Evolving :: + * + * Builder to specify a table column. + * + * See [[DeltaTableBuilder]] for examples. + * @since 1.0.0 + */ +@Evolving +class DeltaColumnBuilder private[tables]( + private val spark: SparkSession, + private val colName: String) { + private var dataType: DataType = _ + private var nullable: Boolean = true + private var generationExpr: Option[String] = None + private var comment: Option[String] = None + + /** + * :: Evolving :: + * + * Specify the column data type. + * + * @param dataType string column data type + * @since 1.0.0 + */ + @Evolving + def dataType(dataType: String): DeltaColumnBuilder = { + this.dataType = spark.sessionState.sqlParser.parseDataType(dataType) + this + } + + /** + * :: Evolving :: + * + * Specify the column data type. + * + * @param dataType DataType column data type + * @since 1.0.0 + */ + @Evolving + def dataType(dataType: DataType): DeltaColumnBuilder = { + this.dataType = dataType + this + } + + /** + * :: Evolving :: + * + * Specify whether the column can be null. + * + * @param nullable boolean whether the column can be null or not. + * @since 1.0.0 + */ + @Evolving + def nullable(nullable: Boolean): DeltaColumnBuilder = { + this.nullable = nullable + this + } + + /** + * :: Evolving :: + * + * Specify a expression if the column is always generated as a function of other columns. + * + * @param expr string the the generation expression + * @since 1.0.0 + */ + @Evolving + def generatedAlwaysAs(expr: String): DeltaColumnBuilder = { + this.generationExpr = Option(expr) + this + } + + /** + * :: Evolving :: + * + * Specify a column comment. + * + * @param comment string column description + * @since 1.0.0 + */ + @Evolving + def comment(comment: String): DeltaColumnBuilder = { + this.comment = Option(comment) + this + } + + /** + * :: Evolving :: + * + * Build the column as a structField. + * + * @since 1.0.0 + */ + @Evolving + def build(): StructField = { + val metadataBuilder = new MetadataBuilder() + if (generationExpr.nonEmpty) { + metadataBuilder.putString(GENERATION_EXPRESSION_METADATA_KEY, generationExpr.get) + } + if (comment.nonEmpty) { + metadataBuilder.putString("comment", comment.get) + } + val fieldMetadata = metadataBuilder.build() + if (dataType == null) { + throw DeltaErrors.analysisException(s"The data type of the column $colName is not provided") + } + StructField( + colName, + dataType, + nullable = nullable, + metadata = fieldMetadata) + } +} diff --git a/spark/src/main/scala/io/delta/tables/DeltaMergeBuilder.scala b/spark/src/main/scala/io/delta/tables/DeltaMergeBuilder.scala new file mode 100644 index 00000000000..8f429fddbb4 --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/DeltaMergeBuilder.scala @@ -0,0 +1,684 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables + +import scala.collection.JavaConverters._ +import scala.collection.Map + +import org.apache.spark.sql.delta.{DeltaErrors, PostHocResolveUpCast, PreprocessTableMerge} +import org.apache.spark.sql.delta.DeltaTableUtils.withActiveSession +import org.apache.spark.sql.delta.DeltaViewHelper +import org.apache.spark.sql.delta.commands.MergeIntoCommand +import org.apache.spark.sql.delta.util.AnalysisHelper + +import org.apache.spark.annotation._ +import org.apache.spark.internal.Logging +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, NamedExpression} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.functions.expr +import org.apache.spark.sql.internal.SQLConf + +/** + * Builder to specify how to merge data from source DataFrame into the target Delta table. + * You can specify any number of `whenMatched` and `whenNotMatched` clauses. + * Here are the constraints on these clauses. + * + * - `whenMatched` clauses: + * + * - The condition in a `whenMatched` clause is optional. However, if there are multiple + * `whenMatched` clauses, then only the last one may omit the condition. + * + * - When there are more than one `whenMatched` clauses and there are conditions (or the lack + * of) such that a row satisfies multiple clauses, then the action for the first clause + * satisfied is executed. In other words, the order of the `whenMatched` clauses matters. + * + * - If none of the `whenMatched` clauses match a source-target row pair that satisfy + * the merge condition, then the target rows will not be updated or deleted. + * + * - If you want to update all the columns of the target Delta table with the + * corresponding column of the source DataFrame, then you can use the + * `whenMatched(...).updateAll()`. This is equivalent to + *
+ *         whenMatched(...).updateExpr(Map(
+ *           ("col1", "source.col1"),
+ *           ("col2", "source.col2"),
+ *           ...))
+ *       
+ * + * - `whenNotMatched` clauses: + * + * - The condition in a `whenNotMatched` clause is optional. However, if there are + * multiple `whenNotMatched` clauses, then only the last one may omit the condition. + * + * - When there are more than one `whenNotMatched` clauses and there are conditions (or the + * lack of) such that a row satisfies multiple clauses, then the action for the first clause + * satisfied is executed. In other words, the order of the `whenNotMatched` clauses matters. + * + * - If no `whenNotMatched` clause is present or if it is present but the non-matching source + * row does not satisfy the condition, then the source row is not inserted. + * + * - If you want to insert all the columns of the target Delta table with the + * corresponding column of the source DataFrame, then you can use + * `whenNotMatched(...).insertAll()`. This is equivalent to + *
+ *         whenNotMatched(...).insertExpr(Map(
+ *           ("col1", "source.col1"),
+ *           ("col2", "source.col2"),
+ *           ...))
+ *       
+ * + * - `whenNotMatchedBySource` clauses: + * + * - The condition in a `whenNotMatchedBySource` clause is optional. However, if there are + * multiple `whenNotMatchedBySource` clauses, then only the last one may omit the condition. + * + * - When there are more than one `whenNotMatchedBySource` clauses and there are conditions (or + * the lack of) such that a row satisfies multiple clauses, then the action for the first + * clause satisfied is executed. In other words, the order of the `whenNotMatchedBySource` + * clauses matters. + * + * - If no `whenNotMatchedBySource` clause is present or if it is present but the + * non-matching target row does not satisfy any of the `whenNotMatchedBySource` clause + * condition, then the target row will not be updated or deleted. + * + * + * Scala example to update a key-value Delta table with new key-values from a source DataFrame: + * {{{ + * deltaTable + * .as("target") + * .merge( + * source.as("source"), + * "target.key = source.key") + * .whenMatched() + * .updateExpr(Map( + * "value" -> "source.value")) + * .whenNotMatched() + * .insertExpr(Map( + * "key" -> "source.key", + * "value" -> "source.value")) + * .whenNotMatchedBySource() + * .updateExpr(Map( + * "value" -> "target.value + 1")) + * .execute() + * }}} + * + * Java example to update a key-value Delta table with new key-values from a source DataFrame: + * {{{ + * deltaTable + * .as("target") + * .merge( + * source.as("source"), + * "target.key = source.key") + * .whenMatched() + * .updateExpr( + * new HashMap() {{ + * put("value", "source.value"); + * }}) + * .whenNotMatched() + * .insertExpr( + * new HashMap() {{ + * put("key", "source.key"); + * put("value", "source.value"); + * }}) + * .whenNotMatchedBySource() + * .updateExpr( + * new HashMap() {{ + * put("value", "target.value + 1"); + * }}) + * .execute(); + * }}} + * + * @since 0.3.0 + */ +class DeltaMergeBuilder private( + private val targetTable: DeltaTable, + private val source: DataFrame, + private val onCondition: Column, + private val whenClauses: Seq[DeltaMergeIntoClause]) + extends AnalysisHelper + with Logging + { + + /** + * Build the actions to perform when the merge condition was matched. This returns + * [[DeltaMergeMatchedActionBuilder]] object which can be used to specify how + * to update or delete the matched target table row with the source row. + * @since 0.3.0 + */ + def whenMatched(): DeltaMergeMatchedActionBuilder = { + DeltaMergeMatchedActionBuilder(this, None) + } + + /** + * Build the actions to perform when the merge condition was matched and + * the given `condition` is true. This returns [[DeltaMergeMatchedActionBuilder]] object + * which can be used to specify how to update or delete the matched target table row with the + * source row. + * + * @param condition boolean expression as a SQL formatted string + * @since 0.3.0 + */ + def whenMatched(condition: String): DeltaMergeMatchedActionBuilder = { + whenMatched(expr(condition)) + } + + /** + * Build the actions to perform when the merge condition was matched and + * the given `condition` is true. This returns a [[DeltaMergeMatchedActionBuilder]] object + * which can be used to specify how to update or delete the matched target table row with the + * source row. + * + * @param condition boolean expression as a Column object + * @since 0.3.0 + */ + def whenMatched(condition: Column): DeltaMergeMatchedActionBuilder = { + DeltaMergeMatchedActionBuilder(this, Some(condition)) + } + + /** + * Build the action to perform when the merge condition was not matched. This returns + * [[DeltaMergeNotMatchedActionBuilder]] object which can be used to specify how + * to insert the new sourced row into the target table. + * @since 0.3.0 + */ + def whenNotMatched(): DeltaMergeNotMatchedActionBuilder = { + DeltaMergeNotMatchedActionBuilder(this, None) + } + + /** + * Build the actions to perform when the merge condition was not matched and + * the given `condition` is true. This returns [[DeltaMergeMatchedActionBuilder]] object + * which can be used to specify how to insert the new sourced row into the target table. + * + * @param condition boolean expression as a SQL formatted string + * @since 0.3.0 + */ + def whenNotMatched(condition: String): DeltaMergeNotMatchedActionBuilder = { + whenNotMatched(expr(condition)) + } + + /** + * Build the actions to perform when the merge condition was not matched and + * the given `condition` is true. This returns [[DeltaMergeMatchedActionBuilder]] object + * which can be used to specify how to insert the new sourced row into the target table. + * + * @param condition boolean expression as a Column object + * @since 0.3.0 + */ + def whenNotMatched(condition: Column): DeltaMergeNotMatchedActionBuilder = { + DeltaMergeNotMatchedActionBuilder(this, Some(condition)) + } + + /** + * Build the actions to perform when the merge condition was not matched by the source. This + * returns [[DeltaMergeNotMatchedBySourceActionBuilder]] object which can be used to specify how + * to update or delete the target table row. + * @since 2.3.0 + */ + def whenNotMatchedBySource(): DeltaMergeNotMatchedBySourceActionBuilder = { + DeltaMergeNotMatchedBySourceActionBuilder(this, None) + } + + /** + * Build the actions to perform when the merge condition was not matched by the source and the + * given `condition` is true. This returns [[DeltaMergeNotMatchedBySourceActionBuilder]] object + * which can be used to specify how to update or delete the target table row. + * + * @param condition boolean expression as a SQL formatted string + * @since 2.3.0 + */ + def whenNotMatchedBySource(condition: String): DeltaMergeNotMatchedBySourceActionBuilder = { + whenNotMatchedBySource(expr(condition)) + } + + /** + * Build the actions to perform when the merge condition was not matched by the source and the + * given `condition` is true. This returns [[DeltaMergeNotMatchedBySourceActionBuilder]] object + * which can be used to specify how to update or delete the target table row . + * + * @param condition boolean expression as a Column object + * @since 2.3.0 + */ + def whenNotMatchedBySource(condition: Column): DeltaMergeNotMatchedBySourceActionBuilder = { + DeltaMergeNotMatchedBySourceActionBuilder(this, Some(condition)) + } + + /** + * Execute the merge operation based on the built matched and not matched actions. + * + * @since 0.3.0 + */ + def execute(): Unit = improveUnsupportedOpError { + val sparkSession = targetTable.toDF.sparkSession + withActiveSession(sparkSession) { + // Note: We are explicitly resolving DeltaMergeInto plan rather than going to through the + // Analyzer using `Dataset.ofRows()` because the Analyzer incorrectly resolves all + // references in the DeltaMergeInto using both source and target child plans, even before + // DeltaAnalysis rule kicks in. This is because the Analyzer understands only MergeIntoTable, + // and handles that separately by skipping resolution (for Delta) and letting the + // DeltaAnalysis rule do the resolving correctly. This can be solved by generating + // MergeIntoTable instead, which blocked by the different issue with MergeIntoTable as + // explained in the function `mergePlan` and + // https://issues.apache.org/jira/browse/SPARK-34962. + val resolvedMergeInto = + DeltaMergeInto.resolveReferencesAndSchema(mergePlan, sparkSession.sessionState.conf)( + tryResolveReferencesForExpressions(sparkSession)) + if (!resolvedMergeInto.resolved) { + throw DeltaErrors.analysisException("Failed to resolve\n", plan = Some(resolvedMergeInto)) + } + val strippedMergeInto = resolvedMergeInto.copy( + target = DeltaViewHelper.stripTempViewForMerge(resolvedMergeInto.target, SQLConf.get) + ) + // Preprocess the actions and verify + var mergeIntoCommand = + PreprocessTableMerge(sparkSession.sessionState.conf)(strippedMergeInto) + // Resolve UpCast expressions that `PreprocessTableMerge` may have introduced. + mergeIntoCommand = PostHocResolveUpCast(sparkSession).apply(mergeIntoCommand) + sparkSession.sessionState.analyzer.checkAnalysis(mergeIntoCommand) + mergeIntoCommand.asInstanceOf[MergeIntoCommand].run(sparkSession) + } + } + + /** + * :: Unstable :: + * + * Private method for internal usage only. Do not call this directly. + */ + @Unstable + private[delta] def withClause(clause: DeltaMergeIntoClause): DeltaMergeBuilder = { + new DeltaMergeBuilder( + this.targetTable, this.source, this.onCondition, this.whenClauses :+ clause) + } + + private def mergePlan: DeltaMergeInto = { + var targetPlan = targetTable.toDF.queryExecution.analyzed + val sourcePlan = source.queryExecution.analyzed + + // If source and target have duplicate, pre-resolved references (can happen with self-merge), + // then rewrite the references in target with new exprId to avoid ambiguity. + // We rewrite the target instead of ths source because the source plan can be arbitrary and + // we know that the target plan is simple combination of LogicalPlan and an + // optional SubqueryAlias. + val duplicateResolvedRefs = targetPlan.outputSet.intersect(sourcePlan.outputSet) + if (duplicateResolvedRefs.nonEmpty) { + val refReplacementMap = duplicateResolvedRefs.toSeq.flatMap { + case a: AttributeReference => + Some(a.exprId -> a.withExprId(NamedExpression.newExprId)) + case _ => None + }.toMap + targetPlan = targetPlan.transformAllExpressions { + case a: AttributeReference if refReplacementMap.contains(a.exprId) => + refReplacementMap(a.exprId) + } + logInfo("Rewritten duplicate refs between target and source plans: " + + refReplacementMap.toSeq.mkString(", ")) + } + + // Note: The Scala API cannot generate MergeIntoTable just like the SQL parser because + // UpdateAction in MergeIntoTable does not have any way to differentiate between + // the representations of `updateAll()` and `update(some-condition, empty-actions)`. + // More specifically, UpdateAction with a list of empty Assignments implicitly represents + // `updateAll()`, so there is no way to represent `update()` with zero column assignments + // (possible in Scala API, but syntactically not possible in SQL). This issue is tracked + // by https://issues.apache.org/jira/browse/SPARK-34962. + val merge = DeltaMergeInto(targetPlan, sourcePlan, onCondition.expr, whenClauses) + val finalMerge = if (duplicateResolvedRefs.nonEmpty) { + // If any expression contain duplicate, pre-resolved references, we can't simply + // replace the references in the same way as the target because we don't know + // whether the user intended to refer to the source or the target columns. Instead, + // we unresolve them (only the duplicate refs) and let the analysis resolve the ambiguity + // and throw the usual error messages when needed. + merge.transformExpressions { + case a: AttributeReference if duplicateResolvedRefs.contains(a) => + UnresolvedAttribute(a.qualifier :+ a.name) + } + } else merge + logDebug("Generated merged plan:\n" + finalMerge) + finalMerge + } +} + +object DeltaMergeBuilder { + /** + * :: Unstable :: + * + * Private method for internal usage only. Do not call this directly. + */ + @Unstable + private[delta] def apply( + targetTable: DeltaTable, + source: DataFrame, + onCondition: Column): DeltaMergeBuilder = { + new DeltaMergeBuilder(targetTable, source, onCondition, Nil) + } +} + +/** + * Builder class to specify the actions to perform when a target table row has matched a + * source row based on the given merge condition and optional match condition. + * + * See [[DeltaMergeBuilder]] for more information. + * + * @since 0.3.0 + */ +class DeltaMergeMatchedActionBuilder private( + private val mergeBuilder: DeltaMergeBuilder, + private val matchCondition: Option[Column]) { + + /** + * Update the matched table rows based on the rules defined by `set`. + * + * @param set rules to update a row as a Scala map between target column names and + * corresponding update expressions as Column objects. + * @since 0.3.0 + */ + def update(set: Map[String, Column]): DeltaMergeBuilder = { + addUpdateClause(set) + } + + /** + * Update the matched table rows based on the rules defined by `set`. + * + * @param set rules to update a row as a Scala map between target column names and + * corresponding update expressions as SQL formatted strings. + * @since 0.3.0 + */ + def updateExpr(set: Map[String, String]): DeltaMergeBuilder = { + addUpdateClause(toStrColumnMap(set)) + } + + /** + * Update a matched table row based on the rules defined by `set`. + * + * @param set rules to update a row as a Java map between target column names and + * corresponding expressions as Column objects. + * @since 0.3.0 + */ + def update(set: java.util.Map[String, Column]): DeltaMergeBuilder = { + addUpdateClause(set.asScala) + } + + /** + * Update a matched table row based on the rules defined by `set`. + * + * @param set rules to update a row as a Java map between target column names and + * corresponding expressions as SQL formatted strings. + * @since 0.3.0 + */ + def updateExpr(set: java.util.Map[String, String]): DeltaMergeBuilder = { + addUpdateClause(toStrColumnMap(set.asScala)) + } + + /** + * Update all the columns of the matched table row with the values of the + * corresponding columns in the source row. + * @since 0.3.0 + */ + def updateAll(): DeltaMergeBuilder = { + val updateClause = DeltaMergeIntoMatchedUpdateClause( + matchCondition.map(_.expr), + DeltaMergeIntoClause.toActions(Nil, Nil)) + mergeBuilder.withClause(updateClause) + } + + /** + * Delete a matched row from the table. + * @since 0.3.0 + */ + def delete(): DeltaMergeBuilder = { + val deleteClause = DeltaMergeIntoMatchedDeleteClause(matchCondition.map(_.expr)) + mergeBuilder.withClause(deleteClause) + } + + private def addUpdateClause(set: Map[String, Column]): DeltaMergeBuilder = { + if (set.isEmpty && matchCondition.isEmpty) { + // This is a catch all clause that doesn't update anything: we can ignore it. + mergeBuilder + } else { + val setActions = set.toSeq + val updateActions = DeltaMergeIntoClause.toActions( + colNames = setActions.map(x => UnresolvedAttribute.quotedString(x._1)), + exprs = setActions.map(x => x._2.expr), + isEmptySeqEqualToStar = false) + val updateClause = DeltaMergeIntoMatchedUpdateClause( + matchCondition.map(_.expr), + updateActions) + mergeBuilder.withClause(updateClause) + } + } + + private def toStrColumnMap(map: Map[String, String]): Map[String, Column] = + map.mapValues(functions.expr(_)).toMap +} + +object DeltaMergeMatchedActionBuilder { + /** + * :: Unstable :: + * + * Private method for internal usage only. Do not call this directly. + */ + @Unstable + private[delta] def apply( + mergeBuilder: DeltaMergeBuilder, + matchCondition: Option[Column]): DeltaMergeMatchedActionBuilder = { + new DeltaMergeMatchedActionBuilder(mergeBuilder, matchCondition) + } +} + + +/** + * Builder class to specify the actions to perform when a source row has not matched any target + * Delta table row based on the merge condition, but has matched the additional condition + * if specified. + * + * See [[DeltaMergeBuilder]] for more information. + * + * @since 0.3.0 + */ +class DeltaMergeNotMatchedActionBuilder private( + private val mergeBuilder: DeltaMergeBuilder, + private val notMatchCondition: Option[Column]) { + + /** + * Insert a new row to the target table based on the rules defined by `values`. + * + * @param values rules to insert a row as a Scala map between target column names and + * corresponding expressions as Column objects. + * @since 0.3.0 + */ + def insert(values: Map[String, Column]): DeltaMergeBuilder = { + addInsertClause(values) + } + + /** + * Insert a new row to the target table based on the rules defined by `values`. + * + * @param values rules to insert a row as a Scala map between target column names and + * corresponding expressions as SQL formatted strings. + * @since 0.3.0 + */ + def insertExpr(values: Map[String, String]): DeltaMergeBuilder = { + addInsertClause(toStrColumnMap(values)) + } + + /** + * Insert a new row to the target table based on the rules defined by `values`. + * + * @param values rules to insert a row as a Java map between target column names and + * corresponding expressions as Column objects. + * @since 0.3.0 + */ + def insert(values: java.util.Map[String, Column]): DeltaMergeBuilder = { + addInsertClause(values.asScala) + } + + /** + * Insert a new row to the target table based on the rules defined by `values`. + * + * @param values rules to insert a row as a Java map between target column names and + * corresponding expressions as SQL formatted strings. + * + * @since 0.3.0 + */ + def insertExpr(values: java.util.Map[String, String]): DeltaMergeBuilder = { + addInsertClause(toStrColumnMap(values.asScala)) + } + + /** + * Insert a new target Delta table row by assigning the target columns to the values of the + * corresponding columns in the source row. + * @since 0.3.0 + */ + def insertAll(): DeltaMergeBuilder = { + val insertClause = DeltaMergeIntoNotMatchedInsertClause( + notMatchCondition.map(_.expr), + DeltaMergeIntoClause.toActions(Nil, Nil)) + mergeBuilder.withClause(insertClause) + } + + private def addInsertClause(setValues: Map[String, Column]): DeltaMergeBuilder = { + val values = setValues.toSeq + val insertActions = DeltaMergeIntoClause.toActions( + colNames = values.map(x => UnresolvedAttribute.quotedString(x._1)), + exprs = values.map(x => x._2.expr), + isEmptySeqEqualToStar = false) + val insertClause = DeltaMergeIntoNotMatchedInsertClause( + notMatchCondition.map(_.expr), + insertActions) + mergeBuilder.withClause(insertClause) + } + + private def toStrColumnMap(map: Map[String, String]): Map[String, Column] = + map.mapValues(functions.expr(_)).toMap +} + +object DeltaMergeNotMatchedActionBuilder { + /** + * :: Unstable :: + * + * Private method for internal usage only. Do not call this directly. + */ + @Unstable + private[delta] def apply( + mergeBuilder: DeltaMergeBuilder, + notMatchCondition: Option[Column]): DeltaMergeNotMatchedActionBuilder = { + new DeltaMergeNotMatchedActionBuilder(mergeBuilder, notMatchCondition) + } +} + +/** + * Builder class to specify the actions to perform when a target table row has no match in the + * source table based on the given merge condition and optional match condition. + * + * See [[DeltaMergeBuilder]] for more information. + * + * @since 2.3.0 + */ +class DeltaMergeNotMatchedBySourceActionBuilder private( + private val mergeBuilder: DeltaMergeBuilder, + private val notMatchBySourceCondition: Option[Column]) { + + /** + * Update an unmatched target table row based on the rules defined by `set`. + * + * @param set rules to update a row as a Scala map between target column names and + * corresponding update expressions as Column objects. + * @since 2.3.0 + */ + def update(set: Map[String, Column]): DeltaMergeBuilder = { + addUpdateClause(set) + } + + /** + * Update an unmatched target table row based on the rules defined by `set`. + * + * @param set rules to update a row as a Scala map between target column names and + * corresponding update expressions as SQL formatted strings. + * @since 2.3.0 + */ + def updateExpr(set: Map[String, String]): DeltaMergeBuilder = { + addUpdateClause(toStrColumnMap(set)) + } + + /** + * Update an unmatched target table row based on the rules defined by `set`. + * + * @param set rules to update a row as a Java map between target column names and + * corresponding expressions as Column objects. + * @since 2.3.0 + */ + def update(set: java.util.Map[String, Column]): DeltaMergeBuilder = { + addUpdateClause(set.asScala) + } + + /** + * Update an unmatched target table row based on the rules defined by `set`. + * + * @param set rules to update a row as a Java map between target column names and + * corresponding expressions as SQL formatted strings. + * @since 2.3.0 + */ + def updateExpr(set: java.util.Map[String, String]): DeltaMergeBuilder = { + addUpdateClause(toStrColumnMap(set.asScala)) + } + + /** + * Delete an unmatched row from the target table. + * @since 2.3.0 + */ + def delete(): DeltaMergeBuilder = { + val deleteClause = + DeltaMergeIntoNotMatchedBySourceDeleteClause(notMatchBySourceCondition.map(_.expr)) + mergeBuilder.withClause(deleteClause) + } + + private def addUpdateClause(set: Map[String, Column]): DeltaMergeBuilder = { + if (set.isEmpty && notMatchBySourceCondition.isEmpty) { + // This is a catch all clause that doesn't update anything: we can ignore it. + mergeBuilder + } else { + val setActions = set.toSeq + val updateActions = DeltaMergeIntoClause.toActions( + colNames = setActions.map(x => UnresolvedAttribute.quotedString(x._1)), + exprs = setActions.map(x => x._2.expr), + isEmptySeqEqualToStar = false) + val updateClause = DeltaMergeIntoNotMatchedBySourceUpdateClause( + notMatchBySourceCondition.map(_.expr), + updateActions) + mergeBuilder.withClause(updateClause) + } + } + + private def toStrColumnMap(map: Map[String, String]): Map[String, Column] = + map.mapValues(functions.expr(_)).toMap +} + +object DeltaMergeNotMatchedBySourceActionBuilder { + /** + * :: Unstable :: + * + * Private method for internal usage only. Do not call this directly. + */ + @Unstable + private[delta] def apply( + mergeBuilder: DeltaMergeBuilder, + notMatchBySourceCondition: Option[Column]): DeltaMergeNotMatchedBySourceActionBuilder = { + new DeltaMergeNotMatchedBySourceActionBuilder(mergeBuilder, notMatchBySourceCondition) + } +} diff --git a/spark/src/main/scala/io/delta/tables/DeltaOptimizeBuilder.scala b/spark/src/main/scala/io/delta/tables/DeltaOptimizeBuilder.scala new file mode 100644 index 00000000000..7b33c9626fc --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/DeltaOptimizeBuilder.scala @@ -0,0 +1,114 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaTableUtils.withActiveSession +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.DeltaOptimizeContext +import org.apache.spark.sql.delta.commands.OptimizeTableCommand +import org.apache.spark.sql.delta.util.AnalysisHelper + +import org.apache.spark.annotation._ +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{ResolvedTable, UnresolvedAttribute} +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} + +/** + * Builder class for constructing OPTIMIZE command and executing. + * + * @param sparkSession SparkSession to use for execution + * @param tableIdentifier Id of the table on which to + * execute the optimize + * @param options Hadoop file system options for read and write. + * @since 2.0.0 + */ +class DeltaOptimizeBuilder private(table: DeltaTableV2) extends AnalysisHelper { + private var partitionFilter: Seq[String] = Seq.empty + + private lazy val tableIdentifier: String = + table.tableIdentifier.getOrElse(s"delta.`${table.deltaLog.dataPath.toString}`") + + /** + * Apply partition filter on this optimize command builder to limit + * the operation on selected partitions. + * @param partitionFilter The partition filter to apply + * @return [[DeltaOptimizeBuilder]] with partition filter applied + * @since 2.0.0 + */ + def where(partitionFilter: String): DeltaOptimizeBuilder = { + this.partitionFilter = this.partitionFilter :+ partitionFilter + this + } + + /** + * Compact the small files in selected partitions. + * @return DataFrame containing the OPTIMIZE execution metrics + * @since 2.0.0 + */ + def executeCompaction(): DataFrame = { + execute(Seq.empty) + } + + /** + * Z-Order the data in selected partitions using the given columns. + * @param columns Zero or more columns to order the data + * using Z-Order curves + * @return DataFrame containing the OPTIMIZE execution metrics + * @since 2.0.0 + */ + @scala.annotation.varargs + def executeZOrderBy(columns: String *): DataFrame = { + val attrs = columns.map(c => UnresolvedAttribute(c)) + execute(attrs) + } + + private def execute(zOrderBy: Seq[UnresolvedAttribute]): DataFrame = { + val sparkSession = table.spark + withActiveSession(sparkSession) { + val tableId: TableIdentifier = sparkSession + .sessionState + .sqlParser + .parseTableIdentifier(tableIdentifier) + val id = Identifier.of(tableId.database.toArray, tableId.identifier) + val catalogPlugin = sparkSession.sessionState.catalogManager.currentCatalog + val catalog = catalogPlugin match { + case tableCatalog: TableCatalog => tableCatalog + case _ => throw new IllegalArgumentException( + s"Catalog ${catalogPlugin.name} does not support tables") + } + val resolvedTable = ResolvedTable.create(catalog, id, table) + val optimize = OptimizeTableCommand( + resolvedTable, partitionFilter, DeltaOptimizeContext())(zOrderBy = zOrderBy) + toDataset(sparkSession, optimize) + } + } +} + +private[delta] object DeltaOptimizeBuilder { + /** + * :: Unstable :: + * + * Private method for internal usage only. Do not call this directly. + */ + @Unstable + private[delta] def apply(table: DeltaTableV2): DeltaOptimizeBuilder = + new DeltaOptimizeBuilder(table) +} diff --git a/spark/src/main/scala/io/delta/tables/DeltaTable.scala b/spark/src/main/scala/io/delta/tables/DeltaTable.scala new file mode 100644 index 00000000000..17a1f348e2f --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/DeltaTable.scala @@ -0,0 +1,1020 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.DeltaTableUtils.withActiveSession +import org.apache.spark.sql.delta.actions.{Protocol, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.AlterTableSetPropertiesDeltaCommand +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import io.delta.tables.execution._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.annotation._ +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.types.StructType + +/** + * Main class for programmatically interacting with Delta tables. + * You can create DeltaTable instances using the static methods. + * {{{ + * DeltaTable.forPath(sparkSession, pathToTheDeltaTable) + * }}} + * + * @since 0.3.0 + */ +class DeltaTable private[tables]( + @transient private val _df: Dataset[Row], + @transient private val table: DeltaTableV2) + extends DeltaTableOperations with Serializable { + + protected def deltaLog: DeltaLog = { + /** Assert the codes run in the driver. */ + if (table == null) { + throw DeltaErrors.deltaTableFoundInExecutor() + } + + table.deltaLog + } + + protected def df: Dataset[Row] = { + /** Assert the codes run in the driver. */ + if (_df == null) { + throw DeltaErrors.deltaTableFoundInExecutor() + } + + _df + } + + /** + * Apply an alias to the DeltaTable. This is similar to `Dataset.as(alias)` or + * SQL `tableName AS alias`. + * + * @since 0.3.0 + */ + def as(alias: String): DeltaTable = new DeltaTable(df.as(alias), table) + + /** + * Apply an alias to the DeltaTable. This is similar to `Dataset.as(alias)` or + * SQL `tableName AS alias`. + * + * @since 0.3.0 + */ + def alias(alias: String): DeltaTable = as(alias) + + /** + * Get a DataFrame (that is, Dataset[Row]) representation of this Delta table. + * + * @since 0.3.0 + */ + def toDF: Dataset[Row] = df + + /** + * Recursively delete files and directories in the table that are not needed by the table for + * maintaining older versions up to the given retention threshold. This method will return an + * empty DataFrame on successful completion. + * + * @param retentionHours The retention threshold in hours. Files required by the table for + * reading versions earlier than this will be preserved and the + * rest of them will be deleted. + * @since 0.3.0 + */ + def vacuum(retentionHours: Double): DataFrame = { + executeVacuum(deltaLog, Some(retentionHours), table.getTableIdentifierIfExists) + } + + /** + * Recursively delete files and directories in the table that are not needed by the table for + * maintaining older versions up to the given retention threshold. This method will return an + * empty DataFrame on successful completion. + * + * note: This will use the default retention period of 7 days. + * + * @since 0.3.0 + */ + def vacuum(): DataFrame = { + executeVacuum(deltaLog, None, table.getTableIdentifierIfExists) + } + + /** + * Get the information of the latest `limit` commits on this table as a Spark DataFrame. + * The information is in reverse chronological order. + * + * @param limit The number of previous commands to get history for + * + * @since 0.3.0 + */ + def history(limit: Int): DataFrame = { + executeHistory(deltaLog, Some(limit), table.getTableIdentifierIfExists) + } + + /** + * Get the information available commits on this table as a Spark DataFrame. + * The information is in reverse chronological order. + * + * @since 0.3.0 + */ + def history(): DataFrame = { + executeHistory(deltaLog, tableId = table.getTableIdentifierIfExists) + } + + /** + * :: Evolving :: + * + * Get the details of a Delta table such as the format, name, and size. + * + * @since 2.1.0 + */ + @Evolving + def detail(): DataFrame = { + executeDetails(deltaLog.dataPath.toString, table.getTableIdentifierIfExists) + } + + /** + * Generate a manifest for the given Delta Table + * + * @param mode Specifies the mode for the generation of the manifest. + * The valid modes are as follows (not case sensitive): + * - "symlink_format_manifest" : This will generate manifests in symlink format + * for Presto and Athena read support. + * See the online documentation for more information. + * @since 0.5.0 + */ + def generate(mode: String): Unit = { + val tableId = table.tableIdentifier.getOrElse(s"delta.`${deltaLog.dataPath.toString}`") + executeGenerate(tableId, mode) + } + + /** + * Delete data from the table that match the given `condition`. + * + * @param condition Boolean SQL expression + * + * @since 0.3.0 + */ + def delete(condition: String): Unit = { + delete(functions.expr(condition)) + } + + /** + * Delete data from the table that match the given `condition`. + * + * @param condition Boolean SQL expression + * + * @since 0.3.0 + */ + def delete(condition: Column): Unit = { + executeDelete(Some(condition.expr)) + } + + /** + * Delete data from the table. + * + * @since 0.3.0 + */ + def delete(): Unit = { + executeDelete(None) + } + + /** + * Optimize the data layout of the table. This returns + * a [[DeltaOptimizeBuilder]] object that can be used to specify + * the partition filter to limit the scope of optimize and + * also execute different optimization techniques such as file + * compaction or order data using Z-Order curves. + * + * See the [[DeltaOptimizeBuilder]] for a full description + * of this operation. + * + * Scala example to run file compaction on a subset of + * partitions in the table: + * {{{ + * deltaTable + * .optimize() + * .where("date='2021-11-18'") + * .executeCompaction(); + * }}} + * + * @since 2.0.0 + */ + def optimize(): DeltaOptimizeBuilder = DeltaOptimizeBuilder(table) + + /** + * Update rows in the table based on the rules defined by `set`. + * + * Scala example to increment the column `data`. + * {{{ + * import org.apache.spark.sql.functions._ + * + * deltaTable.update(Map("data" -> col("data") + 1)) + * }}} + * + * @param set rules to update a row as a Scala map between target column names and + * corresponding update expressions as Column objects. + * @since 0.3.0 + */ + def update(set: Map[String, Column]): Unit = { + executeUpdate(set, None) + } + + /** + * Update rows in the table based on the rules defined by `set`. + * + * Java example to increment the column `data`. + * {{{ + * import org.apache.spark.sql.Column; + * import org.apache.spark.sql.functions; + * + * deltaTable.update( + * new HashMap() {{ + * put("data", functions.col("data").plus(1)); + * }} + * ); + * }}} + * + * @param set rules to update a row as a Java map between target column names and + * corresponding update expressions as Column objects. + * @since 0.3.0 + */ + def update(set: java.util.Map[String, Column]): Unit = { + executeUpdate(set.asScala, None) + } + + /** + * Update data from the table on the rows that match the given `condition` + * based on the rules defined by `set`. + * + * Scala example to increment the column `data`. + * {{{ + * import org.apache.spark.sql.functions._ + * + * deltaTable.update( + * col("date") > "2018-01-01", + * Map("data" -> col("data") + 1)) + * }}} + * + * @param condition boolean expression as Column object specifying which rows to update. + * @param set rules to update a row as a Scala map between target column names and + * corresponding update expressions as Column objects. + * @since 0.3.0 + */ + def update(condition: Column, set: Map[String, Column]): Unit = { + executeUpdate(set, Some(condition)) + } + + /** + * Update data from the table on the rows that match the given `condition` + * based on the rules defined by `set`. + * + * Java example to increment the column `data`. + * {{{ + * import org.apache.spark.sql.Column; + * import org.apache.spark.sql.functions; + * + * deltaTable.update( + * functions.col("date").gt("2018-01-01"), + * new HashMap() {{ + * put("data", functions.col("data").plus(1)); + * }} + * ); + * }}} + * + * @param condition boolean expression as Column object specifying which rows to update. + * @param set rules to update a row as a Java map between target column names and + * corresponding update expressions as Column objects. + * @since 0.3.0 + */ + def update(condition: Column, set: java.util.Map[String, Column]): Unit = { + executeUpdate(set.asScala, Some(condition)) + } + + /** + * Update rows in the table based on the rules defined by `set`. + * + * Scala example to increment the column `data`. + * {{{ + * deltaTable.updateExpr(Map("data" -> "data + 1"))) + * }}} + * + * @param set rules to update a row as a Scala map between target column names and + * corresponding update expressions as SQL formatted strings. + * @since 0.3.0 + */ + def updateExpr(set: Map[String, String]): Unit = { + executeUpdate(toStrColumnMap(set), None) + } + + /** + * Update rows in the table based on the rules defined by `set`. + * + * Java example to increment the column `data`. + * {{{ + * deltaTable.updateExpr( + * new HashMap() {{ + * put("data", "data + 1"); + * }} + * ); + * }}} + * + * @param set rules to update a row as a Java map between target column names and + * corresponding update expressions as SQL formatted strings. + * @since 0.3.0 + */ + def updateExpr(set: java.util.Map[String, String]): Unit = { + executeUpdate(toStrColumnMap(set.asScala), None) + } + + /** + * Update data from the table on the rows that match the given `condition`, + * which performs the rules defined by `set`. + * + * Scala example to increment the column `data`. + * {{{ + * deltaTable.update( + * "date > '2018-01-01'", + * Map("data" -> "data + 1")) + * }}} + * + * @param condition boolean expression as SQL formatted string object specifying + * which rows to update. + * @param set rules to update a row as a Scala map between target column names and + * corresponding update expressions as SQL formatted strings. + * @since 0.3.0 + */ + def updateExpr(condition: String, set: Map[String, String]): Unit = { + executeUpdate(toStrColumnMap(set), Some(functions.expr(condition))) + } + + /** + * Update data from the table on the rows that match the given `condition`, + * which performs the rules defined by `set`. + * + * Java example to increment the column `data`. + * {{{ + * deltaTable.update( + * "date > '2018-01-01'", + * new HashMap() {{ + * put("data", "data + 1"); + * }} + * ); + * }}} + * + * @param condition boolean expression as SQL formatted string object specifying + * which rows to update. + * @param set rules to update a row as a Java map between target column names and + * corresponding update expressions as SQL formatted strings. + * @since 0.3.0 + */ + def updateExpr(condition: String, set: java.util.Map[String, String]): Unit = { + executeUpdate(toStrColumnMap(set.asScala), Some(functions.expr(condition))) + } + + /** + * Merge data from the `source` DataFrame based on the given merge `condition`. This returns + * a [[DeltaMergeBuilder]] object that can be used to specify the update, delete, or insert + * actions to be performed on rows based on whether the rows matched the condition or not. + * + * See the [[DeltaMergeBuilder]] for a full description of this operation and what combinations of + * update, delete and insert operations are allowed. + * + * Scala example to update a key-value Delta table with new key-values from a source DataFrame: + * {{{ + * deltaTable + * .as("target") + * .merge( + * source.as("source"), + * "target.key = source.key") + * .whenMatched + * .updateExpr(Map( + * "value" -> "source.value")) + * .whenNotMatched + * .insertExpr(Map( + * "key" -> "source.key", + * "value" -> "source.value")) + * .execute() + * }}} + * + * Java example to update a key-value Delta table with new key-values from a source DataFrame: + * {{{ + * deltaTable + * .as("target") + * .merge( + * source.as("source"), + * "target.key = source.key") + * .whenMatched + * .updateExpr( + * new HashMap() {{ + * put("value" -> "source.value"); + * }}) + * .whenNotMatched + * .insertExpr( + * new HashMap() {{ + * put("key", "source.key"); + * put("value", "source.value"); + * }}) + * .execute(); + * }}} + * + * @param source source Dataframe to be merged. + * @param condition boolean expression as SQL formatted string + * @since 0.3.0 + */ + def merge(source: DataFrame, condition: String): DeltaMergeBuilder = { + merge(source, functions.expr(condition)) + } + + /** + * Merge data from the `source` DataFrame based on the given merge `condition`. This returns + * a [[DeltaMergeBuilder]] object that can be used to specify the update, delete, or insert + * actions to be performed on rows based on whether the rows matched the condition or not. + * + * See the [[DeltaMergeBuilder]] for a full description of this operation and what combinations of + * update, delete and insert operations are allowed. + * + * Scala example to update a key-value Delta table with new key-values from a source DataFrame: + * {{{ + * deltaTable + * .as("target") + * .merge( + * source.as("source"), + * "target.key = source.key") + * .whenMatched + * .updateExpr(Map( + * "value" -> "source.value")) + * .whenNotMatched + * .insertExpr(Map( + * "key" -> "source.key", + * "value" -> "source.value")) + * .execute() + * }}} + * + * Java example to update a key-value Delta table with new key-values from a source DataFrame: + * {{{ + * deltaTable + * .as("target") + * .merge( + * source.as("source"), + * "target.key = source.key") + * .whenMatched + * .updateExpr( + * new HashMap() {{ + * put("value" -> "source.value") + * }}) + * .whenNotMatched + * .insertExpr( + * new HashMap() {{ + * put("key", "source.key"); + * put("value", "source.value"); + * }}) + * .execute() + * }}} + * + * @param source source Dataframe to be merged. + * @param condition boolean expression as a Column object + * @since 0.3.0 + */ + def merge(source: DataFrame, condition: Column): DeltaMergeBuilder = { + DeltaMergeBuilder(this, source, condition) + } + + /** + * Restore the DeltaTable to an older version of the table specified by version number. + * + * An example would be + * {{{ io.delta.tables.DeltaTable.restoreToVersion(7) }}} + * + * @since 1.2.0 + */ + def restoreToVersion(version: Long): DataFrame = { + executeRestore(table, Some(version), None) + } + + /** + * Restore the DeltaTable to an older version of the table specified by a timestamp. + * + * Timestamp can be of the format yyyy-MM-dd or yyyy-MM-dd HH:mm:ss + * + * An example would be + * {{{ io.delta.tables.DeltaTable.restoreToTimestamp("2019-01-01") }}} + * + * @since 1.2.0 + */ + def restoreToTimestamp(timestamp: String): DataFrame = { + executeRestore(table, None, Some(timestamp)) + } + + /** + * Updates the protocol version of the table to leverage new features. Upgrading the reader + * version will prevent all clients that have an older version of Delta Lake from accessing this + * table. Upgrading the writer version will prevent older versions of Delta Lake to write to this + * table. The reader or writer version cannot be downgraded. + * + * See online documentation and Delta's protocol specification at PROTOCOL.md for more details. + * + * @since 0.8.0 + */ + def upgradeTableProtocol(readerVersion: Int, writerVersion: Int): Unit = + withActiveSession(sparkSession) { + val alterTableCmd = AlterTableSetPropertiesDeltaCommand( + table, + DeltaConfigs.validateConfigurations( + Map( + "delta.minReaderVersion" -> readerVersion.toString, + "delta.minWriterVersion" -> writerVersion.toString))) + toDataset(sparkSession, alterTableCmd) + } + + /** + * Modify the protocol to add a supported feature, and if the table does not support table + * features, upgrade the protocol automatically. In such a case when the provided feature is + * writer-only, the table's writer version will be upgraded to `7`, and when the provided + * feature is reader-writer, both reader and writer versions will be upgraded, to `(3, 7)`. + * + * See online documentation and Delta's protocol specification at PROTOCOL.md for more details. + * + * @since 2.3.0 + */ + def addFeatureSupport(featureName: String): Unit = withActiveSession(sparkSession) { + // Do not check for the correctness of the provided feature name. The ALTER TABLE command will + // do that in a transaction. + val alterTableCmd = AlterTableSetPropertiesDeltaCommand( + table, + Map( + TableFeatureProtocolUtils.propertyKey(featureName) -> + TableFeatureProtocolUtils.FEATURE_PROP_SUPPORTED)) + toDataset(sparkSession, alterTableCmd) + } +} + +/** + * Companion object to create DeltaTable instances. + * + * {{{ + * DeltaTable.forPath(sparkSession, pathToTheDeltaTable) + * }}} + * + * @since 0.3.0 + */ +object DeltaTable { + + /** + * Create a DeltaTable from the given parquet table and partition schema. + * Takes an existing parquet table and constructs a delta transaction log in the base path of + * that table. + * + * Note: Any changes to the table during the conversion process may not result in a consistent + * state at the end of the conversion. Users should stop any changes to the table before the + * conversion is started. + * + * An example usage would be + * {{{ + * io.delta.tables.DeltaTable.convertToDelta( + * spark, + * "parquet.`/path`", + * new StructType().add(StructField("key1", LongType)).add(StructField("key2", StringType))) + * }}} + * + * @since 0.4.0 + */ + def convertToDelta( + spark: SparkSession, + identifier: String, + partitionSchema: StructType): DeltaTable = { + val tableId: TableIdentifier = spark.sessionState.sqlParser.parseTableIdentifier(identifier) + DeltaConvert.executeConvert(spark, tableId, Some(partitionSchema), None) + } + + /** + * Create a DeltaTable from the given parquet table and partition schema. + * Takes an existing parquet table and constructs a delta transaction log in the base path of + * that table. + * + * Note: Any changes to the table during the conversion process may not result in a consistent + * state at the end of the conversion. Users should stop any changes to the table before the + * conversion is started. + * + * An example usage would be + * {{{ + * io.delta.tables.DeltaTable.convertToDelta( + * spark, + * "parquet.`/path`", + * "key1 long, key2 string") + * }}} + * + * @since 0.4.0 + */ + def convertToDelta( + spark: SparkSession, + identifier: String, + partitionSchema: String): DeltaTable = { + val tableId: TableIdentifier = spark.sessionState.sqlParser.parseTableIdentifier(identifier) + DeltaConvert.executeConvert(spark, tableId, Some(StructType.fromDDL(partitionSchema)), None) + } + + /** + * Create a DeltaTable from the given parquet table. Takes an existing parquet table and + * constructs a delta transaction log in the base path of the table. + * + * Note: Any changes to the table during the conversion process may not result in a consistent + * state at the end of the conversion. Users should stop any changes to the table before the + * conversion is started. + * + * An Example would be + * {{{ + * io.delta.tables.DeltaTable.convertToDelta( + * spark, + * "parquet.`/path`" + * }}} + * + * @since 0.4.0 + */ + def convertToDelta( + spark: SparkSession, + identifier: String): DeltaTable = { + val tableId: TableIdentifier = spark.sessionState.sqlParser.parseTableIdentifier(identifier) + DeltaConvert.executeConvert(spark, tableId, None, None) + } + + /** + * Instantiate a [[DeltaTable]] object representing the data at the given path, If the given + * path is invalid (i.e. either no table exists or an existing table is not a Delta table), + * it throws a `not a Delta table` error. + * + * Note: This uses the active SparkSession in the current thread to read the table data. Hence, + * this throws error if active SparkSession has not been set, that is, + * `SparkSession.getActiveSession()` is empty. + * + * @since 0.3.0 + */ + def forPath(path: String): DeltaTable = { + val sparkSession = SparkSession.getActiveSession.getOrElse { + throw DeltaErrors.activeSparkSessionNotFound() + } + forPath(sparkSession, path) + } + + /** + * Instantiate a [[DeltaTable]] object representing the data at the given path, If the given + * path is invalid (i.e. either no table exists or an existing table is not a Delta table), + * it throws a `not a Delta table` error. + * + * @since 0.3.0 + */ + def forPath(sparkSession: SparkSession, path: String): DeltaTable = { + forPath(sparkSession, path, Map.empty[String, String]) + } + + /** + * Instantiate a [[DeltaTable]] object representing the data at the given path, If the given + * path is invalid (i.e. either no table exists or an existing table is not a Delta table), + * it throws a `not a Delta table` error. + * + * @param hadoopConf Hadoop configuration starting with "fs." or "dfs." will be picked up + * by `DeltaTable` to access the file system when executing queries. + * Other configurations will not be allowed. + * + * {{{ + * val hadoopConf = Map( + * "fs.s3a.access.key" -> "", + * "fs.s3a.secret.key" -> "" + * ) + * DeltaTable.forPath(spark, "/path/to/table", hadoopConf) + * }}} + * @since 2.2.0 + */ + def forPath( + sparkSession: SparkSession, + path: String, + hadoopConf: scala.collection.Map[String, String]): DeltaTable = { + // We only pass hadoopConf so that we won't pass any unsafe options to Delta. + val badOptions = hadoopConf.filterKeys { k => + !DeltaTableUtils.validDeltaTableHadoopPrefixes.exists(k.startsWith) + }.toMap + if (!badOptions.isEmpty) { + throw DeltaErrors.unsupportedDeltaTableForPathHadoopConf(badOptions) + } + val fileSystemOptions: Map[String, String] = hadoopConf.toMap + val hdpPath = new Path(path) + if (DeltaTableUtils.isDeltaTable(sparkSession, hdpPath, fileSystemOptions)) { + new DeltaTable(sparkSession.read.format("delta").options(fileSystemOptions).load(path), + DeltaTableV2( + spark = sparkSession, + path = hdpPath, + options = fileSystemOptions)) + } else { + throw DeltaErrors.notADeltaTableException(DeltaTableIdentifier(path = Some(path))) + } + } + + /** + * Java friendly API to instantiate a [[DeltaTable]] object representing the data at the given + * path, If the given path is invalid (i.e. either no table exists or an existing table is not a + * Delta table), it throws a `not a Delta table` error. + * + * @param hadoopConf Hadoop configuration starting with "fs." or "dfs." will be picked up + * by `DeltaTable` to access the file system when executing queries. + * Other configurations will be ignored. + * + * {{{ + * val hadoopConf = Map( + * "fs.s3a.access.key" -> "", + * "fs.s3a.secret.key", "" + * ) + * DeltaTable.forPath(spark, "/path/to/table", hadoopConf) + * }}} + * @since 2.2.0 + */ + def forPath( + sparkSession: SparkSession, + path: String, + hadoopConf: java.util.Map[String, String]): DeltaTable = { + val fsOptions = hadoopConf.asScala.toMap + forPath(sparkSession, path, fsOptions) + } + + /** + * Instantiate a [[DeltaTable]] object using the given table name. If the given + * tableOrViewName is invalid (i.e. either no table exists or an existing table is not a + * Delta table), it throws a `not a Delta table` error. Note: Passing a view name will also + * result in this error as views are not supported. + * + * The given tableOrViewName can also be the absolute path of a delta datasource (i.e. + * delta.`path`), If so, instantiate a [[DeltaTable]] object representing the data at + * the given path (consistent with the [[forPath]]). + * + * Note: This uses the active SparkSession in the current thread to read the table data. Hence, + * this throws error if active SparkSession has not been set, that is, + * `SparkSession.getActiveSession()` is empty. + */ + def forName(tableOrViewName: String): DeltaTable = { + val sparkSession = SparkSession.getActiveSession.getOrElse { + throw DeltaErrors.activeSparkSessionNotFound() + } + forName(sparkSession, tableOrViewName) + } + + /** + * Instantiate a [[DeltaTable]] object using the given table name using the given + * SparkSession. If the given tableName is invalid (i.e. either no table exists or an + * existing table is not a Delta table), it throws a `not a Delta table` error. Note: + * Passing a view name will also result in this error as views are not supported. + * + * The given tableName can also be the absolute path of a delta datasource (i.e. + * delta.`path`), If so, instantiate a [[DeltaTable]] object representing the data at + * the given path (consistent with the [[forPath]]). + */ + def forName(sparkSession: SparkSession, tableName: String): DeltaTable = { + val tableId = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName) + if (DeltaTableUtils.isDeltaTable(sparkSession, tableId)) { + val tbl = sparkSession.sessionState.catalog.getTableMetadata(tableId) + new DeltaTable( + sparkSession.table(tableName), + DeltaTableV2(sparkSession, new Path(tbl.location), Some(tbl), Some(tableName))) + } else if (DeltaTableUtils.isValidPath(tableId)) { + forPath(sparkSession, tableId.table) + } else { + throw DeltaErrors.notADeltaTableException(DeltaTableIdentifier(table = Some(tableId))) + } + } + + /** + * Check if the provided `identifier` string, in this case a file path, + * is the root of a Delta table using the given SparkSession. + * + * An example would be + * {{{ + * DeltaTable.isDeltaTable(spark, "path/to/table") + * }}} + * + * @since 0.4.0 + */ + def isDeltaTable(sparkSession: SparkSession, identifier: String): Boolean = { + val identifierPath = new Path(identifier) + if (sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_STRICT_CHECK_DELTA_TABLE)) { + val rootOption = DeltaTableUtils.findDeltaTableRoot(sparkSession, identifierPath) + rootOption.isDefined && DeltaLog.forTable(sparkSession, rootOption.get).tableExists + } else { + DeltaTableUtils.isDeltaTable(sparkSession, identifierPath) + } + } + + /** + * Check if the provided `identifier` string, in this case a file path, + * is the root of a Delta table. + * + * Note: This uses the active SparkSession in the current thread to search for the table. Hence, + * this throws error if active SparkSession has not been set, that is, + * `SparkSession.getActiveSession()` is empty. + * + * An example would be + * {{{ + * DeltaTable.isDeltaTable(spark, "/path/to/table") + * }}} + * + * @since 0.4.0 + */ + def isDeltaTable(identifier: String): Boolean = { + val sparkSession = SparkSession.getActiveSession.getOrElse { + throw DeltaErrors.activeSparkSessionNotFound() + } + isDeltaTable(sparkSession, identifier) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaTableBuilder]] to create a Delta table, + * error if the table exists (the same as SQL `CREATE TABLE`). + * Refer to [[DeltaTableBuilder]] for more details. + * + * Note: This uses the active SparkSession in the current thread to read the table data. Hence, + * this throws error if active SparkSession has not been set, that is, + * `SparkSession.getActiveSession()` is empty. + * + * @since 1.0.0 + */ + @Evolving + def create(): DeltaTableBuilder = { + val sparkSession = SparkSession.getActiveSession.getOrElse { + throw DeltaErrors.activeSparkSessionNotFound() + } + create(sparkSession) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaTableBuilder]] to create a Delta table, + * error if the table exists (the same as SQL `CREATE TABLE`). + * Refer to [[DeltaTableBuilder]] for more details. + * + * @param spark sparkSession sparkSession passed by the user + * @since 1.0.0 + */ + @Evolving + def create(spark: SparkSession): DeltaTableBuilder = { + new DeltaTableBuilder(spark, CreateTableOptions(ifNotExists = false)) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaTableBuilder]] to create a Delta table, + * if it does not exists (the same as SQL `CREATE TABLE IF NOT EXISTS`). + * Refer to [[DeltaTableBuilder]] for more details. + * + * Note: This uses the active SparkSession in the current thread to read the table data. Hence, + * this throws error if active SparkSession has not been set, that is, + * `SparkSession.getActiveSession()` is empty. + * + * @since 1.0.0 + */ + @Evolving + def createIfNotExists(): DeltaTableBuilder = { + val sparkSession = SparkSession.getActiveSession.getOrElse { + throw DeltaErrors.activeSparkSessionNotFound() + } + createIfNotExists(sparkSession) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaTableBuilder]] to create a Delta table, + * if it does not exists (the same as SQL `CREATE TABLE IF NOT EXISTS`). + * Refer to [[DeltaTableBuilder]] for more details. + * + * @param spark sparkSession sparkSession passed by the user + * @since 1.0.0 + */ + @Evolving + def createIfNotExists(spark: SparkSession): DeltaTableBuilder = { + new DeltaTableBuilder(spark, CreateTableOptions(ifNotExists = true)) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaTableBuilder]] to replace a Delta table, + * error if the table doesn't exist (the same as SQL `REPLACE TABLE`) + * Refer to [[DeltaTableBuilder]] for more details. + * + * Note: This uses the active SparkSession in the current thread to read the table data. Hence, + * this throws error if active SparkSession has not been set, that is, + * `SparkSession.getActiveSession()` is empty. + * + * @since 1.0.0 + */ + @Evolving + def replace(): DeltaTableBuilder = { + val sparkSession = SparkSession.getActiveSession.getOrElse { + throw DeltaErrors.activeSparkSessionNotFound() + } + replace(sparkSession) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaTableBuilder]] to replace a Delta table, + * error if the table doesn't exist (the same as SQL `REPLACE TABLE`) + * Refer to [[DeltaTableBuilder]] for more details. + * + * @param spark sparkSession sparkSession passed by the user + * @since 1.0.0 + */ + @Evolving + def replace(spark: SparkSession): DeltaTableBuilder = { + new DeltaTableBuilder(spark, ReplaceTableOptions(orCreate = false)) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaTableBuilder]] to replace a Delta table + * or create table if not exists (the same as SQL `CREATE OR REPLACE TABLE`) + * Refer to [[DeltaTableBuilder]] for more details. + * + * Note: This uses the active SparkSession in the current thread to read the table data. Hence, + * this throws error if active SparkSession has not been set, that is, + * `SparkSession.getActiveSession()` is empty. + * + * @since 1.0.0 + */ + @Evolving + def createOrReplace(): DeltaTableBuilder = { + val sparkSession = SparkSession.getActiveSession.getOrElse { + throw DeltaErrors.activeSparkSessionNotFound() + } + createOrReplace(sparkSession) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaTableBuilder]] to replace a Delta table, + * or create table if not exists (the same as SQL `CREATE OR REPLACE TABLE`) + * Refer to [[DeltaTableBuilder]] for more details. + * + * @param spark sparkSession sparkSession passed by the user. + * @since 1.0.0 + */ + @Evolving + def createOrReplace(spark: SparkSession): DeltaTableBuilder = { + new DeltaTableBuilder(spark, ReplaceTableOptions(orCreate = true)) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaColumnBuilder]] to specify a column. + * Refer to [[DeltaTableBuilder]] for examples and [[DeltaColumnBuilder]] detailed APIs. + * + * Note: This uses the active SparkSession in the current thread to read the table data. Hence, + * this throws error if active SparkSession has not been set, that is, + * `SparkSession.getActiveSession()` is empty. + * + * @param colName string the column name + * @since 1.0.0 + */ + @Evolving + def columnBuilder(colName: String): DeltaColumnBuilder = { + val sparkSession = SparkSession.getActiveSession.getOrElse { + throw DeltaErrors.activeSparkSessionNotFound() + } + columnBuilder(sparkSession, colName) + } + + /** + * :: Evolving :: + * + * Return an instance of [[DeltaColumnBuilder]] to specify a column. + * Refer to [[DeltaTableBuilder]] for examples and [[DeltaColumnBuilder]] detailed APIs. + * + * @param spark sparkSession sparkSession passed by the user + * @param colName string the column name + * @since 1.0.0 + */ + @Evolving + def columnBuilder(spark: SparkSession, colName: String): DeltaColumnBuilder = { + new DeltaColumnBuilder(spark, colName) + } +} diff --git a/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala b/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala new file mode 100644 index 00000000000..293b1d89b2f --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala @@ -0,0 +1,370 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables + +import scala.collection.mutable + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaTableUtils} +import org.apache.spark.sql.delta.DeltaTableUtils.withActiveSession +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import io.delta.tables.execution._ + +import org.apache.spark.annotation._ +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.plans.logical.{CreateTable, LogicalPlan, ReplaceTable} +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.execution.SQLExecution +import org.apache.spark.sql.types.{DataType, StructField, StructType} + +/** + * :: Evolving :: + * + * Builder to specify how to create / replace a Delta table. + * You must specify the table name or the path before executing the builder. + * You can specify the table columns, the partitioning columns, the location of the data, + * the table comment and the property, and how you want to create / replace the Delta table. + * + * After executing the builder, an instance of [[DeltaTable]] is returned. + * + * Scala example to create a Delta table with generated columns, using the table name: + * {{{ + * val table: DeltaTable = DeltaTable.create() + * .tableName("testTable") + * .addColumn("c1", dataType = "INT", nullable = false) + * .addColumn( + * DeltaTable.columnBuilder("c2") + * .dataType("INT") + * .generatedAlwaysAs("c1 + 10") + * .build() + * ) + * .addColumn( + * DeltaTable.columnBuilder("c3") + * .dataType("INT") + * .comment("comment") + * .nullable(true) + * .build() + * ) + * .partitionedBy("c1", "c2") + * .execute() + * }}} + * + * Scala example to create a delta table using the location: + * {{{ + * val table: DeltaTable = DeltaTable.createIfNotExists(spark) + * .location("/foo/`bar`") + * .addColumn("c1", dataType = "INT", nullable = false) + * .addColumn( + * DeltaTable.columnBuilder(spark, "c2") + * .dataType("INT") + * .generatedAlwaysAs("c1 + 10") + * .build() + * ) + * .addColumn( + * DeltaTable.columnBuilder(spark, "c3") + * .dataType("INT") + * .comment("comment") + * .nullable(true) + * .build() + * ) + * .partitionedBy("c1", "c2") + * .execute() + * }}} + * + * Java Example to replace a table: + * {{{ + * DeltaTable table = DeltaTable.replace() + * .tableName("db.table") + * .addColumn("c1", "INT", false) + * .addColumn( + * DeltaTable.columnBuilder("c2") + * .dataType("INT") + * .generatedAlwaysBy("c1 + 10") + * .build() + * ) + * .execute(); + * }}} + * + * @since 1.0.0 + */ +@Evolving +class DeltaTableBuilder private[tables]( + spark: SparkSession, + builderOption: DeltaTableBuilderOptions) { + private var identifier: String = null + private var partitioningColumns: Option[Seq[String]] = None + private var columns: mutable.Seq[StructField] = mutable.Seq.empty + private var location: Option[String] = None + private var tblComment: Option[String] = None + private var properties = + if (spark.sessionState.conf.getConf(DeltaSQLConf.TABLE_BUILDER_FORCE_TABLEPROPERTY_LOWERCASE)) { + CaseInsensitiveMap(Map.empty[String, String]) + } else { + Map.empty[String, String] + } + + + private val FORMAT_NAME: String = "delta" + + /** + * :: Evolving :: + * + * Specify the table name, optionally qualified with a database name [database_name.] table_name + * + * @param identifier string the table name + * @since 1.0.0 + */ + @Evolving + def tableName(identifier: String): DeltaTableBuilder = { + this.identifier = identifier + this + } + + /** + * :: Evolving :: + * + * Specify the table comment to describe the table. + * + * @param comment string table comment + * @since 1.0.0 + */ + @Evolving + def comment(comment: String): DeltaTableBuilder = { + tblComment = Option(comment) + this + } + + /** + * :: Evolving :: + * + * Specify the path to the directory where table data is stored, + * which could be a path on distributed storage. + * + * @param location string the data location + * @since 1.0.0 + */ + @Evolving + def location(location: String): DeltaTableBuilder = { + this.location = Option(location) + this + } + + /** + * :: Evolving :: + * + * Specify a column. + * + * @param colName string the column name + * @param dataType string the DDL data type + * @since 1.0.0 + */ + @Evolving + def addColumn(colName: String, dataType: String): DeltaTableBuilder = { + addColumn( + DeltaTable.columnBuilder(spark, colName).dataType(dataType).build() + ) + this + } + + /** + * :: Evolving :: + * + * Specify a column. + * + * @param colName string the column name + * @param dataType dataType the DDL data type + * @since 1.0.0 + */ + @Evolving + def addColumn(colName: String, dataType: DataType): DeltaTableBuilder = { + addColumn( + DeltaTable.columnBuilder(spark, colName).dataType(dataType).build() + ) + this + } + + /** + * :: Evolving :: + * + * Specify a column. + * + * @param colName string the column name + * @param dataType string the DDL data type + * @param nullable boolean whether the column is nullable + * @since 1.0.0 + */ + @Evolving + def addColumn(colName: String, dataType: String, nullable: Boolean): DeltaTableBuilder = { + addColumn( + DeltaTable.columnBuilder(spark, colName).dataType(dataType).nullable(nullable).build() + ) + this + } + + /** + * :: Evolving :: + * + * Specify a column. + * + * @param colName string the column name + * @param dataType dataType the DDL data type + * @param nullable boolean whether the column is nullable + * @since 1.0.0 + */ + @Evolving + def addColumn(colName: String, dataType: DataType, nullable: Boolean): DeltaTableBuilder = { + addColumn( + DeltaTable.columnBuilder(spark, colName).dataType(dataType).nullable(nullable).build() + ) + this + } + + /** + * :: Evolving :: + * + * Specify a column. + * + * @param col structField the column struct + * @since 1.0.0 + */ + @Evolving + def addColumn(col: StructField): DeltaTableBuilder = { + columns = columns :+ col + this + } + + + /** + * :: Evolving :: + * + * Specify columns with an existing schema. + * + * @param cols structType the existing schema for columns + * @since 1.0.0 + */ + @Evolving + def addColumns(cols: StructType): DeltaTableBuilder = { + columns = columns ++ cols.toSeq + this + } + + /** + * :: Evolving :: + * + * Specify the columns to partition the output on the file system. + * + * Note: This should only include table columns already defined in schema. + * + * @param colNames string* column names for partitioning + * @since 1.0.0 + */ + @Evolving + @scala.annotation.varargs + def partitionedBy(colNames: String*): DeltaTableBuilder = { + partitioningColumns = Option(colNames) + this + } + + /** + * :: Evolving :: + * + * Specify a key-value pair to tag the table definition. + * + * @param key string the table property key + * @param value string the table property value + * @since 1.0.0 + */ + @Evolving + def property(key: String, value: String): DeltaTableBuilder = { + this.properties = this.properties + (key -> value) + this + } + + /** + * :: Evolving :: + * + * Execute the command to create / replace a Delta table and returns a instance of [[DeltaTable]]. + * + * @since 1.0.0 + */ + @Evolving + def execute(): DeltaTable = withActiveSession(spark) { + if (identifier == null && location.isEmpty) { + throw DeltaErrors.analysisException("Table name or location has to be specified") + } + + if (this.identifier == null) { + identifier = s"delta.`${location.get}`" + } + + // Return DeltaTable Object. + val tableId: TableIdentifier = spark.sessionState.sqlParser.parseTableIdentifier(identifier) + + if (DeltaTableUtils.isValidPath(tableId) && location.nonEmpty + && tableId.table != location.get) { + throw DeltaErrors.analysisException( + s"Creating path-based Delta table with a different location isn't supported. " + + s"Identifier: $identifier, Location: ${location.get}") + } + + val table = spark.sessionState.sqlParser.parseMultipartIdentifier(identifier) + + val partitioning = partitioningColumns.map { colNames => + colNames.map(name => DeltaTableUtils.parseColToTransform(name)) + }.getOrElse(Seq.empty[Transform]) + + val tableSpec = org.apache.spark.sql.catalyst.plans.logical.TableSpec( + properties = properties, + provider = Some(FORMAT_NAME), + options = Map.empty, + location = location, + serde = None, + comment = tblComment, + external = false + ) + + val stmt = builderOption match { + case CreateTableOptions(ifNotExists) => + val unresolvedTable = org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier(table) + CreateTable( + unresolvedTable, + StructType(columns.toSeq), + partitioning, + tableSpec, + ifNotExists) + case ReplaceTableOptions(orCreate) => + val unresolvedTable = org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier(table) + ReplaceTable( + unresolvedTable, + StructType(columns.toSeq), + partitioning, + tableSpec, + orCreate) + } + val qe = spark.sessionState.executePlan(stmt) + // call `QueryExecution.toRDD` to trigger the execution of commands. + SQLExecution.withNewExecutionId(qe, Some("create delta table"))(qe.toRdd) + + // Return DeltaTable Object. + if (DeltaTableUtils.isValidPath(tableId)) { + DeltaTable.forPath(spark, location.get) + } else { + DeltaTable.forName(spark, this.identifier) + } + } +} diff --git a/spark/src/main/scala/io/delta/tables/execution/DeltaConvert.scala b/spark/src/main/scala/io/delta/tables/execution/DeltaConvert.scala new file mode 100644 index 00000000000..905fe7956d4 --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/execution/DeltaConvert.scala @@ -0,0 +1,44 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables.execution + +import org.apache.spark.sql.delta.DeltaTableUtils.withActiveSession +import org.apache.spark.sql.delta.commands.ConvertToDeltaCommand +import io.delta.tables.DeltaTable + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.types.StructType + +trait DeltaConvertBase { + def executeConvert( + spark: SparkSession, + tableIdentifier: TableIdentifier, + partitionSchema: Option[StructType], + deltaPath: Option[String]): DeltaTable = withActiveSession(spark) { + val cvt = ConvertToDeltaCommand(tableIdentifier, partitionSchema, collectStats = true, + deltaPath) + cvt.run(spark) + if (cvt.isCatalogTable(spark.sessionState.analyzer, tableIdentifier)) { + DeltaTable.forName(spark, tableIdentifier.toString) + } else { + DeltaTable.forPath(spark, tableIdentifier.table) + } + } +} + +object DeltaConvert extends DeltaConvertBase {} diff --git a/spark/src/main/scala/io/delta/tables/execution/DeltaTableBuilderOptions.scala b/spark/src/main/scala/io/delta/tables/execution/DeltaTableBuilderOptions.scala new file mode 100644 index 00000000000..0279cf1d588 --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/execution/DeltaTableBuilderOptions.scala @@ -0,0 +1,38 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables.execution + +/** + * DeltaTableBuilder option to indicate whether it's to create / replace the table. + */ +sealed trait DeltaTableBuilderOptions + +/** + * Specify that the builder is to create a Delta table. + * + * @param ifNotExists boolean whether to ignore if the table already exists. + */ +case class CreateTableOptions(ifNotExists: Boolean) extends DeltaTableBuilderOptions + +/** + * Specify that the builder is to replace a Delta table. + * + * @param orCreate boolean whether to create the table if the table doesn't exist. + */ +case class ReplaceTableOptions(orCreate: Boolean) extends DeltaTableBuilderOptions + + diff --git a/spark/src/main/scala/io/delta/tables/execution/DeltaTableOperations.scala b/spark/src/main/scala/io/delta/tables/execution/DeltaTableOperations.scala new file mode 100644 index 00000000000..56f60a28035 --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/execution/DeltaTableOperations.scala @@ -0,0 +1,120 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables.execution + +import scala.collection.Map + +import org.apache.spark.sql.catalyst.TimeTravel +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.DeltaTableUtils.withActiveSession +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.{DeltaGenerateCommand, DescribeDeltaDetailCommand, VacuumCommand} +import org.apache.spark.sql.delta.util.AnalysisHelper +import io.delta.tables.DeltaTable + +import org.apache.spark.sql.{functions, Column, DataFrame} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.connector.catalog.Identifier +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation + +/** + * Interface to provide the actual implementations of DeltaTable operations. + */ +trait DeltaTableOperations extends AnalysisHelper { self: DeltaTable => + + protected def executeDelete(condition: Option[Expression]): Unit = improveUnsupportedOpError { + withActiveSession(sparkSession) { + val delete = DeleteFromTable( + self.toDF.queryExecution.analyzed, + condition.getOrElse(Literal.TrueLiteral)) + toDataset(sparkSession, delete) + } + } + + protected def executeHistory( + deltaLog: DeltaLog, + limit: Option[Int] = None, + tableId: Option[TableIdentifier] = None): DataFrame = withActiveSession(sparkSession) { + val history = deltaLog.history + sparkSession.createDataFrame(history.getHistory(limit)) + } + + protected def executeDetails( + path: String, + tableIdentifier: Option[TableIdentifier]): DataFrame = withActiveSession(sparkSession) { + val details = DescribeDeltaDetailCommand(Option(path), tableIdentifier, self.deltaLog.options) + toDataset(sparkSession, details) + } + + protected def executeGenerate(tblIdentifier: String, mode: String): Unit = + withActiveSession(sparkSession) { + val tableId: TableIdentifier = sparkSession + .sessionState + .sqlParser + .parseTableIdentifier(tblIdentifier) + val generate = DeltaGenerateCommand(mode, tableId, self.deltaLog.options) + toDataset(sparkSession, generate) + } + + protected def executeUpdate( + set: Map[String, Column], + condition: Option[Column]): Unit = improveUnsupportedOpError { + withActiveSession(sparkSession) { + val assignments = set.map { case (targetColName, column) => + Assignment(UnresolvedAttribute.quotedString(targetColName), column.expr) + }.toSeq + val update = + UpdateTable(self.toDF.queryExecution.analyzed, assignments, condition.map(_.expr)) + toDataset(sparkSession, update) + } + } + + protected def executeVacuum( + deltaLog: DeltaLog, + retentionHours: Option[Double], + tableId: Option[TableIdentifier] = None): DataFrame = withActiveSession(sparkSession) { + VacuumCommand.gc(sparkSession, deltaLog, false, retentionHours) + sparkSession.emptyDataFrame + } + + protected def executeRestore( + table: DeltaTableV2, + versionAsOf: Option[Long], + timestampAsOf: Option[String]): DataFrame = withActiveSession(sparkSession) { + val identifier = table.getTableIdentifierIfExists.map( + id => Identifier.of(id.database.toArray, id.table)) + val sourceRelation = DataSourceV2Relation.create(table, None, identifier) + + val restore = RestoreTableStatement( + TimeTravel( + sourceRelation, + timestampAsOf.map(Literal(_)), + versionAsOf, + Some("deltaTable")) + ) + toDataset(sparkSession, restore) + } + + protected def toStrColumnMap(map: Map[String, String]): Map[String, Column] = { + map.toSeq.map { case (k, v) => k -> functions.expr(v) }.toMap + } + + protected def sparkSession = self.toDF.sparkSession +} diff --git a/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala b/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala new file mode 100644 index 00000000000..3d7933467c3 --- /dev/null +++ b/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala @@ -0,0 +1,69 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables.execution + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier, DeltaTableUtils, UnresolvedDeltaPathOrIdentifier} +import org.apache.spark.sql.delta.commands.DeltaCommand +import org.apache.spark.sql.delta.commands.VacuumCommand +import org.apache.spark.sql.delta.commands.VacuumCommand.getDeltaTable +import org.apache.spark.sql.execution.command.{LeafRunnableCommand, RunnableCommand} +import org.apache.spark.sql.types.StringType + +/** + * The `vacuum` command implementation for Spark SQL. Example SQL: + * {{{ + * VACUUM ('/path/to/dir' | delta.`/path/to/dir`) [RETAIN number HOURS] [DRY RUN]; + * }}} + */ +case class VacuumTableCommand( + override val child: LogicalPlan, + horizonHours: Option[Double], + dryRun: Boolean) extends RunnableCommand with UnaryNode with DeltaCommand { + + override val output: Seq[Attribute] = + Seq(AttributeReference("path", StringType, nullable = true)()) + + override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = + copy(child = newChild) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaTable = getDeltaTable(child, "VACUUM") + // The VACUUM command is only supported on existing delta tables. If the target table doesn't + // exist or it is based on a partition directory, an exception will be thrown. + if (!deltaTable.tableExists || deltaTable.hasPartitionFilters) { + throw DeltaErrors.notADeltaTableException( + "VACUUM", + DeltaTableIdentifier(path = Some(deltaTable.path.toString))) + } + VacuumCommand.gc(sparkSession, deltaTable.deltaLog, dryRun, horizonHours).collect() + } +} + +object VacuumTableCommand { + def apply( + path: Option[String], + table: Option[TableIdentifier], + horizonHours: Option[Double], + dryRun: Boolean): VacuumTableCommand = { + val child = UnresolvedDeltaPathOrIdentifier(path, table, "VACUUM") + VacuumTableCommand(child, horizonHours, dryRun) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/TimeTravel.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/TimeTravel.scala new file mode 100644 index 00000000000..17ae33a67e1 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/TimeTravel.scala @@ -0,0 +1,44 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst + +// scalastyle:off import.ordering.noEmptyLine + +import com.databricks.spark.util.DatabricksLogging + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan} + +/** + * A logical node used to time travel the child relation to the given `timestamp` or `version`. + * The `child` must support time travel, e.g. Delta, and cannot be a view, subquery or stream. + * The timestamp expression cannot be a subquery. It must be a timestamp expression. + * @param creationSource The API used to perform time travel, e.g. `atSyntax`, `dfReader` or SQL + */ +case class TimeTravel( + relation: LogicalPlan, + timestamp: Option[Expression], + version: Option[Long], + creationSource: Option[String]) extends LeafNode with DatabricksLogging { + + assert(version.isEmpty ^ timestamp.isEmpty, + "Either the version or timestamp should be provided for time travel") + + override def output: Seq[Attribute] = Nil + + override lazy val resolved: Boolean = false +} diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregation/BitmapAggregator.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregation/BitmapAggregator.scala new file mode 100644 index 00000000000..1e893b2ac23 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregation/BitmapAggregator.scala @@ -0,0 +1,105 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.aggregation + +import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, RoaringBitmapArrayFormat} + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Expression, GenericInternalRow, ImplicitCastInputTypes} +import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate} +import org.apache.spark.sql.catalyst.trees.UnaryLike +import org.apache.spark.sql.types._ + +/** + * This function returns a bitmap representing the set of values of the underlying column. + * + * The bitmap is simply a compressed representation of the set of all integral values that + * appear in the column being aggregated over. + * + * @param child child expression that can produce a column value with `child.eval(inputRow)` + */ +case class BitmapAggregator( + child: Expression, + override val mutableAggBufferOffset: Int, + override val inputAggBufferOffset: Int, + // Take the format as string instead of [[RoaringBitmapArrayFormat.Value]], + // because String is safe to serialize. + serializationFormatString: String) + extends TypedImperativeAggregate[RoaringBitmapArray] with ImplicitCastInputTypes + with UnaryLike[Expression] { + + def this(child: Expression, serializationFormat: RoaringBitmapArrayFormat.Value) = + this(child, 0, 0, serializationFormat.toString) + + override def createAggregationBuffer(): RoaringBitmapArray = new RoaringBitmapArray() + + override def update(buffer: RoaringBitmapArray, input: InternalRow): RoaringBitmapArray = { + val value = child.eval(input) + // Ignore empty rows + if (value != null) { + buffer.add(value.asInstanceOf[Long]) + } + buffer + } + + override def merge(buffer: RoaringBitmapArray, input: RoaringBitmapArray): RoaringBitmapArray = { + buffer.merge(input) + buffer + } + + /** + * Return bitmap cardinality, last and serialized bitmap. + */ + override def eval(bitmapIntegerSet: RoaringBitmapArray): GenericInternalRow = { + // reduce the serialized size via RLE optimisation + bitmapIntegerSet.runOptimize() + new GenericInternalRow(Array( + bitmapIntegerSet.cardinality, + bitmapIntegerSet.last.getOrElse(null), + serialize(bitmapIntegerSet))) + } + + override def serialize(buffer: RoaringBitmapArray): Array[Byte] = { + val serializationFormat = RoaringBitmapArrayFormat.withName(serializationFormatString) + buffer.serializeAsByteArray(serializationFormat) + } + + override def deserialize(storageFormat: Array[Byte]): RoaringBitmapArray = { + RoaringBitmapArray.readFrom(storageFormat) + } + + override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int) + : ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) + + override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int) + : ImperativeAggregate = copy(inputAggBufferOffset = newInputAggBufferOffset) + + override def nullable: Boolean = false + + override def dataType: StructType = StructType( + Seq( + StructField("cardinality", LongType), + StructField("last", LongType), + StructField("bitmap", BinaryType) + ) + ) + + override def inputTypes: Seq[AbstractDataType] = Seq(LongType) + + override protected def withNewChildInternal(newChild: Expression): BitmapAggregator = + copy(child = newChild) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CloneTableStatement.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CloneTableStatement.scala new file mode 100644 index 00000000000..fc3b19fb657 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CloneTableStatement.scala @@ -0,0 +1,49 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.catalyst.expressions.Attribute + +/** + * CLONE TABLE statement, as parsed from SQL + * + * @param source source plan for table to be cloned + * @param target target path or table name where clone should be instantiated + * @param ifNotExists if a table exists at the target, we should not go through with the clone + * @param isReplaceCommand when true, replace the target table if one exists + * @param isCreateCommand when true, create the target table if none exists + * @param tablePropertyOverrides user-defined table properties that should override any properties + * with the same key from the source table + * @param targetLocation if target is a table name then user can provide a targetLocation to + * create an external table with this location + */ +case class CloneTableStatement( + source: LogicalPlan, + target: LogicalPlan, + ifNotExists: Boolean, + isReplaceCommand: Boolean, + isCreateCommand: Boolean, + tablePropertyOverrides: Map[String, String], + targetLocation: Option[String]) extends BinaryNode { + override def output: Seq[Attribute] = Nil + + override def left: LogicalPlan = source + override def right: LogicalPlan = target + override protected def withNewChildrenInternal( + newLeft: LogicalPlan, newRight: LogicalPlan): CloneTableStatement = + copy(source = newLeft, target = newRight) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeltaDelete.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeltaDelete.scala new file mode 100644 index 00000000000..1fd785c19a2 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeltaDelete.scala @@ -0,0 +1,32 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} + +// This only used by Delta which needs to be compatible with DBR 6 and can't use the new class +// added in Spark 3.0: `DeleteFromTable`. +case class DeltaDelete( + child: LogicalPlan, + condition: Option[Expression]) + extends UnaryNode { + override def output: Seq[Attribute] = Seq.empty + + override protected def withNewChildInternal(newChild: LogicalPlan): DeltaDelete = + copy(child = newChild) +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeltaUpdateTable.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeltaUpdateTable.scala new file mode 100644 index 00000000000..e2d4d829837 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeltaUpdateTable.scala @@ -0,0 +1,88 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Expression, ExtractValue, GetStructField} + +/** + * Perform UPDATE on a table + * + * @param child the logical plan representing target table + * @param updateColumns: the to-be-updated target columns + * @param updateExpressions: the corresponding update expression if the condition is matched + * @param condition: Only rows that match the condition will be updated + */ +case class DeltaUpdateTable( + child: LogicalPlan, + updateColumns: Seq[Expression], + updateExpressions: Seq[Expression], + condition: Option[Expression]) + extends UnaryNode { + + assert(updateColumns.size == updateExpressions.size) + + override def output: Seq[Attribute] = Seq.empty + + override protected def withNewChildInternal(newChild: LogicalPlan): DeltaUpdateTable = + copy(child = newChild) +} + +object DeltaUpdateTable { + + /** + * Extracts name parts from a resolved expression referring to a nested or non-nested column + * - For non-nested column, the resolved expression will be like `AttributeReference(...)`. + * - For nested column, the resolved expression will be like `Alias(GetStructField(...))`. + * + * In the nested case, the function recursively traverses through the expression to find + * the name parts. For example, a nested field of a.b.c would be resolved to an expression + * + * `Alias(c, GetStructField(c, GetStructField(b, AttributeReference(a)))` + * + * for which this method recursively extracts the name parts as follows: + * + * `Alias(c, GetStructField(c, GetStructField(b, AttributeReference(a)))` + * -> `GetStructField(c, GetStructField(b, AttributeReference(a)))` + * -> `GetStructField(b, AttributeReference(a))` ++ Seq(c) + * -> `AttributeReference(a)` ++ Seq(b, c) + * -> [a, b, c] + */ + def getTargetColNameParts(resolvedTargetCol: Expression, errMsg: String = null): Seq[String] = { + + def fail(extraMsg: String): Nothing = { + val msg = Option(errMsg).map(_ + " - ").getOrElse("") + extraMsg + throw new AnalysisException(msg) + } + + def extractRecursively(expr: Expression): Seq[String] = expr match { + case attr: Attribute => Seq(attr.name) + + case Alias(c, _) => extractRecursively(c) + + case GetStructField(c, _, Some(name)) => extractRecursively(c) :+ name + + case _: ExtractValue => + fail("Updating nested fields is only supported for StructType.") + + case other => + fail(s"Found unsupported expression '$other' while parsing target column name parts") + } + + extractRecursively(resolvedTargetCol) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RestoreTableStatement.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RestoreTableStatement.scala new file mode 100644 index 00000000000..0d2464a88f7 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RestoreTableStatement.scala @@ -0,0 +1,37 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.catalyst.TimeTravel + +import org.apache.spark.sql.catalyst.expressions.Attribute + +/** + * RESTORE TABLE statement as parsed from SQL + * + * @param table - logical node of the table that will be restored, internally contains either + * version or timestamp. + */ +case class RestoreTableStatement(table: TimeTravel) extends UnaryNode { + + override def child: LogicalPlan = table + + override def output: Seq[Attribute] = Nil + + override protected def withNewChildInternal(newChild: LogicalPlan): RestoreTableStatement = + copy(table = newChild.asInstanceOf[TimeTravel]) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaConstraints.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaConstraints.scala new file mode 100644 index 00000000000..c955acd988d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaConstraints.scala @@ -0,0 +1,41 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.delta.constraints.{AddConstraint, DropConstraint} + +import org.apache.spark.sql.connector.catalog.TableChange + +/** + * The logical plan of the ALTER TABLE ... ADD CONSTRAINT command. + */ +case class AlterTableAddConstraint( + table: LogicalPlan, constraintName: String, expr: String) extends AlterTableCommand { + override def changes: Seq[TableChange] = Seq(AddConstraint(constraintName, expr)) + + protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild) +} + +/** + * The logical plan of the ALTER TABLE ... DROP CONSTRAINT command. + */ +case class AlterTableDropConstraint( + table: LogicalPlan, constraintName: String, ifExists: Boolean) extends AlterTableCommand { + override def changes: Seq[TableChange] = Seq(DropConstraint(constraintName, ifExists)) + + protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaMerge.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaMerge.scala new file mode 100644 index 00000000000..30856385d6b --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaMerge.scala @@ -0,0 +1,675 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import java.util.Locale + +import scala.collection.mutable + +import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaIllegalArgumentException, DeltaUnsupportedOperationException} +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, UnaryExpression} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{DataType, StructField, StructType} + +/** + * A copy of Spark SQL Unevaluable for cross-version compatibility. In 3.0, implementers of + * the original Unevaluable must explicitly override foldable to false; in 3.1 onwards, this + * explicit override is invalid. + */ +trait DeltaUnevaluable extends Expression { + final override def foldable: Boolean = false + + final override def eval(input: InternalRow = null): Any = { + throw new DeltaUnsupportedOperationException( + errorClass = "DELTA_CANNOT_EVALUATE_EXPRESSION", + messageParameters = Array(s"$this") + ) + } + + final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + throw new DeltaUnsupportedOperationException( + errorClass = "DELTA_CANNOT_GENERATE_CODE_FOR_EXPRESSION", + messageParameters = Array(s"$this") + ) +} + +/** + * Represents an action in MERGE's UPDATE or INSERT clause where a target columns is assigned the + * value of an expression + * + * @param targetColNameParts The name parts of the target column. This is a sequence to support + * nested fields as targets. + * @param expr Expression to generate the value of the target column. + * @param targetColNameResolved Whether the targetColNameParts have undergone resolution and checks + * for validity. + */ +case class DeltaMergeAction( + targetColNameParts: Seq[String], + expr: Expression, + targetColNameResolved: Boolean = false) + extends UnaryExpression with DeltaUnevaluable { + override def child: Expression = expr + override def dataType: DataType = expr.dataType + override lazy val resolved: Boolean = { + childrenResolved && checkInputDataTypes().isSuccess && targetColNameResolved + } + override def sql: String = s"$targetColString = ${expr.sql}" + override def toString: String = s"$targetColString = $expr" + private lazy val targetColString: String = targetColNameParts.mkString("`", "`.`", "`") + + override protected def withNewChildInternal(newChild: Expression): DeltaMergeAction = + copy(expr = newChild) +} + + +/** + * Trait that represents a WHEN clause in MERGE. See [[DeltaMergeInto]]. It extends [[Expression]] + * so that Catalyst can find all the expressions in the clause implementations. + */ +sealed trait DeltaMergeIntoClause extends Expression with DeltaUnevaluable { + /** Optional condition of the clause */ + def condition: Option[Expression] + + /** + * Sequence of actions represented as expressions. Note that this can be only be either + * UnresolvedStar, or MergeAction. + */ + def actions: Seq[Expression] + + /** + * Sequence of resolved actions represented as Aliases. Actions, once resolved, must + * be Aliases and not any other NamedExpressions. So it should be safe to do this casting + * as long as this is called after the clause has been resolved. + */ + def resolvedActions: Seq[DeltaMergeAction] = { + assert(actions.forall(_.resolved), "all actions have not been resolved yet") + actions.map(_.asInstanceOf[DeltaMergeAction]) + } + + /** + * String representation of the clause type: Update, Delete or Insert. + */ + def clauseType: String + + override def toString: String = { + val condStr = condition.map { c => s"condition: $c" } + val actionStr = if (actions.isEmpty) None else { + Some("actions: " + actions.mkString("[", ", ", "]")) + } + s"$clauseType " + Seq(condStr, actionStr).flatten.mkString("[", ", ", "]") + } + + override def nullable: Boolean = false + override def dataType: DataType = null + override def children: Seq[Expression] = condition.toSeq ++ actions + + /** Verify whether the expressions in the actions are of the right type */ + protected[logical] def verifyActions(): Unit = actions.foreach { + case _: UnresolvedStar => + case _: DeltaMergeAction => + case a => throw new DeltaIllegalArgumentException( + errorClass = "DELTA_UNEXPECTED_ACTION_EXPRESSION", + messageParameters = Array(s"$a")) + } +} + + +object DeltaMergeIntoClause { + /** + * Convert the parsed columns names and expressions into action for MergeInto. Note: + * - Size of column names and expressions must be the same. + * - If the sizes are zeros and `emptySeqIsStar` is true, this function assumes + * that query had `*` as an action, and therefore generates a single action + * with `UnresolvedStar`. This will be expanded later during analysis. + * - Otherwise, this will convert the names and expressions to MergeActions. + */ + def toActions( + colNames: Seq[UnresolvedAttribute], + exprs: Seq[Expression], + isEmptySeqEqualToStar: Boolean = true): Seq[Expression] = { + assert(colNames.size == exprs.size) + if (colNames.isEmpty && isEmptySeqEqualToStar) { + Seq(UnresolvedStar(None)) + } else { + (colNames, exprs).zipped.map { (col, expr) => DeltaMergeAction(col.nameParts, expr) } + } + } + + def toActions(assignments: Seq[Assignment]): Seq[Expression] = { + if (assignments.isEmpty) { + Seq[Expression](UnresolvedStar(None)) + } else { + assignments.map { + case Assignment(key: UnresolvedAttribute, expr) => DeltaMergeAction(key.nameParts, expr) + case Assignment(key: Attribute, expr) => DeltaMergeAction(Seq(key.name), expr) + case other => + throw new DeltaAnalysisException( + errorClass = "DELTA_MERGE_UNEXPECTED_ASSIGNMENT_KEY", + messageParameters = Array(s"${other.getClass}", s"$other")) + } + } + } +} + +/** Trait that represents WHEN MATCHED clause in MERGE. See [[DeltaMergeInto]]. */ +sealed trait DeltaMergeIntoMatchedClause extends DeltaMergeIntoClause + +/** Represents the clause WHEN MATCHED THEN UPDATE in MERGE. See [[DeltaMergeInto]]. */ +case class DeltaMergeIntoMatchedUpdateClause( + condition: Option[Expression], + actions: Seq[Expression]) + extends DeltaMergeIntoMatchedClause { + + def this(cond: Option[Expression], cols: Seq[UnresolvedAttribute], exprs: Seq[Expression]) = + this(cond, DeltaMergeIntoClause.toActions(cols, exprs)) + + override def clauseType: String = "Update" + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): DeltaMergeIntoMatchedUpdateClause = { + if (condition.isDefined) { + copy(condition = Some(newChildren.head), actions = newChildren.tail) + } else { + copy(condition = None, actions = newChildren) + } + } +} + +/** Represents the clause WHEN MATCHED THEN DELETE in MERGE. See [[DeltaMergeInto]]. */ +case class DeltaMergeIntoMatchedDeleteClause(condition: Option[Expression]) + extends DeltaMergeIntoMatchedClause { + def this(condition: Option[Expression], actions: Seq[DeltaMergeAction]) = this(condition) + + override def clauseType: String = "Delete" + override def actions: Seq[Expression] = Seq.empty + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): DeltaMergeIntoMatchedDeleteClause = + copy(condition = if (condition.isDefined) Some(newChildren.head) else None) +} + +/** Trait that represents WHEN NOT MATCHED clause in MERGE. See [[DeltaMergeInto]]. */ +sealed trait DeltaMergeIntoNotMatchedClause extends DeltaMergeIntoClause + +/** Represents the clause WHEN NOT MATCHED THEN INSERT in MERGE. See [[DeltaMergeInto]]. */ +case class DeltaMergeIntoNotMatchedInsertClause( + condition: Option[Expression], + actions: Seq[Expression]) + extends DeltaMergeIntoNotMatchedClause { + + def this(cond: Option[Expression], cols: Seq[UnresolvedAttribute], exprs: Seq[Expression]) = + this(cond, DeltaMergeIntoClause.toActions(cols, exprs)) + + override def clauseType: String = "Insert" + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): DeltaMergeIntoNotMatchedInsertClause = + if (condition.isDefined) { + copy(condition = Some(newChildren.head), actions = newChildren.tail) + } else { + copy(condition = None, actions = newChildren) + } +} + +/** Trait that represents WHEN NOT MATCHED BY SOURCE clause in MERGE. See [[DeltaMergeInto]]. */ +sealed trait DeltaMergeIntoNotMatchedBySourceClause extends DeltaMergeIntoClause + +/** Represents the clause WHEN NOT MATCHED BY SOURCE THEN UPDATE in MERGE. See + * [[DeltaMergeInto]]. */ +case class DeltaMergeIntoNotMatchedBySourceUpdateClause( + condition: Option[Expression], + actions: Seq[Expression]) + extends DeltaMergeIntoNotMatchedBySourceClause { + + def this(cond: Option[Expression], cols: Seq[UnresolvedAttribute], exprs: Seq[Expression]) = + this(cond, DeltaMergeIntoClause.toActions(cols, exprs)) + + override def clauseType: String = "Update" + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): DeltaMergeIntoNotMatchedBySourceUpdateClause = { + if (condition.isDefined) { + copy(condition = Some(newChildren.head), actions = newChildren.tail) + } else { + copy(condition = None, actions = newChildren) + } + } +} + +/** Represents the clause WHEN NOT MATCHED BY SOURCE THEN DELETE in MERGE. See + * [[DeltaMergeInto]]. */ +case class DeltaMergeIntoNotMatchedBySourceDeleteClause(condition: Option[Expression]) + extends DeltaMergeIntoNotMatchedBySourceClause { + def this(condition: Option[Expression], actions: Seq[DeltaMergeAction]) = this(condition) + + override def clauseType: String = "Delete" + override def actions: Seq[Expression] = Seq.empty + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): DeltaMergeIntoNotMatchedBySourceDeleteClause = + copy(condition = if (condition.isDefined) Some(newChildren.head) else None) +} + +/** + * Merges changes specified in the source plan into a target table, based on the given search + * condition and the actions to perform when the condition is matched or not matched by the rows. + * + * The syntax of the MERGE statement is as follows. + * {{{ + * MERGE INTO + * USING + * ON + * [ WHEN MATCHED [ AND ] THEN ] + * [ WHEN MATCHED [ AND ] THEN ] + * ... + * [ WHEN NOT MATCHED [BY TARGET] [ AND ] THEN ] + * [ WHEN NOT MATCHED [BY TARGET] [ AND ] THEN ] + * ... + * [ WHEN NOT MATCHED BY SOURCE [ AND ] THEN ] + * [ WHEN NOT MATCHED BY SOURCE [ AND ] THEN ] + * ... + * + * + * where + * = + * DELETE | + * UPDATE SET column1 = value1 [, column2 = value2 ...] | + * UPDATE SET * [EXCEPT (column1, ...)] + * = INSERT (column1 [, column2 ...]) VALUES (expr1 [, expr2 ...]) + * = + * DELETE | + * UPDATE SET column1 = value1 [, column2 = value2 ...] + * }}} + * + * - There can be any number of WHEN clauses. + * - WHEN MATCHED clauses: + * - Each WHEN MATCHED clause can have an optional condition. However, if there are multiple + * WHEN MATCHED clauses, only the last can omit the condition. + * - WHEN MATCHED clauses are dependent on their ordering; that is, the first clause that + * satisfies the clause's condition has its corresponding action executed. + * - WHEN NOT MATCHED clause: + * - Can only have the INSERT action. If present, they must follow the last WHEN MATCHED clause. + * - Each WHEN NOT MATCHED clause can have an optional condition. However, if there are multiple + * clauses, only the last can omit the condition. + * - WHEN NOT MATCHED clauses are dependent on their ordering; that is, the first clause that + * satisfies the clause's condition has its corresponding action executed. + * - WHEN NOT MATCHED BY SOURCE clauses: + * - Each WHEN NOT MATCHED BY SOURCE clause can have an optional condition. However, if there are + * multiple WHEN NOT MATCHED BY SOURCE clauses, only the last can omit the condition. + * - WHEN NOT MATCHED BY SOURCE clauses are dependent on their ordering; that is, the first + * clause that satisfies the clause's condition has its corresponding action executed. + */ +case class DeltaMergeInto( + target: LogicalPlan, + source: LogicalPlan, + condition: Expression, + matchedClauses: Seq[DeltaMergeIntoMatchedClause], + notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause], + notMatchedBySourceClauses: Seq[DeltaMergeIntoNotMatchedBySourceClause], + migrateSchema: Boolean, + finalSchema: Option[StructType]) + extends Command with SupportsSubquery { + + (matchedClauses ++ notMatchedClauses ++ notMatchedBySourceClauses).foreach(_.verifyActions()) + + // TODO: extend BinaryCommand once the new Spark version is released + override def children: Seq[LogicalPlan] = Seq(target, source) + override def output: Seq[Attribute] = Seq.empty + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[LogicalPlan]): DeltaMergeInto = + copy(target = newChildren(0), source = newChildren(1)) +} + +object DeltaMergeInto { + def apply( + target: LogicalPlan, + source: LogicalPlan, + condition: Expression, + whenClauses: Seq[DeltaMergeIntoClause]): DeltaMergeInto = { + val notMatchedClauses = whenClauses.collect { case x: DeltaMergeIntoNotMatchedClause => x } + val matchedClauses = whenClauses.collect { case x: DeltaMergeIntoMatchedClause => x } + val notMatchedBySourceClauses = + whenClauses.collect { case x: DeltaMergeIntoNotMatchedBySourceClause => x } + + // grammar enforcement goes here. + if (whenClauses.isEmpty) { + throw new DeltaAnalysisException( + errorClass = "DELTA_MERGE_MISSING_WHEN", + messageParameters = Array.empty + ) + } + + // Check that only the last MATCHED clause omits the condition. + if (matchedClauses.length > 1 && !matchedClauses.init.forall(_.condition.nonEmpty)) { + throw new DeltaAnalysisException( + errorClass = "DELTA_NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION", + messageParameters = Array.empty) + } + + // Check that only the last NOT MATCHED clause omits the condition. + if (notMatchedClauses.length > 1 && !notMatchedClauses.init.forall(_.condition.nonEmpty)) { + throw new DeltaAnalysisException( + errorClass = "DELTA_NON_LAST_NOT_MATCHED_CLAUSE_OMIT_CONDITION", + messageParameters = Array.empty) + } + + // Check that only the last NOT MATCHED BY SOURCE clause omits the condition. + if (notMatchedBySourceClauses.length > 1 && + !notMatchedBySourceClauses.init.forall(_.condition.nonEmpty)) { + throw new DeltaAnalysisException( + errorClass = "DELTA_NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION", + messageParameters = Array.empty) + } + + DeltaMergeInto( + target, + source, + condition, + matchedClauses, + notMatchedClauses, + notMatchedBySourceClauses, + migrateSchema = false, + finalSchema = Some(target.schema)) + } + + def resolveReferencesAndSchema(merge: DeltaMergeInto, conf: SQLConf)( + resolveExprs: (Seq[Expression], Seq[LogicalPlan]) => Seq[Expression]): DeltaMergeInto = { + val DeltaMergeInto( + target, + source, + condition, + matchedClauses, + notMatchedClauses, + notMatchedBySourceClauses, + _, + _) = merge + + /** + * Resolves expressions against given plans or fail using given message. It makes a best-effort + * attempt to throw specific error messages on which part of the query has a problem. + */ + def resolveOrFail( + exprs: Seq[Expression], + plansToResolveExprs: Seq[LogicalPlan], + mergeClauseType: String) + : Seq[Expression] = { + val resolvedExprs = resolveExprs(exprs, plansToResolveExprs) + resolvedExprs.foreach(assertResolved(_, plansToResolveExprs, mergeClauseType)) + resolvedExprs + } + + /** + * Convenience wrapper around `resolveOrFail()` when resolving a single expression. + */ + def resolveSingleExprOrFail( + expr: Expression, + plansToResolveExpr: Seq[LogicalPlan], + mergeClauseType: String) + : Expression = resolveOrFail(Seq(expr), plansToResolveExpr, mergeClauseType).head + + def assertResolved(expr: Expression, plans: Seq[LogicalPlan], mergeClauseType: String): Unit = { + expr.flatMap(_.references).filter(!_.resolved).foreach { a => + // Note: This will throw error only on unresolved attribute issues, + // not other resolution errors like mismatched data types. + val cols = "columns " + plans.flatMap(_.output).map(_.sql).mkString(", ") + throw new DeltaAnalysisException( + errorClass = "DELTA_MERGE_UNRESOLVED_EXPRESSION", + messageParameters = Array(a.sql, mergeClauseType, cols), + origin = Some(a.origin)) + } + } + + val canAutoMigrate = conf.getConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE) + /** + * Resolves a clause using the given plans (used for resolving the action exprs) and + * returns the resolved clause. + */ + def resolveClause[T <: DeltaMergeIntoClause]( + clause: T, + plansToResolveAction: Seq[LogicalPlan]): T = { + + /* + * Returns the sequence of [[DeltaMergeActions]] corresponding to + * [ `columnName = sourceColumnBySameName` ] for every column name in the schema. Nested + * columns are unfolded to create an assignment for each leaf. + * + * @param currSchema: schema to generate DeltaMergeAction for every 'leaf' + * @param qualifier: used to recurse to leaves; represents the qualifier of the current schema + * @return seq of DeltaMergeActions corresponding to columnName = sourceColumnName updates + */ + def getActions(currSchema: StructType, qualifier: Seq[String] = Nil): Seq[DeltaMergeAction] = + currSchema.flatMap { + case StructField(name, struct: StructType, _, _) => + getActions(struct, qualifier :+ name) + case StructField(name, _, _, _) => + val nameParts = qualifier :+ name + val sourceExpr = source.resolve(nameParts, conf.resolver).getOrElse { + // if we use getActions to expand target columns, this will fail on target columns not + // present in the source + throw new DeltaIllegalArgumentException( + errorClass = "DELTA_CANNOT_RESOLVE_SOURCE_COLUMN", + messageParameters = Array(s"${UnresolvedAttribute(nameParts).name}") + ) + } + Seq(DeltaMergeAction(nameParts, sourceExpr, targetColNameResolved = true)) + } + + val typ = clause.clauseType.toUpperCase(Locale.ROOT) + + val resolvedActions: Seq[DeltaMergeAction] = clause.actions.flatMap { action => + action match { + // For actions like `UPDATE SET *` or `INSERT *` + case _: UnresolvedStar if !canAutoMigrate => + // Expand `*` into seq of [ `columnName = sourceColumnBySameName` ] for every target + // column name. The target columns do not need resolution. The right hand side + // expression (i.e. sourceColumnBySameName) needs to be resolved only by the source + // plan. + val unresolvedExprs = target.output.map { attr => + UnresolvedAttribute.quotedString(s"`${attr.name}`") + } + val resolvedExprs = resolveOrFail(unresolvedExprs, Seq(source), s"$typ clause") + (resolvedExprs, target.output.map(_.name)) + .zipped + .map { (resolvedExpr, targetColName) => + DeltaMergeAction(Seq(targetColName), resolvedExpr, targetColNameResolved = true) + } + case _: UnresolvedStar if canAutoMigrate => + clause match { + case _: DeltaMergeIntoNotMatchedInsertClause => + // Expand `*` into seq of [ `columnName = sourceColumnBySameName` ] for every source + // column name. Target columns not present in the source will be filled in + // with null later. + source.output.map { attr => + DeltaMergeAction(Seq(attr.name), attr, targetColNameResolved = true) + } + case _: DeltaMergeIntoMatchedUpdateClause => + // Expand `*` into seq of [ `columnName = sourceColumnBySameName` ] for every source + // column name. Target columns not present in the source will be filled in with + // no-op actions later. + // Nested columns are unfolded to accommodate the case where a source struct has a + // subset of the nested columns in the target. If a source struct (a, b) is writing + // into a target (a, b, c), the final struct after filling in the no-op actions will + // be (s.a, s.b, t.c). + getActions(source.schema, Seq.empty) + } + + + // For actions like `UPDATE SET x = a, y = b` or `INSERT (x, y) VALUES (a, b)` + case d @ DeltaMergeAction(colNameParts, expr, _) if !d.resolved => + val unresolvedAttrib = UnresolvedAttribute(colNameParts) + val resolutionErrorMsg = + s"Cannot resolve ${unresolvedAttrib.sql} in target columns in $typ " + + s"clause given columns ${target.output.map(_.sql).mkString(", ")}" + + // Resolve the target column name without database/table/view qualifiers + // If clause allows nested field to be target, then this will return the all the + // parts of the name (e.g., "a.b" -> Seq("a", "b")). Otherwise, this will + // return only one string. + val resolvedKey = try { + resolveSingleExprOrFail( + expr = unresolvedAttrib, + plansToResolveExpr = Seq(target), + mergeClauseType = s"$typ clause") + } catch { + // Allow schema evolution for update and insert non-star when the column is not in + // the target. + case _: AnalysisException + if canAutoMigrate && (clause.isInstanceOf[DeltaMergeIntoMatchedUpdateClause] || + clause.isInstanceOf[DeltaMergeIntoNotMatchedClause]) => + resolveSingleExprOrFail( + expr = unresolvedAttrib, + plansToResolveExpr = Seq(source), + mergeClauseType = s"$typ clause") + case e: Throwable => throw e + } + + val resolvedNameParts = + DeltaUpdateTable.getTargetColNameParts(resolvedKey, resolutionErrorMsg) + + val resolvedExpr = resolveExprs(Seq(expr), plansToResolveAction).head + assertResolved(resolvedExpr, plansToResolveAction, s"$typ clause") + Seq(DeltaMergeAction(resolvedNameParts, resolvedExpr, targetColNameResolved = true)) + + case d: DeltaMergeAction => + // Already resolved + Seq(d) + + case _ => + action.failAnalysis("INTERNAL_ERROR", + Map("message" -> s"Unexpected action expression '$action' in clause $clause")) + } + } + + val resolvedCondition = clause.condition.map { + resolveSingleExprOrFail(_, plansToResolveAction, mergeClauseType = s"$typ condition") + } + clause.makeCopy(Array(resolvedCondition, resolvedActions)).asInstanceOf[T] + } + + // We must do manual resolution as the expressions in different clauses of the MERGE have + // visibility of the source, the target or both. + val resolvedCond = resolveSingleExprOrFail( + expr = condition, + plansToResolveExpr = Seq(target, source), + mergeClauseType = "search condition") + val resolvedMatchedClauses = matchedClauses.map { + resolveClause(_, plansToResolveAction = Seq(target, source)) + } + val resolvedNotMatchedClauses = notMatchedClauses.map { + resolveClause(_, plansToResolveAction = Seq(source)) + } + val resolvedNotMatchedBySourceClauses = notMatchedBySourceClauses.map { + resolveClause(_, plansToResolveAction = Seq(target)) + } + + val finalSchema = if (canAutoMigrate) { + // When schema evolution is enabled, add to the target table new columns or nested fields that + // are assigned to in merge actions and not already part of the target schema. This is done by + // collecting all assignments from merge actions and using them to filter out the source + // schema before merging it with the target schema. We don't consider NOT MATCHED BY SOURCE + // clauses since these can't by definition reference source columns and thus can't introduce + // new columns in the target schema. + val actions = (matchedClauses ++ notMatchedClauses).flatMap(_.actions) + val assignments = actions.collect { case a: DeltaMergeAction => a.targetColNameParts } + val containsStarAction = actions.exists { + case _: UnresolvedStar => true + case _ => false + } + + + // Filter the source schema to retain only fields that are referenced by at least one merge + // clause, then merge this schema with the target to give the final schema. + def filterSchema(sourceSchema: StructType, basePath: Seq[String]): StructType = + StructType(sourceSchema.flatMap { field => + val fieldPath = basePath :+ field.name + + // Helper method to check if a given field path is a prefix of another path. Delegates + // equality to conf.resolver to correctly handle case sensitivity. + def isPrefix(prefix: Seq[String], path: Seq[String]): Boolean = + prefix.length <= path.length && prefix.zip(path).forall { + case (prefixNamePart, pathNamePart) => conf.resolver(prefixNamePart, pathNamePart) + } + + // Helper method to check if a given field path is equal to another path. + def isEqual(path1: Seq[String], path2: Seq[String]): Boolean = + path1.length == path2.length && isPrefix(path1, path2) + + + field.dataType match { + // Specifically assigned to in one clause: always keep, including all nested attributes + case _ if assignments.exists(isEqual(_, fieldPath)) => Some(field) + // If this is a struct and one of the children is being assigned to in a merge clause, + // keep it and continue filtering children. + case struct: StructType if assignments.exists(isPrefix(fieldPath, _)) => + Some(field.copy(dataType = filterSchema(struct, fieldPath))) + // The field isn't assigned to directly or indirectly (i.e. its children) in any non-* + // clause. Check if it should be kept with any * action. + case struct: StructType if containsStarAction => + Some(field.copy(dataType = filterSchema(struct, fieldPath))) + case _ if containsStarAction => Some(field) + // The field and its children are not assigned to in any * or non-* action, drop it. + case _ => None + } + }) + + val migrationSchema = filterSchema(source.schema, Seq.empty) + // The implicit conversions flag allows any type to be merged from source to target if Spark + // SQL considers the source type implicitly castable to the target. Normally, mergeSchemas + // enforces Parquet-level write compatibility, which would mean an INT source can't be merged + // into a LONG target. + SchemaMergingUtils.mergeSchemas( + target.schema, + migrationSchema, + allowImplicitConversions = true) + } else { + target.schema + } + + val resolvedMerge = DeltaMergeInto( + target, + source, + resolvedCond, + resolvedMatchedClauses, + resolvedNotMatchedClauses, + resolvedNotMatchedBySourceClauses, + migrateSchema = canAutoMigrate, + finalSchema = Some(finalSchema)) + + // Its possible that pre-resolved expressions (e.g. `sourceDF("key") = targetDF("key")`) have + // attribute references that are not present in the output attributes of the children (i.e., + // incorrect DataFrame was used in the `df("col")` form). + if (resolvedMerge.missingInput.nonEmpty) { + val missingAttributes = resolvedMerge.missingInput.mkString(",") + val input = resolvedMerge.inputSet.mkString(",") + throw new DeltaAnalysisException( + errorClass = "DELTA_MERGE_RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT", + messageParameters = Array(missingAttributes, input, + resolvedMerge.simpleString(SQLConf.get.maxToStringFields)), + origin = Some(resolvedMerge.origin) + ) + } + + resolvedMerge + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaTableFeatures.scala b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaTableFeatures.scala new file mode 100644 index 00000000000..1d50973332a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/deltaTableFeatures.scala @@ -0,0 +1,32 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.delta.tablefeatures.DropFeature + +import org.apache.spark.sql.connector.catalog.TableChange + +/** + * The logical plan of the ALTER TABLE ... DROP FEATURE command. + */ +case class AlterTableDropFeature( + table: LogicalPlan, + featureName: String, + truncateHistory: Boolean) extends AlterTableCommand { + override def changes: Seq[TableChange] = Seq(DropFeature(featureName, truncateHistory)) + protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/CheckUnresolvedRelationTimeTravel.scala b/spark/src/main/scala/org/apache/spark/sql/delta/CheckUnresolvedRelationTimeTravel.scala new file mode 100644 index 00000000000..c9ad91efd8a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/CheckUnresolvedRelationTimeTravel.scala @@ -0,0 +1,39 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.RelationTimeTravel +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.trees.TreePattern.RELATION_TIME_TRAVEL + +/** + * Custom check rule that compensates for [SPARK-45383]. It checks the (unresolved) child relation + * of each [[RelationTimeTravel]] in the plan, in order to trigger a helpful table-not-found + * [[AnalysisException]] instead of the internal spark error that would otherwise result. + */ +class CheckUnresolvedRelationTimeTravel(spark: SparkSession) extends (LogicalPlan => Unit) { + override def apply(plan: LogicalPlan): Unit = { + // Short circuit: We only care about (unresolved) plans containing [[RelationTimeTravel]]. + if (plan.containsPattern(RELATION_TIME_TRAVEL)) { + plan.foreachUp { + case tt: RelationTimeTravel => spark.sessionState.analyzer.checkAnalysis0(tt.relation) + case _ => () + } + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/CheckpointProvider.scala b/spark/src/main/scala/org/apache/spark/sql/delta/CheckpointProvider.scala new file mode 100644 index 00000000000..93fb89ae649 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/CheckpointProvider.scala @@ -0,0 +1,479 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.duration.Duration +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.SnapshotManagement.checkpointV2ThreadPool +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage.LogStore +import org.apache.spark.sql.delta.util.FileNames._ +import org.apache.spark.sql.delta.util.threads.NonFateSharingFuture +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.sql.Dataset +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.types.StructType + +/** + * Represents basic information about a checkpoint. + * This is the info we always can know about a checkpoint, without doing any additional I/O. + */ +trait UninitializedCheckpointProvider { + + /** True if the checkpoint provider is empty (does not refer to a valid checkpoint) */ + def isEmpty: Boolean = version < 0 + + /** Checkpoint version */ + def version: Long + + /** + * Top level files that represents this checkpoint. + * These files could be reused again to initialize the [[CheckpointProvider]]. + */ + def topLevelFiles: Seq[FileStatus] + + /** + * File index which could help derive actions stored in top level files + * for the checkpoint. + * This could be used to get [[Protocol]], [[Metadata]] etc from a checkpoint. + * This could also be used if we want to shallow copy a checkpoint. + */ + def topLevelFileIndex: Option[DeltaLogFileIndex] +} + +/** + * A trait which provides information about a checkpoint to the Snapshot. + */ +trait CheckpointProvider extends UninitializedCheckpointProvider { + + /** Effective size of checkpoint across all files */ + def effectiveCheckpointSizeInBytes(): Long + + /** + * List of different file indexes which could help derive full state-reconstruction + * for the checkpoint. + */ + def allActionsFileIndexes(): Seq[DeltaLogFileIndex] +} + +object CheckpointProvider extends DeltaLogging { + + /** Helper method to convert non-empty checkpoint files to DeltaLogFileIndex */ + def checkpointFileIndex(checkpointFiles: Seq[FileStatus]): DeltaLogFileIndex = { + assert(checkpointFiles.nonEmpty, "checkpointFiles must not be empty") + DeltaLogFileIndex(DeltaLogFileIndex.CHECKPOINT_FILE_FORMAT_PARQUET, checkpointFiles).get + } + + /** Converts an [[UninitializedCheckpointProvider]] into a [[CheckpointProvider]] */ + def apply( + spark: SparkSession, + snapshotDescriptor: SnapshotDescriptor, + checksumOpt: Option[VersionChecksum], + uninitializedCheckpointProvider: UninitializedCheckpointProvider) + : CheckpointProvider = uninitializedCheckpointProvider match { + // Note: snapshotDescriptor.protocol should be accessed as late as possible inside the futures + // as it might need I/O. + case uninitializedV2CheckpointProvider: UninitializedV2CheckpointProvider => + new LazyCompleteCheckpointProvider(uninitializedV2CheckpointProvider) { + override def createCheckpointProvider(): CheckpointProvider = { + val (checkpointMetadataOpt, sidecarFiles) = + uninitializedV2CheckpointProvider.nonFateSharingCheckpointReadFuture.get(Duration.Inf) + // This must be a v2 checkpoint, so checkpointMetadataOpt must be non empty. + val checkpointMetadata = checkpointMetadataOpt.getOrElse { + val checkpointFile = uninitializedV2CheckpointProvider.topLevelFiles.head + throw new IllegalStateException(s"V2 Checkpoint ${checkpointFile.getPath} " + + s"has no CheckpointMetadata action") + } + require(isV2CheckpointEnabled(snapshotDescriptor.protocol)) + V2CheckpointProvider(uninitializedV2CheckpointProvider, checkpointMetadata, sidecarFiles) + } + } + case provider: UninitializedV1OrV2ParquetCheckpointProvider + if isV2CheckpointEnabled(checksumOpt).contains(false) => + // V2 checkpoints are specifically disabled, so it must be V1 + PreloadedCheckpointProvider(provider.topLevelFiles, provider.lastCheckpointInfoOpt) + case provider: UninitializedV1OrV2ParquetCheckpointProvider => + // Either v2 checkpoints are explicitly enabled, or we lack a Protocol to prove otherwise. + // We can't tell immediately whether it's V1 or V2, just by looking at the file name. + + // Start a future to start reading the v2 actions from the parquet checkpoint and return + // a lazy checkpoint provider wrapping the future. we won't wait on the future unless/until + // somebody calls a complete checkpoint provider method. + val future = checkpointV2ThreadPool.submitNonFateSharing { spark: SparkSession => + readV2ActionsFromParquetCheckpoint( + spark, provider.logPath, provider.fileStatus, snapshotDescriptor.deltaLog.options) + } + new LazyCompleteCheckpointProvider(provider) { + override def createCheckpointProvider(): CheckpointProvider = { + val (checkpointMetadataOpt, sidecarFiles) = future.get(Duration.Inf) + checkpointMetadataOpt match { + case Some(cm) => + require(isV2CheckpointEnabled(snapshotDescriptor)) + V2CheckpointProvider(provider, cm, sidecarFiles) + case None => + PreloadedCheckpointProvider(provider.topLevelFiles, provider.lastCheckpointInfoOpt) + } + } + } + } + + private[delta] def isV2CheckpointEnabled(protocol: Protocol): Boolean = + protocol.isFeatureSupported(V2CheckpointTableFeature) + + /** + * Returns whether V2 Checkpoints are enabled or not. + * This means an underlying checkpoint in this table could be a V2Checkpoint with sidecar files. + */ + def isV2CheckpointEnabled(snapshotDescriptor: SnapshotDescriptor): Boolean = + isV2CheckpointEnabled(snapshotDescriptor.protocol) + + /** + * Returns: + * - Some(true) if V2 Checkpoints are enabled for the snapshot corresponding to the given + * `checksumOpt`. + * - Some(false) if V2 Checkpoints are disabled for the snapshot + * - None if the given checksumOpt is not sufficient to identify if v2 checkpoints are enabled or + * not. + */ + def isV2CheckpointEnabled(checksumOpt: Option[VersionChecksum]): Option[Boolean] = { + checksumOpt.flatMap(checksum => Option(checksum.protocol)).map(isV2CheckpointEnabled) + } + + private def sendEventForV2CheckpointRead( + startTimeMs: Long, + fileStatus: FileStatus, + fileType: String, + logPath: Path, + exception: Option[Throwable]): Unit = { + recordDeltaEvent( + deltaLog = null, + opType = "delta.checkpointV2.readV2ActionsFromCheckpoint", + data = Map( + "timeTakenMs" -> (System.currentTimeMillis() - startTimeMs), + "v2CheckpointPath" -> fileStatus.getPath.toString, + "v2CheckpointSize" -> fileStatus.getLen, + "errorMessage" -> exception.map(_.toString).getOrElse(""), + "fileType" -> fileType + ), + path = Some(logPath.getParent) + ) + } + + /** Reads and returns the [[CheckpointMetadata]] and [[SidecarFile]]s from a json v2 checkpoint */ + private[delta] def readV2ActionsFromJsonCheckpoint( + logStore: LogStore, + logPath: Path, + fileStatus: FileStatus, + hadoopConf: Configuration): (CheckpointMetadata, Seq[SidecarFile]) = { + val startTimeMs = System.currentTimeMillis() + try { + var checkpointMetadataOpt: Option[CheckpointMetadata] = None + val sidecarFileActions: ArrayBuffer[SidecarFile] = ArrayBuffer.empty + logStore.readAsIterator(fileStatus, hadoopConf).processAndClose { _ + .map(Action.fromJson) + .foreach { + case cm: CheckpointMetadata if checkpointMetadataOpt.isEmpty => + checkpointMetadataOpt = Some(cm) + case cm: CheckpointMetadata => + throw new IllegalStateException( + "More than 1 CheckpointMetadata actions found in the checkpoint file") + case sidecarFile: SidecarFile => + sidecarFileActions.append(sidecarFile) + case _ => () + } + } + val checkpointMetadata = checkpointMetadataOpt.getOrElse { + throw new IllegalStateException("Json V2 Checkpoint has no CheckpointMetadata action") + } + sendEventForV2CheckpointRead(startTimeMs, fileStatus, "json", logPath, exception = None) + (checkpointMetadata, sidecarFileActions.toSeq) + } catch { + case NonFatal(e) => + sendEventForV2CheckpointRead(startTimeMs, fileStatus, "json", logPath, exception = Some(e)) + throw e + } + } + + /** + * Reads and returns the optional [[CheckpointMetadata]], [[SidecarFile]]s from a parquet + * checkpoint. + * The checkpoint metadata returned might be None if the underlying parquet file is not a v2 + * checkpoint. + */ + private[delta] def readV2ActionsFromParquetCheckpoint( + spark: SparkSession, + logPath: Path, + fileStatus: FileStatus, + deltaLogOptions: Map[String, String]): (Option[CheckpointMetadata], Seq[SidecarFile]) = { + val startTimeMs = System.currentTimeMillis() + try { + val relation = DeltaLog.indexToRelation( + spark, checkpointFileIndex(Seq(fileStatus)), deltaLogOptions, Action.logSchema) + import implicits._ + val rows = Dataset.ofRows(spark, relation) + .select("checkpointMetadata", "sidecar") + .where("checkpointMetadata.version is not null or sidecar.path is not null") + .as[(CheckpointMetadata, SidecarFile)] + .collect() + + var checkpointMetadata: Option[CheckpointMetadata] = None + val checkpointSidecarFiles = ArrayBuffer.empty[SidecarFile] + rows.foreach { + case (cm: CheckpointMetadata, _) if checkpointMetadata.isEmpty => + checkpointMetadata = Some(cm) + case (cm: CheckpointMetadata, _) => + throw new IllegalStateException( + "More than 1 CheckpointMetadata actions found in the checkpoint file") + case (_, sf: SidecarFile) => + checkpointSidecarFiles.append(sf) + } + if (checkpointMetadata.isEmpty && checkpointSidecarFiles.nonEmpty) { + throw new IllegalStateException( + "sidecar files present in checkpoint even when checkpoint metadata is missing") + } + sendEventForV2CheckpointRead(startTimeMs, fileStatus, "parquet", logPath, exception = None) + (checkpointMetadata, checkpointSidecarFiles.toSeq) + } catch { + case NonFatal(e) => + sendEventForV2CheckpointRead(startTimeMs, fileStatus, "parquet", logPath, Some(e)) + throw e + } + } +} + +/** + * An implementation of [[CheckpointProvider]] where the information about checkpoint files + * (i.e. Seq[FileStatus]) is already known in advance. + * + * @param topLevelFiles - file statuses that describes the checkpoint + * @param lastCheckpointInfoOpt - optional [[LastCheckpointInfo]] corresponding to this checkpoint. + * This comes from _last_checkpoint file + */ +case class PreloadedCheckpointProvider( + override val topLevelFiles: Seq[FileStatus], + lastCheckpointInfoOpt: Option[LastCheckpointInfo]) + extends CheckpointProvider + with DeltaLogging { + + require(topLevelFiles.nonEmpty, "There should be atleast 1 checkpoint file") + private lazy val fileIndex = CheckpointProvider.checkpointFileIndex(topLevelFiles) + + override def version: Long = checkpointVersion(topLevelFiles.head) + + override def effectiveCheckpointSizeInBytes(): Long = fileIndex.sizeInBytes + + override def allActionsFileIndexes(): Seq[DeltaLogFileIndex] = Seq(fileIndex) + + override lazy val topLevelFileIndex: Option[DeltaLogFileIndex] = Some(fileIndex) +} + +/** + * An implementation for [[CheckpointProvider]] which could be used to represent a scenario when + * checkpoint doesn't exist. This helps us simplify the code by making + * [[LogSegment.checkpointProvider]] as non-optional. + * + * The [[CheckpointProvider.isEmpty]] method returns true for [[EmptyCheckpointProvider]]. Also + * version is returned as -1. + * For a real checkpoint, this will be returned true and version will be >= 0. + */ +object EmptyCheckpointProvider extends CheckpointProvider { + override def version: Long = -1 + override def topLevelFiles: Seq[FileStatus] = Nil + override def effectiveCheckpointSizeInBytes(): Long = 0L + override def allActionsFileIndexes(): Seq[DeltaLogFileIndex] = Nil + override def topLevelFileIndex: Option[DeltaLogFileIndex] = None +} + +/** A trait representing a v2 [[UninitializedCheckpointProvider]] */ +trait UninitializedV2LikeCheckpointProvider extends UninitializedCheckpointProvider { + def fileStatus: FileStatus + def logPath: Path + def lastCheckpointInfoOpt: Option[LastCheckpointInfo] + def v2CheckpointFormat: V2Checkpoint.Format + + override lazy val topLevelFiles: Seq[FileStatus] = Seq(fileStatus) + override lazy val topLevelFileIndex: Option[DeltaLogFileIndex] = + DeltaLogFileIndex(v2CheckpointFormat.fileFormat, topLevelFiles) +} + +/** + * An implementation of [[UninitializedCheckpointProvider]] to represent a parquet checkpoint + * which could be either a v1 checkpoint or v2 checkpoint. + * This needs to be resolved into a [[PreloadedCheckpointProvider]] or a [[V2CheckpointProvider]] + * depending on whether the [[CheckpointMetadata]] action is present or not in the underlying + * parquet file. + */ +case class UninitializedV1OrV2ParquetCheckpointProvider( + override val version: Long, + override val fileStatus: FileStatus, + override val logPath: Path, + override val lastCheckpointInfoOpt: Option[LastCheckpointInfo] +) extends UninitializedV2LikeCheckpointProvider { + + override val v2CheckpointFormat: V2Checkpoint.Format = V2Checkpoint.Format.PARQUET +} + +/** + * An implementation of [[UninitializedCheckpointProvider]] to for v2 checkpoints. + * This needs to be resolved into a [[V2CheckpointProvider]]. + * This class starts an I/O to fetch the V2 actions ([[CheckpointMetadata]], [[SidecarFile]]) as + * soon as the class is initialized so that the extra overhead could be parallelized with other + * operations like reading CRC. + */ +case class UninitializedV2CheckpointProvider( + override val version: Long, + override val fileStatus: FileStatus, + override val logPath: Path, + hadoopConf: Configuration, + deltaLogOptions: Map[String, String], + logStore: LogStore, + override val lastCheckpointInfoOpt: Option[LastCheckpointInfo] +) extends UninitializedV2LikeCheckpointProvider { + + override val v2CheckpointFormat: V2Checkpoint.Format = + V2Checkpoint.toFormat(fileStatus.getPath.getName) + + // Try to get the required actions from LastCheckpointInfo + private val v2ActionsFromLastCheckpointOpt: Option[(CheckpointMetadata, Seq[SidecarFile])] = { + lastCheckpointInfoOpt + .flatMap(_.v2Checkpoint) + .map(v2 => (v2.checkpointMetadataOpt, v2.sidecarFiles)) + .collect { + case (Some(checkpointMetadata), Some(sidecarFiles)) => + (checkpointMetadata, sidecarFiles) + } + } + + /** Helper method to do I/O and read v2 actions from the underlying v2 checkpoint file */ + private def readV2Actions(spark: SparkSession): (Option[CheckpointMetadata], Seq[SidecarFile]) = { + v2CheckpointFormat match { + case V2Checkpoint.Format.JSON => + val (checkpointMetadata, sidecars) = CheckpointProvider.readV2ActionsFromJsonCheckpoint( + logStore, logPath, fileStatus, hadoopConf) + (Some(checkpointMetadata), sidecars) + case V2Checkpoint.Format.PARQUET => + CheckpointProvider.readV2ActionsFromParquetCheckpoint( + spark, logPath, fileStatus, deltaLogOptions) + } + } + + val nonFateSharingCheckpointReadFuture + : NonFateSharingFuture[(Option[CheckpointMetadata], Seq[SidecarFile])] = { + checkpointV2ThreadPool.submitNonFateSharing { spark: SparkSession => + v2ActionsFromLastCheckpointOpt match { + case Some((cm, sidecars)) => Some(cm) -> sidecars + case None => readV2Actions(spark) + } + } + } +} + +/** + * A wrapper implementation of [[CheckpointProvider]] which wraps + * `underlyingCheckpointProviderFuture` and `uninitializedCheckpointProvider` for implementing all + * the [[UninitializedCheckpointProvider]] and [[CheckpointProvider]] APIs. + * + * @param uninitializedCheckpointProvider the underlying [[UninitializedCheckpointProvider]] + */ +abstract class LazyCompleteCheckpointProvider( + uninitializedCheckpointProvider: UninitializedCheckpointProvider) + extends CheckpointProvider { + + override def version: Long = uninitializedCheckpointProvider.version + override def topLevelFiles: Seq[FileStatus] = uninitializedCheckpointProvider.topLevelFiles + override def topLevelFileIndex: Option[DeltaLogFileIndex] = + uninitializedCheckpointProvider.topLevelFileIndex + + protected def createCheckpointProvider(): CheckpointProvider + + lazy val underlyingCheckpointProvider: CheckpointProvider = createCheckpointProvider() + + override def effectiveCheckpointSizeInBytes(): Long = + underlyingCheckpointProvider.effectiveCheckpointSizeInBytes() + + override def allActionsFileIndexes(): Seq[DeltaLogFileIndex] = + underlyingCheckpointProvider.allActionsFileIndexes() +} + +/** + * [[CheckpointProvider]] implementation for Json/Parquet V2 checkpoints. + * + * @param version checkpoint version for the underlying checkpoint + * @param v2CheckpointFile [[FileStatus]] for the json/parquet v2 checkpoint file + * @param v2CheckpointFormat format (json/parquet) for the v2 checkpoint + * @param checkpointMetadata [[CheckpointMetadata]] for the v2 checkpoint + * @param sidecarFiles seq of [[SidecarFile]] for the v2 checkpoint + * @param lastCheckpointInfoOpt optional last checkpoint info for the v2 checkpoint + * @param logPath delta log path for the underlying delta table + */ +case class V2CheckpointProvider( + override val version: Long, + v2CheckpointFile: FileStatus, + v2CheckpointFormat: V2Checkpoint.Format, + checkpointMetadata: CheckpointMetadata, + sidecarFiles: Seq[SidecarFile], + lastCheckpointInfoOpt: Option[LastCheckpointInfo], + logPath: Path + ) extends CheckpointProvider with DeltaLogging { + + private[delta] def sidecarFileStatuses: Seq[FileStatus] = + sidecarFiles.map(_.toFileStatus(logPath)) + + protected lazy val fileIndexesForSidecarFiles: Seq[DeltaLogFileIndex] = { + // V2 checkpoints without sidecars are legal. + if (sidecarFileStatuses.isEmpty) { + Seq.empty + } else { + Seq(CheckpointProvider.checkpointFileIndex(sidecarFileStatuses)) + } + } + + protected lazy val fileIndexForV2Checkpoint: DeltaLogFileIndex = + DeltaLogFileIndex(v2CheckpointFormat.fileFormat, Seq(v2CheckpointFile)).head + + override lazy val topLevelFiles: Seq[FileStatus] = Seq(v2CheckpointFile) + override lazy val topLevelFileIndex: Option[DeltaLogFileIndex] = Some(fileIndexForV2Checkpoint) + override def effectiveCheckpointSizeInBytes(): Long = + sidecarFiles.map(_.sizeInBytes).sum + v2CheckpointFile.getLen + override def allActionsFileIndexes(): Seq[DeltaLogFileIndex] = + topLevelFileIndex ++: fileIndexesForSidecarFiles + +} + +object V2CheckpointProvider { + + /** Alternate constructor which uses [[UninitializedV2LikeCheckpointProvider]] */ + def apply( + uninitializedV2LikeCheckpointProvider: UninitializedV2LikeCheckpointProvider, + checkpointMetadata: CheckpointMetadata, + sidecarFiles: Seq[SidecarFile]): V2CheckpointProvider = { + V2CheckpointProvider( + uninitializedV2LikeCheckpointProvider.version, + uninitializedV2LikeCheckpointProvider.fileStatus, + uninitializedV2LikeCheckpointProvider.v2CheckpointFormat, + checkpointMetadata, + sidecarFiles, + uninitializedV2LikeCheckpointProvider.lastCheckpointInfoOpt, + uninitializedV2LikeCheckpointProvider.logPath) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/Checkpoints.scala b/spark/src/main/scala/org/apache/spark/sql/delta/Checkpoints.scala new file mode 100644 index 00000000000..5f5c7bd9f97 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/Checkpoints.scala @@ -0,0 +1,1114 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.FileNotFoundException +import java.util.UUID + +import scala.collection.mutable +import scala.math.Ordering.Implicits._ +import scala.util.control.NonFatal + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.{Action, CheckpointMetadata, Metadata, SidecarFile, SingleAction} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage.LogStore +import org.apache.spark.sql.delta.util.DeltaFileOperations +import org.apache.spark.sql.delta.util.FileNames._ +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} +import org.apache.hadoop.mapred.{JobConf, TaskAttemptContextImpl, TaskAttemptID} +import org.apache.hadoop.mapreduce.{Job, TaskType} + +import org.apache.spark.TaskContext +import org.apache.spark.paths.SparkPath +import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.{Cast, ElementAt, Literal} +import org.apache.spark.sql.execution.SQLExecution +import org.apache.spark.sql.execution.datasources.FileFormat +import org.apache.spark.sql.execution.datasources.OutputWriter +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.functions.{coalesce, col, struct, when} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.SerializableConfiguration +import org.apache.spark.util.Utils + +/** + * A class to help with comparing checkpoints with each other, where we may have had concurrent + * writers that checkpoint with different number of parts. + * The `numParts` field will be present only for multipart checkpoints (represented by + * Format.WITH_PARTS). + * The `fileName` field is present only for V2 Checkpoints (represented by Format.V2) + * These additional fields are used as a tie breaker when comparing multiple checkpoint + * instance of same Format for the same `version`. + */ +case class CheckpointInstance( + version: Long, + format: CheckpointInstance.Format, + fileName: Option[String] = None, + numParts: Option[Int] = None) extends Ordered[CheckpointInstance] { + + // Assert that numParts are present when checkpoint format is Format.WITH_PARTS. + // For other formats, numParts must be None. + require((format == CheckpointInstance.Format.WITH_PARTS) == numParts.isDefined, + s"numParts ($numParts) must be present for checkpoint format" + + s" ${CheckpointInstance.Format.WITH_PARTS.name}") + // Assert that filePath is present only when checkpoint format is Format.V2. + // For other formats, filePath must be None. + require((format == CheckpointInstance.Format.V2) == fileName.isDefined, + s"fileName ($fileName) must be present for checkpoint format" + + s" ${CheckpointInstance.Format.V2.name}") + + /** + * Returns a [[CheckpointProvider]] which can tell the files corresponding to this + * checkpoint. + * The `lastCheckpointInfoHint` might be passed to [[CheckpointProvider]] so that underlying + * [[CheckpointProvider]] provides more precise info. + */ + def getCheckpointProvider( + deltaLog: DeltaLog, + filesForCheckpointConstruction: Seq[FileStatus], + lastCheckpointInfoHint: Option[LastCheckpointInfo] = None) + : UninitializedCheckpointProvider = { + val logPath = deltaLog.logPath + val lastCheckpointInfo = lastCheckpointInfoHint.filter(cm => CheckpointInstance(cm) == this) + val cpFiles = filterFiles(deltaLog, filesForCheckpointConstruction) + format match { + // Treat single file checkpoints also as V2 Checkpoints because we don't know if it is + // actually a V2 checkpoint until we read it. + case CheckpointInstance.Format.V2 | CheckpointInstance.Format.SINGLE => + assert(cpFiles.size == 1) + val fileStatus = cpFiles.head + if (format == CheckpointInstance.Format.V2) { + val hadoopConf = deltaLog.newDeltaHadoopConf() + UninitializedV2CheckpointProvider( + version, + fileStatus, + logPath, + hadoopConf, + deltaLog.options, + deltaLog.store, + lastCheckpointInfo) + } else { + UninitializedV1OrV2ParquetCheckpointProvider( + version, fileStatus, logPath, lastCheckpointInfo) + } + case CheckpointInstance.Format.WITH_PARTS => + PreloadedCheckpointProvider(cpFiles, lastCheckpointInfo) + case CheckpointInstance.Format.SENTINEL => + throw DeltaErrors.assertionFailedError( + s"invalid checkpoint format ${CheckpointInstance.Format.SENTINEL}") + } + } + + def filterFiles(deltaLog: DeltaLog, + filesForCheckpointConstruction: Seq[FileStatus]) : Seq[FileStatus] = { + val logPath = deltaLog.logPath + format match { + // Treat Single File checkpoints also as V2 Checkpoints because we don't know if it is + // actually a V2 checkpoint until we read it. + case format if format.usesSidecars => + val checkpointFileName = format match { + case CheckpointInstance.Format.V2 => fileName.get + case CheckpointInstance.Format.SINGLE => checkpointFileSingular(logPath, version).getName + case other => + throw new IllegalStateException(s"Unknown checkpoint format $other supporting sidecars") + } + val fileStatus = filesForCheckpointConstruction + .find(_.getPath.getName == checkpointFileName) + .getOrElse { + throw new IllegalStateException("Failed in getting the file information for:\n" + + fileName.get + "\namong\n" + + filesForCheckpointConstruction.map(_.getPath.getName).mkString(" -", "\n -", "")) + } + Seq(fileStatus) + case CheckpointInstance.Format.WITH_PARTS | CheckpointInstance.Format.SINGLE => + val filePaths = if (format == CheckpointInstance.Format.WITH_PARTS) { + checkpointFileWithParts(logPath, version, numParts.get).toSet + } else { + Set(checkpointFileSingular(logPath, version)) + } + val newCheckpointFileArray = + filesForCheckpointConstruction.filter(f => filePaths.contains(f.getPath)) + assert(newCheckpointFileArray.length == filePaths.size, + "Failed in getting the file information for:\n" + + filePaths.mkString(" -", "\n -", "") + "\namong\n" + + filesForCheckpointConstruction.map(_.getPath).mkString(" -", "\n -", "")) + newCheckpointFileArray + case CheckpointInstance.Format.SENTINEL => + throw DeltaErrors.assertionFailedError( + s"invalid checkpoint format ${CheckpointInstance.Format.SENTINEL}") + } + } + + /** + * Comparison rules: + * 1. A [[CheckpointInstance]] with higher version is greater than the one with lower version. + * 2. For [[CheckpointInstance]]s with same version, a Multi-part checkpoint is greater than a + * Single part checkpoint. + * 3. For Multi-part [[CheckpointInstance]]s corresponding to same version, the one with more + * parts is greater than the one with less parts. + * 4. For V2 Checkpoints corresponding to same version, we use the fileName as tie breaker. + */ + override def compare(other: CheckpointInstance): Int = { + (version, format, numParts, fileName) compare + (other.version, other.format, other.numParts, other.fileName) + } +} + +object CheckpointInstance { + sealed abstract class Format(val ordinal: Int, val name: String) extends Ordered[Format] { + override def compare(other: Format): Int = ordinal compare other.ordinal + def usesSidecars: Boolean = this.isInstanceOf[FormatUsesSidecars] + } + trait FormatUsesSidecars + + object Format { + def unapply(name: String): Option[Format] = name match { + case SINGLE.name => Some(SINGLE) + case WITH_PARTS.name => Some(WITH_PARTS) + case V2.name => Some(V2) + case _ => None + } + + /** single-file checkpoint format */ + object SINGLE extends Format(0, "SINGLE") with FormatUsesSidecars + /** multi-file checkpoint format */ + object WITH_PARTS extends Format(1, "WITH_PARTS") + /** V2 Checkpoint format */ + object V2 extends Format(2, "V2") with FormatUsesSidecars + /** Sentinel, for internal use only */ + object SENTINEL extends Format(Int.MaxValue, "SENTINEL") + } + + def apply(path: Path): CheckpointInstance = { + // Three formats to worry about: + // * .checkpoint.parquet + // * .checkpoint...parquet + // * .checkpoint..parquet where u is a unique string + path.getName.split("\\.") match { + case Array(v, "checkpoint", uniqueStr, format) if Seq("json", "parquet").contains(format) => + CheckpointInstance( + version = v.toLong, + format = Format.V2, + numParts = None, + fileName = Some(path.getName)) + case Array(v, "checkpoint", "parquet") => + CheckpointInstance(v.toLong, Format.SINGLE, numParts = None) + case Array(v, "checkpoint", _, n, "parquet") => + CheckpointInstance(v.toLong, Format.WITH_PARTS, numParts = Some(n.toInt)) + case _ => + throw DeltaErrors.assertionFailedError(s"Unrecognized checkpoint path format: $path") + } + } + + def apply(version: Long): CheckpointInstance = { + CheckpointInstance(version, Format.SINGLE, numParts = None) + } + + def apply(metadata: LastCheckpointInfo): CheckpointInstance = { + CheckpointInstance( + version = metadata.version, + format = metadata.getFormatEnum(), + fileName = metadata.v2Checkpoint.map(_.path), + numParts = metadata.parts) + } + + val MaxValue: CheckpointInstance = sentinelValue(versionOpt = None) + + def sentinelValue(versionOpt: Option[Long]): CheckpointInstance = { + val version = versionOpt.getOrElse(Long.MaxValue) + CheckpointInstance(version, Format.SENTINEL, numParts = None) + } +} + +trait Checkpoints extends DeltaLogging { + self: DeltaLog => + + def logPath: Path + def dataPath: Path + protected def store: LogStore + + /** Used to clean up stale log files. */ + protected def doLogCleanup(snapshotToCleanup: Snapshot): Unit + + /** Returns the checkpoint interval for this log. Not transactional. */ + def checkpointInterval(metadata: Metadata): Int = + DeltaConfigs.CHECKPOINT_INTERVAL.fromMetaData(metadata) + + /** The path to the file that holds metadata about the most recent checkpoint. */ + val LAST_CHECKPOINT = new Path(logPath, Checkpoints.LAST_CHECKPOINT_FILE_NAME) + + /** + * Catch non-fatal exceptions related to checkpointing, since the checkpoint is written + * after the commit has completed. From the perspective of the user, the commit has + * completed successfully. However, throw if this is in a testing environment - + * that way any breaking changes can be caught in unit tests. + */ + protected def withCheckpointExceptionHandling( + deltaLog: DeltaLog, opType: String)(thunk: => Unit): Unit = { + try { + thunk + } catch { + case NonFatal(e) => + recordDeltaEvent( + deltaLog, + opType, + data = Map("exception" -> e.getMessage(), "stackTrace" -> e.getStackTrace()) + ) + logWarning(s"Error when writing checkpoint-related files", e) + val throwError = Utils.isTesting || + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_CHECKPOINT_THROW_EXCEPTION_WHEN_FAILED) + if (throwError) throw e + } + } + + /** + * Creates a checkpoint using the default snapshot. + * + * WARNING: This API is being deprecated, and will be removed in future versions. + * Please use the checkpoint(Snapshot) function below to write checkpoints to the delta log. + */ + @deprecated("This method is deprecated and will be removed in future versions.", "12.0") + def checkpoint(): Unit = checkpoint(unsafeVolatileSnapshot) + + /** + * Creates a checkpoint using snapshotToCheckpoint. By default it uses the current log version. + * Note that this function captures and logs all exceptions, since the checkpoint shouldn't fail + * the overall commit operation. + */ + def checkpoint(snapshotToCheckpoint: Snapshot): Unit = recordDeltaOperation( + this, "delta.checkpoint") { + withCheckpointExceptionHandling(snapshotToCheckpoint.deltaLog, "delta.checkpoint.sync.error") { + if (snapshotToCheckpoint.version < 0) { + throw DeltaErrors.checkpointNonExistTable(dataPath) + } + checkpointAndCleanUpDeltaLog(snapshotToCheckpoint) + } + } + + /** + * Creates a checkpoint at given version. Does not invoke metadata cleanup as part of it. + * @param version - version at which we want to create a checkpoint. + */ + def createCheckpointAtVersion(version: Long): Unit = + recordDeltaOperation(this, "delta.createCheckpointAtVersion") { + val snapshot = getSnapshotAt(version) + withCheckpointExceptionHandling(this, "delta.checkpoint.sync.error") { + if (snapshot.version < 0) { + throw DeltaErrors.checkpointNonExistTable(dataPath) + } + writeCheckpointFiles(snapshot) + } + } + + def checkpointAndCleanUpDeltaLog( + snapshotToCheckpoint: Snapshot): Unit = { + val lastCheckpointInfo = writeCheckpointFiles(snapshotToCheckpoint) + writeLastCheckpointFile( + snapshotToCheckpoint.deltaLog, lastCheckpointInfo, LastCheckpointInfo.checksumEnabled(spark)) + doLogCleanup(snapshotToCheckpoint) + } + + protected[delta] def writeLastCheckpointFile( + deltaLog: DeltaLog, + lastCheckpointInfo: LastCheckpointInfo, + addChecksum: Boolean): Unit = { + withCheckpointExceptionHandling(deltaLog, "delta.lastCheckpoint.write.error") { + val suppressOptionalFields = spark.sessionState.conf.getConf( + DeltaSQLConf.SUPPRESS_OPTIONAL_LAST_CHECKPOINT_FIELDS) + val lastCheckpointInfoToWrite = lastCheckpointInfo + val json = LastCheckpointInfo.serializeToJson( + lastCheckpointInfoToWrite, + addChecksum, + suppressOptionalFields) + store.write(LAST_CHECKPOINT, Iterator(json), overwrite = true, newDeltaHadoopConf()) + } + } + + protected def writeCheckpointFiles( + snapshotToCheckpoint: Snapshot): LastCheckpointInfo = { + Checkpoints.writeCheckpoint(spark, this, snapshotToCheckpoint) + } + + /** Returns information about the most recent checkpoint. */ + private[delta] def readLastCheckpointFile(): Option[LastCheckpointInfo] = { + loadMetadataFromFile(0) + } + + /** Loads the checkpoint metadata from the _last_checkpoint file. */ + private def loadMetadataFromFile(tries: Int): Option[LastCheckpointInfo] = + recordDeltaOperation(self, "delta.deltaLog.loadMetadataFromFile") { + try { + val lastCheckpointInfoJson = store.read(LAST_CHECKPOINT, newDeltaHadoopConf()) + val validate = LastCheckpointInfo.checksumEnabled(spark) + Some(LastCheckpointInfo.deserializeFromJson(lastCheckpointInfoJson.head, validate)) + } catch { + case _: FileNotFoundException => + None + case NonFatal(e) if tries < 3 => + logWarning(s"Failed to parse $LAST_CHECKPOINT. This may happen if there was an error " + + "during read operation, or a file appears to be partial. Sleeping and trying again.", e) + Thread.sleep(1000) + loadMetadataFromFile(tries + 1) + case NonFatal(e) => + recordDeltaEvent( + self, + "delta.lastCheckpoint.read.corruptedJson", + data = Map("exception" -> Utils.exceptionString(e)) + ) + + logWarning(s"$LAST_CHECKPOINT is corrupted. Will search the checkpoint files directly", e) + // Hit a partial file. This could happen on Azure as overwriting _last_checkpoint file is + // not atomic. We will try to list all files to find the latest checkpoint and restore + // LastCheckpointInfo from it. + val verifiedCheckpoint = findLastCompleteCheckpointBefore(checkpointInstance = None) + verifiedCheckpoint.map(manuallyLoadCheckpoint) + } + } + + /** Loads the given checkpoint manually to come up with the [[LastCheckpointInfo]] */ + protected def manuallyLoadCheckpoint(cv: CheckpointInstance): LastCheckpointInfo = { + LastCheckpointInfo( + version = cv.version, + size = -1, + parts = cv.numParts, + sizeInBytes = None, + numOfAddFiles = None, + checkpointSchema = None + ) + } + + /** + * Finds the first verified, complete checkpoint before the given version. + * Note that the returned checkpoint will always be < `version`. + * @param version The checkpoint version to compare against + */ + protected def findLastCompleteCheckpointBefore(version: Long): Option[CheckpointInstance] = { + val upperBound = CheckpointInstance(version, CheckpointInstance.Format.SINGLE, numParts = None) + findLastCompleteCheckpointBefore(Some(upperBound)) + } + + /** + * Finds the first verified, complete checkpoint before the given [[CheckpointInstance]]. + * If `checkpointInstance` is passed as None, then we return the last complete checkpoint in the + * deltalog directory. + * @param checkpointInstance The checkpoint instance to compare against + */ + protected def findLastCompleteCheckpointBefore( + checkpointInstance: Option[CheckpointInstance] = None): Option[CheckpointInstance] = { + val (upperBoundCv, startVersion) = checkpointInstance + .collect { case cv if cv.version >= 0 => (cv, cv.version) } + .getOrElse((CheckpointInstance.sentinelValue(versionOpt = None), 0L)) + var cur = startVersion + val hadoopConf = newDeltaHadoopConf() + + logInfo(s"Try to find Delta last complete checkpoint before version $startVersion") + while (cur >= 0) { + val checkpoints = store.listFrom( + listingPrefix(logPath, math.max(0, cur - 1000)), + hadoopConf) + // Checkpoint files of 0 size are invalid but Spark will ignore them silently when reading + // such files, hence we drop them so that we never pick up such checkpoints. + .filter { file => isCheckpointFile(file) && file.getLen != 0 } + .map{ file => CheckpointInstance(file.getPath) } + .takeWhile(tv => (cur == 0 || tv.version <= cur) && tv < upperBoundCv) + .toArray + val lastCheckpoint = + getLatestCompleteCheckpointFromList(checkpoints, Some(upperBoundCv.version)) + if (lastCheckpoint.isDefined) { + logInfo(s"Delta checkpoint is found at version ${lastCheckpoint.get.version}") + return lastCheckpoint + } else { + cur -= 1000 + } + } + logInfo(s"No checkpoint found for Delta table before version $startVersion") + None + } + + /** + * Given a list of checkpoint files, pick the latest complete checkpoint instance which is not + * later than `notLaterThan`. + */ + protected[delta] def getLatestCompleteCheckpointFromList( + instances: Array[CheckpointInstance], + notLaterThanVersion: Option[Long] = None): Option[CheckpointInstance] = { + val sentinelCv = CheckpointInstance.sentinelValue(notLaterThanVersion) + val complete = instances.filter(_ <= sentinelCv).groupBy(identity).filter { + case (ci, matchingCheckpointInstances) => + ci.format match { + case CheckpointInstance.Format.SINGLE => + matchingCheckpointInstances.length == 1 + case CheckpointInstance.Format.WITH_PARTS => + assert(ci.numParts.nonEmpty, "Multi-Part Checkpoint must have non empty numParts") + matchingCheckpointInstances.length == ci.numParts.get + case CheckpointInstance.Format.V2 => + matchingCheckpointInstances.length == 1 + case CheckpointInstance.Format.SENTINEL => + false + } + } + if (complete.isEmpty) None else Some(complete.keys.max) + } +} + +object Checkpoints + extends DeltaLogging + { + + /** The name of the last checkpoint file */ + val LAST_CHECKPOINT_FILE_NAME = "_last_checkpoint" + + /** + * Returns the checkpoint schema that should be written to the last checkpoint file based on + * [[DeltaSQLConf.CHECKPOINT_SCHEMA_WRITE_THRESHOLD_LENGTH]] conf. + */ + private[delta] def checkpointSchemaToWriteInLastCheckpointFile( + spark: SparkSession, + schema: StructType): Option[StructType] = { + val checkpointSchemaSizeThreshold = spark.sessionState.conf.getConf( + DeltaSQLConf.CHECKPOINT_SCHEMA_WRITE_THRESHOLD_LENGTH) + Some(schema).filter(s => JsonUtils.toJson(s).length <= checkpointSchemaSizeThreshold) + } + + /** + * Writes out the contents of a [[Snapshot]] into a checkpoint file that + * can be used to short-circuit future replays of the log. + * + * Returns the checkpoint metadata to be committed to a file. We will use the value + * in this file as the source of truth of the last valid checkpoint. + */ + private[delta] def writeCheckpoint( + spark: SparkSession, + deltaLog: DeltaLog, + snapshot: Snapshot): LastCheckpointInfo = recordFrameProfile( + "Delta", "Checkpoints.writeCheckpoint") { + val hadoopConf = deltaLog.newDeltaHadoopConf() + + // The writing of checkpoints doesn't go through log store, so we need to check with the + // log store and decide whether to use rename. + val useRename = deltaLog.store.isPartialWriteVisible(deltaLog.logPath, hadoopConf) + + val v2CheckpointFormatOpt = { + val policy = DeltaConfigs.CHECKPOINT_POLICY.fromMetaData(snapshot.metadata) + if (policy.needsV2CheckpointSupport) { + assert(CheckpointProvider.isV2CheckpointEnabled(snapshot)) + val v2Format = spark.conf.get(DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT) + // The format of the top level file in V2 checkpoints can be configured through + // the optional config [[DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT]]. + // If nothing is specified, we use the json format. In the future, we may + // write json/parquet dynamically based on heuristics. + v2Format match { + case Some(V2Checkpoint.Format.JSON.name) | None => Some(V2Checkpoint.Format.JSON) + case Some(V2Checkpoint.Format.PARQUET.name) => Some(V2Checkpoint.Format.PARQUET) + case _ => throw new IllegalStateException("unknown checkpoint format") + } + } else { + None + } + } + val v2CheckpointEnabled = v2CheckpointFormatOpt.nonEmpty + + val checkpointRowCount = spark.sparkContext.longAccumulator("checkpointRowCount") + val numOfFiles = spark.sparkContext.longAccumulator("numOfFiles") + + val sessionConf = spark.sessionState.conf + val checkpointPartSize = + sessionConf.getConf(DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE) + + val numParts = checkpointPartSize.map { partSize => + math.ceil((snapshot.numOfFiles + snapshot.numOfRemoves).toDouble / partSize).toLong + }.getOrElse(1L).toInt + val legacyMultiPartCheckpoint = !v2CheckpointEnabled && numParts > 1 + + val base = { + val repartitioned = snapshot.stateDS + .repartition(numParts, coalesce(col("add.path"), col("remove.path"))) + .map { action => + if (action.add != null) { + numOfFiles.add(1) + } + action + } + // commitInfo, cdc and remove.tags are not included in both classic and V2 checkpoints. + if (v2CheckpointEnabled) { + // When V2 Checkpoint is enabled, the baseCheckpoint refers to the sidecar files which will + // only have AddFile and RemoveFile actions. The other non-file actions will be written + // separately after sidecar files are written. + repartitioned + .select("add", "remove") + .withColumn("remove", col("remove").dropFields("tags", "stats")) + .where("add is not null or remove is not null") + } else { + // When V2 Checkpoint is disabled, the baseCheckpoint refers to the main classic checkpoint + // which has all actions except "commitInfo", "cdc", "checkpointMetadata", "sidecar". + repartitioned + .drop("commitInfo", "cdc", "checkpointMetadata", "sidecar") + .withColumn("remove", col("remove").dropFields("tags", "stats")) + } + } + + val chk = buildCheckpoint(base, snapshot) + val schema = chk.schema.asNullable + + val (factory, serConf) = { + val format = new ParquetFileFormat() + val job = Job.getInstance(hadoopConf) + (format.prepareWrite(spark, job, Map.empty, schema), + new SerializableConfiguration(job.getConfiguration)) + } + + // Use the SparkPath in the closure as Path is not Serializable. + val logSparkPath = SparkPath.fromPath(snapshot.path) + val version = snapshot.version + + // This is a hack to get spark to write directly to a file. + val qe = chk.queryExecution + def executeFinalCheckpointFiles(): Array[SerializableFileStatus] = qe + .executedPlan + .execute() + .mapPartitions { case iter => + val actualNumParts = Option(TaskContext.get()).map(_.numPartitions()) + .getOrElse(numParts) + val partition = TaskContext.getPartitionId() + val (writtenPath, finalPath) = Checkpoints.getCheckpointWritePath( + serConf.value, + logSparkPath.toPath, + version, + actualNumParts, + partition, + useRename, + v2CheckpointEnabled) + val fs = writtenPath.getFileSystem(serConf.value) + val writeAction = () => { + try { + val writer = factory.newInstance( + writtenPath.toString, + schema, + new TaskAttemptContextImpl( + new JobConf(serConf.value), + new TaskAttemptID("", 0, TaskType.REDUCE, 0, 0))) + + iter.foreach { row => + checkpointRowCount.add(1) + writer.write(row) + } + // Note: `writer.close()` is not put in a `finally` clause because we don't want to + // close it when an exception happens. Closing the file would flush the content to the + // storage and create an incomplete file. A concurrent reader might see it and fail. + // This would leak resources but we don't have a way to abort the storage request here. + writer.close() + } catch { + case e: org.apache.hadoop.fs.FileAlreadyExistsException if !useRename => + if (fs.exists(writtenPath)) { + // The file has been written by a zombie task. We can just use this checkpoint file + // rather than failing a Delta commit. + } else { + throw e + } + } + } + if (isGCSPath(serConf.value, writtenPath)) { + // GCS may upload an incomplete file when the current thread is interrupted, hence we move + // the write to a new thread so that the write cannot be interrupted. + // TODO Remove this hack when the GCS Hadoop connector fixes the issue. + DeltaFileOperations.runInNewThread("delta-gcs-checkpoint-write") { + writeAction() + } + } else { + writeAction() + } + if (useRename) { + renameAndCleanupTempPartFile(writtenPath, finalPath, fs) + } + val finalPathFileStatus = try { + fs.getFileStatus(finalPath) + } catch { + case _: FileNotFoundException if useRename => + throw DeltaErrors.failOnCheckpointRename(writtenPath, finalPath) + } + + Iterator(SerializableFileStatus.fromStatus(finalPathFileStatus)) + }.collect() + + val finalCheckpointFiles = SQLExecution.withNewExecutionId(qe, Some("Delta checkpoint")) { + executeFinalCheckpointFiles() + } + + if (numOfFiles.value != snapshot.numOfFiles) { + throw DeltaErrors.checkpointMismatchWithSnapshot + } + + val parquetFilesSizeInBytes = finalCheckpointFiles.map(_.length).sum + var overallCheckpointSizeInBytes = parquetFilesSizeInBytes + var overallNumCheckpointActions: Long = checkpointRowCount.value + var checkpointSchemaToWriteInLastCheckpoint: Option[StructType] = + Checkpoints.checkpointSchemaToWriteInLastCheckpointFile(spark, schema) + + val v2Checkpoint = if (v2CheckpointEnabled) { + val (v2CheckpointFileStatus, nonFileActionsWriten, v2Checkpoint, checkpointSchema) = + Checkpoints.writeTopLevelV2Checkpoint( + v2CheckpointFormatOpt.get, + finalCheckpointFiles, + spark, + schema, + snapshot, + deltaLog, + overallNumCheckpointActions, + parquetFilesSizeInBytes, + hadoopConf, + useRename + ) + overallCheckpointSizeInBytes += v2CheckpointFileStatus.getLen + overallNumCheckpointActions += nonFileActionsWriten.size + checkpointSchemaToWriteInLastCheckpoint = checkpointSchema + + Some(v2Checkpoint) + } else { + None + } + + if (!v2CheckpointEnabled && checkpointRowCount.value == 0) { + // In case of V2 Checkpoints, zero row count is possible. + logWarning(DeltaErrors.EmptyCheckpointErrorMessage) + } + + // If we don't parallelize, we use None for backwards compatibility + val checkpointParts = if (legacyMultiPartCheckpoint) Some(numParts) else None + + LastCheckpointInfo( + version = snapshot.version, + size = overallNumCheckpointActions, + parts = checkpointParts, + sizeInBytes = Some(overallCheckpointSizeInBytes), + numOfAddFiles = Some(snapshot.numOfFiles), + v2Checkpoint = v2Checkpoint, + checkpointSchema = checkpointSchemaToWriteInLastCheckpoint + ) + } + + /** + * Generate a tuple of the file to write the checkpoint and where it may later need + * to be copied. Should be used within a task, so that task or stage retries don't + * create the same files. + */ + def getCheckpointWritePath( + conf: Configuration, + logPath: Path, + version: Long, + numParts: Int, + part: Int, + useRename: Boolean, + v2CheckpointEnabled: Boolean): (Path, Path) = { + def getCheckpointWritePath(path: Path): Path = { + if (useRename) { + val tempPath = + new Path(path.getParent, s".${path.getName}.${UUID.randomUUID}.tmp") + DeltaFileOperations.registerTempFileDeletionTaskFailureListener(conf, tempPath) + tempPath + } else { + path + } + } + val destinationName: Path = if (v2CheckpointEnabled) { + newV2CheckpointSidecarFile(logPath, version, numParts, part + 1) + } else { + if (numParts > 1) { + assert(part < numParts, s"Asked to create part: $part of max $numParts in checkpoint.") + checkpointFileWithParts(logPath, version, numParts)(part) + } else { + checkpointFileSingular(logPath, version) + } + } + + getCheckpointWritePath(destinationName) -> destinationName + } + + /** + * Writes a top-level V2 Checkpoint file which may point to multiple + * sidecar files. + * + * @param v2CheckpointFormat The format in which the top-level file should be + * written. Currently, json and parquet are supported. + * @param sidecarCheckpointFiles The list of sidecar files that have already been + * written. The top-level file will store this list. + * @param spark The current spark session + * @param sidecarSchema The schema of the sidecar parquet files. + * @param snapshot The snapshot for which the checkpoint is being written. + * @param deltaLog The deltaLog instance pointing to our tables deltaLog. + * @param rowsWrittenInCheckpointJob The number of rows that were written in total + * to the sidecar files. + * @param parquetFilesSizeInBytes The combined size of all sidecar files in bytes. + * @param hadoopConf The hadoopConf to use for the filesystem operation. + * @param useRename Whether we should first write to a temporary file and then + * rename it to the target file name during the write. + * @return A tuple containing + * 1. [[FileStatus]] of the newly created top-level V2Checkpoint. + * 2. The sequence of actions that were written to the top-level file. + * 3. An instance of the LastCheckpointV2 containing V2-checkpoint related + * metadata which can later be written to LAST_CHECKPOINT + * 4. Schema of the newly written top-level file (only for parquet files) + */ + protected[delta] def writeTopLevelV2Checkpoint( + v2CheckpointFormat: V2Checkpoint.Format, + sidecarCheckpointFiles: Array[SerializableFileStatus], + spark: SparkSession, + sidecarSchema: StructType, + snapshot: Snapshot, + deltaLog: DeltaLog, + rowsWrittenInCheckpointJob: Long, + parquetFilesSizeInBytes: Long, + hadoopConf: Configuration, + useRename: Boolean) : (FileStatus, Seq[Action], LastCheckpointV2, Option[StructType]) = { + // Write the main v2 checkpoint file. + val sidecarFilesWritten = sidecarCheckpointFiles.map(SidecarFile(_)).toSeq + // Filter out the sidecar schema if it is too large. + val sidecarFileSchemaOpt = + Checkpoints.checkpointSchemaToWriteInLastCheckpointFile(spark, sidecarSchema) + val checkpointMetadata = CheckpointMetadata(snapshot.version) + + val nonFileActionsToWrite = + (checkpointMetadata +: sidecarFilesWritten) ++ snapshot.nonFileActions + val (v2CheckpointPath, checkpointSchemaToWriteInLastCheckpoint) = + if (v2CheckpointFormat == V2Checkpoint.Format.JSON) { + val v2CheckpointPath = newV2CheckpointJsonFile(deltaLog.logPath, snapshot.version) + deltaLog.store.write( + v2CheckpointPath, + nonFileActionsToWrite.map(_.json).toIterator, + overwrite = true, + hadoopConf = hadoopConf + ) + (v2CheckpointPath, None) + } else if (v2CheckpointFormat == V2Checkpoint.Format.PARQUET) { + val sparkSession = spark + // scalastyle:off sparkimplicits + import sparkSession.implicits._ + // scalastyle:on sparkimplicits + val dfToWrite = nonFileActionsToWrite.map(_.wrap).toDF() + val v2CheckpointPath = newV2CheckpointParquetFile(deltaLog.logPath, snapshot.version) + val schemaOfDfWritten = createCheckpointV2ParquetFile( + spark, dfToWrite, v2CheckpointPath, hadoopConf, useRename) + (v2CheckpointPath, Some(schemaOfDfWritten)) + } else { + throw DeltaErrors.assertionFailedError( + s"Unrecognized checkpoint V2 format: $v2CheckpointFormat") + } + // Main Checkpoint V2 File written successfully. Now create the last checkpoint v2 blob so + // that we can persist it in _last_checkpoint file. + val v2CheckpointFileStatus = + v2CheckpointPath.getFileSystem(hadoopConf).getFileStatus(v2CheckpointPath) + val unfilteredV2Checkpoint = LastCheckpointV2( + fileStatus = v2CheckpointFileStatus, + nonFileActions = Some((snapshot.nonFileActions :+ checkpointMetadata).map(_.wrap)), + sidecarFiles = Some(sidecarFilesWritten) + ) + ( + v2CheckpointFileStatus, + nonFileActionsToWrite, + trimLastCheckpointV2(unfilteredV2Checkpoint, spark), + checkpointSchemaToWriteInLastCheckpoint + ) + } + + /** + * Helper method to create a V2 Checkpoint parquet file or the V2 Checkpoint Compat file. + * V2 Checkpoint Compat files follow the same naming convention as classic checkpoints + * and they are needed so that V2Checkpoint-unaware readers can read them to understand + * that they don't have the capability to read table for which they were created. + * This is needed in cases where commit 0 has been cleaned up and the reader needs to + * read a checkpoint to read the [[Protocol]]. + */ + def createCheckpointV2ParquetFile( + spark: SparkSession, + ds: Dataset[Row], + finalPath: Path, + hadoopConf: Configuration, + useRename: Boolean): StructType = recordFrameProfile( + "Checkpoints", "createCheckpointV2ParquetFile") { + val df = ds.select( + "txn", "add", "remove", "metaData", "protocol", "domainMetadata", + "checkpointMetadata", "sidecar") + val schema = df.schema.asNullable + val format = new ParquetFileFormat() + val job = Job.getInstance(hadoopConf) + val factory = format.prepareWrite(spark, job, Map.empty, schema) + val serConf = new SerializableConfiguration(job.getConfiguration) + val finalSparkPath = SparkPath.fromPath(finalPath) + + df.repartition(1) + .queryExecution + .executedPlan + .execute() + .mapPartitions { iter => + val actualNumParts = Option(TaskContext.get()).map(_.numPartitions()).getOrElse(1) + require(actualNumParts == 1, "The parquet V2 checkpoint must be written in 1 file") + val partition = TaskContext.getPartitionId() + val finalPath = finalSparkPath.toPath + val writePath = if (useRename) { + val tempPath = + new Path(finalPath.getParent, s".${finalPath.getName}.${UUID.randomUUID}.tmp") + DeltaFileOperations.registerTempFileDeletionTaskFailureListener(serConf.value, tempPath) + tempPath + } else { + finalPath + } + + val fs = writePath.getFileSystem(serConf.value) + + val attemptId = 0 + val taskAttemptContext = new TaskAttemptContextImpl( + new JobConf(serConf.value), + new TaskAttemptID("", 0, TaskType.REDUCE, partition, attemptId)) + + var writerOpt: Option[OutputWriter] = None + + try { + writerOpt = Some(factory.newInstance( + writePath.toString, + schema, + taskAttemptContext)) + + val writer = writerOpt.get + iter.foreach { row => + writer.write(row) + } + // Note: `writer.close()` is not put in a `finally` clause because we don't want to + // close it when an exception happens. Closing the file would flush the content to the + // storage and create an incomplete file. A concurrent reader might see it and fail. + // This would leak resources but we don't have a way to abort the storage request here. + writer.close() + } catch { + case _: org.apache.hadoop.fs.FileAlreadyExistsException + if !useRename && fs.exists(writePath) => + // The file has been written by a zombie task. We can just use this checkpoint file + // rather than failing a Delta commit. + case t: Throwable => + throw t + } + if (useRename) { + renameAndCleanupTempPartFile(writePath, finalPath, fs) + } + val finalPathFileStatus = try { + fs.getFileStatus(finalPath) + } catch { + case _: FileNotFoundException if useRename => + throw DeltaErrors.failOnCheckpointRename(writePath, finalPath) + } + Iterator(SerializableFileStatus.fromStatus(finalPathFileStatus)) + }.collect() + schema + } + + /** Bounds the size of a [[LastCheckpointV2]] by removing any oversized optional fields */ + def trimLastCheckpointV2( + lastCheckpointV2: LastCheckpointV2, + spark: SparkSession): LastCheckpointV2 = { + val nonFileActionThreshold = + spark.sessionState.conf.getConf(DeltaSQLConf.LAST_CHECKPOINT_NON_FILE_ACTIONS_THRESHOLD) + val sidecarThreshold = + spark.sessionState.conf.getConf(DeltaSQLConf.LAST_CHECKPOINT_SIDECARS_THRESHOLD) + lastCheckpointV2.copy( + sidecarFiles = lastCheckpointV2.sidecarFiles.filter(_.size <= sidecarThreshold), + nonFileActions = lastCheckpointV2.nonFileActions.filter(_.size <= nonFileActionThreshold)) + } + + /** + * Helper method to rename a `tempPath` checkpoint part file to `finalPath` checkpoint part file. + * This also tries to handle any race conditions with Zombie tasks. + */ + private[delta] def renameAndCleanupTempPartFile( + tempPath: Path, finalPath: Path, fs: FileSystem): Unit = { + // If rename fails because the final path already exists, it's ok -- some zombie + // task probably got there first. + // We rely on the fact that all checkpoint writers write the same content to any given + // checkpoint part file. So it shouldn't matter which writer wins the race. + val renameSuccessful = try { + // Note that the fs.exists check here is redundant as fs.rename should fail if destination + // file already exists as per File System spec. But the LocalFS doesn't follow this and it + // overrides the final path even if it already exists. So we use exists here to handle that + // case. + // TODO: Remove isTesting and fs.exists check after fixing LocalFS + if (Utils.isTesting && fs.exists(finalPath)) { + false + } else { + fs.rename(tempPath, finalPath) + } + } catch { + case _: org.apache.hadoop.fs.FileAlreadyExistsException => false + } + if (!renameSuccessful) { + try { + fs.delete(tempPath, false) + } catch { case NonFatal(e) => + logWarning(s"Error while deleting the temporary checkpoint part file $tempPath", e) + } + } + } + + // scalastyle:off line.size.limit + /** + * All GCS paths can only have the scheme of "gs". Note: the scheme checking is case insensitive. + * See: + * - https://github.com/databricks/hadoop-connectors/blob/master/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemBase.java#L493 + * - https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/v2.2.3/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java#L88 + */ + // scalastyle:on line.size.limit + private[delta] def isGCSPath(hadoopConf: Configuration, path: Path): Boolean = { + val scheme = path.toUri.getScheme + if (scheme != null) { + scheme.equalsIgnoreCase("gs") + } else { + // When the schema is not available in the path, we check the file system scheme resolved from + // the path. + path.getFileSystem(hadoopConf).getScheme.equalsIgnoreCase("gs") + } + } + + /** + * Modify the contents of the add column based on the table properties + */ + private[delta] def buildCheckpoint(state: DataFrame, snapshot: Snapshot): DataFrame = { + val additionalCols = new mutable.ArrayBuffer[Column]() + val sessionConf = state.sparkSession.sessionState.conf + if (Checkpoints.shouldWriteStatsAsJson(snapshot)) { + additionalCols += col("add.stats").as("stats") + } + // We provide fine grained control using the session conf for now, until users explicitly + // opt in our out of the struct conf. + val includeStructColumns = shouldWriteStatsAsStruct(sessionConf, snapshot) + if (includeStructColumns) { + val partitionValues = Checkpoints.extractPartitionValues( + snapshot.metadata.partitionSchema, "add.partitionValues") + additionalCols ++= partitionValues + } + state.withColumn("add", + when(col("add").isNotNull, struct(Seq( + col("add.path"), + col("add.partitionValues"), + col("add.size"), + col("add.modificationTime"), + col("add.dataChange"), // actually not really useful here + col("add.tags"), + col("add.deletionVector"), + col("add.baseRowId"), + col("add.defaultRowCommitVersion"), + col("add.clusteringProvider")) ++ + additionalCols: _* + )) + ) + } + + def shouldWriteStatsAsStruct(conf: SQLConf, snapshot: Snapshot): Boolean = { + DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.fromMetaData(snapshot.metadata) + } + + def shouldWriteStatsAsJson(snapshot: Snapshot): Boolean = { + DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_JSON.fromMetaData(snapshot.metadata) + } + + val STRUCT_PARTITIONS_COL_NAME = "partitionValues_parsed" + val STRUCT_STATS_COL_NAME = "stats_parsed" + + /** + * Creates a nested struct column of partition values that extract the partition values + * from the original MapType. + */ + def extractPartitionValues(partitionSchema: StructType, partitionValuesColName: String): + Option[Column] = { + val partitionValues = partitionSchema.map { field => + val physicalName = DeltaColumnMapping.getPhysicalName(field) + val attribute = UnresolvedAttribute.quotedString(partitionValuesColName) + new Column(Cast( + ElementAt( + attribute, + Literal(physicalName), + failOnError = false), + field.dataType, + ansiEnabled = false) + ).as(physicalName) + } + if (partitionValues.isEmpty) { + None + } else Some(struct(partitionValues: _*).as(STRUCT_PARTITIONS_COL_NAME)) + } +} + +object V2Checkpoint { + /** Format for V2 Checkpoints */ + sealed abstract class Format(val name: String) { + def fileFormat: FileFormat + } + + def toFormat(fileName: String): Format = fileName match { + case _ if fileName.endsWith(Format.JSON.name) => Format.JSON + case _ if fileName.endsWith(Format.PARQUET.name) => Format.PARQUET + case _ => throw new IllegalStateException(s"Unknown v2 checkpoint file format: ${fileName}") + } + + object Format { + /** json v2 checkpoint */ + object JSON extends Format("json") { + override def fileFormat: FileFormat = DeltaLogFileIndex.CHECKPOINT_FILE_FORMAT_JSON + } + + /** parquet v2 checkpoint */ + object PARQUET extends Format("parquet") { + override def fileFormat: FileFormat = DeltaLogFileIndex.CHECKPOINT_FILE_FORMAT_PARQUET + } + + /** All valid formats for the top level file of v2 checkpoints. */ + val ALL: Set[Format] = Set(Format.JSON, Format.PARQUET) + + /** The string representations of all the valid formats. */ + val ALL_AS_STRINGS: Set[String] = ALL.map(_.name) + } +} + +object CheckpointPolicy { + + sealed abstract class Policy(val name: String) { + override def toString: String = name + def needsV2CheckpointSupport: Boolean = true + } + + /** + * Write classic single file/multi-part checkpoints when this policy is enabled. + * Note that [[V2CheckpointTableFeature]] is not required for this checkpoint policy. + */ + case object Classic extends Policy("classic") { + override def needsV2CheckpointSupport: Boolean = false + } + + /** + * Write V2 checkpoints when this policy is enabled. + * This needs [[V2CheckpointTableFeature]] to be enabled on the table. + */ + case object V2 extends Policy("v2") + + /** ALl checkpoint policies */ + val ALL: Seq[Policy] = Seq(Classic, V2) + + /** Converts a `name` String into a [[Policy]] */ + def fromName(name: String): Policy = ALL.find(_.name == name).getOrElse { + throw new IllegalArgumentException(s"Invalid policy $name") + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/Checksum.scala b/spark/src/main/scala/org/apache/spark/sql/delta/Checksum.scala new file mode 100644 index 00000000000..8a3ea9b6a7c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/Checksum.scala @@ -0,0 +1,168 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.FileNotFoundException +import java.nio.charset.StandardCharsets.UTF_8 + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.FileSizeHistogram +import org.apache.spark.sql.delta.storage.LogStore +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.streaming.CheckpointFileManager +import org.apache.spark.util.Utils + +/** + * Stats calculated within a snapshot, which we store along individual transactions for + * verification. + * + * @param txnId Optional transaction identifier + * @param tableSizeBytes The size of the table in bytes + * @param numFiles Number of `AddFile` actions in the snapshot + * @param numMetadata Number of `Metadata` actions in the snapshot + * @param numProtocol Number of `Protocol` actions in the snapshot + * @param histogramOpt Optional file size histogram + */ +case class VersionChecksum( + txnId: Option[String], + tableSizeBytes: Long, + numFiles: Long, + numMetadata: Long, + numProtocol: Long, + setTransactions: Option[Seq[SetTransaction]], + domainMetadata: Option[Seq[DomainMetadata]], + metadata: Metadata, + protocol: Protocol, + histogramOpt: Option[FileSizeHistogram], + allFiles: Option[Seq[AddFile]]) + +/** + * Record the state of the table as a checksum file along with a commit. + */ +trait RecordChecksum extends DeltaLogging { + val deltaLog: DeltaLog + protected def spark: SparkSession + + private lazy val writer = + CheckpointFileManager.create(deltaLog.logPath, deltaLog.newDeltaHadoopConf()) + + protected def writeChecksumFile(txnId: String, snapshot: Snapshot): Unit = { + if (!spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_WRITE_CHECKSUM_ENABLED)) { + return + } + + val version = snapshot.version + val checksum = snapshot.computeChecksum.copy(txnId = Some(txnId)) + val eventData = mutable.Map[String, Any]("operationSucceeded" -> false) + eventData("numAddFileActions") = checksum.allFiles.map(_.size).getOrElse(-1) + eventData("numSetTransactionActions") = checksum.setTransactions.map(_.size).getOrElse(-1) + val startTimeMs = System.currentTimeMillis() + try { + val toWrite = JsonUtils.toJson(checksum) + "\n" + eventData("jsonSerializationTimeTakenMs") = System.currentTimeMillis() - startTimeMs + eventData("checksumLength") = toWrite.length + val stream = writer.createAtomic( + FileNames.checksumFile(deltaLog.logPath, version), + overwriteIfPossible = false) + try { + stream.write(toWrite.getBytes(UTF_8)) + stream.close() + eventData("overallTimeTakenMs") = System.currentTimeMillis() - startTimeMs + eventData("operationSucceeded") = true + } catch { + case NonFatal(e) => + logWarning(s"Failed to write the checksum for version: $version", e) + stream.cancel() + } + } catch { + case NonFatal(e) => + logWarning(s"Failed to write the checksum for version: $version", e) + } + recordDeltaEvent( + deltaLog, + opType = "delta.checksum.write", + data = eventData) + } +} + +/** + * Read checksum files. + */ +trait ReadChecksum extends DeltaLogging { self: DeltaLog => + + val logPath: Path + private[delta] def store: LogStore + + private[delta] def readChecksum(version: Long): Option[VersionChecksum] = { + recordDeltaOperation(self, "delta.readChecksum") { + val checksumFile = FileNames.checksumFile(logPath, version) + + var exception: Option[String] = None + val content = try Some(store.read(checksumFile, newDeltaHadoopConf())) catch { + case NonFatal(e) => + // We expect FileNotFoundException; if it's another kind of exception, we still catch them + // here but we log them in the checksum error event below. + if (!e.isInstanceOf[FileNotFoundException]) { + exception = Some(Utils.exceptionString(e)) + } + None + } + + if (content.isEmpty) { + // We may not find the checksum file in two cases: + // - We just upgraded our Spark version from an old one + // - Race conditions where we commit a transaction, and before we can write the checksum + // this reader lists the new version, and uses it to create the snapshot. + recordDeltaEvent( + this, + "delta.checksum.error.missing", + data = Map("version" -> version) ++ exception.map("exception" -> _)) + + return None + } + val checksumData = content.get + if (checksumData.isEmpty) { + recordDeltaEvent( + this, + "delta.checksum.error.empty", + data = Map("version" -> version)) + return None + } + try { + Option(JsonUtils.mapper.readValue[VersionChecksum](checksumData.head)) + } catch { + case NonFatal(e) => + recordDeltaEvent( + this, + "delta.checksum.error.parsing", + data = Map("exception" -> Utils.exceptionString(e))) + None + } + } + } +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/ColumnWithDefaultExprUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ColumnWithDefaultExprUtils.scala new file mode 100644 index 00000000000..55b3e658784 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/ColumnWithDefaultExprUtils.scala @@ -0,0 +1,235 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.mutable + +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.constraints.{Constraint, Constraints} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.{DeltaSourceUtils, DeltaSQLConf} + +import org.apache.spark.sql.{Column, DataFrame, Dataset, Encoder} +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.EqualNullSafe +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ +import org.apache.spark.sql.execution.QueryExecution +import org.apache.spark.sql.execution.streaming.{IncrementalExecution, StreamExecution} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType} + +/** + * Provide utilities to handle columns with default expressions. + */ +object ColumnWithDefaultExprUtils extends DeltaLogging { + val USE_NULL_AS_DEFAULT_DELTA_OPTION = "__use_null_as_default" + + // Returns true if column `field` is defined as an IDENTITY column. + def isIdentityColumn(field: StructField): Boolean = { + val md = field.metadata + val hasStart = md.contains(DeltaSourceUtils.IDENTITY_INFO_START) + val hasStep = md.contains(DeltaSourceUtils.IDENTITY_INFO_STEP) + val hasInsert = md.contains(DeltaSourceUtils.IDENTITY_INFO_ALLOW_EXPLICIT_INSERT) + // Verify that we have all or none of the three fields. + if (!((hasStart == hasStep) && (hasStart == hasInsert))) { + throw DeltaErrors.identityColumnInconsistentMetadata(field.name, hasStart, hasStep, hasInsert) + } + hasStart && hasStep && hasInsert + } + + // Return true if `schema` contains any number of IDENTITY column. + def hasIdentityColumn(schema: StructType): Boolean = schema.exists(isIdentityColumn) + + // Return if `protocol` satisfies the requirement for IDENTITY columns. + def satisfiesIdentityColumnProtocol(protocol: Protocol): Boolean = + protocol.minWriterVersion == 6 || protocol.writerFeatureNames.contains("identityColumns") + + // Return true if the column `col` has default expressions (and can thus be omitted from the + // insertion list). + def columnHasDefaultExpr( + protocol: Protocol, + col: StructField, + nullAsDefault: Boolean): Boolean = { + col.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY) || + (col.nullable && nullAsDefault) || + GeneratedColumn.isGeneratedColumn(protocol, col) + } + + // Return true if the column `col` cannot be included as the input data column of COPY INTO. + // TODO: ideally column with default value can be optionally excluded. + def shouldBeExcludedInCopyInto(protocol: Protocol, col: StructField): Boolean = { + GeneratedColumn.isGeneratedColumn(protocol, col) + } + + // Return true if the table with `metadata` has default expressions. + def tableHasDefaultExpr( + protocol: Protocol, + metadata: Metadata, + nullAsDefault: Boolean): Boolean = { + metadata.schema.exists { f => + f.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY) || + (f.nullable && nullAsDefault) + } || + GeneratedColumn.enforcesGeneratedColumns(protocol, metadata) + } + + /** + * If there are columns with default expressions in `schema`, add a new project to generate + * those columns missing in the schema, and return constraints for generated columns existing in + * the schema. + * + * @param deltaLog The table's [[DeltaLog]] used for logging. + * @param queryExecution Used to check whether the original query is a streaming query or not. + * @param schema Table schema. + * @param data The data to be written into the table. + * @param nullAsDefault If true, use null literal as the default value for missing columns. + * @return The data with potentially additional default expressions projected and constraints + * from generated columns if any. + */ + def addDefaultExprsOrReturnConstraints( + deltaLog: DeltaLog, + protocol: Protocol, + queryExecution: QueryExecution, + schema: StructType, + data: DataFrame, + nullAsDefault: Boolean): (DataFrame, Seq[Constraint], Set[String]) = { + val topLevelOutputNames = CaseInsensitiveMap(data.schema.map(f => f.name -> f).toMap) + lazy val metadataOutputNames = CaseInsensitiveMap(schema.map(f => f.name -> f).toMap) + val constraints = mutable.ArrayBuffer[Constraint]() + val track = mutable.Set[String]() + var selectExprs = schema.flatMap { f => + GeneratedColumn.getGenerationExpression(f) match { + case Some(expr) if GeneratedColumn.satisfyGeneratedColumnProtocol(protocol) => + if (topLevelOutputNames.contains(f.name)) { + val column = SchemaUtils.fieldToColumn(f) + // Add a constraint to make sure the value provided by the user is the same as the value + // calculated by the generation expression. + constraints += Constraints.Check(s"Generated Column", EqualNullSafe(column.expr, expr)) + Some(column) + } else { + Some(new Column(expr).alias(f.name)) + } + case _ => + if (topLevelOutputNames.contains(f.name) || + !data.sparkSession.conf.get(DeltaSQLConf.GENERATED_COLUMN_ALLOW_NULLABLE)) { + Some(SchemaUtils.fieldToColumn(f)) + } else { + // we only want to consider columns that are in the data's schema or are generated + // to allow DataFrame with null columns to be written. + // The actual check for nullability on data is done in the DeltaInvariantCheckerExec + getDefaultValueExprOrNullLit(f, nullAsDefault).map(new Column(_)) + } + } + } + val cdcSelectExprs = CDCReader.CDC_COLUMNS_IN_DATA.flatMap { cdcColumnName => + topLevelOutputNames.get(cdcColumnName).flatMap { cdcField => + if (metadataOutputNames.contains(cdcColumnName)) { + // The column is in the table schema. It's not a CDC auto generated column. Skip it since + // it's already in `selectExprs`. + None + } else { + // The column is not in the table schema, + // so it must be a column generated by CDC. Adding it back as it's not in `selectExprs`. + Some(SchemaUtils.fieldToColumn(cdcField).alias(cdcField.name)) + } + } + } + selectExprs = selectExprs ++ cdcSelectExprs + val newData = queryExecution match { + case incrementalExecution: IncrementalExecution => + selectFromStreamingDataFrame(incrementalExecution, data, selectExprs: _*) + case _ => data.select(selectExprs: _*) + } + recordDeltaEvent(deltaLog, "delta.generatedColumns.write") + (newData, constraints.toSeq, track.toSet) + } + + // Removes the default expressions properties from the schema. If `keepGeneratedColumns` is + // true, generated column expressions are kept. If `keepIdentityColumns` is true, IDENTITY column + // properties are kept. + def removeDefaultExpressions( + schema: StructType, + keepGeneratedColumns: Boolean = false, + keepIdentityColumns: Boolean = false): StructType = { + var updated = false + val updatedSchema = schema.map { field => + if (!keepGeneratedColumns && GeneratedColumn.isGeneratedColumn(field)) { + updated = true + val newMetadata = new MetadataBuilder() + .withMetadata(field.metadata) + .remove(DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY) + .build() + field.copy(metadata = newMetadata) + } else if (!keepIdentityColumns && isIdentityColumn(field)) { + updated = true + val newMetadata = new MetadataBuilder() + .withMetadata(field.metadata) + .remove(DeltaSourceUtils.IDENTITY_INFO_ALLOW_EXPLICIT_INSERT) + .remove(DeltaSourceUtils.IDENTITY_INFO_HIGHWATERMARK) + .remove(DeltaSourceUtils.IDENTITY_INFO_START) + .remove(DeltaSourceUtils.IDENTITY_INFO_STEP) + .build() + field.copy(metadata = newMetadata) + } else { + field + } + } + if (updated) { + StructType(updatedSchema) + } else { + schema + } + } + + /** + * Select `cols` from a micro batch DataFrame. Directly calling `select` won't work because it + * will create a `QueryExecution` rather than inheriting `IncrementalExecution` from + * the micro batch DataFrame. A streaming micro batch DataFrame to execute should use + * `IncrementalExecution`. + */ + private def selectFromStreamingDataFrame( + incrementalExecution: IncrementalExecution, + df: DataFrame, + cols: Column*): DataFrame = { + val newMicroBatch = df.select(cols: _*) + val newIncrementalExecution = new IncrementalExecution( + newMicroBatch.sparkSession, + newMicroBatch.queryExecution.logical, + incrementalExecution.outputMode, + incrementalExecution.checkpointLocation, + incrementalExecution.queryId, + incrementalExecution.runId, + incrementalExecution.currentBatchId, + incrementalExecution.prevOffsetSeqMetadata, + incrementalExecution.offsetSeqMetadata, + incrementalExecution.watermarkPropagator + ) + newIncrementalExecution.executedPlan // Force the lazy generation of execution plan + + + // Use reflection to call the private constructor. + val constructor = + classOf[Dataset[_]].getConstructor(classOf[QueryExecution], classOf[Encoder[_]]) + constructor.newInstance( + newIncrementalExecution, + ExpressionEncoder(newIncrementalExecution.analyzed.schema)).asInstanceOf[DataFrame] + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/ConflictChecker.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ConflictChecker.scala new file mode 100644 index 00000000000..85e75efcb0a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/ConflictChecker.scala @@ -0,0 +1,583 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.util.concurrent.TimeUnit + +import scala.collection.mutable + +import org.apache.spark.sql.delta.RowId.RowTrackingMetadataDomain +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.DeltaSparkPlanUtils.CheckDeterministicOptions +import org.apache.spark.sql.delta.util.FileNames +import org.apache.hadoop.fs.FileStatus + +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionSet, Or} +import org.apache.spark.sql.types.StructType + +/** + * A class representing different attributes of current transaction needed for conflict detection. + * + * @param readPredicates predicates by which files have been queried by the transaction + * @param readFiles files that have been seen by the transaction + * @param readWholeTable whether the whole table was read during the transaction + * @param readAppIds appIds that have been seen by the transaction + * @param metadata table metadata for the transaction + * @param actions delta log actions that the transaction wants to commit + * @param readSnapshot read [[Snapshot]] used for the transaction + * @param commitInfo [[CommitInfo]] for the commit + */ +private[delta] case class CurrentTransactionInfo( + val txnId: String, + val readPredicates: Seq[DeltaTableReadPredicate], + val readFiles: Set[AddFile], + val readWholeTable: Boolean, + val readAppIds: Set[String], + val metadata: Metadata, + val protocol: Protocol, + val actions: Seq[Action], + val readSnapshot: Snapshot, + val commitInfo: Option[CommitInfo], + val readRowIdHighWatermark: Long, + val domainMetadata: Seq[DomainMetadata]) { + + /** + * Final actions to commit - including the [[CommitInfo]] which should always come first so we can + * extract it easily from a commit without having to parse an arbitrarily large file. + * + * TODO: We might want to cluster all non-file actions at the front, for similar reasons. + */ + lazy val finalActionsToCommit: Seq[Action] = commitInfo ++: actions + + /** Whether this transaction wants to make any [[Metadata]] update */ + lazy val metadataChanged: Boolean = actions.exists { + case _: Metadata => true + case _ => false + } + + + /** + * Partition schema corresponding to the read snapshot for this transaction. + * NOTE: In conflict detection, we should be careful around whether we want to use the new schema + * which this txn wants to update OR the old schema from the read snapshot. + * e.g. the ConcurrentAppend check makes sure that no new files have been added concurrently + * that this transaction should have read. So this should use the read snapshot partition schema + * and not the new partition schema which this txn is introducing. Using the new schema can cause + * issues. + */ + val partitionSchemaAtReadTime: StructType = readSnapshot.metadata.partitionSchema + + def isConflict(winningTxn: SetTransaction): Boolean = readAppIds.contains(winningTxn.appId) +} + +/** + * Summary of the Winning commit against which we want to check the conflict + * @param actions - delta log actions committed by the winning commit + * @param commitVersion - winning commit version + */ +private[delta] class WinningCommitSummary(val actions: Seq[Action], val commitVersion: Long) { + + val metadataUpdates: Seq[Metadata] = actions.collect { case a: Metadata => a } + val appLevelTransactions: Seq[SetTransaction] = actions.collect { case a: SetTransaction => a } + val protocol: Option[Protocol] = actions.collectFirst { case a: Protocol => a } + val commitInfo: Option[CommitInfo] = actions.collectFirst { case a: CommitInfo => a }.map( + ci => ci.copy(version = Some(commitVersion))) + val removedFiles: Seq[RemoveFile] = actions.collect { case a: RemoveFile => a } + val addedFiles: Seq[AddFile] = actions.collect { case a: AddFile => a } + val isBlindAppendOption: Option[Boolean] = commitInfo.flatMap(_.isBlindAppend) + val blindAppendAddedFiles: Seq[AddFile] = if (isBlindAppendOption.getOrElse(false)) { + addedFiles + } else { + Seq() + } + val changedDataAddedFiles: Seq[AddFile] = if (isBlindAppendOption.getOrElse(false)) { + Seq() + } else { + addedFiles + } + val onlyAddFiles: Boolean = actions.collect { case f: FileAction => f } + .forall(_.isInstanceOf[AddFile]) + +} + +private[delta] class ConflictChecker( + spark: SparkSession, + initialCurrentTransactionInfo: CurrentTransactionInfo, + winningCommitFileStatus: FileStatus, + isolationLevel: IsolationLevel) extends DeltaLogging with ConflictCheckerPredicateElimination { + + protected val winningCommitVersion = FileNames.deltaVersion(winningCommitFileStatus) + protected val startTimeMs = System.currentTimeMillis() + protected val timingStats = mutable.HashMap[String, Long]() + protected val deltaLog = initialCurrentTransactionInfo.readSnapshot.deltaLog + + protected var currentTransactionInfo: CurrentTransactionInfo = initialCurrentTransactionInfo + + protected lazy val winningCommitSummary: WinningCommitSummary = createWinningCommitSummary() + + /** + * This function checks conflict of the `initialCurrentTransactionInfo` against the + * `winningCommitVersion` and returns an updated [[CurrentTransactionInfo]] that represents + * the transaction as if it had started while reading the `winningCommitVersion`. + */ + def checkConflicts(): CurrentTransactionInfo = { + // Check early the protocol and metadata compatibility that is required for subsequent + // file-level checks. + checkProtocolCompatibility() + checkNoMetadataUpdates() + checkIfDomainMetadataConflict() + + // Perform cheap check for transaction dependencies before we start checks files. + checkForUpdatedApplicationTransactionIdsThatCurrentTxnDependsOn() + + // Row Tracking reconciliation. We perform this before the file checks to ensure that + // no files have duplicate row IDs and avoid interacting with files that don't comply with + // the protocol. + reassignOverlappingRowIds() + reassignRowCommitVersions() + + // Data file checks. + checkForAddedFilesThatShouldHaveBeenReadByCurrentTxn() + checkForDeletedFilesAgainstCurrentTxnReadFiles() + checkForDeletedFilesAgainstCurrentTxnDeletedFiles() + + logMetrics() + currentTransactionInfo + } + + /** + * Initializes [[WinningCommitSummary]] for the already committed + * transaction (winning transaction). + */ + protected def createWinningCommitSummary(): WinningCommitSummary = { + recordTime("initialize-old-commit") { + val winningCommitActions = deltaLog.store.read( + winningCommitFileStatus, + deltaLog.newDeltaHadoopConf() + ).map(Action.fromJson) + new WinningCommitSummary(winningCommitActions, winningCommitVersion) + } + } + + /** + * Asserts that the client is up to date with the protocol and is allowed to read and write + * against the protocol set by the committed transaction. + */ + protected def checkProtocolCompatibility(): Unit = { + if (winningCommitSummary.protocol.nonEmpty) { + winningCommitSummary.protocol.foreach { p => + deltaLog.protocolRead(p) + deltaLog.protocolWrite(p) + currentTransactionInfo = currentTransactionInfo.copy(protocol = p) + } + if (currentTransactionInfo.actions.exists(_.isInstanceOf[Protocol])) { + throw DeltaErrors.protocolChangedException(winningCommitSummary.commitInfo) + } + // When a protocol downgrade occurs all other interleaved txns abort. Note, that in the + // opposite scenario, when the current transaction is the protocol downgrade, we resolve + // the conflict and proceed with the downgrade. This is because a protocol downgrade would + // be hard to succeed in concurrent workloads. On the other hand, a protocol downgrade is + // a rare event and thus not that disruptive if other concurrent transactions fail. + val winningProtocol = winningCommitSummary.protocol.get + val readProtocol = currentTransactionInfo.readSnapshot.protocol + val isWinnerDroppingFeatures = TableFeature.isProtocolRemovingExplicitFeatures( + newProtocol = winningProtocol, + oldProtocol = readProtocol) + if (isWinnerDroppingFeatures) { + throw DeltaErrors.protocolChangedException(winningCommitSummary.commitInfo) + } + } + // When the winning transaction does not change the protocol but the losing txn is + // a protocol downgrade, we re-validate the invariants of the removed feature. + // TODO: only revalidate against the snapshot of the last interleaved txn. + val currentProtocol = currentTransactionInfo.protocol + val readProtocol = currentTransactionInfo.readSnapshot.protocol + if (TableFeature.isProtocolRemovingExplicitFeatures(currentProtocol, readProtocol)) { + val winningSnapshot = deltaLog.getSnapshotAt(winningCommitSummary.commitVersion) + val isDowngradeCommitValid = TableFeature.validateFeatureRemovalAtSnapshot( + newProtocol = currentProtocol, + oldProtocol = readProtocol, + snapshot = winningSnapshot) + if (!isDowngradeCommitValid) { + throw DeltaErrors.dropTableFeatureConflictRevalidationFailed( + winningCommitSummary.commitInfo) + } + } + } + + /** + * Check if the committed transaction has changed metadata. + */ + protected def checkNoMetadataUpdates(): Unit = { + // Fail if the metadata is different than what the txn read. + if (winningCommitSummary.metadataUpdates.nonEmpty) { + throw DeltaErrors.metadataChangedException(winningCommitSummary.commitInfo) + } + } + + /** + * Filters the [[files]] list with the partition predicates of the current transaction + * and returns the first file that is matching. + */ + protected def getFirstFileMatchingPartitionPredicates(files: Seq[AddFile]): Option[AddFile] = { + // Blind appends do not read the table. + if (currentTransactionInfo.commitInfo.flatMap(_.isBlindAppend).getOrElse(false)) { + assert(currentTransactionInfo.readPredicates.isEmpty) + return None + } + + // There is no reason to filter files if the table is not partitioned. + if (currentTransactionInfo.readWholeTable || + currentTransactionInfo.readSnapshot.metadata.partitionColumns.isEmpty) { + return files.headOption + } + + import org.apache.spark.sql.delta.implicits._ + val filesDf = files.toDF(spark) + + spark.conf.get(DeltaSQLConf.DELTA_CONFLICT_DETECTION_WIDEN_NONDETERMINISTIC_PREDICATES) match { + case DeltaSQLConf.NonDeterministicPredicateWidening.OFF => + getFirstFileMatchingPartitionPredicatesInternal( + filesDf, shouldWidenNonDeterministicPredicates = false, shouldWidenAllUdf = false) + case wideningMode => + val fileWithWidening = getFirstFileMatchingPartitionPredicatesInternal( + filesDf, shouldWidenNonDeterministicPredicates = true, shouldWidenAllUdf = true) + + fileWithWidening.flatMap { fileWithWidening => + val fileWithoutWidening = + getFirstFileMatchingPartitionPredicatesInternal( + filesDf, shouldWidenNonDeterministicPredicates = false, shouldWidenAllUdf = false) + if (fileWithoutWidening.isEmpty) { + // Conflict due to widening of non-deterministic predicate. + recordDeltaEvent(deltaLog, + opType = "delta.conflictDetection.partitionLevelConcurrency." + + "additionalConflictDueToWideningOfNonDeterministicPredicate", + data = Map( + "wideningMode" -> wideningMode, + "predicate" -> + currentTransactionInfo.readPredicates.map(_.partitionPredicate.toString), + "deterministicUDFs" -> containsDeterministicUDF( + currentTransactionInfo.readPredicates, partitionedOnly = true)) + ) + } + if (wideningMode == DeltaSQLConf.NonDeterministicPredicateWidening.ON) { + Some(fileWithWidening) + } else { + fileWithoutWidening + } + } + } + } + + private def getFirstFileMatchingPartitionPredicatesInternal( + filesDf: DataFrame, + shouldWidenNonDeterministicPredicates: Boolean, + shouldWidenAllUdf: Boolean): Option[AddFile] = { + + def rewritePredicateFn( + predicate: Expression, + shouldRewriteFilter: Boolean): DeltaTableReadPredicate = { + val rewrittenPredicate = if (shouldWidenNonDeterministicPredicates) { + val checkDeterministicOptions = + CheckDeterministicOptions(allowDeterministicUdf = !shouldWidenAllUdf) + eliminateNonDeterministicPredicates(Seq(predicate), checkDeterministicOptions).newPredicates + } else { + Seq(predicate) + } + DeltaTableReadPredicate( + partitionPredicates = rewrittenPredicate, + shouldRewriteFilter = shouldRewriteFilter) + } + + // we need to canonicalize the partition predicates per each group of rewrites vs. nonRewrites + val canonicalPredicates = currentTransactionInfo.readPredicates + .partition(_.shouldRewriteFilter) match { + case (rewrites, nonRewrites) => + val canonicalRewrites = + ExpressionSet(rewrites.map(_.partitionPredicate)).map( + predicate => rewritePredicateFn(predicate, shouldRewriteFilter = true)) + val canonicalNonRewrites = + ExpressionSet(nonRewrites.map(_.partitionPredicate)).map( + predicate => rewritePredicateFn(predicate, shouldRewriteFilter = false)) + canonicalRewrites ++ canonicalNonRewrites + } + + import org.apache.spark.sql.delta.implicits._ + val filesMatchingPartitionPredicates = canonicalPredicates.iterator + .flatMap { readPredicate => + DeltaLog.filterFileList( + partitionSchema = currentTransactionInfo.partitionSchemaAtReadTime, + files = filesDf, + partitionFilters = readPredicate.partitionPredicates, + shouldRewritePartitionFilters = readPredicate.shouldRewriteFilter + ).as[AddFile].head(1).headOption + }.take(1).toArray + + filesMatchingPartitionPredicates.headOption + } + + /** + * Check if the new files added by the already committed transactions should have been read by + * the current transaction. + */ + protected def checkForAddedFilesThatShouldHaveBeenReadByCurrentTxn(): Unit = { + recordTime("checked-appends") { + // Fail if new files have been added that the txn should have read. + val addedFilesToCheckForConflicts = isolationLevel match { + case WriteSerializable if !currentTransactionInfo.metadataChanged => + winningCommitSummary.changedDataAddedFiles // don't conflict with blind appends + case Serializable | WriteSerializable => + winningCommitSummary.changedDataAddedFiles ++ winningCommitSummary.blindAppendAddedFiles + case SnapshotIsolation => + Seq.empty + } + + val fileMatchingPartitionReadPredicates = + getFirstFileMatchingPartitionPredicates(addedFilesToCheckForConflicts) + + if (fileMatchingPartitionReadPredicates.nonEmpty) { + val isWriteSerializable = isolationLevel == WriteSerializable + + val retryMsg = if (isWriteSerializable && winningCommitSummary.onlyAddFiles && + winningCommitSummary.isBlindAppendOption.isEmpty) { + // The transaction was made by an older version which did not set `isBlindAppend` flag + // So even if it looks like an append, we don't know for sure if it was a blind append + // or not. So we suggest them to upgrade all there workloads to latest version. + Some( + "Upgrading all your concurrent writers to use the latest Delta Lake may " + + "avoid this error. Please upgrade and then retry this operation again.") + } else None + throw DeltaErrors.concurrentAppendException( + winningCommitSummary.commitInfo, + getPrettyPartitionMessage(fileMatchingPartitionReadPredicates.get.partitionValues), + retryMsg) + } + } + } + + /** + * Check if [[RemoveFile]] actions added by already committed transactions conflicts with files + * read by the current transaction. + */ + protected def checkForDeletedFilesAgainstCurrentTxnReadFiles(): Unit = { + recordTime("checked-deletes") { + // Fail if files have been deleted that the txn read. + val readFilePaths = currentTransactionInfo.readFiles.map( + f => f.path -> f.partitionValues).toMap + val deleteReadOverlap = winningCommitSummary.removedFiles + .find(r => readFilePaths.contains(r.path)) + if (deleteReadOverlap.nonEmpty) { + val filePath = deleteReadOverlap.get.path + val partition = getPrettyPartitionMessage(readFilePaths(filePath)) + throw DeltaErrors.concurrentDeleteReadException( + winningCommitSummary.commitInfo, s"$filePath in $partition") + } + if (winningCommitSummary.removedFiles.nonEmpty && currentTransactionInfo.readWholeTable) { + val filePath = winningCommitSummary.removedFiles.head.path + throw DeltaErrors.concurrentDeleteReadException( + winningCommitSummary.commitInfo, s"$filePath") + } + } + } + + /** + * Check if [[RemoveFile]] actions added by already committed transactions conflicts with + * [[RemoveFile]] actions this transaction is trying to add. + */ + protected def checkForDeletedFilesAgainstCurrentTxnDeletedFiles(): Unit = { + recordTime("checked-2x-deletes") { + // Fail if a file is deleted twice. + val txnDeletes = currentTransactionInfo.actions + .collect { case r: RemoveFile => r } + .map(_.path).toSet + val deleteOverlap = winningCommitSummary.removedFiles.map(_.path).toSet intersect txnDeletes + if (deleteOverlap.nonEmpty) { + throw DeltaErrors.concurrentDeleteDeleteException( + winningCommitSummary.commitInfo, deleteOverlap.head) + } + } + } + + /** + * Checks if the winning transaction corresponds to some AppId on which current transaction + * also depends. + */ + protected def checkForUpdatedApplicationTransactionIdsThatCurrentTxnDependsOn(): Unit = { + // Fail if the appIds seen by the current transaction has been updated by the winning + // transaction i.e. the winning transaction have [[SetTransaction]] corresponding to + // some appId on which current transaction depends on. Example - This can happen when + // multiple instances of the same streaming query are running at the same time. + if (winningCommitSummary.appLevelTransactions.exists(currentTransactionInfo.isConflict(_))) { + throw DeltaErrors.concurrentTransactionException(winningCommitSummary.commitInfo) + } + } + + /** + * Checks [[DomainMetadata]] to capture whether the current transaction conflicts with the + * winning transaction at any domain. + * 1. Accept the current transaction if its set of metadata domains do not overlap with the + * winning transaction's set of metadata domains. + * 2. Otherwise, fail the current transaction unless each conflicting domain is associated + * with a table feature that defines a domain-specific way of resolving the conflict. + */ + private def checkIfDomainMetadataConflict(): Unit = { + if (!DomainMetadataUtils.domainMetadataSupported(currentTransactionInfo.protocol)) { + return + } + val winningDomainMetadataMap = + DomainMetadataUtils.extractDomainMetadatasMap(winningCommitSummary.actions) + + /** + * Any new well-known domains that need custom conflict resolution need to add new cases in + * below case match clause. E.g. + * case MonotonicCounter(value), Some(MonotonicCounter(conflictingValue)) => + * MonotonicCounter(Math.max(value, conflictingValue)) + */ + def resolveConflict(domainMetadataFromCurrentTransaction: DomainMetadata): DomainMetadata = + (domainMetadataFromCurrentTransaction, + winningDomainMetadataMap.get(domainMetadataFromCurrentTransaction.domain)) match { + // No-conflict case. + case (domain, None) => domain + case (domain, _) if RowTrackingMetadataDomain.isRowTrackingDomain(domain) => domain + case (_, Some(_)) => + // Any conflict not specifically handled by a previous case must fail the transaction. + throw new io.delta.exceptions.ConcurrentTransactionException( + s"A conflicting metadata domain ${domainMetadataFromCurrentTransaction.domain} is " + + "added.") + } + + val mergedDomainMetadata = mutable.Buffer.empty[DomainMetadata] + // Resolve physical [[DomainMetadata]] conflicts (fail on logical conflict). + val updatedActions: Seq[Action] = currentTransactionInfo.actions.map { + case domainMetadata: DomainMetadata => + val mergedAction = resolveConflict(domainMetadata) + mergedDomainMetadata += mergedAction + mergedAction + case other => other + } + + currentTransactionInfo = currentTransactionInfo.copy( + domainMetadata = mergedDomainMetadata.toSeq, + actions = updatedActions) + } + + /** + * Checks whether the Row IDs assigned by the current transaction overlap with the Row IDs + * assigned by the winning transaction. I.e. this function checks whether both the winning and the + * current transaction assigned new Row IDs. If this the case, then this check assigns new Row IDs + * to the new files added by the current transaction so that they no longer overlap. + */ + private def reassignOverlappingRowIds(): Unit = { + // The current transaction should only assign Row Ids if they are supported. + if (!RowId.isSupported(currentTransactionInfo.protocol)) return + + val readHighWaterMark = currentTransactionInfo.readRowIdHighWatermark + + // The winning transaction might have bumped the high water mark or not in case it did + // not add new files to the table. + val winningHighWaterMark = winningCommitSummary.actions.collectFirst { + case RowTrackingMetadataDomain(domain) => domain.rowIdHighWaterMark + }.getOrElse(readHighWaterMark) + + var highWaterMark = winningHighWaterMark + val actionsWithReassignedRowIds = currentTransactionInfo.actions.flatMap { + // We should only set missing row IDs and update the row IDs that were assigned by this + // transaction, and not the row IDs that were assigned by an earlier transaction and merely + // copied over to a new AddFile as part of this transaction. I.e., we should only update the + // base row IDs that are larger than the read high watermark. + case a: AddFile if !a.baseRowId.exists(_ <= readHighWaterMark) => + val newBaseRowId = highWaterMark + 1L + highWaterMark += a.numPhysicalRecords.getOrElse { + throw DeltaErrors.rowIdAssignmentWithoutStats + } + Some(a.copy(baseRowId = Some(newBaseRowId))) + // The row ID high water mark will be replaced if it exists. + case d: DomainMetadata if RowTrackingMetadataDomain.isRowTrackingDomain(d) => None + case a => Some(a) + } + currentTransactionInfo = currentTransactionInfo.copy( + // Add row ID high water mark at the front for faster retrieval. + actions = RowTrackingMetadataDomain(highWaterMark).toDomainMetadata +: + actionsWithReassignedRowIds, + readRowIdHighWatermark = winningHighWaterMark) + } + + /** + * Reassigns default row commit versions to correctly handle the winning transaction. + * Concretely: + * 1. Reassigns all default row commit versions (of AddFiles in the current transaction) equal to + * the version of the winning transaction to the next commit version. + * 2. Assigns all unassigned default row commit versions that do not have one assigned yet + * to handle the row tracking feature being enabled by the winning transaction. + */ + private def reassignRowCommitVersions(): Unit = { + if (!RowTracking.isSupported(currentTransactionInfo.protocol)) { + return + } + + val newActions = currentTransactionInfo.actions.map { + case a: AddFile if a.defaultRowCommitVersion.contains(winningCommitVersion) => + a.copy(defaultRowCommitVersion = Some(winningCommitVersion + 1L)) + + case a: AddFile if a.defaultRowCommitVersion.isEmpty => + // A concurrent transaction has turned on support for Row Tracking. + a.copy(defaultRowCommitVersion = Some(winningCommitVersion + 1L)) + + case a => a + } + + currentTransactionInfo = currentTransactionInfo.copy(actions = newActions) + } + + /** A helper function for pretty printing a specific partition directory. */ + protected def getPrettyPartitionMessage(partitionValues: Map[String, String]): String = { + val partitionColumns = currentTransactionInfo.partitionSchemaAtReadTime + if (partitionColumns.isEmpty) { + "the root of the table" + } else { + val partition = partitionColumns.map { field => + s"${field.name}=${partitionValues(DeltaColumnMapping.getPhysicalName(field))}" + }.mkString("[", ", ", "]") + s"partition ${partition}" + } + } + + protected def recordTime[T](phase: String)(f: => T): T = { + val startTimeNs = System.nanoTime() + val ret = f + val timeTakenMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs) + timingStats += phase -> timeTakenMs + ret + } + + protected def logMetrics(): Unit = { + val totalTimeTakenMs = System.currentTimeMillis() - startTimeMs + val timingStr = timingStats.keys.toSeq.sorted.map(k => s"$k=${timingStats(k)}").mkString(",") + logInfo(s"[$logPrefix] Timing stats against $winningCommitVersion " + + s"[$timingStr, totalTimeTakenMs: $totalTimeTakenMs]") + } + + protected lazy val logPrefix: String = { + def truncate(uuid: String): String = uuid.split("-").head + s"[tableId=${truncate(initialCurrentTransactionInfo.readSnapshot.metadata.id)}," + + s"txnId=${truncate(initialCurrentTransactionInfo.txnId)}] " + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/ConflictCheckerPredicateElimination.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ConflictCheckerPredicateElimination.scala new file mode 100644 index 00000000000..ad9b94330b0 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/ConflictCheckerPredicateElimination.scala @@ -0,0 +1,144 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.util.DeltaSparkPlanUtils +import org.apache.spark.sql.delta.util.DeltaSparkPlanUtils.CheckDeterministicOptions + +import org.apache.spark.sql.catalyst.expressions.{And, EmptyRow, Expression, Literal, Or} +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan + +private[delta] trait ConflictCheckerPredicateElimination extends DeltaSparkPlanUtils { + + /** + * This class represents the state of a expression tree transformation, whereby we try to + * eliminate predicates that are non-deterministic in a way that widens the set of rows to + * include any row that could be read by the original predicate. + * + * Example: `c1 = 5 AND c2 IN (SELECT c FROM )` would be widened to + * `c1 = 5 AND True` eliminating the non-deterministic parquet table read by assuming it would + * have matched all c2 values. + * + * `c1 = 5 OR NOT some_udf(c2)` would be widened to `c1 = 5 OR True`, eliminating the + * non-deterministic `some_udf` by assuming `NOT some_udf(c2)` would have selected all rows. + * + * @param newPredicates + * The (potentially widened) list of predicates. + * @param eliminatedPredicates + * The predicates that were eliminated as non-deterministic. + */ + protected case class PredicateElimination( + newPredicates: Seq[Expression], + eliminatedPredicates: Seq[String]) + protected object PredicateElimination { + final val EMPTY: PredicateElimination = PredicateElimination(Seq.empty, Seq.empty) + + def eliminate(p: Expression, eliminated: Option[String] = None): PredicateElimination = + PredicateElimination( + // Always eliminate with a `TrueLiteral`, implying that the eliminated expression would + // have read the entire table. + newPredicates = Seq(TrueLiteral), + eliminatedPredicates = Seq(eliminated.getOrElse(p.prettyName))) + + def keep(p: Expression): PredicateElimination = + PredicateElimination(newPredicates = Seq(p), eliminatedPredicates = Seq.empty) + + def recurse( + p: Expression, + recFun: Seq[Expression] => PredicateElimination): PredicateElimination = { + val eliminatedChildren = recFun(p.children) + if (eliminatedChildren.eliminatedPredicates.isEmpty) { + // All children were ok, so keep the current expression. + keep(p) + } else { + // Fold the new predicates after sub-expression widening. + val newPredicate = p.withNewChildren(eliminatedChildren.newPredicates) match { + case p if p.foldable => Literal.create(p.eval(EmptyRow), p.dataType) + case Or(TrueLiteral, _) => TrueLiteral + case Or(_, TrueLiteral) => TrueLiteral + case And(left, TrueLiteral) => left + case And(TrueLiteral, right) => right + case p => p + } + PredicateElimination( + newPredicates = Seq(newPredicate), + eliminatedPredicates = eliminatedChildren.eliminatedPredicates) + } + } + } + + /** + * Replace non-deterministic expressions in a way that can only increase the number of selected + * files when these predicates are used for file skipping. + */ + protected def eliminateNonDeterministicPredicates( + predicates: Seq[Expression], + checkDeterministicOptions: CheckDeterministicOptions): PredicateElimination = { + eliminateUnsupportedPredicates(predicates) { + case p @ SubqueryExpression(plan) => + findFirstNonDeltaScan(plan) match { + case Some(plan) => PredicateElimination.eliminate(p, eliminated = Some(plan.nodeName)) + case None => + findFirstNonDeterministicNode(plan, checkDeterministicOptions) match { + case Some(node) => + PredicateElimination.eliminate(p, eliminated = Some(planOrExpressionName(node))) + case None => PredicateElimination.keep(p) + } + } + // And and Or can safely be recursed through. Replacing any non-deterministic sub-tree + // with `True` will lead us to at most select more files than necessary later. + case p: And => PredicateElimination.recurse(p, + p => eliminateNonDeterministicPredicates(p, checkDeterministicOptions)) + case p: Or => PredicateElimination.recurse(p, + p => eliminateNonDeterministicPredicates(p, checkDeterministicOptions)) + // All other expressions must either be completely deterministic, + // or must be replaced entirely, since replacing only their non-deterministic children + // may lead to files wrongly being deselected (e.g. `NOT True`). + case p => + // We always look for non-deterministic child nodes, whether or not `p` is actually + // deterministic. This gives us better feedback on what caused the non-determinism in + // cases where `p` itself it deterministic but `p.deterministic = false` due to correctly + // detected non-deterministic child nodes. + findFirstNonDeterministicChildNode(p.children, checkDeterministicOptions) match { + case Some(node) => + PredicateElimination.eliminate(p, eliminated = Some(planOrExpressionName(node))) + case None => if (p.deterministic) { + PredicateElimination.keep(p) + } else { + PredicateElimination.eliminate(p) + } + } + } + } + + private def eliminateUnsupportedPredicates(predicates: Seq[Expression])( + eliminatePredicates: Expression => PredicateElimination): PredicateElimination = { + predicates + .map(eliminatePredicates) + .foldLeft(PredicateElimination.EMPTY) { case (acc, predicates) => + acc.copy( + newPredicates = acc.newPredicates ++ predicates.newPredicates, + eliminatedPredicates = acc.eliminatedPredicates ++ predicates.eliminatedPredicates) + } + } + + private def planOrExpressionName(e: Either[LogicalPlan, Expression]): String = e match { + case scala.util.Left(plan) => plan.nodeName + case scala.util.Right(expression) => expression.prettyName + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DefaultRowCommitVersion.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DefaultRowCommitVersion.scala new file mode 100644 index 00000000000..0cba4a05c09 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DefaultRowCommitVersion.scala @@ -0,0 +1,37 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions._ + + +object DefaultRowCommitVersion { + def assignIfMissing( + protocol: Protocol, + actions: Iterator[Action], + version: Long): Iterator[Action] = { + if (!RowTracking.isSupported(protocol)) { + return actions + } + actions.map { + case a: AddFile if a.defaultRowCommitVersion.isEmpty => + a.copy(defaultRowCommitVersion = Some(version)) + case a => + a + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala new file mode 100644 index 00000000000..940c16d21ad --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala @@ -0,0 +1,1268 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.util.{Failure, Success, Try} +import scala.util.control.NonFatal + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.catalyst.TimeTravel +import org.apache.spark.sql.delta.DeltaErrors.{TemporallyUnstableInputException, TimestampEarlierThanCommitRetentionException} +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.catalog.IcebergTablePlaceHolder +import org.apache.spark.sql.delta.commands._ +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.constraints.{AddConstraint, DropConstraint} +import org.apache.spark.sql.delta.files.{TahoeFileIndex, TahoeLogFileIndex} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources._ +import org.apache.spark.sql.delta.util.AnalysisHelper +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{AnalysisException, Dataset, SaveMode, SparkSession} +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, HiveTableRelation} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.plans.logical.CloneTableStatement +import org.apache.spark.sql.catalyst.plans.logical.RestoreTableStatement +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.streaming.WriteToStream +import org.apache.spark.sql.catalyst.trees.TreeNodeTag +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttribute +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ +import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform} +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.command.CreateTableLikeCommand +import org.apache.spark.sql.execution.command.RunnableCommand +import org.apache.spark.sql.execution.datasources.HadoopFsRelation +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.execution.streaming.StreamingRelation +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{ArrayType, DataType, IntegerType, MapType, StructField, StructType} +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * Analysis rules for Delta. Currently, these rules enable schema enforcement / evolution with + * INSERT INTO. + */ +class DeltaAnalysis(session: SparkSession) + extends Rule[LogicalPlan] with AnalysisHelper with DeltaLogging { + + type CastFunction = (Expression, DataType, String) => Expression + + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsDown { + // INSERT INTO by ordinal and df.insertInto() + case a @ AppendDelta(r, d) if !a.isByName && + needsSchemaAdjustmentByOrdinal(d.name(), a.query, r.schema) => + val projection = resolveQueryColumnsByOrdinal(a.query, r.output, d.name()) + if (projection != a.query) { + a.copy(query = projection) + } else { + a + } + + + // INSERT INTO by name + // AppendData.byName is also used for DataFrame append so we check for the SQL origin text + // since we only want to up-cast for SQL insert into by name + case a @ AppendDelta(r, d) if a.isByName && + a.origin.sqlText.nonEmpty && needsSchemaAdjustmentByName(a.query, r.output, d) => + val projection = resolveQueryColumnsByName(a.query, r.output, d) + if (projection != a.query) { + a.copy(query = projection) + } else { + a + } + + /** + * Handling create table like when a delta target (provider) + * is provided explicitly or when the source table is a delta table + */ + case EligibleCreateTableLikeCommand(ctl, src) => + val deltaTableIdentifier = DeltaTableIdentifier(session, ctl.targetTable) + + // Check if table is given by path + val isTableByPath = DeltaTableIdentifier.isDeltaPath(session, ctl.targetTable) + + // Check if targetTable is given by path + val targetTableIdentifier = + if (isTableByPath) { + TableIdentifier(deltaTableIdentifier.toString) + } else { + ctl.targetTable + } + + val newStorage = + if (ctl.fileFormat.inputFormat.isDefined) { + ctl.fileFormat + } else if (isTableByPath) { + src.storage.copy(locationUri = + Some(deltaTableIdentifier.get.getPath(session).toUri)) + } else { + src.storage.copy(locationUri = ctl.fileFormat.locationUri) + } + + // If the location is specified or target table is given + // by path, we create an external table. + // Otherwise create a managed table. + val tblType = + if (newStorage.locationUri.isEmpty && !isTableByPath) { + CatalogTableType.MANAGED + } else { + CatalogTableType.EXTERNAL + } + + val catalogTableTarget = + // If source table is Delta format + if (src.provider.exists(DeltaSourceUtils.isDeltaDataSourceName)) { + val deltaLogSrc = DeltaTableV2(session, new Path(src.location)) + + // Column mapping and row tracking fields cannot be set externally. If the features are + // used on the source delta table, then the corresponding fields would be set for the + // sourceTable and needs to be removed from the targetTable's configuration. The fields + // will then be set in the targetTable's configuration internally after. + val sourceMetadata = deltaLogSrc.initialSnapshot.metadata + val config = + sourceMetadata.configuration.-("delta.columnMapping.maxColumnId") + .-(MaterializedRowId.MATERIALIZED_COLUMN_NAME_PROP) + .-(MaterializedRowCommitVersion.MATERIALIZED_COLUMN_NAME_PROP) + + new CatalogTable( + identifier = targetTableIdentifier, + tableType = tblType, + storage = newStorage, + schema = sourceMetadata.schema, + properties = config, + partitionColumnNames = sourceMetadata.partitionColumns, + provider = Some("delta"), + comment = Option(sourceMetadata.description) + ) + } else { // Source table is not delta format + new CatalogTable( + identifier = targetTableIdentifier, + tableType = tblType, + storage = newStorage, + schema = src.schema, + properties = src.properties, + partitionColumnNames = src.partitionColumnNames, + provider = Some("delta"), + comment = src.comment + ) + } + val saveMode = + if (ctl.ifNotExists) { + SaveMode.Ignore + } else { + SaveMode.ErrorIfExists + } + + val protocol = + if (src.provider.exists(DeltaSourceUtils.isDeltaDataSourceName)) { + Some(DeltaTableV2(session, new Path(src.location)).initialSnapshot.protocol) + } else { + None + } + val newDeltaCatalog = new DeltaCatalog() + val existingTableOpt = newDeltaCatalog.getExistingTableIfExists(catalogTableTarget.identifier) + val newTable = newDeltaCatalog + .verifyTableAndSolidify( + catalogTableTarget, + None + ) + CreateDeltaTableCommand( + table = newTable, + existingTableOpt = existingTableOpt, + mode = saveMode, + query = None, + output = ctl.output, + protocol = protocol, + tableByPath = isTableByPath) + + // INSERT OVERWRITE by ordinal and df.insertInto() + case o @ OverwriteDelta(r, d) if !o.isByName && + needsSchemaAdjustmentByOrdinal(d.name(), o.query, r.schema) => + val projection = resolveQueryColumnsByOrdinal(o.query, r.output, d.name()) + if (projection != o.query) { + val aliases = AttributeMap(o.query.output.zip(projection.output).collect { + case (l: AttributeReference, r: AttributeReference) if !l.sameRef(r) => (l, r) + }) + val newDeleteExpr = o.deleteExpr.transformUp { + case a: AttributeReference => aliases.getOrElse(a, a) + } + o.copy(deleteExpr = newDeleteExpr, query = projection) + } else { + o + } + + // INSERT OVERWRITE by name + // OverwriteDelta.byName is also used for DataFrame append so we check for the SQL origin text + // since we only want to up-cast for SQL insert into by name + case o @ OverwriteDelta(r, d) if o.isByName && + o.origin.sqlText.nonEmpty && needsSchemaAdjustmentByName(o.query, r.output, d) => + val projection = resolveQueryColumnsByName(o.query, r.output, d) + if (projection != o.query) { + val aliases = AttributeMap(o.query.output.zip(projection.output).collect { + case (l: AttributeReference, r: AttributeReference) if !l.sameRef(r) => (l, r) + }) + val newDeleteExpr = o.deleteExpr.transformUp { + case a: AttributeReference => aliases.getOrElse(a, a) + } + o.copy(deleteExpr = newDeleteExpr, query = projection) + } else { + o + } + + + // INSERT OVERWRITE with dynamic partition overwrite + case o @ DynamicPartitionOverwriteDelta(r, d) if o.resolved + => + val adjustedQuery = if (!o.isByName && + needsSchemaAdjustmentByOrdinal(d.name(), o.query, r.schema)) { + // INSERT OVERWRITE by ordinal and df.insertInto() + resolveQueryColumnsByOrdinal(o.query, r.output, d.name()) + } else if (o.isByName && o.origin.sqlText.nonEmpty && + needsSchemaAdjustmentByName(o.query, r.output, d)) { + // INSERT OVERWRITE by name + // OverwriteDelta.byName is also used for DataFrame append so we check for the SQL origin + // text since we only want to up-cast for SQL insert into by name + resolveQueryColumnsByName(o.query, r.output, d) + } else { + o.query + } + DeltaDynamicPartitionOverwriteCommand(r, d, adjustedQuery, o.writeOptions, o.isByName) + + // Pull out the partition filter that may be part of the FileIndex. This can happen when someone + // queries a Delta table such as spark.read.format("delta").load("/some/table/partition=2") + case l @ DeltaTable(index: TahoeLogFileIndex) if index.partitionFilters.nonEmpty => + Filter( + index.partitionFilters.reduce(And), + DeltaTableUtils.replaceFileIndex(l, index.copy(partitionFilters = Nil))) + + // SQL CDC table value functions "table_changes" and "table_changes_by_path" + case stmt: CDCStatementBase if stmt.functionArgs.forall(_.resolved) => + stmt.toTableChanges(session) + + case tc: TableChanges if tc.child.resolved => tc.toReadQuery + + + // Here we take advantage of CreateDeltaTableCommand which takes a LogicalPlan for CTAS in order + // to perform CLONE. We do this by passing the CloneTableCommand as the query in + // CreateDeltaTableCommand and let Create handle the creation + checks of creating a table in + // the metastore instead of duplicating that effort in CloneTableCommand. + case cloneStatement: CloneTableStatement => + // Get the info necessary to CreateDeltaTableCommand + EliminateSubqueryAliases(cloneStatement.source) match { + case DataSourceV2Relation(table: DeltaTableV2, _, _, _, _) => + resolveCloneCommand(cloneStatement.target, new CloneDeltaSource(table), cloneStatement) + + // Pass the traveled table if a previous version is to be cloned + case tt @ TimeTravel(DataSourceV2Relation(tbl: DeltaTableV2, _, _, _, _), _, _, _) + if tt.expressions.forall(_.resolved) => + val ttSpec = DeltaTimeTravelSpec(tt.timestamp, tt.version, tt.creationSource) + val traveledTable = tbl.copy(timeTravelOpt = Some(ttSpec)) + resolveCloneCommand( + cloneStatement.target, new CloneDeltaSource(traveledTable), cloneStatement) + + case DataSourceV2Relation(table: IcebergTablePlaceHolder, _, _, _, _) => + resolveCloneCommand( + cloneStatement.target, + CloneIcebergSource( + table.tableIdentifier, sparkTable = None, tableSchema = None, session), + cloneStatement) + + case DataSourceV2Relation(table, _, _, _, _) + if table.getClass.getName.endsWith("org.apache.iceberg.spark.source.SparkTable") => + val tableIdent = Try { + CatalystSqlParser.parseTableIdentifier(table.name()) + } match { + case Success(ident) => ident + case Failure(_: ParseException) => + // Fallback to 2-level identifier to make compatible with older Apache spark, + // this ident will NOT be used to look up the Iceberg tables later. + CatalystSqlParser.parseMultipartIdentifier(table.name()).tail.asTableIdentifier + case Failure(e) => throw e + } + resolveCloneCommand( + cloneStatement.target, + CloneIcebergSource(tableIdent, Some(table), tableSchema = None, session), + cloneStatement) + + case u: UnresolvedRelation => + u.tableNotFound(u.multipartIdentifier) + + case TimeTravel(u: UnresolvedRelation, _, _, _) => + u.tableNotFound(u.multipartIdentifier) + + case LogicalRelation( + HadoopFsRelation(location, _, _, _, _: ParquetFileFormat, _), _, catalogTable, _) => + val tableIdent = catalogTable.map(_.identifier) + .getOrElse(TableIdentifier(location.rootPaths.head.toString, Some("parquet"))) + val provider = if (catalogTable.isDefined) { + catalogTable.get.provider.getOrElse("Unknown") + } else { + "parquet" + } + // Only plain Parquet sources are eligible for CLONE, extensions like 'deltaSharing' are + // NOT supported. + if (!provider.equalsIgnoreCase("parquet")) { + throw DeltaErrors.cloneFromUnsupportedSource( + tableIdent.unquotedString, + provider) + } + + resolveCloneCommand( + cloneStatement.target, + CloneParquetSource(tableIdent, catalogTable, session), cloneStatement) + + case HiveTableRelation(catalogTable, _, _, _, _) => + if (!ConvertToDeltaCommand.isHiveStyleParquetTable(catalogTable)) { + throw DeltaErrors.cloneFromUnsupportedSource( + catalogTable.identifier.unquotedString, + catalogTable.storage.serde.getOrElse("Unknown")) + } + resolveCloneCommand( + cloneStatement.target, + CloneParquetSource(catalogTable.identifier, Some(catalogTable), session), + cloneStatement) + + case v: View => + throw DeltaErrors.cloneFromUnsupportedSource( + v.desc.identifier.unquotedString, "View") + + case l: LogicalPlan => + throw DeltaErrors.cloneFromUnsupportedSource( + l.toString, "Unknown") + } + + case restoreStatement @ RestoreTableStatement(target) => + EliminateSubqueryAliases(target) match { + // Pass the traveled table if a previous version is to be cloned + case tt @ TimeTravel(DataSourceV2Relation(tbl: DeltaTableV2, _, _, _, _), _, _, _) + if tt.expressions.forall(_.resolved) => + val ttSpec = DeltaTimeTravelSpec(tt.timestamp, tt.version, tt.creationSource) + val traveledTable = tbl.copy(timeTravelOpt = Some(ttSpec)) + // restoring to same version as latest should be a no-op. + val sourceSnapshot = try { + traveledTable.initialSnapshot + } catch { + case v: VersionNotFoundException => + throw DeltaErrors.restoreVersionNotExistException(v.userVersion, v.earliest, v.latest) + case tEarlier: TimestampEarlierThanCommitRetentionException => + throw DeltaErrors.restoreTimestampBeforeEarliestException( + tEarlier.userTimestamp.toString, + tEarlier.commitTs.toString + ) + case tUnstable: TemporallyUnstableInputException => + throw DeltaErrors.restoreTimestampGreaterThanLatestException( + tUnstable.userTimestamp.toString, + tUnstable.commitTs.toString + ) + } + // TODO: Fetch the table version from deltaLog.update().version to guarantee freshness. + // This can also be used by RestoreTableCommand + if (sourceSnapshot.version == traveledTable.deltaLog.unsafeVolatileSnapshot.version) { + return LocalRelation(restoreStatement.output) + } + + RestoreTableCommand(traveledTable) + + case u: UnresolvedRelation => + u.tableNotFound(u.multipartIdentifier) + + case TimeTravel(u: UnresolvedRelation, _, _, _) => + u.tableNotFound(u.multipartIdentifier) + + case _ => + throw DeltaErrors.notADeltaTableException("RESTORE") + } + + // Resolve as a resolved table if the path is for delta table. For non delta table, we keep the + // path and pass it along in a ResolvedPathBasedNonDeltaTable. This is needed as DESCRIBE DETAIL + // supports both delta and non delta paths. + case u: UnresolvedPathBasedTable => + val table = getPathBasedDeltaTable(u.path, u.options) + if (Try(table.tableExists).getOrElse(false)) { + // Resolve it as a path-based Delta table + val catalog = session.sessionState.catalogManager.currentCatalog.asTableCatalog + ResolvedTable.create( + catalog, Identifier.of(Array(DeltaSourceUtils.ALT_NAME), u.path), table) + } else { + // Resolve it as a placeholder, to identify it as a non-Delta table. + ResolvedPathBasedNonDeltaTable(u.path, u.options, u.commandName) + } + + case u: UnresolvedPathBasedDeltaTable => + val table = getPathBasedDeltaTable(u.path, u.options) + if (!table.tableExists) { + throw DeltaErrors.notADeltaTableException(u.commandName, u.deltaTableIdentifier) + } + val catalog = session.sessionState.catalogManager.currentCatalog.asTableCatalog + ResolvedTable.create(catalog, u.identifier, table) + + case u: UnresolvedPathBasedDeltaTableRelation => + val table = getPathBasedDeltaTable(u.path, u.options.asScala.toMap) + if (!table.tableExists) { + throw DeltaErrors.notADeltaTableException(u.deltaTableIdentifier) + } + DataSourceV2Relation.create(table, None, Some(u.identifier), u.options) + + case d: DescribeDeltaHistory if d.childrenResolved => d.toCommand + + // This rule falls back to V1 nodes, since we don't have a V2 reader for Delta right now + case dsv2 @ DataSourceV2Relation(d: DeltaTableV2, _, _, _, options) + if dsv2.getTagValue(DeltaRelation.KEEP_AS_V2_RELATION_TAG).isEmpty => + DeltaRelation.fromV2Relation(d, dsv2, options) + + case ResolvedTable(_, _, d: DeltaTableV2, _) if d.catalogTable.isEmpty && !d.tableExists => + // This is DDL on a path based table that doesn't exist. CREATE will not hit this path, most + // SHOW / DESC code paths will hit this + throw DeltaErrors.notADeltaTableException(DeltaTableIdentifier(path = Some(d.path.toString))) + + // DML - TODO: Remove these Delta-specific DML logical plans and use Spark's plans directly + + case d @ DeleteFromTable(table, condition) if d.childrenResolved => + // rewrites Delta from V2 to V1 + val newTarget = stripTempViewWrapper(table).transformUp { case DeltaRelation(lr) => lr } + val indices = newTarget.collect { + case DeltaFullTable(_, index) => index + } + if (indices.isEmpty) { + // Not a Delta table at all, do not transform + d + } else if (indices.size == 1 && indices(0).deltaLog.tableExists) { + // It is a well-defined Delta table with a schema + DeltaDelete(newTarget, Some(condition)) + } else { + // Not a well-defined Delta table + throw DeltaErrors.notADeltaSourceException("DELETE", Some(d)) + } + + case u @ UpdateTable(table, assignments, condition) if u.childrenResolved => + val (cols, expressions) = assignments.map(a => a.key -> a.value).unzip + // rewrites Delta from V2 to V1 + val newTable = stripTempViewWrapper(table).transformUp { case DeltaRelation(lr) => lr } + newTable.collectLeaves().headOption match { + case Some(DeltaFullTable(_, index)) => + DeltaUpdateTable(newTable, cols, expressions, condition) + case o => + // not a Delta table + u + } + + + case merge: MergeIntoTable if merge.childrenResolved => + val matchedActions = merge.matchedActions.map { + case update: UpdateAction => + DeltaMergeIntoMatchedUpdateClause( + update.condition, + DeltaMergeIntoClause.toActions(update.assignments)) + case update: UpdateStarAction => + DeltaMergeIntoMatchedUpdateClause(update.condition, DeltaMergeIntoClause.toActions(Nil)) + case delete: DeleteAction => + DeltaMergeIntoMatchedDeleteClause(delete.condition) + case other => + throw new IllegalArgumentException( + s"${other.prettyName} clauses cannot be part of the WHEN MATCHED clause in MERGE INTO.") + } + val notMatchedActions = merge.notMatchedActions.map { + case insert: InsertAction => + DeltaMergeIntoNotMatchedInsertClause( + insert.condition, + DeltaMergeIntoClause.toActions(insert.assignments)) + case insert: InsertStarAction => + DeltaMergeIntoNotMatchedInsertClause( + insert.condition, DeltaMergeIntoClause.toActions(Nil)) + case other => + throw new IllegalArgumentException( + s"${other.prettyName} clauses cannot be part of the WHEN NOT MATCHED clause in MERGE " + + "INTO.") + } + val notMatchedBySourceActions = merge.notMatchedBySourceActions.map { + case update: UpdateAction => + DeltaMergeIntoNotMatchedBySourceUpdateClause( + update.condition, + DeltaMergeIntoClause.toActions(update.assignments)) + case delete: DeleteAction => + DeltaMergeIntoNotMatchedBySourceDeleteClause(delete.condition) + case other => + throw new IllegalArgumentException( + s"${other.prettyName} clauses cannot be part of the WHEN NOT MATCHED BY SOURCE " + + "clause in MERGE INTO.") + } + // rewrites Delta from V2 to V1 + var isDelta = false + val newTarget = stripTempViewForMergeWrapper(merge.targetTable).transformUp { + case DeltaRelation(lr) => + isDelta = true + lr + } + + if (isDelta) { + // Even if we're merging into a non-Delta target, we will catch it later and throw an + // exception. + val deltaMerge = DeltaMergeInto( + newTarget, + merge.sourceTable, + merge.mergeCondition, + matchedActions ++ notMatchedActions ++ notMatchedBySourceActions + ) + + DeltaMergeInto.resolveReferencesAndSchema(deltaMerge, conf)( + tryResolveReferencesForExpressions(session)) + } else { + merge + } + + case merge: MergeIntoTable if merge.targetTable.exists(_.isInstanceOf[DataSourceV2Relation]) => + // When we hit here, it means the MERGE source is not resolved and we can't convert the MERGE + // command to the Delta variant. We need to add a special marker to the target table, so that + // this rule does not convert it to v1 relation too early, as we need to keep it as a v2 + // relation to bypass the OSS MERGE resolution code in the rule `ResolveReferences`. + merge.targetTable.foreach { + // TreeNodeTag is not very reliable, but it's OK to use it here, as we will use it very + // soon: when this rule transforms down the plan tree and hits the MERGE target table. + // There is no chance in this rule that we will drop this tag. At the end, This rule will + // turn MergeIntoTable into DeltaMergeInto, and convert all Delta relations inside it to + // v1 relations (no need to clean up this tag). + case r: DataSourceV2Relation => r.setTagValue(DeltaRelation.KEEP_AS_V2_RELATION_TAG, ()) + case _ => + } + merge + + case reorg @ DeltaReorgTable(resolved @ ResolvedTable(_, _, _: DeltaTableV2, _), spec) => + DeltaReorgTableCommand(resolved, spec)(reorg.predicates) + + case DeltaReorgTable(ResolvedTable(_, _, t, _), _) => + throw DeltaErrors.notADeltaTable(t.name()) + + case cmd @ ShowColumns(child @ ResolvedTable(_, _, table: DeltaTableV2, _), namespace, _) => + // Adapted from the rule in spark ResolveSessionCatalog.scala, which V2 tables don't trigger. + // NOTE: It's probably a spark bug to check head instead of tail, for 3-part identifiers. + val resolver = session.sessionState.analyzer.resolver + val v1TableName = child.identifier.asTableIdentifier + namespace.foreach { ns => + if (v1TableName.database.exists(!resolver(_, ns.head))) { + throw QueryCompilationErrors.showColumnsWithConflictDatabasesError(ns, v1TableName) + } + } + ShowDeltaTableColumnsCommand(child) + + case deltaMerge: DeltaMergeInto => + val d = if (deltaMerge.childrenResolved && !deltaMerge.resolved) { + DeltaMergeInto.resolveReferencesAndSchema(deltaMerge, conf)( + tryResolveReferencesForExpressions(session)) + } else deltaMerge + d.copy(target = stripTempViewForMergeWrapper(d.target)) + + case origStreamWrite: WriteToStream => + // The command could have Delta as source and/or sink. We need to look at both. + val streamWrite = origStreamWrite match { + case WriteToStream(_, _, sink @ DeltaSink(_, _, _, _, _, None), _, _, _, _, Some(ct)) => + // The command has a catalog table, but the DeltaSink does not. This happens because + // DeltaDataSource.createSink (Spark API) didn't have access to the catalog table when it + // created the DeltaSink. Fortunately we can fix it up here. + origStreamWrite.copy(sink = sink.copy(catalogTable = Some(ct))) + case _ => origStreamWrite + } + + // We also need to validate the source schema location, if the command has a Delta source. + verifyDeltaSourceSchemaLocation( + streamWrite.inputQuery, streamWrite.resolvedCheckpointLocation) + streamWrite + + } + + /** + * Creates a catalog table for CreateDeltaTableCommand. + * + * @param targetPath Target path containing the target path to clone to + * @param byPath Whether the target is a path based table + * @param tableIdent Table Identifier for the target table + * @param targetLocation User specified target location for the new table + * @param existingTable Existing table definition if we're going to be replacing the table + * @param srcTable The source table to clone + * @return catalog to CreateDeltaTableCommand with + */ + private def createCatalogTableForCloneCommand( + targetPath: Path, + byPath: Boolean, + tableIdent: TableIdentifier, + targetLocation: Option[String], + existingTable: Option[CatalogTable], + srcTable: CloneSource): CatalogTable = { + // If external location is defined then then table is an external table + // If the table is a path-based table, we also say that the table is external even if no + // metastore table will be created. This is done because we are still explicitly providing a + // locationUri which is behavior expected only of external tables + // In the case of ifNotExists being true and a table existing at the target destination, create + // a managed table so we don't have to pass a fake path + val (tableType, storage) = if (targetLocation.isDefined || byPath) { + (CatalogTableType.EXTERNAL, + CatalogStorageFormat.empty.copy(locationUri = Some(targetPath.toUri))) + } else { + (CatalogTableType.MANAGED, CatalogStorageFormat.empty) + } + val properties = srcTable.metadata.configuration + + new CatalogTable( + identifier = tableIdent, + tableType = tableType, + storage = storage, + schema = srcTable.schema, + properties = properties, + provider = Some("delta"), + stats = existingTable.flatMap(_.stats) + ) + } + + private def getPathBasedDeltaTable(path: String, options: Map[String, String]): DeltaTableV2 = { + DeltaTableV2(session, new Path(path), options = options) + } + + /** + * Instantiates a CreateDeltaTableCommand with CloneTableCommand as the child query. + * + * @param targetPlan the target of Clone as passed in a LogicalPlan + * @param sourceTbl the DeltaTableV2 that was resolved as the source of the clone command + * @return Resolve the clone command as the query in a CreateDeltaTableCommand. + */ + private def resolveCloneCommand( + targetPlan: LogicalPlan, + sourceTbl: CloneSource, + statement: CloneTableStatement): LogicalPlan = { + val isReplace = statement.isReplaceCommand + val isCreate = statement.isCreateCommand + + import session.sessionState.analyzer.{NonSessionCatalogAndIdentifier, SessionCatalogAndIdentifier} + val targetLocation = statement.targetLocation + val saveMode = if (isReplace) { + SaveMode.Overwrite + } else if (statement.ifNotExists) { + SaveMode.Ignore + } else { + SaveMode.ErrorIfExists + } + + val tableCreationMode = if (isCreate && isReplace) { + TableCreationModes.CreateOrReplace + } else if (isCreate) { + TableCreationModes.Create + } else { + TableCreationModes.Replace + } + // We don't use information in the catalog if the table is time travelled + val sourceCatalogTable = if (sourceTbl.timeTravelOpt.isDefined) None else sourceTbl.catalogTable + + EliminateSubqueryAliases(targetPlan) match { + // Target is a path based table + case DataSourceV2Relation(targetTbl: DeltaTableV2, _, _, _, _) if !targetTbl.tableExists => + val path = targetTbl.path + val tblIdent = TableIdentifier(path.toString, Some("delta")) + if (!isCreate) { + throw DeltaErrors.cannotReplaceMissingTableException( + Identifier.of(Array("delta"), path.toString)) + } + // Trying to clone something on itself should be a no-op + if (sourceTbl == new CloneDeltaSource(targetTbl)) { + return LocalRelation() + } + // If this is a path based table and an external location is also defined throw an error + if (statement.targetLocation.exists(loc => new Path(loc).toString != path.toString)) { + throw DeltaErrors.cloneAmbiguousTarget(statement.targetLocation.get, tblIdent) + } + // We're creating a table by path and there won't be a place to store catalog stats + val catalog = createCatalogTableForCloneCommand( + path, byPath = true, tblIdent, targetLocation, sourceCatalogTable, sourceTbl) + CreateDeltaTableCommand( + catalog, + None, + saveMode, + Some(CloneTableCommand( + sourceTbl, + tblIdent, + statement.tablePropertyOverrides, + path)), + tableByPath = true, + output = CloneTableCommand.output) + + // Target is a metastore table + case UnresolvedRelation(SessionCatalogAndIdentifier(catalog, ident), _, _) => + if (!isCreate) { + throw DeltaErrors.cannotReplaceMissingTableException(ident) + } + val tblIdent = ident + .asTableIdentifier + val finalTarget = new Path(statement.targetLocation.getOrElse( + session.sessionState.catalog.defaultTablePath(tblIdent).toString)) + val catalogTable = createCatalogTableForCloneCommand( + finalTarget, byPath = false, tblIdent, targetLocation, sourceCatalogTable, sourceTbl) + val catalogTableWithPath = if (targetLocation.isEmpty) { + catalogTable.copy( + storage = CatalogStorageFormat.empty.copy(locationUri = Some(finalTarget.toUri))) + } else { + catalogTable + } + CreateDeltaTableCommand( + catalogTableWithPath, + None, + saveMode, + Some(CloneTableCommand( + sourceTbl, + tblIdent, + statement.tablePropertyOverrides, + finalTarget)), + operation = tableCreationMode, + output = CloneTableCommand.output) + + case UnresolvedRelation(NonSessionCatalogAndIdentifier(catalog: TableCatalog, ident), _, _) => + if (!isCreate) { + throw DeltaErrors.cannotReplaceMissingTableException(ident) + } + val partitions: Array[Transform] = sourceTbl.metadata.partitionColumns.map { col => + new IdentityTransform(new FieldReference(Seq(col))) + }.toArray + // HACK ALERT: since there is no DSV2 API for getting table path before creation, + // here we create a table to get the path, then overwrite it with the + // cloned table. + val sourceConfig = sourceTbl.metadata.configuration.asJava + val newTable = catalog.createTable(ident, sourceTbl.schema, partitions, sourceConfig) + try { + newTable match { + case targetTable: DeltaTableV2 => + val path = targetTable.path + val tblIdent = TableIdentifier(path.toString, Some("delta")) + val catalogTable = createCatalogTableForCloneCommand( + path, byPath = true, tblIdent, targetLocation, sourceCatalogTable, sourceTbl) + CreateDeltaTableCommand( + table = catalogTable, + existingTableOpt = None, + mode = SaveMode.Overwrite, + query = Some( + CloneTableCommand( + sourceTable = sourceTbl, + targetIdent = tblIdent, + tablePropertyOverrides = statement.tablePropertyOverrides, + targetPath = path)), + tableByPath = true, + operation = TableCreationModes.Replace, + output = CloneTableCommand.output) + case _ => + throw DeltaErrors.notADeltaSourceException("CREATE TABLE CLONE", Some(statement)) + } + } catch { + case NonFatal(e) => + catalog.dropTable(ident) + throw e + } + // Delta metastore table already exists at target + case DataSourceV2Relation(deltaTableV2: DeltaTableV2, _, _, _, _) => + val path = deltaTableV2.path + val existingTable = deltaTableV2.catalogTable + val tblIdent = existingTable match { + case Some(existingCatalog) => existingCatalog.identifier + case None => TableIdentifier(path.toString, Some("delta")) + } + // Reuse the existing schema so that the physical name of columns are consistent + val cloneSourceTable = sourceTbl match { + case source: CloneIcebergSource => + // Reuse the existing schema so that the physical name of columns are consistent + source.copy(tableSchema = Some(deltaTableV2.initialSnapshot.metadata.schema)) + case other => other + } + val catalogTable = createCatalogTableForCloneCommand( + path, + byPath = existingTable.isEmpty, + tblIdent, + targetLocation, + sourceCatalogTable, + cloneSourceTable) + + CreateDeltaTableCommand( + catalogTable, + existingTable, + saveMode, + Some(CloneTableCommand( + cloneSourceTable, + tblIdent, + statement.tablePropertyOverrides, + path)), + tableByPath = existingTable.isEmpty, + operation = tableCreationMode, + output = CloneTableCommand.output) + + // Non-delta metastore table already exists at target + case LogicalRelation(_, _, existingCatalogTable @ Some(catalogTable), _) => + val tblIdent = catalogTable.identifier + val path = new Path(catalogTable.location) + val newCatalogTable = createCatalogTableForCloneCommand( + path, byPath = false, tblIdent, targetLocation, sourceCatalogTable, sourceTbl) + CreateDeltaTableCommand( + newCatalogTable, + existingCatalogTable, + saveMode, + Some(CloneTableCommand( + sourceTbl, + tblIdent, + statement.tablePropertyOverrides, + path)), + operation = tableCreationMode, + output = CloneTableCommand.output) + + case _ => throw DeltaErrors.notADeltaTableException("CLONE") + } + } + + /** + * Performs the schema adjustment by adding UpCasts (which are safe) and Aliases so that we + * can check if the by-ordinal schema of the insert query matches our Delta table. + * The schema adjustment also include string length check if it's written into a char/varchar + * type column/field. + */ + private def resolveQueryColumnsByOrdinal( + query: LogicalPlan, targetAttrs: Seq[Attribute], tblName: String): LogicalPlan = { + // always add a Cast. it will be removed in the optimizer if it is unnecessary. + val project = query.output.zipWithIndex.map { case (attr, i) => + if (i < targetAttrs.length) { + val targetAttr = targetAttrs(i) + addCastToColumn(attr, targetAttr, tblName) + } else { + attr + } + } + Project(project, query) + } + + /** + * Performs the schema adjustment by adding UpCasts (which are safe) so that we can insert into + * the Delta table when the input data types doesn't match the table schema. Unlike + * `resolveQueryColumnsByOrdinal` which ignores the names in `targetAttrs` and maps attributes + * directly to query output, this method will use the names in the query output to find the + * corresponding attribute to use. This method also allows users to not provide values for + * generated columns. If values of any columns are not in the query output, they must be generated + * columns. + */ + private def resolveQueryColumnsByName( + query: LogicalPlan, targetAttrs: Seq[Attribute], deltaTable: DeltaTableV2): LogicalPlan = { + insertIntoByNameMissingColumn(query, targetAttrs, deltaTable) + // Spark will resolve columns to make sure specified columns are in the table schema and don't + // have duplicates. This is just a sanity check. + assert( + query.output.length <= targetAttrs.length, + s"Too many specified columns ${query.output.map(_.name).mkString(", ")}. " + + s"Table columns: ${targetAttrs.map(_.name).mkString(", ")}") + + val project = query.output.map { attr => + val targetAttr = targetAttrs.find(t => session.sessionState.conf.resolver(t.name, attr.name)) + .getOrElse { + // This is a sanity check. Spark should have done the check. + throw DeltaErrors.missingColumn(attr, targetAttrs) + } + addCastToColumn(attr, targetAttr, deltaTable.name()) + } + Project(project, query) + } + + private def addCastToColumn( + attr: Attribute, + targetAttr: Attribute, + tblName: String): NamedExpression = { + val expr = (attr.dataType, targetAttr.dataType) match { + case (s, t) if s == t => + attr + case (s: StructType, t: StructType) if s != t => + addCastsToStructs(tblName, attr, s, t) + case (ArrayType(s: StructType, sNull: Boolean), ArrayType(t: StructType, tNull: Boolean)) + if s != t && sNull == tNull => + addCastsToArrayStructs(tblName, attr, s, t, sNull) + case _ => + getCastFunction(attr, targetAttr.dataType, targetAttr.name) + } + Alias(expr, targetAttr.name)(explicitMetadata = Option(targetAttr.metadata)) + } + + /** + * With Delta, we ACCEPT_ANY_SCHEMA, meaning that Spark doesn't automatically adjust the schema + * of INSERT INTO. This allows us to perform better schema enforcement/evolution. Since Spark + * skips this step, we see if we need to perform any schema adjustment here. + */ + private def needsSchemaAdjustmentByOrdinal( + tableName: String, + query: LogicalPlan, + schema: StructType): Boolean = { + val output = query.output + if (output.length < schema.length) { + throw DeltaErrors.notEnoughColumnsInInsert(tableName, output.length, schema.length) + } + // Now we should try our best to match everything that already exists, and leave the rest + // for schema evolution to WriteIntoDelta + val existingSchemaOutput = output.take(schema.length) + existingSchemaOutput.map(_.name) != schema.map(_.name) || + !SchemaUtils.isReadCompatible(schema.asNullable, existingSchemaOutput.toStructType) + } + + /** + * Checks for missing columns in a insert by name query and throws an exception if found. + * Delta does not require users to provide values for generated columns, so any columns missing + * from the query output must have a default expression. + * See [[ColumnWithDefaultExprUtils.columnHasDefaultExpr]]. + */ + private def insertIntoByNameMissingColumn( + query: LogicalPlan, + targetAttrs: Seq[Attribute], + deltaTable: DeltaTableV2): Unit = { + if (query.output.length < targetAttrs.length) { + // Some columns are not specified. We don't allow schema evolution in INSERT INTO BY NAME, so + // we need to ensure the missing columns must be generated columns. + val userSpecifiedNames = if (session.sessionState.conf.caseSensitiveAnalysis) { + query.output.map(a => (a.name, a)).toMap + } else { + CaseInsensitiveMap(query.output.map(a => (a.name, a)).toMap) + } + val tableSchema = deltaTable.initialSnapshot.metadata.schema + if (tableSchema.length != targetAttrs.length) { + // The target attributes may contain the metadata columns by design. Throwing an exception + // here in case target attributes may have the metadata columns for Delta in future. + throw DeltaErrors.schemaNotConsistentWithTarget(s"$tableSchema", s"$targetAttrs") + } + val nullAsDefault = deltaTable.spark.sessionState.conf.useNullsForMissingDefaultColumnValues + deltaTable.initialSnapshot.metadata.schema.foreach { col => + if (!userSpecifiedNames.contains(col.name) && + !ColumnWithDefaultExprUtils.columnHasDefaultExpr( + deltaTable.initialSnapshot.protocol, col, nullAsDefault)) { + throw DeltaErrors.missingColumnsInInsertInto(col.name) + } + } + } + } + + /** + * With Delta, we ACCEPT_ANY_SCHEMA, meaning that Spark doesn't automatically adjust the schema + * of INSERT INTO. Here we check if we need to perform any schema adjustment for INSERT INTO by + * name queries. We also check that any columns not in the list of user-specified columns must + * have a default expression. + */ + private def needsSchemaAdjustmentByName(query: LogicalPlan, targetAttrs: Seq[Attribute], + deltaTable: DeltaTableV2): Boolean = { + insertIntoByNameMissingColumn(query, targetAttrs, deltaTable) + val userSpecifiedNames = if (session.sessionState.conf.caseSensitiveAnalysis) { + query.output.map(a => (a.name, a)).toMap + } else { + CaseInsensitiveMap(query.output.map(a => (a.name, a)).toMap) + } + val specifiedTargetAttrs = targetAttrs.filter(col => userSpecifiedNames.contains(col.name)) + !SchemaUtils.isReadCompatible( + specifiedTargetAttrs.toStructType.asNullable, query.output.toStructType) + } + + // Get cast operation for the level of strictness in the schema a user asked for + private def getCastFunction: CastFunction = { + val timeZone = conf.sessionLocalTimeZone + conf.storeAssignmentPolicy match { + case SQLConf.StoreAssignmentPolicy.LEGACY => + (input: Expression, dt: DataType, _) => + Cast(input, dt, Option(timeZone), ansiEnabled = false) + case SQLConf.StoreAssignmentPolicy.ANSI => + (input: Expression, dt: DataType, name: String) => { + val cast = Cast(input, dt, Option(timeZone), ansiEnabled = true) + cast.setTagValue(Cast.BY_TABLE_INSERTION, ()) + TableOutputResolver.checkCastOverflowInTableInsert(cast, name) + } + case SQLConf.StoreAssignmentPolicy.STRICT => + (input: Expression, dt: DataType, _) => + UpCast(input, dt) + } + } + + /** + * Recursively casts structs in case it contains null types. + * TODO: Support other complex types like MapType and ArrayType + */ + private def addCastsToStructs( + tableName: String, + parent: NamedExpression, + source: StructType, + target: StructType): NamedExpression = { + if (source.length < target.length) { + throw DeltaErrors.notEnoughColumnsInInsert( + tableName, source.length, target.length, Some(parent.qualifiedName)) + } + val fields = source.zipWithIndex.map { + case (StructField(name, nested: StructType, _, metadata), i) if i < target.length => + target(i).dataType match { + case t: StructType => + val subField = Alias(GetStructField(parent, i, Option(name)), target(i).name)( + explicitMetadata = Option(metadata)) + addCastsToStructs(tableName, subField, nested, t) + case o => + val field = parent.qualifiedName + "." + name + val targetName = parent.qualifiedName + "." + target(i).name + throw DeltaErrors.cannotInsertIntoColumn(tableName, field, targetName, o.simpleString) + } + case (other, i) if i < target.length => + val targetAttr = target(i) + Alias( + getCastFunction(GetStructField(parent, i, Option(other.name)), + targetAttr.dataType, targetAttr.name), + targetAttr.name)(explicitMetadata = Option(targetAttr.metadata)) + + case (other, i) => + // This is a new column, so leave to schema evolution as is. Do not lose it's name so + // wrap with an alias + Alias( + GetStructField(parent, i, Option(other.name)), + other.name)(explicitMetadata = Option(other.metadata)) + } + Alias(CreateStruct(fields), parent.name)( + parent.exprId, parent.qualifier, Option(parent.metadata)) + } + + private def addCastsToArrayStructs( + tableName: String, + parent: NamedExpression, + source: StructType, + target: StructType, + sourceNullable: Boolean): Expression = { + val structConverter: (Expression, Expression) => Expression = (_, i) => + addCastsToStructs(tableName, Alias(GetArrayItem(parent, i), i.toString)(), source, target) + val transformLambdaFunc = { + val elementVar = NamedLambdaVariable("elementVar", source, sourceNullable) + val indexVar = NamedLambdaVariable("indexVar", IntegerType, false) + LambdaFunction(structConverter(elementVar, indexVar), Seq(elementVar, indexVar)) + } + ArrayTransform(parent, transformLambdaFunc) + } + + private def stripTempViewWrapper(plan: LogicalPlan): LogicalPlan = { + DeltaViewHelper.stripTempView(plan, conf) + } + + private def stripTempViewForMergeWrapper(plan: LogicalPlan): LogicalPlan = { + DeltaViewHelper.stripTempViewForMerge(plan, conf) + } + + /** + * Verify the input plan for a SINGLE streaming query with the following: + * 1. Schema location must be under checkpoint location, if not lifted by flag + * 2. No two duplicating delta source can share the same schema location + */ + private def verifyDeltaSourceSchemaLocation( + inputQuery: LogicalPlan, + checkpointLocation: String): Unit = { + // Maps StreamingRelation to schema location, similar to how MicroBatchExecution converts + // StreamingRelation to StreamingExecutionRelation. + val schemaLocationMap = mutable.Map[StreamingRelation, String]() + val allowSchemaLocationOutsideOfCheckpoint = session.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_ALLOW_SCHEMA_LOCATION_OUTSIDE_CHECKPOINT_LOCATION) + inputQuery.foreach { + case streamingRelation @ StreamingRelation(dataSourceV1, sourceName, _) + if DeltaSourceUtils.isDeltaDataSourceName(sourceName) => + DeltaDataSource.extractSchemaTrackingLocationConfig( + session, dataSourceV1.options + ).foreach { rootSchemaTrackingLocation => + assert(dataSourceV1.options.contains("path"), "Path for Delta table must be defined") + val log = DeltaLog.forTable(session, new Path(dataSourceV1.options("path"))) + val sourceIdOpt = dataSourceV1.options.get(DeltaOptions.STREAMING_SOURCE_TRACKING_ID) + val schemaTrackingLocation = + DeltaSourceMetadataTrackingLog.fullMetadataTrackingLocation( + rootSchemaTrackingLocation, log.tableId, sourceIdOpt) + // Make sure schema location is under checkpoint + if (!allowSchemaLocationOutsideOfCheckpoint && + !(schemaTrackingLocation.stripPrefix("file:").stripSuffix("/") + "/") + .startsWith(checkpointLocation.stripPrefix("file:").stripSuffix("/") + "/")) { + throw DeltaErrors.schemaTrackingLocationNotUnderCheckpointLocation( + schemaTrackingLocation, checkpointLocation) + } + // Save schema location for this streaming relation + schemaLocationMap.put(streamingRelation, schemaTrackingLocation.stripSuffix("/")) + } + case _ => + } + + // Now verify all schema locations are distinct + val conflictSchemaOpt = schemaLocationMap + .keys + .groupBy { rel => schemaLocationMap(rel) } + .find(_._2.size > 1) + conflictSchemaOpt.foreach { case (schemaLocation, relations) => + val ds = relations.head.dataSource + // Pick one source that has conflict to make it more actionable for the user + val oneTableWithConflict = ds.catalogTable + .map(_.identifier.toString) + .getOrElse { + // `path` must exist + CaseInsensitiveMap(ds.options).get("path").get + } + throw DeltaErrors.sourcesWithConflictingSchemaTrackingLocation( + schemaLocation, oneTableWithConflict) + } + } + + object EligibleCreateTableLikeCommand { + def unapply(arg: LogicalPlan): Option[(CreateTableLikeCommand, CatalogTable)] = arg match { + case c: CreateTableLikeCommand => + val src = session.sessionState.catalog.getTempViewOrPermanentTableMetadata(c.sourceTable) + if (src.provider.contains("delta") || + c.provider.exists(DeltaSourceUtils.isDeltaDataSourceName)) { + Some(c, src) + } else { + None + } + case _ => + None + } + } +} + +/** Matchers for dealing with a Delta table. */ +object DeltaRelation extends DeltaLogging { + val KEEP_AS_V2_RELATION_TAG = new TreeNodeTag[Unit]("__keep_as_v2_relation") + + def unapply(plan: LogicalPlan): Option[LogicalRelation] = plan match { + case dsv2 @ DataSourceV2Relation(d: DeltaTableV2, _, _, _, options) => + Some(fromV2Relation(d, dsv2, options)) + case lr @ DeltaTable(_) => Some(lr) + case _ => None + } + + def fromV2Relation( + d: DeltaTableV2, + v2Relation: DataSourceV2Relation, + options: CaseInsensitiveStringMap): LogicalRelation = { + recordFrameProfile("DeltaAnalysis", "fromV2Relation") { + val relation = d.withOptions(options.asScala.toMap).toBaseRelation + val output = if (CDCReader.isCDCRead(options)) { + // Handles cdc for the spark.read.options().table() code path + toAttributes(relation.schema) + } else { + v2Relation.output + } + LogicalRelation(relation, output, d.ttSafeCatalogTable, isStreaming = false) + } + } +} + +object AppendDelta { + def unapply(a: AppendData): Option[(DataSourceV2Relation, DeltaTableV2)] = { + if (a.query.resolved) { + a.table match { + case r: DataSourceV2Relation if r.table.isInstanceOf[DeltaTableV2] => + Some((r, r.table.asInstanceOf[DeltaTableV2])) + case _ => None + } + } else { + None + } + } +} + +object OverwriteDelta { + def unapply(o: OverwriteByExpression): Option[(DataSourceV2Relation, DeltaTableV2)] = { + if (o.query.resolved) { + o.table match { + case r: DataSourceV2Relation if r.table.isInstanceOf[DeltaTableV2] => + Some((r, r.table.asInstanceOf[DeltaTableV2])) + case _ => None + } + } else { + None + } + } +} + +object DynamicPartitionOverwriteDelta { + def unapply(o: OverwritePartitionsDynamic): Option[(DataSourceV2Relation, DeltaTableV2)] = { + if (o.query.resolved) { + o.table match { + case r: DataSourceV2Relation if r.table.isInstanceOf[DeltaTableV2] => + Some((r, r.table.asInstanceOf[DeltaTableV2])) + case _ => None + } + } else { + None + } + } +} + +/** + * A `RunnableCommand` that will execute dynamic partition overwrite using [[WriteIntoDelta]]. + * + * This is a workaround of Spark not supporting V1 fallback for dynamic partition overwrite. + * Note the following details: + * - Extends `V2WriteCommmand` so that Spark can transform this plan in the same as other + * commands like `AppendData`. + * - Exposes the query as a child so that the Spark optimizer can optimize it. + */ +case class DeltaDynamicPartitionOverwriteCommand( + table: NamedRelation, + deltaTable: DeltaTableV2, + query: LogicalPlan, + writeOptions: Map[String, String], + isByName: Boolean, + analyzedQuery: Option[LogicalPlan] = None) extends RunnableCommand with V2WriteCommand { + + override def child: LogicalPlan = query + + override def withNewQuery(newQuery: LogicalPlan): DeltaDynamicPartitionOverwriteCommand = { + copy(query = newQuery) + } + + override def withNewTable(newTable: NamedRelation): DeltaDynamicPartitionOverwriteCommand = { + copy(table = newTable) + } + + override def storeAnalyzedQuery(): Command = copy(analyzedQuery = Some(query)) + + override protected def withNewChildInternal( + newChild: LogicalPlan): DeltaDynamicPartitionOverwriteCommand = copy(query = newChild) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaOptions = new DeltaOptions( + CaseInsensitiveMap[String]( + deltaTable.options ++ + writeOptions ++ + Seq(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION -> + DeltaOptions.PARTITION_OVERWRITE_MODE_DYNAMIC)), + sparkSession.sessionState.conf) + + // TODO: The configuration can be fetched directly from WriteIntoDelta's txn. Don't pass + // in the default snapshot's metadata config here. + WriteIntoDelta( + deltaTable.deltaLog, + SaveMode.Overwrite, + deltaOptions, + partitionColumns = Nil, + deltaTable.deltaLog.unsafeVolatileSnapshot.metadata.configuration, + Dataset.ofRows(sparkSession, query), + deltaTable.catalogTable + ).run(sparkSession) + } +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaColumnMapping.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaColumnMapping.scala new file mode 100644 index 00000000000..3fc69bb578c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaColumnMapping.scala @@ -0,0 +1,821 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.{Locale, UUID} + +import scala.collection.mutable + +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils} +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{ArrayType, DataType, MapType, Metadata => SparkMetadata, MetadataBuilder, StructField, StructType} + +trait DeltaColumnMappingBase extends DeltaLogging { + val PARQUET_FIELD_ID_METADATA_KEY = "parquet.field.id" + val PARQUET_FIELD_NESTED_IDS_METADATA_KEY = "parquet.field.nested.ids" + val COLUMN_MAPPING_METADATA_PREFIX = "delta.columnMapping." + val COLUMN_MAPPING_METADATA_ID_KEY = COLUMN_MAPPING_METADATA_PREFIX + "id" + val COLUMN_MAPPING_PHYSICAL_NAME_KEY = COLUMN_MAPPING_METADATA_PREFIX + "physicalName" + val COLUMN_MAPPING_METADATA_NESTED_IDS_KEY = COLUMN_MAPPING_METADATA_PREFIX + "nested.ids" + val PARQUET_LIST_ELEMENT_FIELD_NAME = "element" + val PARQUET_MAP_KEY_FIELD_NAME = "key" + val PARQUET_MAP_VALUE_FIELD_NAME = "value" + + /** + * This list of internal columns (and only this list) is allowed to have missing + * column mapping metadata such as field id and physical name because + * they might not be present in user's table schema. + * + * These fields, if materialized to parquet, will always be matched by their display name in the + * downstream parquet reader even under column mapping modes. + * + * For future developers who want to utilize additional internal columns without generating + * column mapping metadata, please add them here. + * + * This list is case-insensitive. + */ + protected val DELTA_INTERNAL_COLUMNS: Set[String] = + (CDCReader.CDC_COLUMNS_IN_DATA ++ Seq( + CDCReader.CDC_COMMIT_VERSION, + CDCReader.CDC_COMMIT_TIMESTAMP, + /** + * Whenever `_metadata` column is selected, Spark adds the format generated metadata + * columns to `ParquetFileFormat`'s required output schema. Column `_metadata` contains + * constant value subfields metadata such as `file_path` and format specific custom metadata + * subfields such as `row_index` in Parquet. Spark creates the file format object with + * data schema plus additional custom metadata columns required from file format to fill up + * the `_metadata` column. + */ + ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME, + DeltaParquetFileFormat.IS_ROW_DELETED_COLUMN_NAME, + DeltaParquetFileFormat.ROW_INDEX_COLUMN_NAME) + ).map(_.toLowerCase(Locale.ROOT)).toSet + + val supportedModes: Set[DeltaColumnMappingMode] = + Set(IdMapping, NoMapping, NameMapping) + + def isInternalField(field: StructField): Boolean = DELTA_INTERNAL_COLUMNS + .contains(field.name.toLowerCase(Locale.ROOT)) + + def satisfiesColumnMappingProtocol(protocol: Protocol): Boolean = + protocol.isFeatureSupported(ColumnMappingTableFeature) + + /** + * The only allowed mode change is from NoMapping to NameMapping. Other changes + * would require re-writing Parquet files and are not supported right now. + */ + private def allowMappingModeChange( + oldMode: DeltaColumnMappingMode, + newMode: DeltaColumnMappingMode): Boolean = { + if (oldMode == newMode) true + else oldMode == NoMapping && newMode == NameMapping + } + + def isColumnMappingUpgrade( + oldMode: DeltaColumnMappingMode, + newMode: DeltaColumnMappingMode): Boolean = { + oldMode == NoMapping && newMode != NoMapping + } + + /** + * If the table is already on the column mapping protocol, we block: + * - changing column mapping config + * otherwise, we block + * - upgrading to the column mapping Protocol through configurations + */ + def verifyAndUpdateMetadataChange( + deltaLog: DeltaLog, + oldProtocol: Protocol, + oldMetadata: Metadata, + newMetadata: Metadata, + isCreatingNewTable: Boolean, + isOverwriteSchema: Boolean): Metadata = { + // field in new metadata should have been dropped + val oldMappingMode = oldMetadata.columnMappingMode + val newMappingMode = newMetadata.columnMappingMode + + if (!supportedModes.contains(newMappingMode)) { + throw DeltaErrors.unsupportedColumnMappingMode(newMappingMode.name) + } + + val isChangingModeOnExistingTable = oldMappingMode != newMappingMode && !isCreatingNewTable + if (isChangingModeOnExistingTable) { + if (!allowMappingModeChange(oldMappingMode, newMappingMode)) { + throw DeltaErrors.changeColumnMappingModeNotSupported( + oldMappingMode.name, newMappingMode.name) + } else { + // legal mode change, now check if protocol is upgraded before or part of this txn + val caseInsensitiveMap = CaseInsensitiveMap(newMetadata.configuration) + val minReaderVersion = caseInsensitiveMap + .get(Protocol.MIN_READER_VERSION_PROP).map(_.toInt) + .getOrElse(oldProtocol.minReaderVersion) + val minWriterVersion = caseInsensitiveMap + .get(Protocol.MIN_WRITER_VERSION_PROP).map(_.toInt) + .getOrElse(oldProtocol.minWriterVersion) + var newProtocol = Protocol(minReaderVersion, minWriterVersion) + val satisfiesWriterVersion = minWriterVersion >= ColumnMappingTableFeature.minWriterVersion + val satisfiesReaderVersion = minReaderVersion >= ColumnMappingTableFeature.minReaderVersion + // This is an OR check because `readerFeatures` and `writerFeatures` can independently + // support table features. + if ((newProtocol.supportsReaderFeatures && satisfiesWriterVersion) || + (newProtocol.supportsWriterFeatures && satisfiesReaderVersion)) { + newProtocol = newProtocol.withFeature(ColumnMappingTableFeature) + } + + if (!satisfiesColumnMappingProtocol(newProtocol)) { + throw DeltaErrors.changeColumnMappingModeOnOldProtocol(oldProtocol) + } + } + } + + val updatedMetadata = updateColumnMappingMetadata( + oldMetadata, newMetadata, isChangingModeOnExistingTable, isOverwriteSchema) + + // record column mapping table creation/upgrade + if (newMappingMode != NoMapping) { + if (isCreatingNewTable) { + recordDeltaEvent(deltaLog, "delta.columnMapping.createTable") + } else if (oldMappingMode != newMappingMode) { + recordDeltaEvent(deltaLog, "delta.columnMapping.upgradeTable") + } + } + + updatedMetadata + } + + def hasColumnId(field: StructField): Boolean = + field.metadata.contains(COLUMN_MAPPING_METADATA_ID_KEY) + + def getColumnId(field: StructField): Int = + field.metadata.getLong(COLUMN_MAPPING_METADATA_ID_KEY).toInt + + def hasNestedColumnIds(field: StructField): Boolean = + field.metadata.contains(COLUMN_MAPPING_METADATA_NESTED_IDS_KEY) + + def getNestedColumnIds(field: StructField): SparkMetadata = + field.metadata.getMetadata(COLUMN_MAPPING_METADATA_NESTED_IDS_KEY) + + def hasPhysicalName(field: StructField): Boolean = + field.metadata.contains(COLUMN_MAPPING_PHYSICAL_NAME_KEY) + + /** + * Gets the required column metadata for each column based on the column mapping mode. + */ + def getColumnMappingMetadata(field: StructField, mode: DeltaColumnMappingMode): SparkMetadata = { + mode match { + case NoMapping => + // drop all column mapping related fields + new MetadataBuilder() + .withMetadata(field.metadata) + .remove(COLUMN_MAPPING_METADATA_ID_KEY) + .remove(COLUMN_MAPPING_METADATA_NESTED_IDS_KEY) + .remove(PARQUET_FIELD_ID_METADATA_KEY) + .remove(PARQUET_FIELD_NESTED_IDS_METADATA_KEY) + .remove(COLUMN_MAPPING_PHYSICAL_NAME_KEY) + .build() + + case IdMapping | NameMapping => + if (!hasColumnId(field)) { + throw DeltaErrors.missingColumnId(mode, field.name) + } + if (!hasPhysicalName(field)) { + throw DeltaErrors.missingPhysicalName(mode, field.name) + } + // Delta spec requires writer to always write field_id in parquet schema for column mapping + // Reader strips PARQUET_FIELD_ID_METADATA_KEY in + // DeltaParquetFileFormat:prepareSchemaForRead + val builder = new MetadataBuilder() + .withMetadata(field.metadata) + .putLong(PARQUET_FIELD_ID_METADATA_KEY, getColumnId(field)) + + // Nested field IDs for the 'element' and 'key'/'value' fields of Arrays + // and Maps are written when Uniform with IcebergCompatV2 is enabled on a table. + if (hasNestedColumnIds(field)) { + builder.putMetadata(PARQUET_FIELD_NESTED_IDS_METADATA_KEY, getNestedColumnIds(field)) + } + + builder.build() + + case mode => + throw DeltaErrors.unsupportedColumnMappingMode(mode.name) + } + } + + /** Recursively renames columns in the given schema with their physical schema. */ + def renameColumns(schema: StructType): StructType = { + SchemaMergingUtils.transformColumns(schema) { (_, field, _) => + field.copy(name = getPhysicalName(field)) + } + } + + def assignPhysicalName(field: StructField, physicalName: String): StructField = { + field.copy(metadata = new MetadataBuilder() + .withMetadata(field.metadata) + .putString(COLUMN_MAPPING_PHYSICAL_NAME_KEY, physicalName) + .build()) + } + + def assignPhysicalNames(schema: StructType): StructType = { + SchemaMergingUtils.transformColumns(schema) { (_, field, _) => + if (hasPhysicalName(field)) field else assignPhysicalName(field, generatePhysicalName) + } + } + + /** Set physical name based on field path, skip if field path not found in the map */ + def setPhysicalNames( + schema: StructType, + fieldPathToPhysicalName: Map[Seq[String], String]): StructType = { + if (fieldPathToPhysicalName.isEmpty) { + schema + } else { + SchemaMergingUtils.transformColumns(schema) { (parent, field, _) => + val path = parent :+ field.name + if (fieldPathToPhysicalName.contains(path)) { + assignPhysicalName(field, fieldPathToPhysicalName(path)) + } else { + field + } + } + } + } + + def generatePhysicalName: String = "col-" + UUID.randomUUID() + + def getPhysicalName(field: StructField): String = { + if (field.metadata.contains(COLUMN_MAPPING_PHYSICAL_NAME_KEY)) { + field.metadata.getString(COLUMN_MAPPING_PHYSICAL_NAME_KEY) + } else { + field.name + } + } + + private def updateColumnMappingMetadata( + oldMetadata: Metadata, + newMetadata: Metadata, + isChangingModeOnExistingTable: Boolean, + isOverwritingSchema: Boolean): Metadata = { + val newMappingMode = DeltaConfigs.COLUMN_MAPPING_MODE.fromMetaData(newMetadata) + newMappingMode match { + case IdMapping | NameMapping => + assignColumnIdAndPhysicalName( + newMetadata, oldMetadata, isChangingModeOnExistingTable, isOverwritingSchema) + case NoMapping => + newMetadata + case mode => + throw DeltaErrors.unsupportedColumnMappingMode(mode.name) + } + } + + def findMaxColumnId(schema: StructType): Long = { + var maxId: Long = 0 + SchemaMergingUtils.transformColumns(schema)((_, f, _) => { + if (hasColumnId(f)) { + maxId = maxId max getColumnId(f) + } + f + }) + maxId + } + + /** + * Verify the metadata for valid column mapping metadata assignment. This is triggered for every + * commit as a last defense. + * + * 1. Ensure column mapping metadata is set for the appropriate mode + * 2. Ensure no duplicate column id/physical names set + * 3. Ensure max column id is in a good state (set, and greater than all field ids available) + */ + def checkColumnIdAndPhysicalNameAssignments(metadata: Metadata): Unit = { + val schema = metadata.schema + val mode = metadata.columnMappingMode + + // physical name/column id -> full field path + val columnIds = mutable.Set[Int]() + val physicalNames = mutable.Set[String]() + // use id mapping to keep all column mapping metadata + // this method checks for missing physical name & column id already + val physicalSchema = createPhysicalSchema(schema, schema, IdMapping, checkSupportedMode = false) + + // Check id / physical name duplication + SchemaMergingUtils.transformColumns(physicalSchema) ((parentPhysicalPath, field, _) => { + // field.name is now physical name + // We also need to apply backticks to column paths with dots in them to prevent a possible + // false alarm in which a column `a.b` is duplicated with `a`.`b` + val curFullPhysicalPath = UnresolvedAttribute(parentPhysicalPath :+ field.name).name + val columnId = getColumnId(field) + if (columnIds.contains(columnId)) { + throw DeltaErrors.duplicatedColumnId(mode, columnId, schema) + } + columnIds.add(columnId) + + // We should check duplication by full physical name path, because nested fields + // such as `a.b.c` shouldn't conflict with `x.y.c` due to same column name. + if (physicalNames.contains(curFullPhysicalPath)) { + throw DeltaErrors.duplicatedPhysicalName(mode, curFullPhysicalPath, schema) + } + physicalNames.add(curFullPhysicalPath) + + field + }) + + // Check assignment of the max id property + if (SQLConf.get.getConf(DeltaSQLConf.DELTA_COLUMN_MAPPING_CHECK_MAX_COLUMN_ID)) { + if (!metadata.configuration.contains(DeltaConfigs.COLUMN_MAPPING_MAX_ID.key)) { + throw DeltaErrors.maxColumnIdNotSet + } + val fieldMaxId = DeltaColumnMapping.findMaxColumnId(schema) + if (metadata.columnMappingMaxId < DeltaColumnMapping.findMaxColumnId(schema)) { + throw DeltaErrors.maxColumnIdNotSetCorrectly(metadata.columnMappingMaxId, fieldMaxId) + } + } + } + + /** + * For each column/field in a Metadata's schema, assign id using the current maximum id + * as the basis and increment from there, and assign physical name using UUID + * @param newMetadata The new metadata to assign Ids and physical names + * @param oldMetadata The old metadata + * @param isChangingModeOnExistingTable whether this is part of a commit that changes the + * mapping mode on a existing table + * @return new metadata with Ids and physical names assigned + */ + def assignColumnIdAndPhysicalName( + newMetadata: Metadata, + oldMetadata: Metadata, + isChangingModeOnExistingTable: Boolean, + isOverwritingSchema: Boolean): Metadata = { + val rawSchema = newMetadata.schema + var maxId = DeltaConfigs.COLUMN_MAPPING_MAX_ID.fromMetaData(newMetadata) max + findMaxColumnId(rawSchema) + val startId = maxId + val newSchema = + SchemaMergingUtils.transformColumns(rawSchema)((path, field, _) => { + val builder = new MetadataBuilder().withMetadata(field.metadata) + + lazy val fullName = path :+ field.name + lazy val existingFieldOpt = + SchemaUtils.findNestedFieldIgnoreCase( + oldMetadata.schema, fullName, includeCollections = true) + lazy val canReuseColumnMappingMetadataDuringOverwrite = { + val canReuse = + isOverwritingSchema && + SparkSession.getActiveSession.exists( + _.conf.get(DeltaSQLConf.REUSE_COLUMN_MAPPING_METADATA_DURING_OVERWRITE)) && + existingFieldOpt.exists { existingField => + // Ensure data type & nullability are compatible + DataType.equalsIgnoreCompatibleNullability( + from = existingField.dataType, + to = field.dataType + ) + } + if (canReuse) { + require(!isChangingModeOnExistingTable, + "Cannot change column mapping mode while overwriting the table") + assert(hasColumnId(existingFieldOpt.get) && hasPhysicalName(existingFieldOpt.get)) + } + canReuse + } + + if (!hasColumnId(field)) { + val columnId = if (canReuseColumnMappingMetadataDuringOverwrite) { + getColumnId(existingFieldOpt.get) + } else { + maxId += 1 + maxId + } + + builder.putLong(COLUMN_MAPPING_METADATA_ID_KEY, columnId) + } + if (!hasPhysicalName(field)) { + val physicalName = if (isChangingModeOnExistingTable) { + if (existingFieldOpt.isEmpty) { + if (oldMetadata.schema.isEmpty) { + // We should relax the check for tables that have both an empty schema + // and no data. Assumption: no schema => no data + generatePhysicalName + } else throw DeltaErrors.schemaChangeDuringMappingModeChangeNotSupported( + oldMetadata.schema, newMetadata.schema) + } else { + // When changing from NoMapping to NameMapping mode, we directly use old display names + // as physical names. This is by design: 1) We don't need to rewrite the + // existing Parquet files, and 2) display names in no-mapping mode have all the + // properties required for physical names: unique, stable and compliant with Parquet + // column naming restrictions. + existingFieldOpt.get.name + } + } else if (canReuseColumnMappingMetadataDuringOverwrite) { + // Copy the physical name metadata over from the existing field if possible + getPhysicalName(existingFieldOpt.get) + } else { + generatePhysicalName + } + + builder.putString(COLUMN_MAPPING_PHYSICAL_NAME_KEY, physicalName) + } + field.copy(metadata = builder.build()) + }) + + val (finalSchema, newMaxId) = if (IcebergCompatV2.isEnabled(newMetadata)) { + rewriteFieldIdsForIceberg(newSchema, maxId) + } else { + (newSchema, maxId) + } + + newMetadata.copy( + schemaString = finalSchema.json, + configuration = newMetadata.configuration + ++ Map(DeltaConfigs.COLUMN_MAPPING_MAX_ID.key -> newMaxId.toString) + ) + } + + def dropColumnMappingMetadata(schema: StructType): StructType = { + SchemaMergingUtils.transformColumns(schema) { (_, field, _) => + field.copy( + metadata = new MetadataBuilder() + .withMetadata(field.metadata) + .remove(COLUMN_MAPPING_METADATA_ID_KEY) + .remove(COLUMN_MAPPING_METADATA_NESTED_IDS_KEY) + .remove(COLUMN_MAPPING_PHYSICAL_NAME_KEY) + .remove(PARQUET_FIELD_ID_METADATA_KEY) + .remove(PARQUET_FIELD_NESTED_IDS_METADATA_KEY) + .build() + ) + } + } + + def filterColumnMappingProperties(properties: Map[String, String]): Map[String, String] = { + properties.filterKeys(_ != DeltaConfigs.COLUMN_MAPPING_MAX_ID.key).toMap + } + + // Verify the values of internal column mapping properties are the same in two sets of config + // ONLY if the config is present in both sets of properties. + def verifyInternalProperties(one: Map[String, String], two: Map[String, String]): Boolean = { + val key = DeltaConfigs.COLUMN_MAPPING_MAX_ID.key + one.get(key).forall(value => value == two.getOrElse(key, value)) + } + + /** + * Create a physical schema for the given schema using the Delta table schema as a reference. + * + * @param schema the given logical schema (potentially without any metadata) + * @param referenceSchema the schema from the delta log, which has all the metadata + * @param columnMappingMode column mapping mode of the delta table, which determines which + * metadata to fill in + * @param checkSupportedMode whether we should check of the column mapping mode is supported + */ + def createPhysicalSchema( + schema: StructType, + referenceSchema: StructType, + columnMappingMode: DeltaColumnMappingMode, + checkSupportedMode: Boolean = true): StructType = { + if (columnMappingMode == NoMapping) { + return schema + } + + // createPhysicalSchema is the narrow-waist for both read/write code path + // so we could check for mode support here + if (checkSupportedMode && !supportedModes.contains(columnMappingMode)) { + throw DeltaErrors.unsupportedColumnMappingMode(columnMappingMode.name) + } + + SchemaMergingUtils.transformColumns(schema) { (path, field, _) => + val fullName = path :+ field.name + val inSchema = SchemaUtils + .findNestedFieldIgnoreCase(referenceSchema, fullName, includeCollections = true) + inSchema.map { refField => + val sparkMetadata = getColumnMappingMetadata(refField, columnMappingMode) + field.copy(metadata = sparkMetadata, name = getPhysicalName(refField)) + }.getOrElse { + if (isInternalField(field)) { + field + } else { + throw DeltaErrors.columnNotFound(fullName, referenceSchema) + } + } + } + } + + /** + * Create a list of physical attributes for the given attributes using the table schema as a + * reference. + * + * @param output the list of attributes (potentially without any metadata) + * @param referenceSchema the table schema with all the metadata + * @param columnMappingMode column mapping mode of the delta table, which determines which + * metadata to fill in + */ + def createPhysicalAttributes( + output: Seq[Attribute], + referenceSchema: StructType, + columnMappingMode: DeltaColumnMappingMode): Seq[Attribute] = { + // Assign correct column mapping info to columns according to the schema + val struct = createPhysicalSchema(output.toStructType, referenceSchema, columnMappingMode) + output.zip(struct).map { case (attr, field) => + attr.withDataType(field.dataType) // for recursive column names and metadata + .withMetadata(field.metadata) + .withName(field.name) + } + } + + /** + * Returns a map of physicalNamePath -> field for the given `schema`, where + * physicalNamePath is the [$parentPhysicalName, ..., $fieldPhysicalName] list of physical names + * for every field (including nested) in the `schema`. + * + * Must be called after `checkColumnIdAndPhysicalNameAssignments`, so that we know the schema + * is valid. + */ + def getPhysicalNameFieldMap(schema: StructType): Map[Seq[String], StructField] = { + val physicalSchema = renameColumns(schema) + + val physicalSchemaFieldPaths = SchemaMergingUtils.explode(physicalSchema).map(_._1) + + val originalSchemaFields = SchemaMergingUtils.explode(schema).map(_._2) + + physicalSchemaFieldPaths.zip(originalSchemaFields).toMap + } + + /** + * Returns true if Column Mapping mode is enabled and the newMetadata's schema, when compared to + * the currentMetadata's schema, is indicative of a DROP COLUMN operation. + * + * We detect DROP COLUMNS by checking if any physical name in `currentSchema` is missing in + * `newSchema`. + */ + def isDropColumnOperation(newMetadata: Metadata, currentMetadata: Metadata): Boolean = { + + // We will need to compare the new schema's physical columns to the current schema's physical + // columns. So, they both must have column mapping enabled. + if (newMetadata.columnMappingMode == NoMapping || + currentMetadata.columnMappingMode == NoMapping) { + return false + } + + isDropColumnOperation(newSchema = newMetadata.schema, currentSchema = currentMetadata.schema) + } + + def isDropColumnOperation(newSchema: StructType, currentSchema: StructType): Boolean = { + val newPhysicalToLogicalMap = getPhysicalNameFieldMap(newSchema) + val currentPhysicalToLogicalMap = getPhysicalNameFieldMap(currentSchema) + + // are any of the current physical names missing in the new schema? + currentPhysicalToLogicalMap + .keys + .exists { k => !newPhysicalToLogicalMap.contains(k) } + } + + /** + * Returns true if Column Mapping mode is enabled and the newMetadata's schema, when compared to + * the currentMetadata's schema, is indicative of a RENAME COLUMN operation. + * + * We detect RENAME COLUMNS by checking if any two columns with the same physical name have + * different logical names + */ + def isRenameColumnOperation(newMetadata: Metadata, currentMetadata: Metadata): Boolean = { + + // We will need to compare the new schema's physical columns to the current schema's physical + // columns. So, they both must have column mapping enabled. + if (newMetadata.columnMappingMode == NoMapping || + currentMetadata.columnMappingMode == NoMapping) { + return false + } + + isRenameColumnOperation(newSchema = newMetadata.schema, currentSchema = currentMetadata.schema) + } + + def isRenameColumnOperation(newSchema: StructType, currentSchema: StructType): Boolean = { + val newPhysicalToLogicalMap = getPhysicalNameFieldMap(newSchema) + val currentPhysicalToLogicalMap = getPhysicalNameFieldMap(currentSchema) + + // do any two columns with the same physical name have different logical names? + currentPhysicalToLogicalMap + .exists { case (physicalPath, field) => + newPhysicalToLogicalMap.get(physicalPath).exists(_.name != field.name) + } + } + + /** + * Compare the old metadata's schema with new metadata's schema for column mapping schema changes. + * Also check for repartition because we need to fail fast when repartition detected. + * + * newMetadata's snapshot version must be >= oldMetadata's snapshot version so we could reliably + * detect the difference between ADD COLUMN and DROP COLUMN. + * + * As of now, `newMetadata` is column mapping read compatible with `oldMetadata` if + * no rename column or drop column has happened in-between. + */ + def hasNoColumnMappingSchemaChanges(newMetadata: Metadata, oldMetadata: Metadata, + allowUnsafeReadOnPartitionChanges: Boolean = false): Boolean = { + // Helper function to check no column mapping schema change and no repartition + def hasNoColMappingAndRepartitionSchemaChange( + newMetadata: Metadata, oldMetadata: Metadata): Boolean = { + isRenameColumnOperation(newMetadata, oldMetadata) || + isDropColumnOperation(newMetadata, oldMetadata) || + !SchemaUtils.isPartitionCompatible( + // if allow unsafe row read for partition change, ignore the check + if (allowUnsafeReadOnPartitionChanges) Seq.empty else newMetadata.partitionColumns, + if (allowUnsafeReadOnPartitionChanges) Seq.empty else oldMetadata.partitionColumns) + } + + val (oldMode, newMode) = (oldMetadata.columnMappingMode, newMetadata.columnMappingMode) + if (oldMode != NoMapping && newMode != NoMapping) { + require(oldMode == newMode, "changing mode is not supported") + // Both changes are post column mapping enabled + !hasNoColMappingAndRepartitionSchemaChange(newMetadata, oldMetadata) + } else if (oldMode == NoMapping && newMode != NoMapping) { + // The old metadata does not have column mapping while the new metadata does, in this case + // we assume an upgrade has happened in between. + // So we manually construct a post-upgrade schema for the old metadata and compare that with + // the new metadata, as the upgrade would use the logical name as the physical name, we could + // easily capture any difference in the schema using the same is{Drop,Rename}ColumnOperation + // utils. + var upgradedMetadata = assignColumnIdAndPhysicalName( + oldMetadata, oldMetadata, isChangingModeOnExistingTable = true, isOverwritingSchema = false + ) + // need to change to a column mapping mode too so the utils below can recognize + upgradedMetadata = upgradedMetadata.copy( + configuration = upgradedMetadata.configuration ++ + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> newMetadata.columnMappingMode.name) + ) + // use the same check + !hasNoColMappingAndRepartitionSchemaChange(newMetadata, upgradedMetadata) + } else { + // Not column mapping, don't block + // TODO: support column mapping downgrade check once that's rolled out. + true + } + } + + /** + * Adds the nested field IDs required by Iceberg. + * + * In parquet, list-type columns have a nested, implicitly defined [[element]] field and + * map-type columns have implicitly defined [[key]] and [[value]] fields. By default, + * Spark does not write field IDs for these fields in the parquet files. However, Iceberg + * requires these *nested* field IDs to be present. This method rewrites the specified + * Spark schema to add those nested field IDs. + * + * As list and map types are not [[StructField]]s themselves, nested field IDs are stored in + * a map as part of the metadata of the *nearest* parent [[StructField]]. For example, consider + * the following schema: + * + * col1 ARRAY(INT) + * col2 MAP(INT, INT) + * col3 STRUCT(a INT, b ARRAY(STRUCT(c INT, d MAP(INT, INT)))) + * + * col1 is a list and so requires one nested field ID for the [[element]] field in parquet. + * This nested field ID will be stored in a map that is part of col1's [[StructField.metadata]]. + * The same applies to the nested field IDs for col2's implicit [[key]] and [[value]] fields. + * col3 itself is a Struct, consisting of an integer field and a list field named 'b'. The + * nested field ID for the list of 'b' is stored in b's StructField metadata. Finally, the + * list type itself is again a struct consisting of an integer field and a map field named 'd'. + * The nested field IDs for the map of 'd' are stored in d's StructField metadata. + * + * @param schema The schema to which nested field IDs should be added + * @param startId The first field ID to use for the nested field IDs + */ + def rewriteFieldIdsForIceberg(schema: StructType, startId: Long): (StructType, Long) = { + var currFieldId = startId + + def initNestedIdsMetadata(field: StructField): MetadataBuilder = { + if (hasNestedColumnIds(field)) { + new MetadataBuilder().withMetadata(getNestedColumnIds(field)) + } else { + new MetadataBuilder() + } + } + + /* + * Helper to add the next field ID to the specified [[MetadataBuilder]] under + * the specified key. This method first checks whether this is an existing nested + * field or a newly added nested field. New field IDs are only assigned to newly + * added nested fields. + */ + def updateFieldId(metadata: MetadataBuilder, key: String): Unit = { + if (!metadata.build().contains(key)) { + currFieldId += 1 + metadata.putLong(key, currFieldId) + } + } + + /* + * Recursively adds nested field IDs for the passed data type in pre-order, + * ensuring uniqueness of field IDs. + * + * @param dt The data type that should be transformed + * @param nestedIds A MetadataBuilder that keeps track of the nested field ID + * assignment. This metadata is added to the parent field. + * @param path The current field path relative to the parent field + */ + def transform[E <: DataType](dt: E, nestedIds: MetadataBuilder, path: Seq[String]): E = { + val newDt = dt match { + case StructType(fields) => + StructType(fields.map { field => + val newNestedIds = initNestedIdsMetadata(field) + val newDt = transform(field.dataType, newNestedIds, Seq(getPhysicalName(field))) + val newFieldMetadata = new MetadataBuilder().withMetadata(field.metadata).putMetadata( + COLUMN_MAPPING_METADATA_NESTED_IDS_KEY, newNestedIds.build()).build() + field.copy(dataType = newDt, metadata = newFieldMetadata) + }) + case ArrayType(elementType, containsNull) => + // update element type metadata and recurse into element type + val elemPath = path :+ PARQUET_LIST_ELEMENT_FIELD_NAME + updateFieldId(nestedIds, elemPath.mkString(".")) + val elementDt = transform(elementType, nestedIds, elemPath) + // return new array type with updated metadata + ArrayType(elementDt, containsNull) + case MapType(keyType, valType, valueContainsNull) => + // update key type metadata and recurse into key type + val keyPath = path :+ PARQUET_MAP_KEY_FIELD_NAME + updateFieldId(nestedIds, keyPath.mkString(".")) + val keyDt = transform(keyType, nestedIds, keyPath) + // update value type metadata and recurse into value type + val valPath = path :+ PARQUET_MAP_VALUE_FIELD_NAME + updateFieldId(nestedIds, valPath.mkString(".")) + val valDt = transform(valType, nestedIds, valPath) + // return new map type with updated metadata + MapType(keyDt, valDt, valueContainsNull) + case other => other + } + newDt.asInstanceOf[E] + } + + (transform(schema, new MetadataBuilder(), Seq.empty), currFieldId) + } +} + +object DeltaColumnMapping extends DeltaColumnMappingBase + +/** + * A trait for Delta column mapping modes. + */ +sealed trait DeltaColumnMappingMode { + def name: String +} + +/** + * No mapping mode uses a column's display name as its true identifier to + * read and write data. + * + * This is the default mode and is the same mode as Delta always has been. + */ +case object NoMapping extends DeltaColumnMappingMode { + val name = "none" +} + +/** + * Id Mapping uses column ID as the true identifier of a column. Column IDs are stored as + * StructField metadata in the schema and will be used when reading and writing Parquet files. + * The Parquet files in this mode will also have corresponding field Ids for each column in their + * file schema. + * + * This mode is used for tables converted from Iceberg. + */ +case object IdMapping extends DeltaColumnMappingMode { + val name = "id" +} + +/** + * Name Mapping uses the physical column name as the true identifier of a column. The physical name + * is stored as part of StructField metadata in the schema and will be used when reading and writing + * Parquet files. Even if id mapping can be used for reading the physical files, name mapping is + * used for reading statistics and partition values in the DeltaLog. + */ +case object NameMapping extends DeltaColumnMappingMode { + val name = "name" +} + +object DeltaColumnMappingMode { + def apply(name: String): DeltaColumnMappingMode = { + name.toLowerCase(Locale.ROOT) match { + case NoMapping.name => NoMapping + case IdMapping.name => IdMapping + case NameMapping.name => NameMapping + case mode => throw DeltaErrors.unsupportedColumnMappingMode(mode) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala new file mode 100644 index 00000000000..b30e09f7035 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala @@ -0,0 +1,728 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.{HashMap, Locale} + +import org.apache.spark.sql.delta.actions.{Action, Metadata, Protocol, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.hooks.AutoCompactType +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.{DataSkippingReader, StatisticsCollection} + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.util.{DateTimeConstants, IntervalUtils} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.util.Utils + +case class DeltaConfig[T]( + key: String, + defaultValue: String, + fromString: String => T, + validationFunction: T => Boolean, + helpMessage: String, + editable: Boolean = true, + alternateKeys: Seq[String] = Seq.empty) { + /** + * Recover the saved value of this configuration from `Metadata`. If undefined, fall back to + * alternate keys, returning defaultValue if none match. + */ + def fromMetaData(metadata: Metadata): T = { + for (usedKey <- key +: alternateKeys) { + metadata.configuration.get(usedKey).map { value => return fromString(value) } + } + fromString(defaultValue) + } + + /** Validate the setting for this configuration */ + private def validate(value: String): Unit = { + if (!editable) { + throw DeltaErrors.cannotModifyTableProperty(key) + } + val onErrorMessage = s"$key $helpMessage" + try { + require(validationFunction(fromString(value)), onErrorMessage) + } catch { + case e: NumberFormatException => + throw new IllegalArgumentException(onErrorMessage, e) + } + } + + /** + * Validate this configuration and return the key - value pair to save into the metadata. + */ + def apply(value: String): (String, String) = { + validate(value) + key -> value + } + + /** + * SQL configuration to set for ensuring that all newly created tables have this table property. + */ + def defaultTablePropertyKey: String = DeltaConfigs.sqlConfPrefix + key.stripPrefix("delta.") +} + +/** + * Contains list of reservoir configs and validation checks. + */ +trait DeltaConfigsBase extends DeltaLogging { + + // Special properties stored in the Hive MetaStore that specifies which version last updated + // the entry in the MetaStore with the latest schema and table property information + val METASTORE_LAST_UPDATE_VERSION = "delta.lastUpdateVersion" + val METASTORE_LAST_COMMIT_TIMESTAMP = "delta.lastCommitTimestamp" + + /** + * Convert a string to [[CalendarInterval]]. This method is case-insensitive and will throw + * [[IllegalArgumentException]] when the input string is not a valid interval. + * + * TODO Remove this method and use `CalendarInterval.fromCaseInsensitiveString` instead when + * upgrading Spark. This is a fork version of `CalendarInterval.fromCaseInsensitiveString` which + * will be available in the next Spark release (See SPARK-27735). + * + * @throws IllegalArgumentException if the string is not a valid internal. + */ + def parseCalendarInterval(s: String): CalendarInterval = { + if (s == null || s.trim.isEmpty) { + throw DeltaErrors.emptyCalendarInterval + } + val sInLowerCase = s.trim.toLowerCase(Locale.ROOT) + val interval = + if (sInLowerCase.startsWith("interval ")) sInLowerCase else "interval " + sInLowerCase + val cal = IntervalUtils.safeStringToInterval(UTF8String.fromString(interval)) + if (cal == null) { + throw DeltaErrors.invalidInterval(s) + } + cal + } + + /** + * The prefix for a category of special configs for delta universal format to support the + * user facing config naming convention for different table formats: + * "delta.universalFormat.config.[iceberg/hudi].[config_name]" + * Note that config_name can be arbitrary. + */ + final val DELTA_UNIVERSAL_FORMAT_CONFIG_PREFIX = "delta.universalformat.config." + + final val DELTA_UNIVERSAL_FORMAT_ICEBERG_CONFIG_PREFIX = + s"${DELTA_UNIVERSAL_FORMAT_CONFIG_PREFIX}iceberg." + + /** + * A global default value set as a SQLConf will overwrite the default value of a DeltaConfig. + * For example, user can run: + * set spark.databricks.delta.properties.defaults.randomPrefixLength = 5 + * This setting will be populated to a Delta table during its creation time and overwrites + * the default value of delta.randomPrefixLength. + * + * We accept these SQLConfs as strings and only perform validation in DeltaConfig. All the + * DeltaConfigs set in SQLConf should adopt the same prefix. + */ + val sqlConfPrefix = "spark.databricks.delta.properties.defaults." + + private val entries = new HashMap[String, DeltaConfig[_]] + + protected def buildConfig[T]( + key: String, + defaultValue: String, + fromString: String => T, + validationFunction: T => Boolean, + helpMessage: String, + userConfigurable: Boolean = true, + alternateConfs: Seq[DeltaConfig[T]] = Seq.empty): DeltaConfig[T] = { + + val deltaConfig = DeltaConfig(s"delta.$key", + defaultValue, + fromString, + validationFunction, + helpMessage, + userConfigurable, + alternateConfs.map(_.key)) + + entries.put(key.toLowerCase(Locale.ROOT), deltaConfig) + deltaConfig + } + + /** + * Validates specified configurations and returns the normalized key -> value map. + */ + def validateConfigurations(configurations: Map[String, String]): Map[String, String] = { + val allowArbitraryProperties = SparkSession.active.sessionState.conf + .getConf(DeltaSQLConf.ALLOW_ARBITRARY_TABLE_PROPERTIES) + + configurations.map { case kv @ (key, value) => + key.toLowerCase(Locale.ROOT) match { + case lKey if lKey.startsWith("delta.constraints.") => + // This is a CHECK constraint, we should allow it. + kv + case lKey if lKey.startsWith(TableFeatureProtocolUtils.FEATURE_PROP_PREFIX) => + // This is a table feature, we should allow it. + lKey -> value + case lKey if lKey.startsWith("delta.") => + Option(entries.get(lKey.stripPrefix("delta."))) match { + case Some(deltaConfig) => deltaConfig(value) // validate the value + case None if allowArbitraryProperties => + logConsole( + s"You are setting a property: $key that is not recognized by this " + + "version of Delta") + kv + case None => throw DeltaErrors.unknownConfigurationKeyException(key) + } + case _ => + if (entries.containsKey(key)) { + logConsole(s""" + |You are trying to set a property the key of which is the same as Delta config: $key. + |If you are trying to set a Delta config, prefix it with "delta.", e.g. 'delta.$key'. + """.stripMargin) + } + kv + } + } + } + + /** + * Table properties for new tables can be specified through SQL Configurations using the + * [[sqlConfPrefix]] and [[TableFeatureProtocolUtils.DEFAULT_FEATURE_PROP_PREFIX]]. This method + * checks to see if any of the configurations exist among the SQL configurations and merges them + * with the user provided configurations. User provided configs take precedence. + * + * When `ignoreProtocolConfsOpt` is `true` (or `false`), this method will not (or will) copy + * protocol-related configs. If `ignoreProtocolConfsOpt` is None, whether to copy + * protocol-related configs will be depending on the existence of + * [[DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS]] (`delta.ignoreProtocolDefaults`) in + * SQL or table configs. + * + * "Protocol-related configs" includes `delta.minReaderVersion`, `delta.minWriterVersion`, + * `delta.ignoreProtocolDefaults`, and anything that starts with `delta.feature.` + */ + def mergeGlobalConfigs( + sqlConfs: SQLConf, + tableConf: Map[String, String], + ignoreProtocolConfsOpt: Option[Boolean] = None): Map[String, String] = { + import scala.collection.JavaConverters._ + + val ignoreProtocolConfs = + ignoreProtocolConfsOpt.getOrElse(ignoreProtocolDefaultsIsSet(sqlConfs, tableConf)) + + val shouldCopyFunc: (String => Boolean) = + !ignoreProtocolConfs || !TableFeatureProtocolUtils.isTableProtocolProperty(_) + + val globalConfs = entries.asScala + .filter { case (_, config) => shouldCopyFunc(config.key) } + .flatMap { case (_, config) => + val sqlConfKey = sqlConfPrefix + config.key.stripPrefix("delta.") + Option(sqlConfs.getConfString(sqlConfKey, null)).map(config(_)) + } + + // Table features configured in session must be merged manually because there's no + // ConfigEntry registered for table features in SQL configs or Table props. + val globalFeatureConfs = if (ignoreProtocolConfs) { + Map.empty[String, String] + } else { + sqlConfs.getAllConfs + .filterKeys(_.startsWith(TableFeatureProtocolUtils.DEFAULT_FEATURE_PROP_PREFIX)) + .map { case (key, value) => + val featureName = key.stripPrefix(TableFeatureProtocolUtils.DEFAULT_FEATURE_PROP_PREFIX) + val tableKey = TableFeatureProtocolUtils.FEATURE_PROP_PREFIX + featureName + tableKey -> value + } + } + + globalConfs.toMap ++ globalFeatureConfs.toMap ++ tableConf + } + + /** + * Whether [[DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS]] is set in Spark session + * configs or table properties. + */ + private[delta] def ignoreProtocolDefaultsIsSet( + sqlConfs: SQLConf, + tableConf: Map[String, String]): Boolean = { + tableConf + .getOrElse( + DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key, + sqlConfs.getConfString( + DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.defaultTablePropertyKey, + "false")) + .toBoolean + } + + /** + * Normalize the specified property keys if the key is for a Delta config. + */ + def normalizeConfigKeys(propKeys: Seq[String]): Seq[String] = { + propKeys.map { key => + key.toLowerCase(Locale.ROOT) match { + case lKey if lKey.startsWith(TableFeatureProtocolUtils.FEATURE_PROP_PREFIX) => + lKey + case lKey if lKey.startsWith("delta.") => + Option(entries.get(lKey.stripPrefix("delta."))).map(_.key).getOrElse(key) + case _ => key + } + } + } + + /** + * Normalize the specified property key if the key is for a Delta config. + */ + def normalizeConfigKey(propKey: Option[String]): Option[String] = { + propKey.map { key => + key.toLowerCase(Locale.ROOT) match { + case lKey if lKey.startsWith(TableFeatureProtocolUtils.FEATURE_PROP_PREFIX) => + lKey + case lKey if lKey.startsWith("delta.") => + Option(entries.get(lKey.stripPrefix("delta."))).map(_.key).getOrElse(key) + case _ => key + } + } + } + + def getMilliSeconds(i: CalendarInterval): Long = { + getMicroSeconds(i) / 1000L + } + + private def getMicroSeconds(i: CalendarInterval): Long = { + assert(i.months == 0) + i.days * DateTimeConstants.MICROS_PER_DAY + i.microseconds + } + + /** + * For configs accepting an interval, we require the user specified string must obey: + * + * - Doesn't use months or years, since an internal like this is not deterministic. + * - The microseconds parsed from the string value must be a non-negative value. + * + * The method returns whether a [[CalendarInterval]] satisfies the requirements. + */ + def isValidIntervalConfigValue(i: CalendarInterval): Boolean = { + i.months == 0 && getMicroSeconds(i) >= 0 + } + + /** + * The protocol reader version modelled as a table property. This property is *not* stored as + * a table property in the `Metadata` action. It is stored as its own action. Having it modelled + * as a table property makes it easier to upgrade, and view the version. + */ + val MIN_READER_VERSION = buildConfig[Int]( + "minReaderVersion", + Action.supportedProtocolVersion().minReaderVersion.toString, + _.toInt, + v => Action.supportedReaderVersionNumbers.contains(v), + s"needs to be one of ${Action.supportedReaderVersionNumbers.toSeq.sorted.mkString(", ")}.") + + /** + * The protocol reader version modelled as a table property. This property is *not* stored as + * a table property in the `Metadata` action. It is stored as its own action. Having it modelled + * as a table property makes it easier to upgrade, and view the version. + */ + val MIN_WRITER_VERSION = buildConfig[Int]( + "minWriterVersion", + Action.supportedProtocolVersion().minWriterVersion.toString, + _.toInt, + v => Action.supportedWriterVersionNumbers.contains(v), + s"needs to be one of ${Action.supportedWriterVersionNumbers.toSeq.sorted.mkString(", ")}.") + + /** + * Ignore protocol-related configs set in SQL config. + * When set to true, CREATE TABLE and REPLACE TABLE commands will not consider default + * protocol versions and table features in the current Spark session. + */ + val CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS = buildConfig[Boolean]( + "ignoreProtocolDefaults", + defaultValue = "false", + fromString = _.toBoolean, + validationFunction = _ => true, + helpMessage = "needs to be a boolean.") + + + /** + * The shortest duration we have to keep delta files around before deleting them. We can only + * delete delta files that are before a compaction. We may keep files beyond this duration until + * the next calendar day. + */ + val LOG_RETENTION = buildConfig[CalendarInterval]( + "logRetentionDuration", + "interval 30 days", + parseCalendarInterval, + isValidIntervalConfigValue, + "needs to be provided as a calendar interval such as '2 weeks'. Months " + + "and years are not accepted. You may specify '365 days' for a year instead.") + + /** + * The shortest duration we have to keep delta sample files around before deleting them. + */ + val SAMPLE_RETENTION = buildConfig[CalendarInterval]( + "sampleRetentionDuration", + "interval 7 days", + parseCalendarInterval, + isValidIntervalConfigValue, + "needs to be provided as a calendar interval such as '2 weeks'. Months " + + "and years are not accepted. You may specify '365 days' for a year instead.") + + /** + * The shortest duration we have to keep checkpoint files around before deleting them. Note that + * we'll never delete the most recent checkpoint. We may keep checkpoint files beyond this + * duration until the next calendar day. + */ + val CHECKPOINT_RETENTION_DURATION = buildConfig[CalendarInterval]( + "checkpointRetentionDuration", + "interval 2 days", + parseCalendarInterval, + isValidIntervalConfigValue, + "needs to be provided as a calendar interval such as '2 weeks'. Months " + + "and years are not accepted. You may specify '365 days' for a year instead.") + + /** How often to checkpoint the delta log. */ + val CHECKPOINT_INTERVAL = buildConfig[Int]( + "checkpointInterval", + "10", + _.toInt, + _ > 0, + "needs to be a positive integer.") + + /** + * Enable auto compaction for a Delta table. When enabled, we will check if files already + * written to a Delta table can leverage compaction after a commit. If so, we run a post-commit + * hook to compact the files. + * It can be enabled by setting the property to `true` + * Note that the behavior from table property can be overridden by the config: + * [[org.apache.spark.sql.delta.sources.DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED]] + */ + val AUTO_COMPACT = buildConfig[Option[String]]( + "autoOptimize.autoCompact", + null, + v => Option(v).map(_.toLowerCase(Locale.ROOT)), + v => v.isEmpty || AutoCompactType.ALLOWED_VALUES.contains(v.get), + s""""needs to be one of: ${AutoCompactType.ALLOWED_VALUES.mkString(",")}""") + + /** Whether to clean up expired checkpoints and delta logs. */ + val ENABLE_EXPIRED_LOG_CLEANUP = buildConfig[Boolean]( + "enableExpiredLogCleanup", + "true", + _.toBoolean, + _ => true, + "needs to be a boolean.") + + /** + * If true, a delta table can be rolled back to any point within LOG_RETENTION. Leaving this on + * requires converting the oldest delta file we have into a checkpoint, which we do once a day. If + * doing that operation is too expensive, it can be turned off, but the table can only be rolled + * back CHECKPOINT_RETENTION_DURATION ago instead of LOG_RETENTION ago. + */ + val ENABLE_FULL_RETENTION_ROLLBACK = buildConfig[Boolean]( + "enableFullRetentionRollback", + "true", + _.toBoolean, + _ => true, + "needs to be a boolean." + ) + + /** + * The logRetention period to be used in DROP FEATURE ... TRUNCATE HISTORY command. + * The value should represent the expected duration of the longest running transaction. Setting + * this to a lower value than the longest running transaction may corrupt the table. + */ + val TABLE_FEATURE_DROP_TRUNCATE_HISTORY_LOG_RETENTION = buildConfig[CalendarInterval]( + "dropFeatureTruncateHistory.retentionDuration", + "interval 24 hours", + parseCalendarInterval, + isValidIntervalConfigValue, + "needs to be provided as a calendar interval such as '2 weeks'. Months " + + "and years are not accepted. You may specify '365 days' for a year instead.") + + /** + * The shortest duration we have to keep logically deleted data files around before deleting them + * physically. This is to prevent failures in stale readers after compactions or partition + * overwrites. + * + * Note: this value should be large enough: + * - It should be larger than the longest possible duration of a job if you decide to run "VACUUM" + * when there are concurrent readers or writers accessing the table. + * - If you are running a streaming query reading from the table, you should make sure the query + * doesn't stop longer than this value. Otherwise, the query may not be able to restart as it + * still needs to read old files. + */ + val TOMBSTONE_RETENTION = buildConfig[CalendarInterval]( + "deletedFileRetentionDuration", + "interval 1 week", + parseCalendarInterval, + isValidIntervalConfigValue, + "needs to be provided as a calendar interval such as '2 weeks'. Months " + + "and years are not accepted. You may specify '365 days' for a year instead.") + + /** + * Whether to use a random prefix in a file path instead of partition information. This is + * required for very high volume S3 calls to better be partitioned across S3 servers. + */ + val RANDOMIZE_FILE_PREFIXES = buildConfig[Boolean]( + "randomizeFilePrefixes", + "false", + _.toBoolean, + _ => true, + "needs to be a boolean.") + + /** + * Whether to use a random prefix in a file path instead of partition information. This is + * required for very high volume S3 calls to better be partitioned across S3 servers. + */ + val RANDOM_PREFIX_LENGTH = buildConfig[Int]( + "randomPrefixLength", + "2", + _.toInt, + a => a > 0, + "needs to be greater than 0.") + + /** + * Whether this Delta table is append-only. Files can't be deleted, or values can't be updated. + */ + val IS_APPEND_ONLY = buildConfig[Boolean]( + key = "appendOnly", + defaultValue = "false", + fromString = _.toBoolean, + validationFunction = _ => true, + helpMessage = "needs to be a boolean.") + + /** + * Whether commands modifying this Delta table are allowed to create new deletion vectors. + */ + val ENABLE_DELETION_VECTORS_CREATION = buildConfig[Boolean]( + key = "enableDeletionVectors", + defaultValue = "false", + fromString = _.toBoolean, + validationFunction = _ => true, + helpMessage = "needs to be a boolean.") + + /** + * Whether this table will automatically optimize the layout of files during writes. + */ + val AUTO_OPTIMIZE = buildConfig[Option[Boolean]]( + "autoOptimize", + null, + v => Option(v).map(_.toBoolean), + _ => true, + "needs to be a boolean.") + + /** + * The number of columns to collect stats on for data skipping. A value of -1 means collecting + * stats for all columns. Updating this conf does not trigger stats re-collection, but redefines + * the stats schema of table, i.e., it will change the behavior of future stats collection + * (e.g., in append and OPTIMIZE) as well as data skipping (e.g., the column stats beyond this + * number will be ignored even when they exist). + */ + val DATA_SKIPPING_NUM_INDEXED_COLS = buildConfig[Int]( + "dataSkippingNumIndexedCols", + DataSkippingReader.DATA_SKIPPING_NUM_INDEXED_COLS_DEFAULT_VALUE.toString, + _.toInt, + a => a >= -1, + "needs to be larger than or equal to -1.") + + /** + * The names of specific columns to collect stats on for data skipping. If present, it takes + * precedences over dataSkippingNumIndexedCols config, and the system will only collect stats for + * columns that exactly match those specified. If a nested column is specified, the system will + * collect stats for all leaf fields of that column. If a non-existent column is specified, it + * will be ignored. Updating this conf does not trigger stats re-collection, but redefines the + * stats schema of table, i.e., it will change the behavior of future stats collection (e.g., in + * append and OPTIMIZE) as well as data skipping (e.g., the column stats not mentioned by this + * config will be ignored even if they exist). + */ + val DATA_SKIPPING_STATS_COLUMNS = buildConfig[Option[String]]( + "dataSkippingStatsColumns", + null, + v => Option(v), + vOpt => vOpt.forall(v => StatisticsCollection.parseDeltaStatsColumnNames(v).isDefined), + """ + |The dataSkippingStatsColumns parameter is a comma-separated list of case-insensitive column + |identifiers. Each column identifier can consist of letters, digits, and underscores. + |Multiple column identifiers can be listed, separated by commas. + | + |If a column identifier includes special characters such as !@#$%^&*()_+-={}|[]:";'<>,.?/, + |the column name should be enclosed in backticks (`) to escape the special characters. + | + |A column identifier can refer to one of the following: the name of a non-struct column, the + |leaf field's name of a struct column, or the name of a struct column. When a struct column's + |name is specified in dataSkippingStatsColumns, statistics for all its leaf fields will be + |collected. + |""".stripMargin) + + val SYMLINK_FORMAT_MANIFEST_ENABLED = buildConfig[Boolean]( + s"${hooks.GenerateSymlinkManifest.CONFIG_NAME_ROOT}.enabled", + "false", + _.toBoolean, + _ => true, + "needs to be a boolean.") + + /** + * When enabled, we will write file statistics in the checkpoint in JSON format as the "stats" + * column. + */ + val CHECKPOINT_WRITE_STATS_AS_JSON = buildConfig[Boolean]( + "checkpoint.writeStatsAsJson", + "true", + _.toBoolean, + _ => true, + "needs to be a boolean.") + + /** + * When enabled, we will write file statistics in the checkpoint in the struct format in the + * "stats_parsed" column. We will also write partition values as a struct as + * "partitionValues_parsed". + */ + val CHECKPOINT_WRITE_STATS_AS_STRUCT = buildConfig[Boolean]( + "checkpoint.writeStatsAsStruct", + "true", + _.toBoolean, + _ => true, + "needs to be a boolean.") + + /** + * Deprecated in favor of CHANGE_DATA_FEED. + */ + private val CHANGE_DATA_FEED_LEGACY = buildConfig[Boolean]( + "enableChangeDataCapture", + "false", + _.toBoolean, + _ => true, + "needs to be a boolean.") + + /** + * Enable change data feed output. + * When enabled, DELETE, UPDATE, and MERGE INTO operations will need to do additional work to + * output their change data in an efficiently readable format. + */ + val CHANGE_DATA_FEED = buildConfig[Boolean]( + "enableChangeDataFeed", + "false", + _.toBoolean, + _ => true, + "needs to be a boolean.", + alternateConfs = Seq(CHANGE_DATA_FEED_LEGACY)) + + val COLUMN_MAPPING_MODE = buildConfig[DeltaColumnMappingMode]( + "columnMapping.mode", + "none", + DeltaColumnMappingMode(_), + _ => true, + "") + + /** + * Maximum columnId used in the schema so far for column mapping. Internal property that cannot + * be set by users. + */ + val COLUMN_MAPPING_MAX_ID = buildConfig[Long]( + "columnMapping.maxColumnId", + "0", + _.toLong, + _ => true, + "", + userConfigurable = false) + + + /** + * The shortest duration within which new [[Snapshot]]s will retain transaction identifiers (i.e. + * [[SetTransaction]]s). When a new [[Snapshot]] sees a transaction identifier older than or equal + * to the specified TRANSACTION_ID_RETENTION_DURATION, it considers it expired and ignores it. + */ + val TRANSACTION_ID_RETENTION_DURATION = buildConfig[Option[CalendarInterval]]( + "setTransactionRetentionDuration", + null, + v => if (v == null) None else Some(parseCalendarInterval(v)), + opt => opt.forall(isValidIntervalConfigValue), + "needs to be provided as a calendar interval such as '2 weeks'. Months " + + "and years are not accepted. You may specify '365 days' for a year instead.") + + /** + * The isolation level of a table defines the degree to which a transaction must be isolated from + * modifications made by concurrent transactions. Delta currently supports one isolation level: + * Serializable. + */ + val ISOLATION_LEVEL = buildConfig[IsolationLevel]( + "isolationLevel", + Serializable.toString, + IsolationLevel.fromString(_), + _ == Serializable, + "must be Serializable" + ) + + /** Policy to decide what kind of checkpoint to write to a table. */ + val CHECKPOINT_POLICY = buildConfig[CheckpointPolicy.Policy]( + key = "checkpointPolicy", + defaultValue = CheckpointPolicy.Classic.name, + fromString = str => CheckpointPolicy.fromName(str), + validationFunction = (v => CheckpointPolicy.ALL.exists(_.name == v.name)), + helpMessage = s"can be one of the " + + s"following: ${CheckpointPolicy.Classic.name}, ${CheckpointPolicy.V2.name}") + + /** + * Indicates whether Row Tracking is enabled on the table. When this flag is turned on, all rows + * are guaranteed to have Row IDs and Row Commit Versions assigned to them, and writers are + * expected to preserve them by materializing them to hidden columns in the data files. + */ + val ROW_TRACKING_ENABLED = buildConfig[Boolean]( + key = "enableRowTracking", + defaultValue = false.toString, + fromString = _.toBoolean, + validationFunction = _ => true, + helpMessage = "needs to be a boolean.") + + /** + * Convert the table's metadata into other storage formats after each Delta commit. + * Only Iceberg is supported for now + */ + val UNIVERSAL_FORMAT_ENABLED_FORMATS = buildConfig[Seq[String]]( + "universalFormat.enabledFormats", + "", + fromString = str => + if (str == null || str.isEmpty) Nil + else str.split(","), + validationFunction = seq => + if (seq.distinct.length != seq.length) false + else seq.toSet.subsetOf(UniversalFormat.SUPPORTED_FORMATS), + s"Must be a comma-separated list of formats from the list: " + + s"${UniversalFormat.SUPPORTED_FORMATS.mkString("{", ",", "}")}." + ) + + val ICEBERG_COMPAT_V1_ENABLED = buildConfig[Option[Boolean]]( + "enableIcebergCompatV1", + null, + v => Option(v).map(_.toBoolean), + _ => true, + "needs to be a boolean." + ) + + val ICEBERG_COMPAT_V2_ENABLED = buildConfig[Option[Boolean]]( + key = "enableIcebergCompatV2", + defaultValue = null, + fromString = v => Option(v).map(_.toBoolean), + validationFunction = _ => true, + helpMessage = "needs to be a boolean." + ) + + /** + * Enable optimized writes into a Delta table. Optimized writes adds an adaptive shuffle before + * the write to write compacted files into a Delta table during a write. + */ + val OPTIMIZE_WRITE = buildConfig[Option[Boolean]]( + "autoOptimize.optimizeWrite", + null, + v => Option(v).map(_.toBoolean), + _ => true, + "needs to be a boolean." + ) +} + +object DeltaConfigs extends DeltaConfigsBase diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaErrors.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaErrors.scala new file mode 100644 index 00000000000..aead53e03b7 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaErrors.scala @@ -0,0 +1,3615 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.{FileNotFoundException, IOException} +import java.nio.file.FileAlreadyExistsException +import java.util.ConcurrentModificationException + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.actions.{CommitInfo, Metadata, Protocol, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import org.apache.spark.sql.delta.commands.AlterTableDropFeatureDeltaCommand +import org.apache.spark.sql.delta.constraints.Constraints +import org.apache.spark.sql.delta.hooks.AutoCompactType +import org.apache.spark.sql.delta.hooks.PostCommitHook +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.{DeltaInvariantViolationException, InvariantViolationException, SchemaUtils, UnsupportedDataTypeInfo} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.JsonUtils +import io.delta.sql.DeltaSparkSessionExtension +import org.apache.hadoop.fs.{ChecksumException, Path} +import org.json4s.JValue + +import org.apache.spark.{SparkConf, SparkEnv, SparkException} +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.ExtendedAnalysisException +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ +import org.apache.spark.sql.connector.catalog.Identifier +import org.apache.spark.sql.errors.QueryErrorsBase +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{DataType, StructField, StructType} + + +trait DocsPath { + /** + * The URL for the base path of Delta's docs. When changing this path, ensure that the new path + * works with the error messages below. + */ + protected def baseDocsPath(conf: SparkConf): String = "https://docs.delta.io/latest" + + def assertValidCallingFunction(): Unit = { + val callingMethods = Thread.currentThread.getStackTrace + callingMethods.foreach { method => + if (errorsWithDocsLinks.contains(method.getMethodName)) { + return + } + } + assert(assertion = false, "The method throwing the error which contains a doc link must be a " + + s"part of DocsPath.errorsWithDocsLinks") + } + + /** + * Get the link to the docs for the given relativePath. Validates that the error generating the + * link is added to docsLinks. + * + * @param relativePath the relative path after the base url to access. + * @param skipValidation whether to validate that the function generating the link is + * in the allowlist. + * @return The entire URL of the documentation link + */ + def generateDocsLink( + conf: SparkConf, + relativePath: String, + skipValidation: Boolean = false): String = { + if (!skipValidation) assertValidCallingFunction() + baseDocsPath(conf) + relativePath + } + + /** + * List of error function names for all errors that have URLs. When adding your error to this list + * remember to also add it to the list of errors in DeltaErrorsSuite + * + * @note add your error to DeltaErrorsSuiteBase after adding it to this list so that the url can + * be tested + */ + def errorsWithDocsLinks: Seq[String] = Seq( + "createExternalTableWithoutLogException", + "createExternalTableWithoutSchemaException", + "createManagedTableWithoutSchemaException", + "multipleSourceRowMatchingTargetRowInMergeException", + "ignoreStreamingUpdatesAndDeletesWarning", + "concurrentModificationExceptionMsg", + "incorrectLogStoreImplementationException", + "sourceNotDeterministicInMergeException", + "columnMappingAdviceMessage", + "icebergClassMissing", + "tableFeatureReadRequiresWriteException", + "tableFeatureRequiresHigherReaderProtocolVersion", + "tableFeatureRequiresHigherWriterProtocolVersion", + "blockStreamingReadsWithIncompatibleColumnMappingSchemaChanges" + ) +} + +/** + * A holder object for Delta errors. + * + * + * IMPORTANT: Any time you add a test that references the docs, add to the Seq defined in + * DeltaErrorsSuite so that the doc links that are generated can be verified to work in + * docs.delta.io + */ +trait DeltaErrorsBase + extends DocsPath + with DeltaLogging + with QueryErrorsBase { + + def baseDocsPath(spark: SparkSession): String = baseDocsPath(spark.sparkContext.getConf) + + val faqRelativePath: String = "/delta-intro.html#frequently-asked-questions" + + val EmptyCheckpointErrorMessage = + s""" + |Attempted to write an empty checkpoint without any actions. This checkpoint will not be + |useful in recomputing the state of the table. However this might cause other checkpoints to + |get deleted based on retention settings. + """.stripMargin + + // scalastyle:off + def assertionFailedError(msg: String): Throwable = new AssertionError(msg) + // scalastyle:on + + def deltaSourceIgnoreDeleteError( + version: Long, + removedFile: String, + dataPath: String): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_SOURCE_IGNORE_DELETE", + messageParameters = Array(removedFile, version.toString, dataPath) + ) + } + + def deltaSourceIgnoreChangesError( + version: Long, + removedFile: String, + dataPath: String): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_SOURCE_TABLE_IGNORE_CHANGES", + messageParameters = Array(removedFile, version.toString, dataPath) + ) + } + + def unknownReadLimit(limit: String): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_UNKNOWN_READ_LIMIT", + messageParameters = Array(limit) + ) + } + + def unknownPrivilege(privilege: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_UNKNOWN_PRIVILEGE", + messageParameters = Array(privilege) + ) + } + + def columnNotFound(path: Seq[String], schema: StructType): Throwable = { + val name = UnresolvedAttribute(path).name + cannotResolveColumn(name, schema) + } + + def failedMergeSchemaFile(file: String, schema: String, cause: Throwable): Throwable = { + new DeltaSparkException( + errorClass = "DELTA_FAILED_MERGE_SCHEMA_FILE", + messageParameters = Array(file, schema), + cause = cause) + } + + def failOnCheckpointRename(src: Path, dest: Path): DeltaIllegalStateException = { + new DeltaIllegalStateException( + errorClass = "DELTA_CANNOT_RENAME_PATH", + messageParameters = Array(s"${src.toString}", s"${dest.toString}")) + } + + def checkpointMismatchWithSnapshot : Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_CHECKPOINT_SNAPSHOT_MISMATCH", + messageParameters = Array.empty + ) + } + + /** + * Thrown when main table data contains columns that are reserved for CDF, such as `_change_type`. + */ + def cdcColumnsInData(columns: Seq[String]): Throwable = { + new DeltaIllegalStateException( + errorClass = "RESERVED_CDC_COLUMNS_ON_WRITE", + messageParameters = Array(columns.mkString("[", ",", "]"), DeltaConfigs.CHANGE_DATA_FEED.key) + ) + } + + /** + * Thrown when main table data already contains columns that are reserved for CDF, such as + * `_change_type`, but CDF is not yet enabled on that table. + */ + def tableAlreadyContainsCDCColumns(columns: Seq[String]): Throwable = { + new DeltaIllegalStateException(errorClass = "DELTA_TABLE_ALREADY_CONTAINS_CDC_COLUMNS", + messageParameters = Array(columns.mkString("[", ",", "]"))) + } + + /** + * Thrown when a CDC query contains conflict 'starting' or 'ending' options, e.g. when both + * starting version and starting timestamp are specified. + * + * @param position Specifies which option was duplicated in the read. Values are "starting" or + * "ending" + */ + def multipleCDCBoundaryException(position: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_MULTIPLE_CDC_BOUNDARY", + messageParameters = Array(position, position, position) + ) + } + + def formatColumn(colName: String): String = s"`$colName`" + + def formatColumnList(colNames: Seq[String]): String = + colNames.map(formatColumn).mkString("[", ", ", "]") + + def formatSchema(schema: StructType): String = schema.treeString + + def analysisException( + msg: String, + line: Option[Int] = None, + startPosition: Option[Int] = None, + plan: Option[LogicalPlan] = None, + cause: Option[Throwable] = None): AnalysisException = { + new ExtendedAnalysisException(msg, line, startPosition, plan, cause) + } + + def notNullColumnMissingException(constraint: Constraints.NotNull): Throwable = { + new DeltaInvariantViolationException( + errorClass = "DELTA_MISSING_NOT_NULL_COLUMN_VALUE", + messageParameters = Array(s"${UnresolvedAttribute(constraint.column).name}")) + } + + def nestedNotNullConstraint( + parent: String, nested: DataType, nestType: String): AnalysisException = { + new DeltaAnalysisException( + errorClass = "DELTA_NESTED_NOT_NULL_CONSTRAINT", + messageParameters = Array( + s"$nestType", + s"$parent", + s"${DeltaSQLConf.ALLOW_UNENFORCED_NOT_NULL_CONSTRAINTS.key}", + s"$nestType", + s"${nested.prettyJson}" + ) + ) + } + + def nullableParentWithNotNullNestedField : Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_NOT_NULL_NESTED_FIELD", + messageParameters = Array.empty + ) + } + + def constraintAlreadyExists(name: String, oldExpr: String): AnalysisException = { + new DeltaAnalysisException( + errorClass = "DELTA_CONSTRAINT_ALREADY_EXISTS", + messageParameters = Array(name, oldExpr) + ) + } + + def invalidConstraintName(name: String): AnalysisException = { + new AnalysisException(s"Cannot use '$name' as the name of a CHECK constraint.") + } + + def nonexistentConstraint(constraintName: String, tableName: String): AnalysisException = { + new DeltaAnalysisException( + errorClass = "DELTA_CONSTRAINT_DOES_NOT_EXIST", + messageParameters = Array( + constraintName, + tableName, + DeltaSQLConf.DELTA_ASSUMES_DROP_CONSTRAINT_IF_EXISTS.key, + "true")) + } + + def checkConstraintNotBoolean(name: String, expr: String): AnalysisException = { + new DeltaAnalysisException( + errorClass = "DELTA_NON_BOOLEAN_CHECK_CONSTRAINT", + messageParameters = Array(name, expr) + ) + } + + def newCheckConstraintViolated(num: Long, tableName: String, expr: String): AnalysisException = { + new DeltaAnalysisException( + errorClass = "DELTA_NEW_CHECK_CONSTRAINT_VIOLATION", + messageParameters = Array(s"$num", tableName, expr) + ) + } + + def newNotNullViolated( + num: Long, tableName: String, col: UnresolvedAttribute): AnalysisException = { + new DeltaAnalysisException( + errorClass = "DELTA_NEW_NOT_NULL_VIOLATION", + messageParameters = Array(s"$num", tableName, col.name) + ) + } + + def useAddConstraints: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_ADD_CONSTRAINTS", + messageParameters = Array.empty) + } + + def incorrectLogStoreImplementationException( + sparkConf: SparkConf, + cause: Throwable): Throwable = { + new DeltaIOException( + errorClass = "DELTA_INCORRECT_LOG_STORE_IMPLEMENTATION", + messageParameters = Array(generateDocsLink(sparkConf, "/delta-storage.html")), + cause = cause) + } + + def failOnDataLossException(expectedVersion: Long, seenVersion: Long): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_MISSING_FILES_UNEXPECTED_VERSION", + messageParameters = Array(s"$expectedVersion", s"$seenVersion", + s"${DeltaOptions.FAIL_ON_DATA_LOSS_OPTION}") + ) + } + + def staticPartitionsNotSupportedException: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_STATIC_PARTITIONS", + messageParameters = Array.empty + ) + } + + def zOrderingOnPartitionColumnException(colName: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_ZORDERING_ON_PARTITION_COLUMN", + messageParameters = Array(colName) + ) + } + + def zOrderingOnColumnWithNoStatsException( + colNames: Seq[String], + spark: SparkSession): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_ZORDERING_ON_COLUMN_WITHOUT_STATS", + messageParameters = Array(colNames.mkString("[", ", ", "]"), + DeltaSQLConf.DELTA_OPTIMIZE_ZORDER_COL_STAT_CHECK.key) + ) + } + + def zOrderingColumnDoesNotExistException(colName: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_ZORDERING_COLUMN_DOES_NOT_EXIST", + messageParameters = Array(colName)) + } + + /** + * Throwable used when CDC options contain no 'start'. + */ + def noStartVersionForCDC(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_NO_START_FOR_CDC_READ", + messageParameters = Array.empty + ) + } + + /** + * Throwable used when CDC is not enabled according to table metadata. + */ + def changeDataNotRecordedException(version: Long, start: Long, end: Long): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_MISSING_CHANGE_DATA", + messageParameters = Array(start.toString, end.toString, version.toString, + DeltaConfigs.CHANGE_DATA_FEED.key)) + } + + /** + * Throwable used for invalid CDC 'start' and 'end' options, where end < start + */ + def endBeforeStartVersionInCDC(start: Long, end: Long): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_INVALID_CDC_RANGE", + messageParameters = Array(start.toString, end.toString) + ) + } + + /** + * Throwable used for invalid CDC 'start' and 'latest' options, where latest < start + */ + def startVersionAfterLatestVersion(start: Long, latest: Long): Throwable = { + new IllegalArgumentException( + s"Provided Start version($start) for reading change data is invalid. " + + s"Start version cannot be greater than the latest version of the table($latest).") + } + + def setTransactionVersionConflict(appId: String, version1: Long, version2: Long): Throwable = { + new IllegalArgumentException( + s"Two SetTransaction actions within the same transaction have the same appId ${appId} but " + + s"different versions ${version1} and ${version2}.") + } + + def unexpectedChangeFilesFound(changeFiles: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_UNEXPECTED_CHANGE_FILES_FOUND", + messageParameters = Array(changeFiles)) + } + + def addColumnAtIndexLessThanZeroException(pos: String, col: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_ADD_COLUMN_AT_INDEX_LESS_THAN_ZERO", + messageParameters = Array(pos, col)) + } + + def dropColumnAtIndexLessThanZeroException(pos: Int): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_DROP_COLUMN_AT_INDEX_LESS_THAN_ZERO", + messageParameters = Array(s"$pos") + ) + } + + def columnNameNotFoundException(colName: String, scheme: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_COLUMN_NOT_FOUND", + messageParameters = Array(colName, scheme)) + } + + def foundDuplicateColumnsException(colType: String, duplicateCols: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_DUPLICATE_COLUMNS_FOUND", + messageParameters = Array(colType, duplicateCols)) + } + + def addColumnStructNotFoundException(pos: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_ADD_COLUMN_STRUCT_NOT_FOUND", + messageParameters = Array(pos)) + } + + def addColumnParentNotStructException(column: StructField, other: DataType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_ADD_COLUMN_PARENT_NOT_STRUCT", + messageParameters = Array(s"${column.name}", s"$other")) + } + + def operationNotSupportedException( + operation: String, tableIdentifier: TableIdentifier): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_OPERATION_NOT_ALLOWED_DETAIL", + messageParameters = Array(operation, tableIdentifier.toString)) + } + + def operationNotSupportedException(operation: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_OPERATION_NOT_ALLOWED", + messageParameters = Array(operation)) + } + + def emptyDataException: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_EMPTY_DATA", messageParameters = Array.empty) + } + + def fileNotFoundException(path: String): Throwable = { + new DeltaFileNotFoundException( + errorClass = "DELTA_FILE_NOT_FOUND", + messageParameters = Array(path)) + } + + def fileOrDirectoryNotFoundException(path: String): Throwable = { + new DeltaFileNotFoundException( + errorClass = "DELTA_FILE_OR_DIR_NOT_FOUND", + messageParameters = Array(path)) + } + + def excludeRegexOptionException(regexOption: String, cause: Throwable = null): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_REGEX_OPT_SYNTAX_ERROR", + messageParameters = Array(regexOption), + cause = cause) + } + + def notADeltaTableException(deltaTableIdentifier: DeltaTableIdentifier): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_MISSING_DELTA_TABLE", + messageParameters = Array(s"$deltaTableIdentifier")) + } + + def notADeltaTableException( + operation: String, deltaTableIdentifier: DeltaTableIdentifier): Throwable = { + notADeltaTableException(operation, deltaTableIdentifier.toString) + } + + def notADeltaTableException(operation: String, tableName: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TABLE_ONLY_OPERATION", + messageParameters = Array(tableName, operation)) + } + + def notADeltaTableException(operation: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_ONLY_OPERATION", + messageParameters = Array(operation) + ) + } + + def notADeltaSourceException(command: String, plan: Option[LogicalPlan] = None): Throwable = { + val planName = if (plan.isDefined) plan.toString else "" + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_SOURCE", + messageParameters = Array(command, s"$planName") + ) + } + + def partitionColumnCastFailed( + columnValue: String, + dataType: String, + columnName: String): Throwable = { + new DeltaRuntimeException( + errorClass = "DELTA_PARTITION_COLUMN_CAST_FAILED", + messageParameters = Array(columnValue, dataType, columnName)) + } + + def schemaChangedSinceAnalysis( + atAnalysis: StructType, + latestSchema: StructType, + mentionLegacyFlag: Boolean = false): Throwable = { + val schemaDiff = SchemaUtils.reportDifferences(atAnalysis, latestSchema) + .map(_.replace("Specified", "Latest")) + val legacyFlagMessage = if (mentionLegacyFlag) { + s""" + |This check can be turned off by setting the session configuration key + |${DeltaSQLConf.DELTA_SCHEMA_ON_READ_CHECK_ENABLED.key} to false.""".stripMargin + } else { + "" + } + new DeltaAnalysisException( + errorClass = "DELTA_SCHEMA_CHANGE_SINCE_ANALYSIS", + messageParameters = Array(schemaDiff.mkString("\n"), legacyFlagMessage) + ) + } + + def incorrectArrayAccess(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INCORRECT_ARRAY_ACCESS", + messageParameters = Array.empty) + } + def invalidColumnName(name: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_CHARACTERS_IN_COLUMN_NAME", + messageParameters = Array(name)) + } + + def invalidIsolationLevelException(s: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_INVALID_ISOLATION_LEVEL", + messageParameters = Array(s)) + } + + def invalidPartitionColumn(col: String, tbl: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_PARTITION_COLUMN", + messageParameters = Array(col, tbl)) + } + + def invalidPartitionColumn(e: AnalysisException): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_PARTITION_COLUMN_NAME", + messageParameters = Array.empty, + cause = Option(e)) + } + + def invalidTimestampFormat( + ts: String, + format: String, + cause: Option[Throwable] = None): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_TIMESTAMP_FORMAT", + messageParameters = Array(ts, format), + cause = cause) + } + + def missingTableIdentifierException(operationName: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_OPERATION_MISSING_PATH", + messageParameters = Array(operationName) + ) + } + + def viewInDescribeDetailException(view: TableIdentifier): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_DESCRIBE_DETAIL_VIEW", + messageParameters = Array(s"$view") + ) + } + + def alterTableChangeColumnException(oldColumns: String, newColumns: String): Throwable = { + new AnalysisException( + "ALTER TABLE CHANGE COLUMN is not supported for changing column " + oldColumns + " to " + + newColumns) + } + + def cannotWriteIntoView(table: TableIdentifier): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_WRITE_INTO_VIEW", + messageParameters = Array(s"$table") + ) + } + + def castingCauseOverflowErrorInTableWrite( + from: DataType, + to: DataType, + columnName: String): ArithmeticException = { + new DeltaArithmeticException( + errorClass = "DELTA_CAST_OVERFLOW_IN_TABLE_WRITE", + messageParameters = Array( + toSQLType(from), // sourceType + toSQLType(to), // targetType + toSQLId(columnName), // columnName + SQLConf.STORE_ASSIGNMENT_POLICY.key, // storeAssignmentPolicyFlag + // updateAndMergeCastingFollowsAnsiEnabledFlag + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key, + SQLConf.ANSI_ENABLED.key // ansiEnabledFlag + ) + ) + } + + def notADeltaTable(table: String): Throwable = { + new DeltaAnalysisException(errorClass = "DELTA_NOT_A_DELTA_TABLE", + messageParameters = Array(table)) + } + + def unsupportedWriteStagedTable(tableName: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_WRITES_STAGED_TABLE", + messageParameters = Array(tableName) + ) + } + + def notEnoughColumnsInInsert( + table: String, + query: Int, + target: Int, + nestedField: Option[String] = None): Throwable = { + val nestedFieldStr = nestedField.map(f => s"not enough nested fields in $f") + .getOrElse("not enough data columns") + new DeltaAnalysisException( + errorClass = "DELTA_INSERT_COLUMN_ARITY_MISMATCH", + messageParameters = Array(table, nestedFieldStr, target.toString, query.toString)) + } + + def notFoundFileToBeRewritten(absolutePath: String, candidates: Iterable[String]): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_FILE_TO_OVERWRITE_NOT_FOUND", + messageParameters = Array(absolutePath, candidates.mkString("\n"))) + } + + def cannotFindSourceVersionException(json: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_CANNOT_FIND_VERSION", + messageParameters = Array(json)) + } + + def cannotInsertIntoColumn( + tableName: String, + source: String, + target: String, + targetType: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_COLUMN_STRUCT_TYPE_MISMATCH", + messageParameters = Array(source, targetType, target, tableName)) + } + + def alterTableReplaceColumnsException( + oldSchema: StructType, + newSchema: StructType, + reason: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_ALTER_TABLE_REPLACE_COL_OP", + messageParameters = Array(reason, formatSchema(oldSchema), formatSchema(newSchema)) + ) + } + + def ambiguousPartitionColumnException( + columnName: String, colMatches: Seq[StructField]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_AMBIGUOUS_PARTITION_COLUMN", + messageParameters = Array(formatColumn(columnName).toString, + formatColumnList(colMatches.map(_.name))) + ) + } + + def tableNotSupportedException(operation: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TABLE_NOT_SUPPORTED_IN_OP", + messageParameters = Array(operation) + ) + } + + def vacuumBasePathMissingException(baseDeltaPath: Path): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_VACUUM_SPECIFIC_PARTITION", + messageParameters = Array(s"$baseDeltaPath") + ) + } + + def unexpectedDataChangeException(op: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_DATA_CHANGE_FALSE", + messageParameters = Array(op) + ) + } + + def unknownConfigurationKeyException(confKey: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNKNOWN_CONFIGURATION", + messageParameters = Array(confKey, DeltaSQLConf.ALLOW_ARBITRARY_TABLE_PROPERTIES.key)) + } + + def cdcNotAllowedInThisVersion(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CDC_NOT_ALLOWED_IN_THIS_VERSION", + messageParameters = Array.empty + ) + } + + def cdcWriteNotAllowedInThisVersion(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CHANGE_TABLE_FEED_DISABLED", + messageParameters = Array.empty + ) + } + + def pathNotSpecifiedException: Throwable = { + new IllegalArgumentException("'path' is not specified") + } + + def pathNotExistsException(path: String): Throwable = { + new DeltaAnalysisException(errorClass = "DELTA_PATH_DOES_NOT_EXIST", + messageParameters = Array(path)) + } + + def directoryNotFoundException(path: String): Throwable = { + new FileNotFoundException(s"$path doesn't exist") + } + + def pathAlreadyExistsException(path: Path): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_PATH_EXISTS", + messageParameters = Array(s"$path") + ) + } + + def logFileNotFoundException( + path: Path, + version: Long, + metadata: Metadata): Throwable = { + val logRetention = DeltaConfigs.LOG_RETENTION.fromMetaData(metadata) + val checkpointRetention = DeltaConfigs.CHECKPOINT_RETENTION_DURATION.fromMetaData(metadata) + new DeltaFileNotFoundException( + errorClass = "DELTA_TRUNCATED_TRANSACTION_LOG", + messageParameters = Array( + path.toString, + version.toString, + DeltaConfigs.LOG_RETENTION.key, + logRetention.toString, + DeltaConfigs.CHECKPOINT_RETENTION_DURATION.key, + checkpointRetention.toString) + ) + } + + def logFileNotFoundExceptionForStreamingSource(e: FileNotFoundException): Throwable = { + new DeltaFileNotFoundException( + errorClass = "DELTA_LOG_FILE_NOT_FOUND_FOR_STREAMING_SOURCE", + messageParameters = Array.empty + ).initCause(e) + } + + def logFailedIntegrityCheck(version: Long, mismatchOption: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_TXN_LOG_FAILED_INTEGRITY", + messageParameters = Array(version.toString, mismatchOption) + ) + } + + def checkpointNonExistTable(path: Path): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_CHECKPOINT_NON_EXIST_TABLE", + messageParameters = Array(s"$path")) + } + + def multipleLoadPathsException(paths: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "MULTIPLE_LOAD_PATH", + messageParameters = Array(paths.mkString("[", ",", "]"))) + } + + def partitionColumnNotFoundException(colName: String, schema: Seq[Attribute]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_PARTITION_COLUMN_NOT_FOUND", + messageParameters = Array( + s"${formatColumn(colName)}", + s"${schema.map(_.name).mkString(", ")}" + ) + ) + } + + def partitionPathParseException(fragment: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_PARTITION_PATH", + messageParameters = Array(fragment)) + } + + def partitionPathInvolvesNonPartitionColumnException( + badColumns: Seq[String], fragment: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_NON_PARTITION_COLUMN_SPECIFIED", + messageParameters = Array(formatColumnList(badColumns), fragment) + ) + } + + def nonPartitionColumnAbsentException(colsDropped: Boolean): Throwable = { + val msg = if (colsDropped) { + " Columns which are of NullType have been dropped." + } else { + "" + } + new DeltaAnalysisException( + errorClass = "DELTA_NON_PARTITION_COLUMN_ABSENT", + messageParameters = Array(msg) + ) + } + + def replaceWhereMismatchException( + replaceWhere: String, + invariantViolation: InvariantViolationException): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_REPLACE_WHERE_MISMATCH", + messageParameters = Array(replaceWhere, invariantViolation.getMessage), + cause = Some(invariantViolation)) + } + + def replaceWhereMismatchException(replaceWhere: String, badPartitions: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_REPLACE_WHERE_MISMATCH", + messageParameters = Array(replaceWhere, + s"Invalid data would be written to partitions $badPartitions.")) + } + + def illegalFilesFound(file: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_ILLEGAL_FILE_FOUND", + messageParameters = Array(file)) + } + + def illegalDeltaOptionException(name: String, input: String, explain: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_ILLEGAL_OPTION", + messageParameters = Array(input, name, explain)) + } + + def invalidIdempotentWritesOptionsException(explain: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_INVALID_IDEMPOTENT_WRITES_OPTIONS", + messageParameters = Array(explain)) + } + + def invalidInterval(interval: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_INVALID_INTERVAL", + messageParameters = Array(interval) + ) + } + + def invalidTableValueFunction(function: String) : Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_TABLE_VALUE_FUNCTION", + messageParameters = Array(function) + ) + } + + def startingVersionAndTimestampBothSetException( + versionOptKey: String, + timestampOptKey: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_STARTING_VERSION_AND_TIMESTAMP_BOTH_SET", + messageParameters = Array(versionOptKey, timestampOptKey)) + } + + def unrecognizedLogFile(path: Path): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_UNRECOGNIZED_LOGFILE", + messageParameters = Array(s"$path") + ) + } + + def modifyAppendOnlyTableException(tableName: String): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_CANNOT_MODIFY_APPEND_ONLY", + // `tableName` could be null here, so convert to string first. + messageParameters = Array(s"$tableName", DeltaConfigs.IS_APPEND_ONLY.key) + ) + } + + def missingPartFilesException(version: Long, ae: Exception): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_MISSING_PART_FILES", + messageParameters = Array(s"$version"), + cause = ae + ) + } + + def deltaVersionsNotContiguousException( + spark: SparkSession, deltaVersions: Seq[Long]): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_VERSIONS_NOT_CONTIGUOUS", + messageParameters = Array(deltaVersions.mkString(", ")) + ) + } + + def actionNotFoundException(action: String, version: Long): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_STATE_RECOVER_ERROR", + messageParameters = Array(action, version.toString)) + } + + + def schemaChangedException( + readSchema: StructType, + dataSchema: StructType, + retryable: Boolean, + version: Option[Long], + includeStartingVersionOrTimestampMessage: Boolean): Throwable = { + def newException(errorClass: String, messageParameters: Array[String]): Throwable = { + new DeltaIllegalStateException(errorClass, messageParameters) + } + + if (version.isEmpty) { + newException("DELTA_SCHEMA_CHANGED", Array( + formatSchema(readSchema), + formatSchema(dataSchema) + )) + } else if (!includeStartingVersionOrTimestampMessage) { + newException("DELTA_SCHEMA_CHANGED_WITH_VERSION", Array( + version.get.toString, + formatSchema(readSchema), + formatSchema(dataSchema) + )) + } else { + newException("DELTA_SCHEMA_CHANGED_WITH_STARTING_OPTIONS", Array( + version.get.toString, + formatSchema(readSchema), + formatSchema(dataSchema), + version.get.toString + )) + } + } + + def streamWriteNullTypeException: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_NULL_SCHEMA_IN_STREAMING_WRITE", + messageParameters = Array.empty + ) + } + + def schemaNotSetException: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_SCHEMA_NOT_SET", + messageParameters = Array.empty + ) + } + + def specifySchemaAtReadTimeException: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_SCHEMA_DURING_READ", + messageParameters = Array.empty + ) + } + + def schemaNotProvidedException: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_SCHEMA_NOT_PROVIDED", + messageParameters = Array.empty) + } + + def outputModeNotSupportedException(dataSource: String, outputMode: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_OUTPUT_MODE", + messageParameters = Array(dataSource, outputMode) + ) + } + + def updateSetColumnNotFoundException(col: String, colList: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_MISSING_SET_COLUMN", + messageParameters = Array(formatColumn(col), formatColumnList(colList))) + } + + def updateSetConflictException(cols: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CONFLICT_SET_COLUMN", + messageParameters = Array(formatColumnList(cols))) + } + + def updateNonStructTypeFieldNotSupportedException(col: String, s: DataType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_FIELD_UPDATE_NON_STRUCT", + messageParameters = Array(s"${formatColumn(col)}", s"$s") + ) + } + + def truncateTablePartitionNotSupportedException: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TRUNCATE_TABLE_PARTITION_NOT_SUPPORTED", messageParameters = Array.empty + ) + } + + def bloomFilterOnPartitionColumnNotSupportedException(name: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_PARTITION_COLUMN_IN_BLOOM_FILTER", + messageParameters = Array(name)) + } + + def bloomFilterOnNestedColumnNotSupportedException(name: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_NESTED_COLUMN_IN_BLOOM_FILTER", + messageParameters = Array(name)) + } + + def bloomFilterOnColumnTypeNotSupportedException(name: String, dataType: DataType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_COLUMN_TYPE_IN_BLOOM_FILTER", + messageParameters = Array(s"${dataType.catalogString}", name)) + } + + def bloomFilterMultipleConfForSingleColumnException(name: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_MULTIPLE_CONF_FOR_SINGLE_COLUMN_IN_BLOOM_FILTER", + messageParameters = Array(name)) + } + + def bloomFilterCreateOnNonExistingColumnsException(unknownColumns: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_CREATE_BLOOM_FILTER_NON_EXISTING_COL", + messageParameters = Array(unknownColumns.mkString(", "))) + } + + def bloomFilterInvalidParameterValueException(message: String): Throwable = { + new AnalysisException( + s"Cannot create bloom filter index, invalid parameter value: $message") + } + + def bloomFilterDropOnNonIndexedColumnException(name: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_DROP_BLOOM_FILTER_ON_NON_INDEXED_COLUMN", + messageParameters = Array(name)) + } + + def bloomFilterDropOnNonExistingColumnsException(unknownColumns: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_BLOOM_FILTER_DROP_ON_NON_EXISTING_COLUMNS", + messageParameters = Array(unknownColumns.mkString(", ")) + ) + } + + + def cannotRenamePath(tempPath: String, path: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_CANNOT_RENAME_PATH", messageParameters = Array(tempPath, path)) + } + + def cannotSpecifyBothFileListAndPatternString(): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_FILE_LIST_AND_PATTERN_STRING_CONFLICT", + messageParameters = null) + } + + def cannotUpdateArrayField(table: String, field: String): Throwable = { + new DeltaAnalysisException(errorClass = "DELTA_CANNOT_UPDATE_ARRAY_FIELD", + messageParameters = Array(table, field)) + } + + def cannotUpdateMapField(table: String, field: String): Throwable = { + new DeltaAnalysisException(errorClass = "DELTA_CANNOT_UPDATE_MAP_FIELD", + messageParameters = Array(table, field)) + } + + def cannotUpdateStructField(table: String, field: String): Throwable = { + new DeltaAnalysisException(errorClass = "DELTA_CANNOT_UPDATE_STRUCT_FIELD", + messageParameters = Array(table, field)) + } + + def cannotUpdateOtherField(tableName: String, dataType: DataType): Throwable = { + new DeltaAnalysisException(errorClass = "DELTA_CANNOT_UPDATE_OTHER_FIELD", + messageParameters = Array(tableName, s"$dataType")) + } + + def cannotUseDataTypeForPartitionColumnError(field: StructField): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_PARTITION_COLUMN_TYPE", + messageParameters = Array(s"${field.name}", s"${field.dataType}") + ) + } + + def unexpectedPartitionSchemaFromUserException( + catalogPartitionSchema: StructType, userPartitionSchema: StructType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNEXPECTED_PARTITION_SCHEMA_FROM_USER", + messageParameters = Array( + formatSchema(catalogPartitionSchema), formatSchema(userPartitionSchema)) + ) + } + + def multipleSourceRowMatchingTargetRowInMergeException(spark: SparkSession): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_MULTIPLE_SOURCE_ROW_MATCHING_TARGET_ROW_IN_MERGE", + messageParameters = Array(generateDocsLink(spark.sparkContext.getConf, + "/delta-update.html#upsert-into-a-table-using-merge")) + ) + } + + def sourceMaterializationFailedRepeatedlyInMerge: Throwable = + new DeltaRuntimeException(errorClass = "DELTA_MERGE_MATERIALIZE_SOURCE_FAILED_REPEATEDLY") + + def sourceNotDeterministicInMergeException(spark: SparkSession): Throwable = { + new UnsupportedOperationException( + s"""Cannot perform Merge because the source dataset is not deterministic. Please refer to + |${generateDocsLink(spark.sparkContext.getConf, + "/delta-update.html#operation-semantics")} for more information.""".stripMargin + ) + } + + def columnOfTargetTableNotFoundInMergeException(targetCol: String, + colNames: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_COLUMN_NOT_FOUND_IN_MERGE", + messageParameters = Array(targetCol, colNames) + ) + } + + def subqueryNotSupportedException(op: String, cond: Expression): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_SUBQUERY", + messageParameters = Array(op, cond.sql) + ) + } + + def multiColumnInPredicateNotSupportedException(operation: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_MULTI_COL_IN_PREDICATE", + messageParameters = Array(operation) + ) + } + + def nestedFieldNotSupported(operation: String, field: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_NESTED_FIELD_IN_OPERATION", + messageParameters = Array(operation, field) + ) + } + + def nestedFieldsNeedRename(columns : Set[String], baseSchema : StructType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_NESTED_FIELDS_NEED_RENAME", + messageParameters = Array(columns.mkString("[", ", ", "]"), formatSchema(baseSchema)) + ) + } + + def inSubqueryNotSupportedException(operation: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_IN_SUBQUERY", + messageParameters = Array(operation)) + } + + def convertMetastoreMetadataMismatchException( + tableProperties: Map[String, String], + deltaConfiguration: Map[String, String]): Throwable = { + def prettyMap(m: Map[String, String]): String = { + m.map(e => s"${e._1}=${e._2}").mkString("[", ", ", "]") + } + new AnalysisException( + s"""You are trying to convert a table which already has a delta log where the table + |properties in the catalog don't match the configuration in the delta log. + |Table properties in catalog: ${prettyMap(tableProperties)} + |Delta configuration: ${prettyMap{deltaConfiguration}} + |If you would like to merge the configurations (update existing fields and insert new + |ones), set the SQL configuration + |spark.databricks.delta.convert.metadataCheck.enabled to false. + """.stripMargin) + } + + def createExternalTableWithoutLogException( + path: Path, tableName: String, spark: SparkSession): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CREATE_EXTERNAL_TABLE_WITHOUT_TXN_LOG", + messageParameters = Array(tableName, path.toString, + generateDocsLink(spark.sparkContext.getConf, "/index.html"))) + } + + def createExternalTableWithoutSchemaException( + path: Path, tableName: String, spark: SparkSession): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CREATE_EXTERNAL_TABLE_WITHOUT_SCHEMA", + messageParameters = Array(tableName, path.toString, + generateDocsLink(spark.sparkContext.getConf, "/index.html"))) + } + + def createManagedTableWithoutSchemaException( + tableName: String, spark: SparkSession): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_MANAGED_TABLE_SYNTAX_NO_SCHEMA", + messageParameters = Array(tableName, s"""${generateDocsLink(spark.sparkContext.getConf, + "/index.html")}""".stripMargin) + ) + } + + def readTableWithoutSchemaException(identifier: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_READ_TABLE_WITHOUT_COLUMNS", + messageParameters = Array(identifier)) + } + + def createTableWithDifferentSchemaException( + path: Path, + specifiedSchema: StructType, + existingSchema: StructType, + diffs: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CREATE_TABLE_SCHEME_MISMATCH", + messageParameters = Array(path.toString, + specifiedSchema.treeString, existingSchema.treeString, + diffs.map("\n".r.replaceAllIn(_, "\n ")).mkString("- ", "\n- ", ""))) + } + + def createTableWithDifferentPartitioningException( + path: Path, + specifiedColumns: Seq[String], + existingColumns: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CREATE_TABLE_WITH_DIFFERENT_PARTITIONING", + messageParameters = Array( + path.toString, + specifiedColumns.mkString(", "), + existingColumns.mkString(", ") + ) + ) + } + + def createTableWithDifferentPropertiesException( + path: Path, + specifiedProperties: Map[String, String], + existingProperties: Map[String, String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CREATE_TABLE_WITH_DIFFERENT_PROPERTY", + messageParameters = Array(path.toString, + specifiedProperties.toSeq.sorted.map { case (k, v) => s"$k=$v" }.mkString("\n"), + existingProperties.toSeq.sorted.map { case (k, v) => s"$k=$v" }.mkString("\n")) + ) + } + + def aggsNotSupportedException(op: String, cond: Expression): Throwable = { + val condStr = s"(condition = ${cond.sql})." + new DeltaAnalysisException( + errorClass = "DELTA_AGGREGATION_NOT_SUPPORTED", + messageParameters = Array(op, condStr) + ) + } + + def targetTableFinalSchemaEmptyException(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TARGET_TABLE_FINAL_SCHEMA_EMPTY", + messageParameters = Array.empty) + } + + def nonDeterministicNotSupportedException(op: String, cond: Expression): Throwable = { + val condStr = s"(condition = ${cond.sql})." + new DeltaAnalysisException( + errorClass = "DELTA_NON_DETERMINISTIC_FUNCTION_NOT_SUPPORTED", + messageParameters = Array(op, s"$condStr") + ) + } + + def noHistoryFound(logPath: Path): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_NO_COMMITS_FOUND", + messageParameters = Array(logPath.toString)) + } + + def noRecreatableHistoryFound(logPath: Path): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_NO_RECREATABLE_HISTORY_FOUND", + messageParameters = Array(s"$logPath")) + } + + def unsupportedAbsPathAddFile(str: String): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_UNSUPPORTED_ABS_PATH_ADD_FILE", + messageParameters = Array(str) + ) + } + + case class TimestampEarlierThanCommitRetentionException( + userTimestamp: java.sql.Timestamp, + commitTs: java.sql.Timestamp, + timestampString: String) extends AnalysisException( + s"""The provided timestamp ($userTimestamp) is before the earliest version available to this + |table ($commitTs). Please use a timestamp after $timestampString. + """.stripMargin) + + def timestampGreaterThanLatestCommit( + userTimestamp: java.sql.Timestamp, + commitTs: java.sql.Timestamp, + timestampString: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TIMESTAMP_GREATER_THAN_COMMIT", + messageParameters = Array(s"$userTimestamp", s"$commitTs", timestampString) + ) + } + + def timestampInvalid(expr: Expression): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TIMESTAMP_INVALID", + messageParameters = Array(s"${expr.sql}") + ) + } + + case class TemporallyUnstableInputException( + userTimestamp: java.sql.Timestamp, + commitTs: java.sql.Timestamp, + timestampString: String, + commitVersion: Long) extends AnalysisException( + s"""The provided timestamp: $userTimestamp is after the latest commit timestamp of + |$commitTs. If you wish to query this version of the table, please either provide + |the version with "VERSION AS OF $commitVersion" or use the exact timestamp + |of the last commit: "TIMESTAMP AS OF '$timestampString'". + """.stripMargin) + + def restoreVersionNotExistException( + userVersion: Long, + earliest: Long, + latest: Long): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_RESTORE_TABLE_VERSION", + messageParameters = Array(userVersion.toString, earliest.toString, latest.toString)) + } + + def restoreTimestampGreaterThanLatestException( + userTimestamp: String, + latestTimestamp: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_RESTORE_TIMESTAMP_GREATER", + messageParameters = Array(userTimestamp, latestTimestamp) + ) + } + + def restoreTimestampBeforeEarliestException( + userTimestamp: String, + earliestTimestamp: String): Throwable = { + new AnalysisException( + s"Cannot restore table to timestamp ($userTimestamp) as it is before the earliest version " + + s"available. Please use a timestamp after ($earliestTimestamp)" + ) + } + + def timeTravelNotSupportedException: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_TIME_TRAVEL_VIEWS", + messageParameters = Array.empty + ) + } + + def multipleTimeTravelSyntaxUsed: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_TIME_TRAVEL_MULTIPLE_FORMATS", + messageParameters = Array.empty + ) + } + + def nonExistentDeltaTable(tableId: DeltaTableIdentifier): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TABLE_NOT_FOUND", + messageParameters = Array(s"$tableId")) + } + + def differentDeltaTableReadByStreamingSource( + newTableId: String, oldTableId: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DIFFERENT_DELTA_TABLE_READ_BY_STREAMING_SOURCE", + messageParameters = Array(newTableId, oldTableId)) + } + + def nonExistentColumnInSchema(column: String, schema: String): Throwable = { + new DeltaAnalysisException("DELTA_COLUMN_NOT_FOUND_IN_SCHEMA", + Array(column, schema)) + } + + def noRelationTable(tableIdent: Identifier): Throwable = { + new DeltaNoSuchTableException( + errorClass = "DELTA_NO_RELATION_TABLE", + messageParameters = Array(s"${tableIdent.quoted}")) + } + + def provideOneOfInTimeTravel: Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_ONEOF_IN_TIMETRAVEL", messageParameters = null) + } + + def emptyCalendarInterval: Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_INVALID_CALENDAR_INTERVAL_EMPTY", + messageParameters = Array.empty + ) + } + + def unexpectedPartialScan(path: Path): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNEXPECTED_PARTIAL_SCAN", + messageParameters = Array(s"$path") + ) + } + + def deltaLogAlreadyExistsException(path: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_LOG_ALREADY_EXISTS", + messageParameters = Array(path) + ) + } + + def missingProviderForConvertException(path: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_MISSING_PROVIDER_FOR_CONVERT", + messageParameters = Array(path)) + } + + def convertNonParquetTablesException(ident: TableIdentifier, sourceName: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CONVERT_NON_PARQUET_TABLE", + messageParameters = Array(sourceName, ident.toString) + ) + } + + def unexpectedPartitionColumnFromFileNameException( + path: String, parsedCol: String, expectedCol: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNEXPECTED_PARTITION_COLUMN_FROM_FILE_NAME", + messageParameters = Array( + formatColumn(expectedCol), + formatColumn(parsedCol), + path) + ) + } + + def unexpectedNumPartitionColumnsFromFileNameException( + path: String, parsedCols: Seq[String], expectedCols: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNEXPECTED_NUM_PARTITION_COLUMNS_FROM_FILE_NAME", + messageParameters = Array( + expectedCols.size.toString, + formatColumnList(expectedCols), + parsedCols.size.toString, + formatColumnList(parsedCols), + path) + ) + } + + def castPartitionValueException(partitionValue: String, dataType: DataType): Throwable = { + new DeltaRuntimeException( + errorClass = "DELTA_FAILED_CAST_PARTITION_VALUE", + messageParameters = Array(partitionValue, dataType.toString)) + } + + def emptyDirectoryException(directory: String): Throwable = { + new DeltaFileNotFoundException( + errorClass = "DELTA_EMPTY_DIRECTORY", + messageParameters = Array(directory) + ) + } + + def alterTableSetLocationSchemaMismatchException( + original: StructType, destination: StructType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_SET_LOCATION_SCHEMA_MISMATCH", + messageParameters = Array(formatSchema(original), formatSchema(destination), + DeltaSQLConf.DELTA_ALTER_LOCATION_BYPASS_SCHEMA_CHECK.key)) + } + + def sparkSessionNotSetException(): Throwable = { + new DeltaIllegalStateException(errorClass = "DELTA_SPARK_SESSION_NOT_SET") + } + + def setLocationNotSupportedOnPathIdentifiers(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_SET_LOCATION_ON_PATH_IDENTIFIER", + messageParameters = Array.empty) + } + + def useSetLocation(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_CHANGE_LOCATION", + messageParameters = Array.empty + ) + } + + def cannotSetLocationMultipleTimes(locations : Seq[String]) : Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_CANNOT_SET_LOCATION_MULTIPLE_TIMES", + messageParameters = Array(s"${locations}") + ) + } + + def cannotReplaceMissingTableException(itableIdentifier: Identifier): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_REPLACE_MISSING_TABLE", + messageParameters = Array(itableIdentifier.toString)) + } + + def cannotCreateLogPathException(logPath: String): Throwable = { + new DeltaIOException( + errorClass = "DELTA_CANNOT_CREATE_LOG_PATH", + messageParameters = Array(logPath)) + } + + def cannotChangeProvider(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_CHANGE_PROVIDER", + messageParameters = Array.empty + ) + } + + def describeViewHistory: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_DESCRIBE_VIEW_HISTORY", + messageParameters = Array.empty + ) + } + + def viewNotSupported(operationName: String): Throwable = { + new AnalysisException(s"Operation $operationName can not be performed on a view") + } + + def postCommitHookFailedException( + failedHook: PostCommitHook, + failedOnCommitVersion: Long, + extraErrorMessage: String, + error: Throwable): Throwable = { + var errorMessage = "" + if (extraErrorMessage != null && extraErrorMessage.nonEmpty) { + errorMessage += s": $extraErrorMessage" + } + val ex = new DeltaRuntimeException( + errorClass = "DELTA_POST_COMMIT_HOOK_FAILED", + messageParameters = Array(s"$failedOnCommitVersion", failedHook.name, errorMessage) + ) + ex.initCause(error) + ex + } + + def unsupportedGenerateModeException(modeName: String): Throwable = { + import org.apache.spark.sql.delta.commands.DeltaGenerateCommand + val supportedModes = DeltaGenerateCommand.modeNameToGenerationFunc.keys.toSeq.mkString(", ") + new DeltaIllegalArgumentException( + errorClass = "DELTA_MODE_NOT_SUPPORTED", + messageParameters = Array(modeName, supportedModes)) + } + + def illegalUsageException(option: String, operation: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_ILLEGAL_USAGE", + messageParameters = Array(option, operation)) + } + + def foundMapTypeColumnException(key: String, value: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_FOUND_MAP_TYPE_COLUMN", + messageParameters = Array(key, value) + ) + } + def columnNotInSchemaException(column: String, schema: StructType): Throwable = { + nonExistentColumnInSchema(column, schema.treeString) + } + + def metadataAbsentException(): Throwable = { + new DeltaIllegalStateException(errorClass = "DELTA_METADATA_ABSENT", + messageParameters = Array(DeltaSQLConf.DELTA_COMMIT_VALIDATION_ENABLED.key)) + } + + def metadataAbsentForExistingCatalogTable(tableName: String, tablePath: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_METADATA_ABSENT_EXISTING_CATALOG_TABLE", + messageParameters = Array(tableName, tablePath, tableName)) + } + + def updateSchemaMismatchExpression(from: StructType, to: StructType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UPDATE_SCHEMA_MISMATCH_EXPRESSION", + messageParameters = Array(from.catalogString, to.catalogString) + ) + } + + def extractReferencesFieldNotFound(field: String, exception: Throwable): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_EXTRACT_REFERENCES_FIELD_NOT_FOUND", + messageParameters = Array(field), + cause = exception) + } + + def addFilePartitioningMismatchException( + addFilePartitions: Seq[String], + metadataPartitions: Seq[String]): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_INVALID_PARTITIONING_SCHEMA", + messageParameters = Array(s"${DeltaErrors.formatColumnList(metadataPartitions)}", + s"${DeltaErrors.formatColumnList(addFilePartitions)}", + s"${DeltaSQLConf.DELTA_COMMIT_VALIDATION_ENABLED.key}") + ) + } + + def concurrentModificationExceptionMsg( + sparkConf: SparkConf, + baseMessage: String, + commit: Option[CommitInfo]): String = { + baseMessage + + commit.map(ci => s"\nConflicting commit: ${JsonUtils.toJson(ci)}").getOrElse("") + + s"\nRefer to " + + s"${DeltaErrors.generateDocsLink(sparkConf, "/concurrency-control.html")} " + + "for more details." + } + + def ignoreStreamingUpdatesAndDeletesWarning(spark: SparkSession): String = { + val docPage = DeltaErrors.generateDocsLink( + spark.sparkContext.getConf, + "/delta-streaming.html#ignoring-updates-and-deletes") + s"""WARNING: The 'ignoreFileDeletion' option is deprecated. Switch to using one of + |'ignoreDeletes' or 'ignoreChanges'. Refer to $docPage for details. + """.stripMargin + } + + def configureSparkSessionWithExtensionAndCatalog( + originalException: Option[Throwable]): Throwable = { + val catalogImplConfig = SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key + new DeltaAnalysisException( + errorClass = "DELTA_CONFIGURE_SPARK_SESSION_WITH_EXTENSION_AND_CATALOG", + messageParameters = Array(classOf[DeltaSparkSessionExtension].getName, + catalogImplConfig, classOf[DeltaCatalog].getName, + classOf[DeltaSparkSessionExtension].getName, + catalogImplConfig, classOf[DeltaCatalog].getName), + cause = originalException) + } + + def duplicateColumnsOnUpdateTable(originalException: Throwable): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_DUPLICATE_COLUMNS_ON_UPDATE_TABLE", + messageParameters = Array(originalException.getMessage), + cause = Some(originalException)) + } + + def maxCommitRetriesExceededException( + attemptNumber: Int, + attemptVersion: Long, + initAttemptVersion: Long, + numActions: Int, + totalCommitAttemptTime: Long): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_MAX_COMMIT_RETRIES_EXCEEDED", + messageParameters = Array(s"$attemptNumber", s"$initAttemptVersion", s"$attemptVersion", + s"$numActions", s"$totalCommitAttemptTime")) + } + + def generatedColumnsReferToWrongColumns(e: AnalysisException): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_GENERATED_COLUMN_REFERENCES", Array.empty, cause = Some(e)) + } + + def generatedColumnsUpdateColumnType(current: StructField, update: StructField): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_GENERATED_COLUMN_UPDATE_TYPE_MISMATCH", + messageParameters = Array( + s"${current.name}", + s"${current.dataType.sql}", + s"${update.dataType.sql}" + ) + ) + } + + def generatedColumnsUDF(expr: Expression): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UDF_IN_GENERATED_COLUMN", + messageParameters = Array(s"${expr.sql}")) + } + + def generatedColumnsNonDeterministicExpression(expr: Expression): Throwable = { + new AnalysisException( + s"Found ${expr.sql}. A generated column cannot use a non deterministic expression") + } + + def generatedColumnsAggregateExpression(expr: Expression): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_AGGREGATE_IN_GENERATED_COLUMN", + messageParameters = Array(expr.sql.toString) + ) + } + + def generatedColumnsUnsupportedExpression(expr: Expression): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_EXPRESSION_GENERATED_COLUMN", + messageParameters = Array(s"${expr.sql}") + ) + } + + def generatedColumnsTypeMismatch( + column: String, + columnType: DataType, + exprType: DataType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_GENERATED_COLUMNS_EXPR_TYPE_MISMATCH", + messageParameters = Array(column, exprType.sql, columnType.sql) + ) + } + + def expressionsNotFoundInGeneratedColumn(column: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_EXPRESSIONS_NOT_FOUND_IN_GENERATED_COLUMN", + messageParameters = Array(column) + ) + } + + def cannotChangeDataType(msg: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_CHANGE_DATA_TYPE", + messageParameters = Array(msg) + ) + } + + def ambiguousDataTypeChange(column: String, from: StructType, to: StructType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_AMBIGUOUS_DATA_TYPE_CHANGE", + messageParameters = Array(column, from.toDDL, to.toDDL) + ) + } + + def unsupportedDataTypes( + unsupportedDataType: UnsupportedDataTypeInfo, + moreUnsupportedDataTypes: UnsupportedDataTypeInfo*): Throwable = { + val prettyMessage = (unsupportedDataType +: moreUnsupportedDataTypes) + .map(dt => s"${dt.column}: ${dt.dataType}") + .mkString("[", ", ", "]") + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_DATA_TYPES", + messageParameters = Array(prettyMessage, DeltaSQLConf.DELTA_SCHEMA_TYPE_CHECK.key) + ) + } + + def tableAlreadyExists(table: CatalogTable): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TABLE_ALREADY_EXISTS", + messageParameters = Array(s"${table.identifier.quotedString}") + ) + } + + def tableLocationMismatch(table: CatalogTable, existingTable: CatalogTable): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_TABLE_LOCATION_MISMATCH", + messageParameters = Array( + s"${table.identifier.quotedString}", + s"`${existingTable.location}`", + s"`${table.location}`") + ) + } + + def nonSinglePartNamespaceForCatalog(ident: String): Throwable = { + new DeltaNoSuchTableException( + errorClass = "DELTA_NON_SINGLE_PART_NAMESPACE_FOR_CATALOG", + messageParameters = Array(ident)) + } + + def indexLargerThanStruct(pos: Int, column: StructField, len: Int): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INDEX_LARGER_THAN_STRUCT", + messageParameters = Array(s"$pos", s"$column", s"$len") + ) + } + + def indexLargerOrEqualThanStruct(pos: Int, len: Int): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INDEX_LARGER_OR_EQUAL_THAN_STRUCT", + messageParameters = Array(s"$pos", s"$len") + ) + } + + def invalidV1TableCall(callVersion: String, tableVersion: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_INVALID_V1_TABLE_CALL", + messageParameters = Array(callVersion, tableVersion) + ) + } + + def cannotGenerateUpdateExpressions(): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_CANNOT_GENERATE_UPDATE_EXPRESSIONS", + messageParameters = Array.empty + ) + } + + def unrecognizedInvariant(): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_UNRECOGNIZED_INVARIANT", + messageParameters = Array.empty + ) + } + + def unrecognizedColumnChange(otherClass: String) : Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_UNRECOGNIZED_COLUMN_CHANGE", + messageParameters = Array(otherClass) + ) + } + + def notNullColumnNotFoundInStruct(struct: String): Throwable = { + new DeltaIndexOutOfBoundsException( + errorClass = "DELTA_NOT_NULL_COLUMN_NOT_FOUND_IN_STRUCT", + messageParameters = Array(struct) + ) + } + + def unSupportedInvariantNonStructType: Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_UNSUPPORTED_INVARIANT_NON_STRUCT", + messageParameters = Array.empty + ) + } + + def cannotResolveColumn(fieldName: String, schema: StructType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_RESOLVE_COLUMN", + messageParameters = Array(fieldName, schema.treeString) + ) + } + + def unsupportedTruncateSampleTables: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_TRUNCATE_SAMPLE_TABLES", + messageParameters = Array.empty + ) + } + + def unrecognizedFileAction(otherAction: String, otherClass: String) : Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_UNRECOGNIZED_FILE_ACTION", + messageParameters = Array(otherAction, otherClass) + ) + } + + def operationOnTempViewWithGenerateColsNotSupported(op: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_OPERATION_ON_TEMP_VIEW_WITH_GENERATED_COLS_NOT_SUPPORTED", + messageParameters = Array(op, op)) + } + + def cannotModifyTableProperty(prop: String): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_CANNOT_MODIFY_TABLE_PROPERTY", + messageParameters = Array(prop)) + } + + /** + * We have plans to support more column mapping modes, but they are not implemented yet, + * so we error for now to be forward compatible with tables created in the future. + */ + def unsupportedColumnMappingMode(mode: String): Throwable = + new ColumnMappingUnsupportedException(s"The column mapping mode `$mode` is " + + s"not supported for this Delta version. Please upgrade if you want to use this mode.") + + def missingColumnId(mode: DeltaColumnMappingMode, field: String): Throwable = { + ColumnMappingException(s"Missing column ID in column mapping mode `${mode.name}`" + + s" in the field: $field", mode) + } + + def missingPhysicalName(mode: DeltaColumnMappingMode, field: String): Throwable = + ColumnMappingException(s"Missing physical name in column mapping mode `${mode.name}`" + + s" in the field: $field", mode) + + def duplicatedColumnId( + mode: DeltaColumnMappingMode, + id: Long, + schema: StructType): Throwable = { + ColumnMappingException( + s"Found duplicated column id `$id` in column mapping mode `${mode.name}` \n" + + s"schema: \n ${schema.prettyJson}", mode + ) + } + + def duplicatedPhysicalName( + mode: DeltaColumnMappingMode, + physicalName: String, + schema: StructType): Throwable = { + ColumnMappingException( + s"Found duplicated physical name `$physicalName` in column mapping mode `${mode.name}` \n\t" + + s"schema: \n ${schema.prettyJson}", mode + ) + } + + def maxColumnIdNotSet: Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_COLUMN_MAPPING_MAX_COLUMN_ID_NOT_SET", + messageParameters = Array(DeltaConfigs.COLUMN_MAPPING_MAX_ID.key) + ) + } + + def maxColumnIdNotSetCorrectly(tableMax: Long, fieldMax: Long): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_COLUMN_MAPPING_MAX_COLUMN_ID_NOT_SET_CORRECTLY", + messageParameters = Array( + DeltaConfigs.COLUMN_MAPPING_MAX_ID.key, tableMax.toString, fieldMax.toString) + ) + } + + def changeColumnMappingModeNotSupported(oldMode: String, newMode: String): Throwable = { + new DeltaColumnMappingUnsupportedException( + errorClass = "DELTA_UNSUPPORTED_COLUMN_MAPPING_MODE_CHANGE", + messageParameters = Array(oldMode, newMode)) + } + + def generateManifestWithColumnMappingNotSupported: Throwable = { + new DeltaColumnMappingUnsupportedException( + errorClass = "DELTA_UNSUPPORTED_MANIFEST_GENERATION_WITH_COLUMN_MAPPING") + } + + def convertToDeltaNoPartitionFound(tableName: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CONVERSION_NO_PARTITION_FOUND", + messageParameters = Array(tableName) + ) + } + + def convertToDeltaWithColumnMappingNotSupported(mode: DeltaColumnMappingMode): Throwable = { + new DeltaColumnMappingUnsupportedException( + errorClass = "DELTA_CONVERSION_UNSUPPORTED_COLUMN_MAPPING", + messageParameters = Array( + DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey, + mode.name)) + } + + def changeColumnMappingModeOnOldProtocol(oldProtocol: Protocol): Throwable = { + val requiredProtocol = { + if (oldProtocol.supportsReaderFeatures || oldProtocol.supportsWriterFeatures) { + Protocol( + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION, + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(ColumnMappingTableFeature) + } else { + ColumnMappingTableFeature.minProtocolVersion + } + } + + new DeltaColumnMappingUnsupportedException( + errorClass = "DELTA_UNSUPPORTED_COLUMN_MAPPING_PROTOCOL", + messageParameters = Array( + s"${DeltaConfigs.COLUMN_MAPPING_MODE.key}", + s"$requiredProtocol", + s"$oldProtocol", + columnMappingAdviceMessage(requiredProtocol))) + } + + private def columnMappingAdviceMessage( + requiredProtocol: Protocol = ColumnMappingTableFeature.minProtocolVersion): String = { + s""" + |Please enable Column Mapping on your Delta table with mapping mode 'name'. + |You can use one of the following commands. + | + |If your table is already on the required protocol version: + |ALTER TABLE table_name SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name') + | + |If your table is not on the required protocol version and requires a protocol upgrade: + |ALTER TABLE table_name SET TBLPROPERTIES ( + | 'delta.columnMapping.mode' = 'name', + | 'delta.minReaderVersion' = '${requiredProtocol.minReaderVersion}', + | 'delta.minWriterVersion' = '${requiredProtocol.minWriterVersion}') + |""".stripMargin + } + + def columnRenameNotSupported: Throwable = { + val adviceMsg = columnMappingAdviceMessage() + new DeltaAnalysisException("DELTA_UNSUPPORTED_RENAME_COLUMN", Array(adviceMsg)) + } + + def dropColumnNotSupported(suggestUpgrade: Boolean): Throwable = { + val adviceMsg = if (suggestUpgrade) columnMappingAdviceMessage() else "" + new DeltaAnalysisException("DELTA_UNSUPPORTED_DROP_COLUMN", Array(adviceMsg)) + } + + def dropNestedColumnsFromNonStructTypeException(struct : DataType) : Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_DROP_NESTED_COLUMN_FROM_NON_STRUCT_TYPE", + messageParameters = Array(s"$struct") + ) + } + + def dropPartitionColumnNotSupported(droppingPartCols: Seq[String]): Throwable = { + new DeltaAnalysisException("DELTA_UNSUPPORTED_DROP_PARTITION_COLUMN", + Array(droppingPartCols.mkString(","))) + } + + def schemaChangeDuringMappingModeChangeNotSupported( + oldSchema: StructType, + newSchema: StructType): Throwable = + new DeltaColumnMappingUnsupportedException( + errorClass = "DELTA_UNSUPPORTED_COLUMN_MAPPING_SCHEMA_CHANGE", + messageParameters = Array( + formatSchema(oldSchema), + formatSchema(newSchema))) + + def foundInvalidCharsInColumnNames(cause: Throwable): Throwable = + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_CHARACTERS_IN_COLUMN_NAMES", + messageParameters = Array.empty, + cause = Some(cause)) + + def foundViolatingConstraintsForColumnChange( + operation: String, + columnName: String, + constraints: Map[String, String]): Throwable = { + val plural = if (constraints.size > 1) "s" else "" + new AnalysisException( + s""" + |Cannot $operation column $columnName because this column is referenced by the following + | check constraint$plural:\n\t${constraints.mkString("\n\t")} + |""".stripMargin) + } + + def foundViolatingGeneratedColumnsForColumnChange( + operation: String, + columnName: String, + fields: Seq[StructField]): Throwable = { + val plural = if (fields.size > 1) "s" else "" + new AnalysisException( + s""" + |Cannot $operation column $columnName because this column is referenced by the following + | generated column$plural:\n\t${fields.map(_.name).mkString("\n\t")} + |""".stripMargin) + } + + def missingColumnsInInsertInto(column: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INSERT_COLUMN_MISMATCH", + messageParameters = Array(column)) + } + + def schemaNotConsistentWithTarget(tableSchema: String, targetAttr: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_SCHEMA_NOT_CONSISTENT_WITH_TARGET", + messageParameters = Array(tableSchema, targetAttr) + ) + } + + def logStoreConfConflicts(classConf: Seq[(String, String)], + schemeConf: Seq[(String, String)]): Throwable = { + val classConfStr = classConf.map(_._1).mkString(", ") + val schemeConfStr = schemeConf.map(_._1).mkString(", ") + new DeltaAnalysisException( + errorClass = "DELTA_INVALID_LOGSTORE_CONF", + messageParameters = Array(classConfStr, schemeConfStr) + ) + } + + def inconsistentLogStoreConfs(setKeys: Seq[(String, String)]): Throwable = { + val setKeyStr = setKeys.map(_.productIterator.mkString(" = ")).mkString(", ") + new DeltaIllegalArgumentException( + errorClass = "DELTA_INCONSISTENT_LOGSTORE_CONFS", + messageParameters = Array(setKeyStr) + ) + } + + def ambiguousPathsInCreateTableException(identifier: String, location: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_AMBIGUOUS_PATHS_IN_CREATE_TABLE", + messageParameters = Array(identifier, location, + DeltaSQLConf.DELTA_LEGACY_ALLOW_AMBIGUOUS_PATHS.key)) + } + + def concurrentWriteException( + conflictingCommit: Option[CommitInfo]): io.delta.exceptions.ConcurrentWriteException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + s"A concurrent transaction has written new data since the current transaction " + + s"read the table. Please try the operation again.", + conflictingCommit) + new io.delta.exceptions.ConcurrentWriteException(message) + } + + def metadataChangedException( + conflictingCommit: Option[CommitInfo]): io.delta.exceptions.MetadataChangedException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + "The metadata of the Delta table has been changed by a concurrent update. " + + "Please try the operation again.", + conflictingCommit) + new io.delta.exceptions.MetadataChangedException(message) + } + + def protocolPropNotIntException(key: String, value: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_PROTOCOL_PROPERTY_NOT_INT", + Array(key, value)) + } + + def protocolChangedException( + conflictingCommit: Option[CommitInfo]): io.delta.exceptions.ProtocolChangedException = { + val additionalInfo = conflictingCommit.map { v => + if (v.version.getOrElse(-1) == 0) { + "This happens when multiple writers are writing to an empty directory. " + + "Creating the table ahead of time will avoid this conflict. " + } else { + "" + } + }.getOrElse("") + val message = DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + "The protocol version of the Delta table has been changed by a concurrent update. " + + additionalInfo + "Please try the operation again.", + conflictingCommit) + new io.delta.exceptions.ProtocolChangedException(message) + } + + def unsupportedReaderTableFeaturesInTableException( + tableNameOrPath: String, + unsupported: Iterable[String]): DeltaUnsupportedTableFeatureException = { + new DeltaUnsupportedTableFeatureException( + errorClass = "DELTA_UNSUPPORTED_FEATURES_FOR_READ", + tableNameOrPath = tableNameOrPath, + unsupported = unsupported) + } + + def unsupportedWriterTableFeaturesInTableException( + tableNameOrPath: String, + unsupported: Iterable[String]): DeltaUnsupportedTableFeatureException = { + new DeltaUnsupportedTableFeatureException( + errorClass = "DELTA_UNSUPPORTED_FEATURES_FOR_WRITE", + tableNameOrPath = tableNameOrPath, + unsupported = unsupported) + } + + def unsupportedTableFeatureConfigsException( + configs: Iterable[String]): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_UNSUPPORTED_FEATURES_IN_CONFIG", + messageParameters = Array(configs.mkString(", "))) + } + + def unsupportedTableFeatureStatusException( + feature: String, + status: String): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_UNSUPPORTED_FEATURE_STATUS", + messageParameters = Array(feature, status)) + } + + def tableFeatureReadRequiresWriteException( + requiredWriterVersion: Int): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_READ_FEATURE_PROTOCOL_REQUIRES_WRITE", + messageParameters = Array( + requiredWriterVersion.toString, + generateDocsLink(SparkSession.active.sparkContext.getConf, "/index.html"))) + } + + def tableFeatureRequiresHigherReaderProtocolVersion( + feature: String, + currentVersion: Int, + requiredVersion: Int): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_REQUIRES_HIGHER_READER_VERSION", + messageParameters = Array( + feature, + currentVersion.toString, + requiredVersion.toString, + generateDocsLink(SparkSession.active.sparkContext.getConf, "/index.html"))) + } + + def tableFeatureRequiresHigherWriterProtocolVersion( + feature: String, + currentVersion: Int, + requiredVersion: Int): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_REQUIRES_HIGHER_WRITER_VERSION", + messageParameters = Array( + feature, + currentVersion.toString, + requiredVersion.toString, + generateDocsLink(SparkSession.active.sparkContext.getConf, "/index.html"))) + } + + def tableFeatureMismatchException(features: Iterable[String]): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURES_PROTOCOL_METADATA_MISMATCH", + messageParameters = Array(features.mkString(", "))) + } + + def tableFeaturesRequireManualEnablementException( + unsupportedFeatures: Iterable[TableFeature], + supportedFeatures: Iterable[TableFeature]): Throwable = { + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURES_REQUIRE_MANUAL_ENABLEMENT", + messageParameters = Array( + unsupportedFeatures.map(_.name).toSeq.sorted.mkString(", "), + supportedFeatures.map(_.name).toSeq.sorted.mkString(", "))) + } + + case class LogRetentionConfig(key: String, value: String, truncateHistoryRetention: String) + + private def logRetentionConfig(metadata: Metadata): LogRetentionConfig = { + val logRetention = DeltaConfigs.LOG_RETENTION + val truncateHistoryRetention = DeltaConfigs.TABLE_FEATURE_DROP_TRUNCATE_HISTORY_LOG_RETENTION + LogRetentionConfig( + logRetention.key, + logRetention.fromMetaData(metadata).toString, + truncateHistoryRetention.fromMetaData(metadata).toString) + } + + def dropTableFeatureHistoricalVersionsExist( + feature: String, + metadata: Metadata): DeltaTableFeatureException = { + val config = logRetentionConfig(metadata) + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_DROP_HISTORICAL_VERSIONS_EXIST", + messageParameters = Array(feature, config.key, config.value, config.truncateHistoryRetention) + ) + } + + def dropTableFeatureWaitForRetentionPeriod( + feature: String, + metadata: Metadata): DeltaTableFeatureException = { + val config = logRetentionConfig(metadata) + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + messageParameters = Array(feature, config.key, config.value, config.truncateHistoryRetention) + ) + } + + def tableFeatureDropHistoryTruncationNotAllowed(): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_DROP_HISTORY_TRUNCATION_NOT_ALLOWED", + messageParameters = Array.empty) + } + + def dropTableFeatureNonRemovableFeature(feature: String): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_DROP_NONREMOVABLE_FEATURE", + messageParameters = Array(feature)) + } + + def dropTableFeatureConflictRevalidationFailed( + conflictingCommit: Option[CommitInfo] = None): DeltaTableFeatureException = { + val concurrentCommit = DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, "", conflictingCommit) + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_DROP_CONFLICT_REVALIDATION_FAIL", + messageParameters = Array(concurrentCommit)) + } + + def dropTableFeatureFeatureNotSupportedByClient( + feature: String): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_DROP_UNSUPPORTED_CLIENT_FEATURE", + messageParameters = Array(feature)) + } + + def dropTableFeatureFeatureNotSupportedByProtocol( + feature: String): DeltaTableFeatureException = { + new DeltaTableFeatureException( + errorClass = "DELTA_FEATURE_DROP_FEATURE_NOT_PRESENT", + messageParameters = Array(feature)) + } + + def dropTableFeatureNotDeltaTableException(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_ONLY_OPERATION", + messageParameters = Array("ALTER TABLE DROP FEATURE") + ) + } + + def concurrentAppendException( + conflictingCommit: Option[CommitInfo], + partition: String, + customRetryMsg: Option[String] = None): io.delta.exceptions.ConcurrentAppendException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + s"Files were added to $partition by a concurrent update. " + + customRetryMsg.getOrElse("Please try the operation again."), + conflictingCommit) + new io.delta.exceptions.ConcurrentAppendException(message) + } + + def concurrentDeleteReadException( + conflictingCommit: Option[CommitInfo], + file: String): io.delta.exceptions.ConcurrentDeleteReadException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + "This transaction attempted to read one or more files that were deleted" + + s" (for example $file) by a concurrent update. Please try the operation again.", + conflictingCommit) + new io.delta.exceptions.ConcurrentDeleteReadException(message) + } + + def concurrentDeleteDeleteException( + conflictingCommit: Option[CommitInfo], + file: String): io.delta.exceptions.ConcurrentDeleteDeleteException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + "This transaction attempted to delete one or more files that were deleted " + + s"(for example $file) by a concurrent update. Please try the operation again.", + conflictingCommit) + new io.delta.exceptions.ConcurrentDeleteDeleteException(message) + } + + + def concurrentTransactionException( + conflictingCommit: Option[CommitInfo]): io.delta.exceptions.ConcurrentTransactionException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + s"This error occurs when multiple streaming queries are using the same checkpoint to write " + + "into this table. Did you run multiple instances of the same streaming query" + + " at the same time?", + conflictingCommit) + new io.delta.exceptions.ConcurrentTransactionException(message) + } + + def restoreMissedDataFilesError(missedFiles: Array[String], version: Long): Throwable = + new IllegalArgumentException( + s"""Not all files from version $version are available in file system. + | Missed files (top 100 files): ${missedFiles.mkString(",")}. + | Please use more recent version or timestamp for restoring. + | To disable check update option ${SQLConf.IGNORE_MISSING_FILES.key}""" + .stripMargin + ) + + def unexpectedAlias(alias : String) : Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_UNEXPECTED_ALIAS", + messageParameters = Array(alias) + ) + } + + def unexpectedProject(project : String) : Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_UNEXPECTED_PROJECT", + messageParameters = Array(project) + ) + } + + def unexpectedAttributeReference(ref: String): Throwable = { + new DeltaIllegalStateException(errorClass = "DELTA_UNEXPECTED_ATTRIBUTE_REFERENCE", + messageParameters = Array(ref)) + } + + def unsetNonExistentProperty(key: String, table: String): Throwable = { + new DeltaAnalysisException(errorClass = "DELTA_UNSET_NON_EXISTENT_PROPERTY", Array(key, table)) + } + + def identityColumnInconsistentMetadata( + colName: String, + hasStart: Boolean, + hasStep: Boolean, + hasInsert: Boolean): Throwable = { + new AnalysisException(s"Inconsistent IDENTITY metadata for column $colName " + + s"detected: $hasStart, $hasStep, $hasInsert") + } + + def activeSparkSessionNotFound(): Throwable = { + new DeltaIllegalArgumentException(errorClass = "DELTA_ACTIVE_SPARK_SESSION_NOT_FOUND") + } + + def sparkTaskThreadNotFound: Throwable = { + new DeltaIllegalStateException(errorClass = "DELTA_SPARK_THREAD_NOT_FOUND") + } + + def iteratorAlreadyClosed(): Throwable = { + new DeltaIllegalStateException(errorClass = "DELTA_ITERATOR_ALREADY_CLOSED") + } + + def activeTransactionAlreadySet(): Throwable = { + new DeltaIllegalStateException(errorClass = "DELTA_ACTIVE_TRANSACTION_ALREADY_SET") + } + + def deltaStatsCollectionColumnNotFound(statsType: String, columnPath: String): Throwable = { + new DeltaRuntimeException( + errorClass = "DELTA_STATS_COLLECTION_COLUMN_NOT_FOUND", + messageParameters = Array(statsType, columnPath) + ) + } + + def convertToDeltaRowTrackingEnabledWithoutStatsCollection: Throwable = { + val statisticsCollectionPropertyKey = DeltaSQLConf.DELTA_COLLECT_STATS.key + val rowTrackingTableFeatureDefaultKey = + TableFeatureProtocolUtils.defaultPropertyKey(RowTrackingFeature) + val rowTrackingDefaultPropertyKey = DeltaConfigs.ROW_TRACKING_ENABLED.defaultTablePropertyKey + new DeltaIllegalStateException( + errorClass = "DELTA_CONVERT_TO_DELTA_ROW_TRACKING_WITHOUT_STATS", + messageParameters = Array( + statisticsCollectionPropertyKey, + rowTrackingTableFeatureDefaultKey, + rowTrackingDefaultPropertyKey)) + } + + /** This is a method only used for testing Py4J exception handling. */ + def throwDeltaIllegalArgumentException(): Throwable = { + new DeltaIllegalArgumentException(errorClass = "DELTA_UNRECOGNIZED_INVARIANT") + } + + def invalidSourceVersion(version: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_INVALID_SOURCE_VERSION", + messageParameters = Array(version) + ) + } + + def invalidSourceOffsetFormat(): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_INVALID_SOURCE_OFFSET_FORMAT" + ) + } + + def invalidCommittedVersion(attemptVersion: Long, currentVersion: Long): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_INVALID_COMMITTED_VERSION", + messageParameters = Array(attemptVersion.toString, currentVersion.toString) + ) + } + + def nonPartitionColumnReference(colName: String, partitionColumns: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_NON_PARTITION_COLUMN_REFERENCE", + messageParameters = Array(colName, partitionColumns.mkString(", ")) + ) + } + + def missingColumn(attr: Attribute, targetAttrs: Seq[Attribute]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_MISSING_COLUMN", + messageParameters = Array(attr.name, targetAttrs.map(_.name).mkString(", ")) + ) + } + + def missingPartitionColumn(col: String, schemaCatalog: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_MISSING_PARTITION_COLUMN", + messageParameters = Array(col, schemaCatalog) + ) + } + + def noNewAttributeId(oldAttr: AttributeReference): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_NO_NEW_ATTRIBUTE_ID", + messageParameters = Array(oldAttr.qualifiedName) + ) + } + + def nonGeneratedColumnMissingUpdateExpression(column: Attribute): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_NON_GENERATED_COLUMN_MISSING_UPDATE_EXPR", + messageParameters = Array(column.toString) + ) + } + + def failedInferSchema: Throwable = { + new DeltaRuntimeException("DELTA_FAILED_INFER_SCHEMA") + } + + def failedReadFileFooter(file: String, e: Throwable): Throwable = { + new DeltaIOException( + errorClass = "DELTA_FAILED_READ_FILE_FOOTER", + messageParameters = Array(file), + cause = e + ) + } + + def failedScanWithHistoricalVersion(historicalVersion: Long): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_FAILED_SCAN_WITH_HISTORICAL_VERSION", + messageParameters = Array(historicalVersion.toString) + ) + } + + def failedRecognizePredicate(predicate: String, cause: Throwable): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_FAILED_RECOGNIZE_PREDICATE", messageParameters = Array(predicate), + cause = Some(cause) + ) + } + + def failedFindAttributeInOutputColumns(newAttrName: String, targetColNames: String): Throwable = + { + new DeltaAnalysisException( + errorClass = "DELTA_FAILED_FIND_ATTRIBUTE_IN_OUTPUT_COLUMNS", + messageParameters = Array(newAttrName, targetColNames) + ) + } + + def failedFindPartitionColumnInOutputPlan(partitionColumn: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_FAILED_FIND_PARTITION_COLUMN_IN_OUTPUT_PLAN", + messageParameters = Array(partitionColumn)) + } + + def deltaTableFoundInExecutor(): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_TABLE_FOUND_IN_EXECUTOR", + messageParameters = Array.empty + ) + } + + def unsupportSubqueryInPartitionPredicates(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_SUBQUERY_IN_PARTITION_PREDICATES", + messageParameters = Array.empty + ) + } + + def fileAlreadyExists(file: String): Throwable = { + new DeltaFileAlreadyExistsException( + errorClass = "DELTA_FILE_ALREADY_EXISTS", + messageParameters = Array(file) + ) + } + + def replaceWhereUsedWithDynamicPartitionOverwrite(): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_REPLACE_WHERE_WITH_DYNAMIC_PARTITION_OVERWRITE" + ) + } + + def overwriteSchemaUsedWithDynamicPartitionOverwrite(): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_OVERWRITE_SCHEMA_WITH_DYNAMIC_PARTITION_OVERWRITE" + ) + } + + def replaceWhereUsedInOverwrite(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_REPLACE_WHERE_IN_OVERWRITE", messageParameters = Array.empty + ) + } + + def deltaDynamicPartitionOverwriteDisabled(): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_DYNAMIC_PARTITION_OVERWRITE_DISABLED" + ) + } + + def incorrectArrayAccessByName(rightName: String, wrongName: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_INCORRECT_ARRAY_ACCESS_BY_NAME", + messageParameters = Array(rightName, wrongName) + ) + } + + def columnPathNotNested(columnPath: String, other: DataType, column: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_COLUMN_PATH_NOT_NESTED", + messageParameters = Array( + s"$columnPath", + s"$other", + s"${SchemaUtils.prettyFieldName(column)}" + ) + ) + } + + def showPartitionInNotPartitionedTable(tableName: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_SHOW_PARTITION_IN_NON_PARTITIONED_TABLE", + messageParameters = Array(tableName) + ) + } + + def showPartitionInNotPartitionedColumn(badColumns: Set[String]): Throwable = { + val badCols = badColumns.mkString("[", ", ", "]") + new DeltaAnalysisException( + errorClass = "DELTA_SHOW_PARTITION_IN_NON_PARTITIONED_COLUMN", + messageParameters = Array(badCols) + ) + } + + def duplicateColumnOnInsert(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_DUPLICATE_COLUMNS_ON_INSERT", + messageParameters = Array.empty + ) + } + + def timeTravelInvalidBeginValue(timeTravelKey: String, cause: Throwable): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_TIME_TRAVEL_INVALID_BEGIN_VALUE", + messageParameters = Array(timeTravelKey), + cause = cause + ) + } + + def removeFileCDCMissingExtendedMetadata(fileName: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_REMOVE_FILE_CDC_MISSING_EXTENDED_METADATA", + messageParameters = Array(fileName) + ) + } + + def failRelativizePath(pathName: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_FAIL_RELATIVIZE_PATH", messageParameters = Array( + pathName, + DeltaSQLConf.DELTA_VACUUM_RELATIVIZE_IGNORE_ERROR.key) + ) + } + + def invalidFormatFromSourceVersion(wrongVersion: Long, expectedVersion: Integer): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_INVALID_FORMAT_FROM_SOURCE_VERSION", + messageParameters = Array(expectedVersion.toString, wrongVersion.toString) + ) + } + + def createTableWithNonEmptyLocation(tableId: String, tableLocation: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CREATE_TABLE_WITH_NON_EMPTY_LOCATION", + messageParameters = Array(tableId, tableLocation) + ) + } + + def maxArraySizeExceeded(): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_MAX_ARRAY_SIZE_EXCEEDED", messageParameters = Array.empty + ) + } + + def replaceWhereWithFilterDataChangeUnset(dataFilters: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_REPLACE_WHERE_WITH_FILTER_DATA_CHANGE_UNSET", + messageParameters = Array(dataFilters) + ) + } + + def blockColumnMappingAndCdcOperation(op: DeltaOperations.Operation): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_BLOCK_COLUMN_MAPPING_AND_CDC_OPERATION", + messageParameters = Array(op.name) + ) + } + + def missingDeltaStorageJar(e: NoClassDefFoundError): Throwable = { + // scalastyle:off line.size.limit + new NoClassDefFoundError( + s"""${e.getMessage} + |Please ensure that the delta-storage dependency is included. + | + |If using Python, please ensure you call `configure_spark_with_delta_pip` or use + |`--packages io.delta:delta-spark_:`. + |See https://docs.delta.io/latest/quick-start.html#python. + | + |More information about this dependency and how to include it can be found here: + |https://docs.delta.io/latest/porting.html#delta-lake-1-1-or-below-to-delta-lake-1-2-or-above. + |""".stripMargin) + // scalastyle:on line.size.limit + } + + /** + * If `isSchemaChange` is false, this means the `incompatVersion` actually refers to a data schema + * instead of a schema change. This happens when we could not find any read-incompatible schema + * changes within the querying range, but the read schema is still NOT compatible with the data + * files being queried, which could happen if user falls back to `legacy` mode and read past data + * using some diverged latest schema or time-travelled schema. In this uncommon case, we should + * tell the user to try setting it back to endVersion, OR ask us to give them the flag to force + * unblock. + */ + def blockBatchCdfReadWithIncompatibleSchemaChange( + start: Long, + end: Long, + readSchema: StructType, + readVersion: Long, + incompatVersion: Long, + isSchemaChange: Boolean = true): Throwable = { + new DeltaUnsupportedOperationException( + if (isSchemaChange) { + "DELTA_CHANGE_DATA_FEED_INCOMPATIBLE_SCHEMA_CHANGE" + } else { + "DELTA_CHANGE_DATA_FEED_INCOMPATIBLE_DATA_SCHEMA" + }, + messageParameters = Array( + start.toString, end.toString, + readSchema.json, readVersion.toString, incompatVersion.toString) ++ { + if (isSchemaChange) { + Array(start.toString, incompatVersion.toString, incompatVersion.toString, end.toString) + } else { + Array(DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE.key) + } + } + ) + } + + def blockStreamingReadsWithIncompatibleColumnMappingSchemaChanges( + spark: SparkSession, + readSchema: StructType, + incompatibleSchema: StructType, + detectedDuringStreaming: Boolean): Throwable = { + val docLink = "/versioning.html#column-mapping" + val enableNonAdditiveSchemaEvolution = spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_ENABLE_SCHEMA_TRACKING) + new DeltaStreamingColumnMappingSchemaIncompatibleException( + readSchema, + incompatibleSchema, + generateDocsLink(spark.sparkContext.getConf, docLink), + enableNonAdditiveSchemaEvolution, + additionalProperties = Map( + "detectedDuringStreaming" -> detectedDuringStreaming.toString + )) + } + + def failedToGetSnapshotDuringColumnMappingStreamingReadCheck(cause: Throwable): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_STREAMING_CHECK_COLUMN_MAPPING_NO_SNAPSHOT", + messageParameters = Array(DeltaSQLConf + .DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_COLUMN_MAPPING_SCHEMA_CHANGES.key), + cause = Some(cause)) + } + + def unsupportedDeltaTableForPathHadoopConf(unsupportedOptions: Map[String, String]): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_TABLE_FOR_PATH_UNSUPPORTED_HADOOP_CONF", + messageParameters = Array( + DeltaTableUtils.validDeltaTableHadoopPrefixes.mkString("[", ",", "]"), + unsupportedOptions.mkString(",")) + ) + } + + def cloneOnRelativePath(path: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_INVALID_CLONE_PATH", + messageParameters = Array(path)) + } + + def cloneAmbiguousTarget(externalLocation: String, targetIdent: TableIdentifier): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_CLONE_AMBIGUOUS_TARGET", + messageParameters = Array(externalLocation, s"$targetIdent") + ) + } + + def cloneFromUnsupportedSource(name: String, format: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CLONE_UNSUPPORTED_SOURCE", + messageParameters = Array(name, format) + ) + } + + def cloneReplaceUnsupported(tableIdentifier: TableIdentifier): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_UNSUPPORTED_CLONE_REPLACE_SAME_TABLE", + messageParameters = Array(s"$tableIdentifier") + ) + } + + def cloneReplaceNonEmptyTable: Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_UNSUPPORTED_NON_EMPTY_CLONE" + ) + } + + def partitionSchemaInIcebergTables: Throwable = { + new DeltaIllegalArgumentException(errorClass = "DELTA_PARTITION_SCHEMA_IN_ICEBERG_TABLES") + } + + def icebergClassMissing(sparkConf: SparkConf, cause: Throwable): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_MISSING_ICEBERG_CLASS", + messageParameters = Array( + generateDocsLink( + sparkConf, "/delta-utility.html#convert-a-parquet-table-to-a-delta-table")), + cause = cause) + } + + def streamingMetadataEvolutionException( + newSchema: StructType, + newConfigs: Map[String, String], + newProtocol: Protocol): Throwable = { + new DeltaRuntimeException( + errorClass = "DELTA_STREAMING_METADATA_EVOLUTION", + messageParameters = Array( + formatSchema(newSchema), + newConfigs.map { case (k, v) => + s"$k:$v" + }.mkString(", "), + newProtocol.simpleString + )) + } + + def streamingMetadataLogInitFailedIncompatibleMetadataException( + startVersion: Long, + endVersion: Long): Throwable = { + new DeltaRuntimeException( + errorClass = "DELTA_STREAMING_SCHEMA_LOG_INIT_FAILED_INCOMPATIBLE_METADATA", + messageParameters = Array(startVersion.toString, endVersion.toString) + ) + } + + def failToDeserializeSchemaLog(location: String): Throwable = { + new DeltaRuntimeException( + errorClass = "DELTA_STREAMING_SCHEMA_LOG_DESERIALIZE_FAILED", + messageParameters = Array(location) + ) + } + + def failToParseSchemaLog: Throwable = { + new DeltaRuntimeException(errorClass = "DELTA_STREAMING_SCHEMA_LOG_PARSE_SCHEMA_FAILED") + } + + def sourcesWithConflictingSchemaTrackingLocation( + schemaTrackingLocatiob: String, + tableOrPath: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_STREAMING_SCHEMA_LOCATION_CONFLICT", + messageParameters = Array(schemaTrackingLocatiob, tableOrPath)) + } + + def incompatibleSchemaLogPartitionSchema( + persistedPartitionSchema: StructType, + tablePartitionSchema: StructType): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_STREAMING_SCHEMA_LOG_INCOMPATIBLE_PARTITION_SCHEMA", + messageParameters = Array(persistedPartitionSchema.json, tablePartitionSchema.json)) + } + + def incompatibleSchemaLogDeltaTable( + persistedTableId: String, + tableId: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_STREAMING_SCHEMA_LOG_INCOMPATIBLE_DELTA_TABLE_ID", + messageParameters = Array(persistedTableId, tableId)) + } + + def schemaTrackingLocationNotUnderCheckpointLocation( + schemaTrackingLocation: String, + checkpointLocation: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_STREAMING_SCHEMA_LOCATION_NOT_UNDER_CHECKPOINT", + messageParameters = Array(schemaTrackingLocation, checkpointLocation)) + } + + def cannotContinueStreamingPostSchemaEvolution( + nonAdditiveSchemaChangeOpType: String, + previousSchemaChangeVersion: Long, + currentSchemaChangeVersion: Long, + checkpointHash: Int, + allowAllMode: String, + opTypeSpecificAllowMode: String): Throwable = { + val allowAllSqlConfKey = s"${DeltaSQLConf.SQL_CONF_PREFIX}.streaming.$allowAllMode" + new DeltaRuntimeException( + errorClass = "DELTA_STREAMING_CANNOT_CONTINUE_PROCESSING_POST_SCHEMA_EVOLUTION", + messageParameters = Array( + nonAdditiveSchemaChangeOpType, + previousSchemaChangeVersion.toString, + currentSchemaChangeVersion.toString, + currentSchemaChangeVersion.toString, + // Allow this stream to pass for this particular version + s"$allowAllSqlConfKey.ckpt_$checkpointHash", + currentSchemaChangeVersion.toString, + // Allow this stream to pass + s"$allowAllSqlConfKey.ckpt_$checkpointHash", + "always", + // Allow all streams to pass + allowAllSqlConfKey, + "always", + allowAllMode, + opTypeSpecificAllowMode + ) + ) + } + + def cannotReconstructPathFromURI(uri: String): Throwable = + new DeltaRuntimeException( + errorClass = "DELTA_CANNOT_RECONSTRUCT_PATH_FROM_URI", + messageParameters = Array(uri)) + + def deletionVectorCardinalityMismatch(): Throwable = { + new DeltaChecksumException( + errorClass = "DELTA_DELETION_VECTOR_CARDINALITY_MISMATCH", + messageParameters = Array.empty, + pos = 0 + ) + } + + def deletionVectorSizeMismatch(): Throwable = { + new DeltaChecksumException( + errorClass = "DELTA_DELETION_VECTOR_SIZE_MISMATCH", + messageParameters = Array.empty, + pos = 0) + } + + def deletionVectorInvalidRowIndex(): Throwable = { + new DeltaChecksumException( + errorClass = "DELTA_DELETION_VECTOR_INVALID_ROW_INDEX", + messageParameters = Array.empty, + pos = 0) + } + + def deletionVectorChecksumMismatch(): Throwable = { + new DeltaChecksumException( + errorClass = "DELTA_DELETION_VECTOR_CHECKSUM_MISMATCH", + messageParameters = Array.empty, + pos = 0) + } + + def statsRecomputeNotSupportedOnDvTables(): Throwable = { + new DeltaCommandUnsupportedWithDeletionVectorsException( + errorClass = "DELTA_UNSUPPORTED_STATS_RECOMPUTE_WITH_DELETION_VECTORS", + messageParameters = Array.empty + ) + } + + def addFileWithDVsAndTightBoundsException(): Throwable = + new DeltaIllegalStateException( + errorClass = "DELTA_ADDING_DELETION_VECTORS_WITH_TIGHT_BOUNDS_DISALLOWED") + + def addFileWithDVsMissingNumRecordsException: Throwable = + new DeltaRuntimeException(errorClass = "DELTA_DELETION_VECTOR_MISSING_NUM_RECORDS") + + def generateNotSupportedWithDeletionVectors(): Throwable = + new DeltaCommandUnsupportedWithDeletionVectorsException( + errorClass = "DELTA_UNSUPPORTED_GENERATE_WITH_DELETION_VECTORS") + + def addingDeletionVectorsDisallowedException(): Throwable = + new DeltaCommandUnsupportedWithDeletionVectorsException( + errorClass = "DELTA_ADDING_DELETION_VECTORS_DISALLOWED") + + def unsupportedExpression( + causedBy: String, + expType: DataType, + supportedTypes: Seq[String]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_UNSUPPORTED_EXPRESSION", + messageParameters = Array(s"$expType", causedBy, supportedTypes.mkString(",")) + ) + } + + def rowIdAssignmentWithoutStats: Throwable = { + new DeltaIllegalStateException(errorClass = "DELTA_ROW_ID_ASSIGNMENT_WITHOUT_STATS") + } + + def addingColumnWithInternalNameFailed(colName: String): Throwable = { + new DeltaRuntimeException( + errorClass = "DELTA_ADDING_COLUMN_WITH_INTERNAL_NAME_FAILED", + messageParameters = Array(colName) + ) + } + + def materializedRowIdMetadataMissing(tableName: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_MATERIALIZED_ROW_TRACKING_COLUMN_NAME_MISSING", + messageParameters = Array("Row ID", tableName) + ) + } + + def materializedRowCommitVersionMetadataMissing(tableName: String): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_MATERIALIZED_ROW_TRACKING_COLUMN_NAME_MISSING", + messageParameters = Array("Row Commit Version", tableName) + ) + } + + def domainMetadataDuplicate(domainName: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_DUPLICATE_DOMAIN_METADATA_INTERNAL_ERROR", + messageParameters = Array(domainName) + ) + } + + def domainMetadataTableFeatureNotSupported(domainNames: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_DOMAIN_METADATA_NOT_SUPPORTED", + messageParameters = Array(domainNames) + ) + } + + def uniFormIcebergRequiresIcebergCompat(): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_UNIVERSAL_FORMAT_VIOLATION", + messageParameters = Array( + UniversalFormat.ICEBERG_FORMAT, + "Requires IcebergCompat to be explicitly enabled in order for Universal Format (Iceberg) " + + "to be enabled on an existing table. To enable IcebergCompatV2, set the table property " + + "'delta.enableIcebergCompatV2' = 'true'." + ) + ) + } + + def icebergCompatVersionMutualExclusive(version: Int): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.VERSION_MUTUAL_EXCLUSIVE", + messageParameters = Array(version.toString) + ) + } + + def icebergCompatChangeVersionNeedRewrite(version: Int, newVersion: Int): Throwable = { + val newVersionString = newVersion.toString + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.CHANGE_VERSION_NEED_REWRITE", + messageParameters = Array(newVersionString, newVersionString, newVersionString, + newVersionString) + ) + } + + def icebergCompatVersionNotSupportedException( + currVersion: Int, + maxVersion: Int): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.COMPAT_VERSION_NOT_SUPPORTED", + messageParameters = Array( + currVersion.toString, + currVersion.toString, + maxVersion.toString + ) + ) + } + + def icebergCompatReorgAddFileTagsMissingException( + tableVersion: Long, + icebergCompatVersion: Int, + addFilesCount: Long, + addFilesWithTagsCount: Long): Throwable = { + new DeltaIllegalStateException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.FILES_NOT_ICEBERG_COMPAT", + messageParameters = Array( + icebergCompatVersion.toString, + icebergCompatVersion.toString, + addFilesCount.toString, + tableVersion.toString, + (addFilesCount - addFilesWithTagsCount).toString, + icebergCompatVersion.toString + ) + ) + } + + def icebergCompatDataFileRewriteFailedException( + icebergCompatVersion: Int, + cause: Throwable): Throwable = { + new DeltaIllegalStateException( + errorClass = "", + messageParameters = Array( + icebergCompatVersion.toString, + icebergCompatVersion.toString + ), + cause + ) + } + + def icebergCompatReplacePartitionedTableException( + version: Int, + prevPartitionCols: Seq[String], + newPartitionCols: Seq[String]): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.REPLACE_TABLE_CHANGE_PARTITION_NAMES", + messageParameters = Array( + version.toString, + version.toString, + prevPartitionCols.mkString("(", ",", ")"), + newPartitionCols.mkString("(", ",", ")") + ) + ) + } + + def icebergCompatUnsupportedDataTypeException( + version: Int, dataType: DataType, schema: StructType): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.UNSUPPORTED_DATA_TYPE", + messageParameters = Array(version.toString, version.toString, + dataType.typeName, schema.treeString) + ) + } + + def icebergCompatMissingRequiredTableFeatureException( + version: Int, tf: TableFeature): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.MISSING_REQUIRED_TABLE_FEATURE", + messageParameters = Array(version.toString, version.toString, tf.toString) + ) + } + + def icebergCompatDisablingRequiredTableFeatureException( + version: Int, tf: TableFeature): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.DISABLING_REQUIRED_TABLE_FEATURE", + messageParameters = Array(version.toString, version.toString, tf.toString, version.toString) + ) + } + + def icebergCompatIncompatibleTableFeatureException( + version: Int, tf: TableFeature): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.INCOMPATIBLE_TABLE_FEATURE", + messageParameters = Array(version.toString, version.toString, tf.toString) + ) + } + + def icebergCompatDeletionVectorsShouldBeDisabledException(version: Int): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.DELETION_VECTORS_SHOULD_BE_DISABLED", + messageParameters = Array(version.toString, version.toString) + ) + } + + def icebergCompatDeletionVectorsNotPurgedException(version: Int): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.DELETION_VECTORS_NOT_PURGED", + messageParameters = Array(version.toString, version.toString) + ) + } + + def icebergCompatWrongRequiredTablePropertyException( + version: Int, + key: String, + actualValue: String, + requiredValue: String): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.WRONG_REQUIRED_TABLE_PROPERTY", + messageParameters = Array(version.toString, version.toString, key, requiredValue, actualValue) + ) + } + + def invalidAutoCompactType(value: String): Throwable = { + new DeltaIllegalArgumentException( + errorClass = "DELTA_INVALID_AUTO_COMPACT_TYPE", + messageParameters = Array(value, AutoCompactType.ALLOWED_VALUES.mkString("(", ",", ")")) + ) + } + + def clusterByInvalidNumColumnsException( + numColumnsLimit: Int, + actualNumColumns: Int): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CLUSTER_BY_INVALID_NUM_COLUMNS", + messageParameters = Array(numColumnsLimit.toString, actualNumColumns.toString) + ) + } + + def clusteringColumnMissingStats( + clusteringColumnWithoutStats: String, + statsSchema: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CLUSTERING_COLUMN_MISSING_STATS", + messageParameters = Array(clusteringColumnWithoutStats, statsSchema) + ) + } + + def clusteringColumnsMismatchException( + providedClusteringColumns: String, + existingClusteringColumns: String): Throwable = { + new DeltaAnalysisException( + "DELTA_CLUSTERING_COLUMNS_MISMATCH", + Array(providedClusteringColumns, existingClusteringColumns) + ) + } + + def dropClusteringColumnNotSupported(droppingClusteringCols: Seq[String]): Throwable = { + new DeltaAnalysisException( + "DELTA_UNSUPPORTED_DROP_CLUSTERING_COLUMN", + Array(droppingClusteringCols.mkString(","))) + } + + def replacingClusteredTableWithPartitionedTableNotAllowed(): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CLUSTERING_REPLACE_TABLE_WITH_PARTITIONED_TABLE", + messageParameters = Array.empty) + } + + def clusteringWithPartitionPredicatesException(predicates: Seq[String]): Throwable = { + new DeltaUnsupportedOperationException( + errorClass = "DELTA_CLUSTERING_WITH_PARTITION_PREDICATE", + messageParameters = Array(s"${predicates.mkString(" ")}")) + } + + def clusteringWithZOrderByException(zOrderBy: Seq[UnresolvedAttribute]): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CLUSTERING_WITH_ZORDER_BY", + messageParameters = Array(s"${zOrderBy.map(_.name).mkString(", ")}")) + } + + def clusteringTablePreviewDisabledException(): Throwable = { + val msg = s""" + |A clustered table is currently in preview and is disabled by default. Please set + |${DeltaSQLConf.DELTA_CLUSTERING_TABLE_PREVIEW_ENABLED.key} to true to enable it. + |Note that a clustered table is not recommended for production use (e.g., unsupported + |incremental clustering). + |""".stripMargin.replace("\n", " ") + new UnsupportedOperationException(msg) + } + + def alterTableSetClusteringTableFeatureException(tableFeature: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_ALTER_TABLE_SET_CLUSTERING_TABLE_FEATURE_NOT_ALLOWED", + messageParameters = Array(tableFeature)) + } + + def createTableSetClusteringTableFeatureException(tableFeature: String): Throwable = { + new DeltaAnalysisException( + errorClass = "DELTA_CREATE_TABLE_SET_CLUSTERING_TABLE_FEATURE_NOT_ALLOWED", + messageParameters = Array(tableFeature)) + } +} + +object DeltaErrors extends DeltaErrorsBase +/** The basic class for all Tahoe commit conflict exceptions. */ +abstract class DeltaConcurrentModificationException(message: String) + extends ConcurrentModificationException(message) { + + /** + * Type of the commit conflict. + */ + def conflictType: String = this.getClass.getSimpleName.stripSuffix("Exception") +} + +/** + * This class is kept for backward compatibility. + * Use [[io.delta.exceptions.ConcurrentWriteException]] instead. + */ +class ConcurrentWriteException(message: String) + extends io.delta.exceptions.DeltaConcurrentModificationException(message) { + def this(conflictingCommit: Option[CommitInfo]) = this( + DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + s"A concurrent transaction has written new data since the current transaction " + + s"read the table. Please try the operation again.", + conflictingCommit)) +} + +/** + * Thrown when time travelling to a version that does not exist in the Delta Log. + * @param userVersion - the version time travelling to + * @param earliest - earliest version available in the Delta Log + * @param latest - The latest version available in the Delta Log + */ +case class VersionNotFoundException( + userVersion: Long, + earliest: Long, + latest: Long) extends AnalysisException( + s"Cannot time travel Delta table to version $userVersion. " + + s"Available versions: [$earliest, $latest]." + ) + +/** + * This class is kept for backward compatibility. + * Use [[io.delta.exceptions.MetadataChangedException]] instead. + */ +class MetadataChangedException(message: String) + extends io.delta.exceptions.DeltaConcurrentModificationException(message) { + def this(conflictingCommit: Option[CommitInfo]) = this( + DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + "The metadata of the Delta table has been changed by a concurrent update. " + + "Please try the operation again.", + conflictingCommit)) +} + +/** + * This class is kept for backward compatibility. + * Use [[io.delta.exceptions.ProtocolChangedException]] instead. + */ +class ProtocolChangedException(message: String) + extends io.delta.exceptions.DeltaConcurrentModificationException(message) { + def this(conflictingCommit: Option[CommitInfo]) = this( + DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + "The protocol version of the Delta table has been changed by a concurrent update. " + + "Please try the operation again.", + conflictingCommit)) +} + +/** + * This class is kept for backward compatibility. + * Use [[io.delta.exceptions.ConcurrentAppendException]] instead. + */ +class ConcurrentAppendException(message: String) + extends io.delta.exceptions.DeltaConcurrentModificationException(message) { + def this( + conflictingCommit: Option[CommitInfo], + partition: String, + customRetryMsg: Option[String] = None) = this( + DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + s"Files were added to $partition by a concurrent update. " + + customRetryMsg.getOrElse("Please try the operation again."), + conflictingCommit)) +} + +/** + * This class is kept for backward compatibility. + * Use [[io.delta.exceptions.ConcurrentDeleteReadException]] instead. + */ +class ConcurrentDeleteReadException(message: String) + extends io.delta.exceptions.DeltaConcurrentModificationException(message) { + def this(conflictingCommit: Option[CommitInfo], file: String) = this( + DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + "This transaction attempted to read one or more files that were deleted" + + s" (for example $file) by a concurrent update. Please try the operation again.", + conflictingCommit)) +} + +/** + * This class is kept for backward compatibility. + * Use [[io.delta.exceptions.ConcurrentDeleteDeleteException]] instead. + */ +class ConcurrentDeleteDeleteException(message: String) + extends io.delta.exceptions.DeltaConcurrentModificationException(message) { + def this(conflictingCommit: Option[CommitInfo], file: String) = this( + DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + "This transaction attempted to delete one or more files that were deleted " + + s"(for example $file) by a concurrent update. Please try the operation again.", + conflictingCommit)) +} + +/** + * This class is kept for backward compatibility. + * Use [[io.delta.exceptions.ConcurrentTransactionException]] instead. + */ +class ConcurrentTransactionException(message: String) + extends io.delta.exceptions.DeltaConcurrentModificationException(message) { + def this(conflictingCommit: Option[CommitInfo]) = this( + DeltaErrors.concurrentModificationExceptionMsg( + SparkEnv.get.conf, + s"This error occurs when multiple streaming queries are using the same checkpoint to write " + + "into this table. Did you run multiple instances of the same streaming query" + + " at the same time?", + conflictingCommit)) +} + +/** A helper class in building a helpful error message in case of metadata mismatches. */ +class MetadataMismatchErrorBuilder { + private var bits: Seq[String] = Nil + + def addSchemaMismatch(original: StructType, data: StructType, id: String): Unit = { + bits ++= + s"""A schema mismatch detected when writing to the Delta table (Table ID: $id). + |To enable schema migration using DataFrameWriter or DataStreamWriter, please set: + |'.option("${DeltaOptions.MERGE_SCHEMA_OPTION}", "true")'. + |For other operations, set the session configuration + |${DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key} to "true". See the documentation + |specific to the operation for details. + | + |Table schema: + |${DeltaErrors.formatSchema(original)} + | + |Data schema: + |${DeltaErrors.formatSchema(data)} + """.stripMargin :: Nil + } + + def addPartitioningMismatch(original: Seq[String], provided: Seq[String]): Unit = { + bits ++= + s"""Partition columns do not match the partition columns of the table. + |Given: ${DeltaErrors.formatColumnList(provided)} + |Table: ${DeltaErrors.formatColumnList(original)} + """.stripMargin :: Nil + } + + def addOverwriteBit(): Unit = { + bits ++= + s"""To overwrite your schema or change partitioning, please set: + |'.option("${DeltaOptions.OVERWRITE_SCHEMA_OPTION}", "true")'. + | + |Note that the schema can't be overwritten when using + |'${DeltaOptions.REPLACE_WHERE_OPTION}'. + """.stripMargin :: Nil + } + + def finalizeAndThrow(conf: SQLConf): Unit = { + throw new AnalysisException(bits.mkString("\n")) + } +} + +class DeltaColumnMappingUnsupportedException( + errorClass: String, + messageParameters: Array[String] = Array.empty) + extends ColumnMappingUnsupportedException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +class DeltaFileNotFoundException( + errorClass: String, + messageParameters: Array[String] = Array.empty) + extends FileNotFoundException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +class DeltaFileAlreadyExistsException( + errorClass: String, + messageParameters: Array[String] = Array.empty) + extends FileAlreadyExistsException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +class DeltaIOException( + errorClass: String, + messageParameters: Array[String] = Array.empty, + cause: Throwable = null) + extends IOException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters), cause) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +class DeltaIllegalStateException( + errorClass: String, + messageParameters: Array[String] = Array.empty, + cause: Throwable = null) + extends IllegalStateException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters), cause) + with DeltaThrowable { + override def getErrorClass: String = errorClass + + override def getMessageParameters: java.util.Map[String, String] = { + DeltaThrowableHelper.getParameterNames(errorClass, null) + .zip(messageParameters).toMap.asJava + } +} + +class DeltaIndexOutOfBoundsException( + errorClass: String, + messageParameters: Array[String] = Array.empty) + extends IndexOutOfBoundsException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +/** Thrown when the protocol version of a table is greater than supported by this client. */ +case class InvalidProtocolVersionException( + tableNameOrPath: String, + readerRequiredVersion: Int, + writerRequiredVersion: Int, + supportedReaderVersions: Seq[Int], + supportedWriterVersions: Seq[Int]) + extends RuntimeException(DeltaThrowableHelper.getMessage( + errorClass = "DELTA_INVALID_PROTOCOL_VERSION", + messageParameters = Array( + tableNameOrPath, + readerRequiredVersion.toString, + writerRequiredVersion.toString, + io.delta.VERSION, + supportedReaderVersions.sorted.mkString(", "), + supportedWriterVersions.sorted.mkString(", ")))) + with DeltaThrowable { + override def getErrorClass: String = "DELTA_INVALID_PROTOCOL_VERSION" +} + +class ProtocolDowngradeException(oldProtocol: Protocol, newProtocol: Protocol) + extends RuntimeException(DeltaThrowableHelper.getMessage( + errorClass = "DELTA_INVALID_PROTOCOL_DOWNGRADE", + messageParameters = Array(s"(${oldProtocol.simpleString})", s"(${newProtocol.simpleString})") + )) with DeltaThrowable { + override def getErrorClass: String = "DELTA_INVALID_PROTOCOL_DOWNGRADE" +} + +class DeltaTableFeatureException( + errorClass: String, + messageParameters: Array[String] = Array.empty) + extends DeltaRuntimeException(errorClass, messageParameters) + +case class DeltaUnsupportedTableFeatureException( + errorClass: String, + tableNameOrPath: String, + unsupported: Iterable[String]) + extends DeltaTableFeatureException( + errorClass, + Array(tableNameOrPath, io.delta.VERSION, unsupported.mkString(", "))) + +class DeltaRuntimeException( + errorClass: String, + val messageParameters: Array[String] = Array.empty) + extends RuntimeException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass + + override def getMessageParameters: java.util.Map[String, String] = + DeltaThrowableHelper.getParameterNames(errorClass, null) + .zip(messageParameters).toMap.asJava +} + +class DeltaSparkException( + errorClass: String, + messageParameters: Array[String] = Array.empty, + cause: Throwable = null) + extends SparkException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters), cause) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +class DeltaNoSuchTableException( + errorClass: String, + messageParameters: Array[String] = Array.empty) + extends AnalysisException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +class DeltaCommandUnsupportedWithDeletionVectorsException( + errorClass: String, + messageParameters: Array[String] = Array.empty) + extends UnsupportedOperationException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +sealed trait DeltaTablePropertyValidationFailedSubClass { + def tag: String + /** Can be overridden in case subclasses need the table name as well. */ + def messageParameters(table: String): Array[String] = Array(table) +} + +final object DeltaTablePropertyValidationFailedSubClass { + final case object PersistentDeletionVectorsWithIncrementalManifestGeneration + extends DeltaTablePropertyValidationFailedSubClass { + override val tag = "PERSISTENT_DELETION_VECTORS_WITH_INCREMENTAL_MANIFEST_GENERATION" + } + final case object ExistingDeletionVectorsWithIncrementalManifestGeneration + extends DeltaTablePropertyValidationFailedSubClass { + override val tag = "EXISTING_DELETION_VECTORS_WITH_INCREMENTAL_MANIFEST_GENERATION" + /** This subclass needs the table parameters in two places. */ + override def messageParameters(table: String): Array[String] = Array(table, table) + } + final case object PersistentDeletionVectorsInNonParquetTable + extends DeltaTablePropertyValidationFailedSubClass { + override val tag = "PERSISTENT_DELETION_VECTORS_IN_NON_PARQUET_TABLE" + } +} + +class DeltaTablePropertyValidationFailedException( + table: String, + subClass: DeltaTablePropertyValidationFailedSubClass) + extends RuntimeException(DeltaThrowableHelper.getMessage( + errorClass = "DELTA_VIOLATE_TABLE_PROPERTY_VALIDATION_FAILED" + "." + subClass.tag, + messageParameters = subClass.messageParameters(table))) + with DeltaThrowable { + + override def getMessageParameters: java.util.Map[String, String] = { + DeltaThrowableHelper.getParameterNames( + "DELTA_VIOLATE_TABLE_PROPERTY_VALIDATION_FAILED", + subClass.tag).zip(subClass.messageParameters(table)).toMap.asJava + } + + override def getErrorClass: String = + "DELTA_VIOLATE_TABLE_PROPERTY_VALIDATION_FAILED." + subClass.tag +} + +/** Errors thrown around column mapping. */ +class ColumnMappingUnsupportedException(msg: String) + extends UnsupportedOperationException(msg) +case class ColumnMappingException(msg: String, mode: DeltaColumnMappingMode) + extends AnalysisException(msg) + +class DeltaChecksumException( + errorClass: String, + messageParameters: Array[String] = Array.empty, + pos: Long) + extends ChecksumException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters), pos) + with DeltaThrowable { + override def getErrorClass: String = errorClass +} + +/** + * Errors thrown when an operation is not supported with column mapping schema changes + * (rename / drop column). + * + * To make compatible with existing behavior for those who accidentally has already used this + * operation, user should always be able to use `escapeConfigName` to fall back at own risk. + */ +class DeltaStreamingColumnMappingSchemaIncompatibleException( + val readSchema: StructType, + val incompatibleSchema: StructType, + val docLink: String, + val enableNonAdditiveSchemaEvolution: Boolean = false, + val additionalProperties: Map[String, String] = Map.empty) + extends DeltaUnsupportedOperationException( + errorClass = if (enableNonAdditiveSchemaEvolution) { + "DELTA_STREAMING_INCOMPATIBLE_SCHEMA_CHANGE_USE_SCHEMA_LOG" + } else { + "DELTA_STREAMING_INCOMPATIBLE_SCHEMA_CHANGE" + }, + messageParameters = Array( + docLink, + readSchema.json, + incompatibleSchema.json) + ) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaFileFormat.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaFileFormat.scala new file mode 100644 index 00000000000..cc52a5d1637 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaFileFormat.scala @@ -0,0 +1,38 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.datasources.FileFormat + +trait DeltaFileFormat { + // TODO: Add support for column mapping + /** Return the current Spark session used. */ + protected def spark: SparkSession + + /** + * Build the underlying Spark `FileFormat` of the Delta table with specified metadata. + * + * With column mapping, some properties of the underlying file format might change during + * transaction, so if possible, we should always pass in the latest transaction's metadata + * instead of one from a past snapshot. + */ + def fileFormat(protocol: Protocol, metadata: Metadata): FileFormat = + new DeltaParquetFileFormat(protocol, metadata) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaFileProviderUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaFileProviderUtils.scala new file mode 100644 index 00000000000..cb14d3fb7f9 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaFileProviderUtils.scala @@ -0,0 +1,92 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.Action +import org.apache.spark.sql.delta.storage.ClosableIterator +import org.apache.spark.sql.delta.util.FileNames.DeltaFile +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileStatus + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.JsonToStructs +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.StructType +import org.apache.spark.unsafe.types.UTF8String + +object DeltaFileProviderUtils { + + protected def readThreadPool = SnapshotManagement.deltaLogAsyncUpdateThreadPool + + /** Put any future parsing options here. */ + val jsonStatsParseOption = Map.empty[String, String] + + private[delta] def createJsonStatsParser(schemaToUse: StructType): String => InternalRow = { + val parser = JsonToStructs( + schema = schemaToUse, + options = jsonStatsParseOption, + child = null, + timeZoneId = Some(SQLConf.get.sessionLocalTimeZone) + ) + (json: String) => { + val utf8json = UTF8String.fromString(json) + parser.nullSafeEval(utf8json).asInstanceOf[InternalRow] + } + } + + /** + * Get the Delta json files present in the delta log in the range [startVersion, endVersion]. + * Returns the files in sorted order, and throws if any in the range are missing. + */ + def getDeltaFilesInVersionRange( + spark: SparkSession, + deltaLog: DeltaLog, + startVersion: Long, + endVersion: Long): Seq[FileStatus] = { + val result = deltaLog + .listFrom(startVersion) + .collect { case DeltaFile(fs, v) if v <= endVersion => (fs, v) } + .toSeq + // Verify that we got the entire range requested + if (result.size.toLong != endVersion - startVersion + 1) { + throw DeltaErrors.deltaVersionsNotContiguousException(spark, result.map(_._2)) + } + result.map(_._1) + } + + /** Helper method to read and parse the delta files parallelly into [[Action]]s. */ + def parallelReadAndParseDeltaFilesAsIterator( + deltaLog: DeltaLog, + spark: SparkSession, + files: Seq[FileStatus]): Seq[ClosableIterator[String]] = { + val hadoopConf = deltaLog.newDeltaHadoopConf() + parallelReadDeltaFilesBase(spark, files, hadoopConf, { file: FileStatus => + deltaLog.store.readAsIterator(file, hadoopConf) + }) + } + + protected def parallelReadDeltaFilesBase[A]( + spark: SparkSession, + files: Seq[FileStatus], + hadoopConf: Configuration, + f: FileStatus => A): Seq[A] = { + readThreadPool.parallelMap(spark, files) { file => + f(file) + }.toSeq + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaHistoryManager.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaHistoryManager.scala new file mode 100644 index 00000000000..3542ea05cf9 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaHistoryManager.scala @@ -0,0 +1,619 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.FileNotFoundException +import java.sql.Timestamp + +import scala.collection.mutable + +import org.apache.spark.sql.delta.actions.{Action, CommitInfo, CommitMarker, JobInfo, NotebookInfo} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.storage.LogStore +import org.apache.spark.sql.delta.util.{DateTimeUtils, FileNames, TimestampFormatter} +import org.apache.spark.sql.delta.util.FileNames._ +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.SerializableConfiguration + +/** + * This class keeps tracks of the version of commits and their timestamps for a Delta table to + * help with operations like describing the history of a table. + * + * @param deltaLog The transaction log of this table + * @param maxKeysPerList How many commits to list when performing a parallel search. Exposed for + * tests. Currently set to `1000`, which is the maximum keys returned by S3 + * per list call. Azure can return `5000`, therefore we choose 1000. + */ +class DeltaHistoryManager( + deltaLog: DeltaLog, + maxKeysPerList: Int = 1000) extends DeltaLogging { + + private def spark: SparkSession = SparkSession.active + + private def getSerializableHadoopConf: SerializableConfiguration = { + new SerializableConfiguration(deltaLog.newDeltaHadoopConf()) + } + + import DeltaHistoryManager._ + + /** + * Returns the information of the latest `limit` commits made to this table in reverse + * chronological order. + */ + def getHistory(limitOpt: Option[Int]): Seq[DeltaHistory] = { + val listStart = limitOpt.map { limit => + math.max(deltaLog.update().version - limit + 1, 0) + }.getOrElse(getEarliestDeltaFile(deltaLog)) + getHistory(listStart) + } + + /** + * Get the commit information of the Delta table from commit `[start, end)`. If `end` is `None`, + * we return all commits from start to now. + */ + def getHistory( + start: Long, + end: Option[Long] = None): Seq[DeltaHistory] = { + import org.apache.spark.sql.delta.implicits._ + val conf = getSerializableHadoopConf + val logPath = deltaLog.logPath.toString + // We assume that commits are contiguous, therefore we try to load all of them in order + val info = spark.range(start, end.getOrElse(deltaLog.update().version) + 1) + .mapPartitions { versions => + val logStore = LogStore(SparkEnv.get.conf, conf.value) + val basePath = new Path(logPath) + val fs = basePath.getFileSystem(conf.value) + versions.flatMap { commit => + try { + val ci = DeltaHistoryManager.getCommitInfo(logStore, basePath, commit, conf.value) + val metadata = fs.getFileStatus(FileNames.deltaFile(basePath, commit)) + Some(ci.withTimestamp(metadata.getModificationTime)) + } catch { + case _: FileNotFoundException => + // We have a race-condition where files can be deleted while reading. It's fine to + // skip those files + None + } + }.map(DeltaHistory.fromCommitInfo) + } + // Spark should return the commits in increasing order as well + monotonizeCommitTimestamps(info.collect()).reverse + } + + /** + * Returns the latest commit that happened at or before `time`. + * @param timestamp The timestamp to search for + * @param canReturnLastCommit Whether we can return the latest version of the table if the + * provided timestamp is after the latest commit + * @param mustBeRecreatable Whether the state at the given commit should be recreatable + * @param canReturnEarliestCommit Whether we can return the earliest commit if no such commit + * exists. + */ + def getActiveCommitAtTime( + timestamp: Timestamp, + canReturnLastCommit: Boolean, + mustBeRecreatable: Boolean = true, + canReturnEarliestCommit: Boolean = false): Commit = { + val time = timestamp.getTime + val earliest = if (mustBeRecreatable) { + getEarliestRecreatableCommit + } else { + getEarliestDeltaFile(deltaLog) + } + val latestVersion = deltaLog.update().version + + // Search for the commit + val commit = if (latestVersion - earliest > 2 * maxKeysPerList) { + parallelSearch(time, earliest, latestVersion + 1) + } else { + val commits = getCommits( + deltaLog.store, + deltaLog.logPath, + earliest, + Some(latestVersion + 1), + deltaLog.newDeltaHadoopConf()) + // If it returns empty, we will fail below with `timestampEarlierThanCommitRetention`. + lastCommitBeforeTimestamp(commits, time).getOrElse(commits.head) + } + + // Error handling + val commitTs = new Timestamp(commit.timestamp) + val timestampFormatter = TimestampFormatter( + DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)) + val tsString = DateTimeUtils.timestampToString( + timestampFormatter, DateTimeUtils.fromJavaTimestamp(commitTs)) + if (commit.timestamp > time && !canReturnEarliestCommit) { + throw DeltaErrors.TimestampEarlierThanCommitRetentionException(timestamp, commitTs, tsString) + } else if (commit.version == latestVersion && !canReturnLastCommit) { + if (commit.timestamp < time) { + throw DeltaErrors.TemporallyUnstableInputException( + timestamp, commitTs, tsString, commit.version) + } + } + commit + } + + /** + * Check whether the given version exists. + * @param mustBeRecreatable whether the snapshot of this version needs to be recreated. + * @param allowOutOfRange whether to allow the version is exceeding the latest snapshot version. + */ + def checkVersionExists( + version: Long, + mustBeRecreatable: Boolean = true, + allowOutOfRange: Boolean = false): Unit = { + val earliest = if (mustBeRecreatable) { + getEarliestRecreatableCommit + } else { + getEarliestDeltaFile(deltaLog) + } + val latest = deltaLog.update().version + if (version < earliest || ((version > latest) && !allowOutOfRange)) { + throw VersionNotFoundException(version, earliest, latest) + } + } + + /** + * Searches for the latest commit with the timestamp, which has happened at or before `time` in + * the range `[start, end)`. + */ + private def parallelSearch( + time: Long, + start: Long, + end: Long): Commit = { + parallelSearch0( + spark, + getSerializableHadoopConf, + deltaLog.logPath.toString, + time, + start, + end, + maxKeysPerList) + } + + /** + * Get the earliest commit, which we can recreate. Note that this version isn't guaranteed to + * exist when performing an action as a concurrent operation can delete the file during cleanup. + * This value must be used as a lower bound. + * + * We search for the earliest checkpoint we have, or whether we have the 0th delta file, because + * that way we can reconstruct the entire history of the table. This method assumes that the + * commits are contiguous. + */ + private[delta] def getEarliestRecreatableCommit: Long = { + val files = deltaLog.store.listFrom( + FileNames.listingPrefix(deltaLog.logPath, 0), + deltaLog.newDeltaHadoopConf()) + .filter(f => FileNames.isDeltaFile(f) || FileNames.isCheckpointFile(f)) + + // A map of checkpoint version and number of parts, to number of parts observed + val checkpointMap = new scala.collection.mutable.HashMap[(Long, Int), Int]() + var smallestDeltaVersion = Long.MaxValue + var lastCompleteCheckpoint: Option[Long] = None + + // Iterate through the log files - this will be in order starting from the lowest version. + // Checkpoint files come before deltas, so when we see a checkpoint, we remember it and + // return it once we detect that we've seen a smaller or equal delta version. + while (files.hasNext) { + val nextFilePath = files.next().getPath + if (FileNames.isDeltaFile(nextFilePath)) { + val version = FileNames.deltaVersion(nextFilePath) + if (version == 0L) return version + smallestDeltaVersion = math.min(version, smallestDeltaVersion) + + // Note that we also check this condition at the end of the function - we check it + // here too to try and avoid more file listing when it's unnecessary. + if (lastCompleteCheckpoint.exists(_ >= smallestDeltaVersion - 1)) { + return lastCompleteCheckpoint.get + } + } else if (FileNames.isCheckpointFile(nextFilePath)) { + val checkpointVersion = FileNames.checkpointVersion(nextFilePath) + val parts = FileNames.numCheckpointParts(nextFilePath) + if (parts.isEmpty) { + lastCompleteCheckpoint = Some(checkpointVersion) + } else { + // if we have a multi-part checkpoint, we need to check that all parts exist + val numParts = parts.getOrElse(1) + val preCount = checkpointMap.getOrElse(checkpointVersion -> numParts, 0) + if (numParts == preCount + 1) { + lastCompleteCheckpoint = Some(checkpointVersion) + } + checkpointMap.put(checkpointVersion -> numParts, preCount + 1) + } + } + } + + if (lastCompleteCheckpoint.exists(_ >= smallestDeltaVersion)) { + return lastCompleteCheckpoint.get + } else if (smallestDeltaVersion < Long.MaxValue) { + throw DeltaErrors.noRecreatableHistoryFound(deltaLog.logPath) + } else { + throw DeltaErrors.noHistoryFound(deltaLog.logPath) + } + } +} + +/** Contains many utility methods that can also be executed on Spark executors. */ +object DeltaHistoryManager extends DeltaLogging { + /** Get the persisted commit info for the given delta file. */ + private def getCommitInfo( + logStore: LogStore, + basePath: Path, + version: Long, + hadoopConf: Configuration): CommitInfo = { + val logs = logStore.readAsIterator(FileNames.deltaFile(basePath, version), hadoopConf) + try { + val info = logs.map(Action.fromJson).collectFirst { case c: CommitInfo => c } + if (info.isEmpty) { + CommitInfo.empty(Some(version)) + } else { + info.head.copy(version = Some(version)) + } + } finally { + logs.close() + } + } + + /** + * Get the earliest commit available for this table. Note that this version isn't guaranteed to + * exist when performing an action as a concurrent operation can delete the file during cleanup. + * This value must be used as a lower bound. + */ + def getEarliestDeltaFile(deltaLog: DeltaLog): Long = { + deltaLog.store + .listFrom( + path = FileNames.listingPrefix(deltaLog.logPath, 0), + hadoopConf = deltaLog.newDeltaHadoopConf()) + .collectFirst { case DeltaFile(_, version) => version } + .getOrElse { + throw DeltaErrors.noHistoryFound(deltaLog.logPath) + } + } + + /** + * When calling getCommits, the initial few timestamp values may be wrong because they are not + * properly monotonized. Callers should pass a start value at least + * this far behind the first timestamp they care about if they need correct values. + */ + private[delta] val POTENTIALLY_UNMONOTONIZED_TIMESTAMPS = 100 + + /** + * Returns the commit version and timestamps of all commits in `[start, end)`. If `end` is not + * specified, will return all commits that exist after `start`. Will guarantee that the commits + * returned will have both monotonically increasing versions as well as timestamps. + * Exposed for tests. + */ + private[delta] def getCommits( + logStore: LogStore, + logPath: Path, + start: Long, + end: Option[Long], + hadoopConf: Configuration): Array[Commit] = { + val until = end.getOrElse(Long.MaxValue) + val commits = + logStore + .listFrom(listingPrefix(logPath, start), hadoopConf) + .collect { case DeltaFile(file, version) => Commit(version, file.getModificationTime) } + .takeWhile(_.version < until) + + monotonizeCommitTimestamps(commits.toArray) + } + + /** + * Makes sure that the commit timestamps are monotonically increasing with respect to commit + * versions. Requires the input commits to be sorted by the commit version. + */ + private def monotonizeCommitTimestamps[T <: CommitMarker](commits: Array[T]): Array[T] = { + var i = 0 + val length = commits.length + while (i < length - 1) { + val prevTimestamp = commits(i).getTimestamp + assert(commits(i).getVersion < commits(i + 1).getVersion, "Unordered commits provided.") + if (prevTimestamp >= commits(i + 1).getTimestamp) { + logWarning(s"Found Delta commit ${commits(i).getVersion} with a timestamp $prevTimestamp " + + s"which is greater than the next commit timestamp ${commits(i + 1).getTimestamp}.") + commits(i + 1) = commits(i + 1).withTimestamp(prevTimestamp + 1).asInstanceOf[T] + } + i += 1 + } + commits + } + + /** + * Searches for the latest commit with the timestamp, which has happened at or before `time` in + * the range `[start, end)`. The algorithm works as follows: + * 1. We use Spark to list our commit history in parallel `maxKeysPerList` at a time. + * 2. We then perform our search in each fragment of commits containing at most `maxKeysPerList` + * elements. + * 3. All fragments that are before `time` will return the last commit in the fragment. + * 4. All fragments that are after `time` will exit early and return the first commit in the + * fragment. + * 5. The fragment that contains the version we are looking for will return the version we are + * looking for. + * 6. Once all the results are returned from Spark, we make sure that the commit timestamps are + * monotonically increasing across the fragments, because we couldn't adjust for the + * boundaries when working in parallel. + * 7. We then return the version we are looking for in this smaller list on the Driver. + * We will return the first available commit if the condition cannot be met. This method works + * even for boundary commits, and can be best demonstrated through an example: + * Imagine we have commits 999, 1000, 1001, 1002. t_999 < t_1000 but t_1000 > t_1001 and + * t_1001 < t_1002. So at the the boundary, we will need to eventually adjust t_1001. Assume the + * result needs to be t_1001 after the adjustment as t_search < t_1002 and t_search > t_1000. + * What will happen is that the first fragment will return t_1000, and the second fragment will + * return t_1001. On the Driver, we will adjust t_1001 = t_1000 + 1 milliseconds, and our linear + * search will return t_1001. + * + * Placed in the static object to avoid serializability issues. + * + * @param spark The active SparkSession + * @param conf The session specific Hadoop Configuration + * @param logPath The path of the DeltaLog + * @param time The timestamp to search for in milliseconds + * @param start Earliest available commit version (approximate is acceptable) + * @param end Latest available commit version (approximate is acceptable) + * @param step The number with which to chunk each linear search across commits. Provide the + * max number of keys returned by the underlying FileSystem for in a single RPC for + * best results. + */ + private def parallelSearch0( + spark: SparkSession, + conf: SerializableConfiguration, + logPath: String, + time: Long, + start: Long, + end: Long, + step: Long): Commit = { + import org.apache.spark.sql.delta.implicits._ + val possibleCommits = spark.range(start, end, step).mapPartitions { startVersions => + val logStore = LogStore(SparkEnv.get.conf, conf.value) + val basePath = new Path(logPath) + startVersions.map { startVersion => + val commits = getCommits( + logStore, + basePath, + startVersion, + Some(math.min(startVersion + step, end)), + conf.value) + lastCommitBeforeTimestamp(commits, time).getOrElse(commits.head) + } + }.collect() + + // Spark should return the commits in increasing order as well + val commitList = monotonizeCommitTimestamps(possibleCommits) + lastCommitBeforeTimestamp(commitList, time).getOrElse(commitList.head) + } + + /** Returns the latest commit that happened at or before `time`. */ + private def lastCommitBeforeTimestamp(commits: Seq[Commit], time: Long): Option[Commit] = { + val i = commits.lastIndexWhere(_.timestamp <= time) + if (i < 0) None else Some(commits(i)) + } + + /** A helper class to represent the timestamp and version of a commit. */ + case class Commit(version: Long, timestamp: Long) extends CommitMarker { + override def withTimestamp(timestamp: Long): Commit = this.copy(timestamp = timestamp) + + override def getTimestamp: Long = timestamp + + override def getVersion: Long = version + } + + /** + * An iterator that helps select old log files for deletion. It takes the input iterator of log + * files from the earliest file, and returns should-be-deleted files until the given maxTimestamp + * or maxVersion to delete is reached. Note that this iterator may stop deleting files earlier + * than maxTimestamp or maxVersion if it finds that files that need to be preserved for adjusting + * the timestamps of subsequent files. Let's go through an example. Assume the following commit + * history: + * + * +---------+-----------+--------------------+ + * | Version | Timestamp | Adjusted Timestamp | + * +---------+-----------+--------------------+ + * | 0 | 0 | 0 | + * | 1 | 5 | 5 | + * | 2 | 10 | 10 | + * | 3 | 7 | 11 | + * | 4 | 8 | 12 | + * | 5 | 14 | 14 | + * +---------+-----------+--------------------+ + * + * As you can see from the example, we require timestamps to be monotonically increasing with + * respect to the version of the commit, and each commit to have a unique timestamp. If we have + * a commit which doesn't obey one of these two requirements, we adjust the timestamp of that + * commit to be one millisecond greater than the previous commit. + * + * Given the above commit history, the behavior of this iterator will be as follows: + * - For maxVersion = 1 and maxTimestamp = 9, we can delete versions 0 and 1 + * - Until we receive maxVersion >= 4 and maxTimestamp >= 12, we can't delete versions 2 and 3. + * This is because version 2 is used to adjust the timestamps of commits up to version 4. + * - For maxVersion >= 5 and maxTimestamp >= 14 we can delete everything + * The semantics of time travel guarantee that for a given timestamp, the user will ALWAYS get the + * same version. Consider a user asks to get the version at timestamp 11. If all files are there, + * we would return version 3 (timestamp 11) for this query. If we delete versions 0-2, the + * original timestamp of version 3 (7) will not have an anchor to adjust on, and if the time + * travel query is re-executed we would return version 4. This is the motivation behind this + * iterator implementation. + * + * The implementation maintains an internal "maybeDelete" buffer of files that we are unsure of + * deleting because they may be necessary to adjust time of future files. For each file we get + * from the underlying iterator, we check whether it needs time adjustment or not. If it does need + * time adjustment, then we cannot immediately decide whether it is safe to delete that file or + * not and therefore we put it in each the buffer. Then we iteratively peek ahead at the future + * files and accordingly decide whether to delete all the buffered files or retain them. + * + * @param underlying The iterator which gives the list of files in ascending version order + * @param maxTimestamp The timestamp until which we can delete (inclusive). + * @param maxVersion The version until which we can delete (inclusive). + * @param versionGetter A method to get the commit version from the file path. + */ + class BufferingLogDeletionIterator( + underlying: Iterator[FileStatus], + maxTimestamp: Long, + maxVersion: Long, + versionGetter: Path => Long) extends Iterator[FileStatus] { + /** + * Our output iterator + */ + private val filesToDelete = new mutable.Queue[FileStatus]() + /** + * Our intermediate buffer which will buffer files as long as the last file requires a timestamp + * adjustment. + */ + private val maybeDeleteFiles = new mutable.ArrayBuffer[FileStatus]() + private var lastFile: FileStatus = _ + private var hasNextCalled: Boolean = false + + private def init(): Unit = { + if (underlying.hasNext) { + lastFile = underlying.next() + maybeDeleteFiles.append(lastFile) + } + } + + init() + + /** Whether the given file can be deleted based on the version and retention timestamp input. */ + private def shouldDeleteFile(file: FileStatus): Boolean = { + file.getModificationTime <= maxTimestamp && versionGetter(file.getPath) <= maxVersion + } + + /** + * Files need a time adjustment if their timestamp isn't later than the lastFile. + */ + private def needsTimeAdjustment(file: FileStatus): Boolean = { + versionGetter(lastFile.getPath) < versionGetter(file.getPath) && + lastFile.getModificationTime >= file.getModificationTime + } + + /** + * Enqueue the files in the buffer if the last file is safe to delete. Clears the buffer. + */ + private def flushBuffer(): Unit = { + if (maybeDeleteFiles.lastOption.exists(shouldDeleteFile)) { + filesToDelete ++= maybeDeleteFiles + } + maybeDeleteFiles.clear() + } + + /** + * Peeks at the next file in the iterator. Based on the next file we can have three + * possible outcomes: + * - The underlying iterator returned a file, which doesn't require timestamp adjustment. If + * the file in the buffer has expired, flush the buffer to our output queue. + * - The underlying iterator returned a file, which requires timestamp adjustment. In this case, + * we add this file to the buffer and fetch the next file + * - The underlying iterator is empty. In this case, we check the last file in the buffer. If + * it has expired, then flush the buffer to the output queue. + * Once this method returns, the buffer is expected to have 1 file (last file of the + * underlying iterator) unless the underlying iterator is fully consumed. + */ + private def queueFilesInBuffer(): Unit = { + var continueBuffering = true + while (continueBuffering) { + if (!underlying.hasNext) { + flushBuffer() + return + } + + var currentFile = underlying.next() + require(currentFile != null, "FileStatus iterator returned null") + if (needsTimeAdjustment(currentFile)) { + currentFile = new FileStatus( + currentFile.getLen, currentFile.isDirectory, currentFile.getReplication, + currentFile.getBlockSize, lastFile.getModificationTime + 1, currentFile.getPath) + maybeDeleteFiles.append(currentFile) + } else { + flushBuffer() + maybeDeleteFiles.append(currentFile) + continueBuffering = false + } + lastFile = currentFile + } + } + + override def hasNext: Boolean = { + hasNextCalled = true + if (filesToDelete.isEmpty) queueFilesInBuffer() + filesToDelete.nonEmpty + } + + override def next(): FileStatus = { + if (!hasNextCalled) throw new NoSuchElementException() + hasNextCalled = false + filesToDelete.dequeue() + } + } +} + +/** + * class describing the output schema of + * [[org.apache.spark.sql.delta.commands.DescribeDeltaHistoryCommand]] + */ +case class DeltaHistory( + version: Option[Long], + timestamp: Timestamp, + userId: Option[String], + userName: Option[String], + operation: String, + operationParameters: Map[String, String], + job: Option[JobInfo], + notebook: Option[NotebookInfo], + clusterId: Option[String], + readVersion: Option[Long], + isolationLevel: Option[String], + isBlindAppend: Option[Boolean], + operationMetrics: Option[Map[String, String]], + userMetadata: Option[String], + engineInfo: Option[String]) extends CommitMarker { + + override def withTimestamp(timestamp: Long): DeltaHistory = { + this.copy(timestamp = new Timestamp(timestamp)) + } + + override def getTimestamp: Long = timestamp.getTime + + override def getVersion: Long = version.get +} + +object DeltaHistory { + /** Create an instance of [[DeltaHistory]] from [[CommitInfo]] */ + def fromCommitInfo(ci: CommitInfo): DeltaHistory = { + DeltaHistory( + version = ci.version, + timestamp = ci.timestamp, + userId = ci.userId, + userName = ci.userName, + operation = ci.operation, + operationParameters = ci.operationParameters, + job = ci.job, + notebook = ci.notebook, + clusterId = ci.clusterId, + readVersion = ci.readVersion, + isolationLevel = ci.isolationLevel, + isBlindAppend = ci.isBlindAppend, + operationMetrics = ci.operationMetrics, + userMetadata = ci.userMetadata, + engineInfo = ci.engineInfo) + } +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLog.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLog.scala new file mode 100644 index 00000000000..8324b3e1dd6 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLog.scala @@ -0,0 +1,964 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.lang.ref.WeakReference +import java.net.URI +import java.util.concurrent.TimeUnit + +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.util.Try +import scala.util.control.NonFatal + +import com.databricks.spark.util.TagDefinitions._ +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.commands.WriteIntoDelta +import org.apache.spark.sql.delta.files.{TahoeBatchFileIndex, TahoeLogFileIndex} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils} +import org.apache.spark.sql.delta.sources._ +import org.apache.spark.sql.delta.storage.LogStoreProvider +import org.apache.spark.sql.delta.util.FileNames +import com.google.common.cache.{CacheBuilder, RemovalNotification} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.{FileSourceOptions, TableIdentifier} +import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute} +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStatistics, CatalogTable} +import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Cast, Expression, Literal} +import org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper +import org.apache.spark.sql.catalyst.util.FailFastMode +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.expressions.UserDefinedFunction +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation} +import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.util._ + +/** + * Used to query the current state of the log as well as modify it by adding + * new atomic collections of actions. + * + * Internally, this class implements an optimistic concurrency control + * algorithm to handle multiple readers or writers. Any single read + * is guaranteed to see a consistent snapshot of the table. + * + * @param logPath Path of the Delta log JSONs. + * @param dataPath Path of the data files. + * @param options Filesystem options filtered from `allOptions`. + * @param allOptions All options provided by the user, for example via `df.write.option()`. This + * includes but not limited to filesystem and table properties. + * @param clock Clock to be used when starting a new transaction. + */ +class DeltaLog private( + val logPath: Path, + val dataPath: Path, + val options: Map[String, String], + val allOptions: Map[String, String], + val clock: Clock + ) extends Checkpoints + with MetadataCleanup + with LogStoreProvider + with SnapshotManagement + with DeltaFileFormat + with ProvidesUniFormConverters + with ReadChecksum { + + import org.apache.spark.sql.delta.files.TahoeFileIndex + import org.apache.spark.sql.delta.util.FileNames._ + + /** + * Path to sidecar directory. + * This is intentionally kept `lazy val` as otherwise any other constructor codepaths in DeltaLog + * (e.g. SnapshotManagement etc) will see it as null as they are executed before this line is + * called. + */ + lazy val sidecarDirPath: Path = FileNames.sidecarDirPath(logPath) + + + protected def spark = SparkSession.active + + checkRequiredConfigurations() + + /** + * Keep a reference to `SparkContext` used to create `DeltaLog`. `DeltaLog` cannot be used when + * `SparkContext` is stopped. We keep the reference so that we can check whether the cache is + * still valid and drop invalid `DeltaLog`` objects. + */ + private val sparkContext = new WeakReference(spark.sparkContext) + + /** + * Returns the Hadoop [[Configuration]] object which can be used to access the file system. All + * Delta code should use this method to create the Hadoop [[Configuration]] object, so that the + * hadoop file system configurations specified in DataFrame options will come into effect. + */ + // scalastyle:off deltahadoopconfiguration + final def newDeltaHadoopConf(): Configuration = + spark.sessionState.newHadoopConfWithOptions(options) + // scalastyle:on deltahadoopconfiguration + + /** Used to read and write physical log files and checkpoints. */ + lazy val store = createLogStore(spark) + + /** Delta History Manager containing version and commit history. */ + lazy val history = new DeltaHistoryManager( + this, spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_HISTORY_PAR_SEARCH_THRESHOLD)) + + /* --------------- * + | Configuration | + * --------------- */ + + /** + * The max lineage length of a Snapshot before Delta forces to build a Snapshot from scratch. + * Delta will build a Snapshot on top of the previous one if it doesn't see a checkpoint. + * However, there is a race condition that when two writers are writing at the same time, + * a writer may fail to pick up checkpoints written by another one, and the lineage will grow + * and finally cause StackOverflowError. Hence we have to force to build a Snapshot from scratch + * when the lineage length is too large to avoid hitting StackOverflowError. + */ + def maxSnapshotLineageLength: Int = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_MAX_SNAPSHOT_LINEAGE_LENGTH) + + /** The unique identifier for this table. */ + def tableId: String = unsafeVolatileMetadata.id // safe because table id never changes + + /** + * Combines the tableId with the path of the table to ensure uniqueness. Normally `tableId` + * should be globally unique, but nothing stops users from copying a Delta table directly to + * a separate location, where the transaction log is copied directly, causing the tableIds to + * match. When users mutate the copied table, and then try to perform some checks joining the + * two tables, optimizations that depend on `tableId` alone may not be correct. Hence we use a + * composite id. + */ + private[delta] def compositeId: (String, Path) = tableId -> dataPath + + /** + * Creates a [[LogicalRelation]] for a given [[DeltaLogFileIndex]], with all necessary file source + * options taken from the Delta Log. All reads of Delta metadata files should use this method. + */ + def indexToRelation( + index: DeltaLogFileIndex, + schema: StructType = Action.logSchema): LogicalRelation = { + DeltaLog.indexToRelation(spark, index, options, schema) + } + + /** + * Load the data using the FileIndex. This allows us to skip many checks that add overhead, e.g. + * file existence checks, partitioning schema inference. + */ + def loadIndex( + index: DeltaLogFileIndex, + schema: StructType = Action.logSchema): DataFrame = { + Dataset.ofRows(spark, indexToRelation(index, schema)) + } + + /* ------------------ * + | Delta Management | + * ------------------ */ + + /** + * Returns a new [[OptimisticTransaction]] that can be used to read the current state of the log + * and then commit updates. The reads and updates will be checked for logical conflicts with any + * concurrent writes to the log, and post-commit hooks can be used to notify the table's catalog + * of schema changes, etc. + * + * Note that all reads in a transaction must go through the returned transaction object, and not + * directly to the [[DeltaLog]] otherwise they will not be checked for conflicts. + * + * @param catalogTableOpt The [[CatalogTable]] for the table this transaction updates. Passing + * None asserts this is a path-based table with no catalog entry. + * + * @param snapshotOpt THe [[Snapshot]] this transaction should use, if not latest. + */ + def startTransaction( + catalogTableOpt: Option[CatalogTable], + snapshotOpt: Option[Snapshot] = None): OptimisticTransaction = { + new OptimisticTransaction(this, catalogTableOpt, snapshotOpt) + } + + /** Legacy/compat overload that does not require catalog table information. Avoid prod use. */ + @deprecated("Please use the CatalogTable overload instead", "3.0") + def startTransaction(): OptimisticTransaction = { + startTransaction(catalogTableOpt = None, snapshotOpt = None) + } + + /** + * Execute a piece of code within a new [[OptimisticTransaction]]. Reads/write sets will + * be recorded for this table, and all other tables will be read + * at a snapshot that is pinned on the first access. + * + * @param catalogTableOpt The [[CatalogTable]] for the table this transaction updates. Passing + * None asserts this is a path-based table with no catalog entry. + * + * @param snapshotOpt THe [[Snapshot]] this transaction should use, if not latest. + * @note This uses thread-local variable to make the active transaction visible. So do not use + * multi-threaded code in the provided thunk. + */ + def withNewTransaction[T]( + catalogTableOpt: Option[CatalogTable], + snapshotOpt: Option[Snapshot] = None)( + thunk: OptimisticTransaction => T): T = { + try { + val txn = startTransaction(catalogTableOpt, snapshotOpt) + OptimisticTransaction.setActive(txn) + thunk(txn) + } finally { + OptimisticTransaction.clearActive() + } + } + + /** Legacy/compat overload that does not require catalog table information. Avoid prod use. */ + @deprecated("Please use the CatalogTable overload instead", "3.0") + def withNewTransaction[T](thunk: OptimisticTransaction => T): T = { + try { + val txn = startTransaction() + OptimisticTransaction.setActive(txn) + thunk(txn) + } finally { + OptimisticTransaction.clearActive() + } + } + + + /** + * Upgrade the table's protocol version, by default to the maximum recognized reader and writer + * versions in this Delta release. This method only upgrades protocol version, and will fail if + * the new protocol version is not a superset of the original one used by the snapshot. + */ + def upgradeProtocol( + catalogTable: Option[CatalogTable], + snapshot: Snapshot, + newVersion: Protocol): Unit = { + val currentVersion = snapshot.protocol + if (newVersion == currentVersion) { + logConsole(s"Table $dataPath is already at protocol version $newVersion.") + return + } + if (!currentVersion.canUpgradeTo(newVersion)) { + throw new ProtocolDowngradeException(currentVersion, newVersion) + } + + val txn = startTransaction(catalogTable, Some(snapshot)) + try { + SchemaMergingUtils.checkColumnNameDuplication(txn.metadata.schema, "in the table schema") + } catch { + case e: AnalysisException => + throw DeltaErrors.duplicateColumnsOnUpdateTable(e) + } + txn.commit(Seq(newVersion), DeltaOperations.UpgradeProtocol(newVersion)) + logConsole(s"Upgraded table at $dataPath to $newVersion.") + } + + /** + * Get all actions starting from "startVersion" (inclusive). If `startVersion` doesn't exist, + * return an empty Iterator. + */ + def getChanges( + startVersion: Long, + failOnDataLoss: Boolean = false): Iterator[(Long, Seq[Action])] = { + val hadoopConf = newDeltaHadoopConf() + val deltasWithVersion = store.listFrom(listingPrefix(logPath, startVersion), hadoopConf) + .flatMap(DeltaFile.unapply(_)) + // Subtract 1 to ensure that we have the same check for the inclusive startVersion + var lastSeenVersion = startVersion - 1 + deltasWithVersion.map { case (status, version) => + val p = status.getPath + if (failOnDataLoss && version > lastSeenVersion + 1) { + throw DeltaErrors.failOnDataLossException(lastSeenVersion + 1, version) + } + lastSeenVersion = version + (version, store.read(status, hadoopConf).map(Action.fromJson)) + } + } + + /** + * Get access to all actions starting from "startVersion" (inclusive) via [[FileStatus]]. + * If `startVersion` doesn't exist, return an empty Iterator. + */ + def getChangeLogFiles( + startVersion: Long, + failOnDataLoss: Boolean = false): Iterator[(Long, FileStatus)] = { + val deltasWithVersion = store + .listFrom(listingPrefix(logPath, startVersion), newDeltaHadoopConf()) + .flatMap(DeltaFile.unapply(_)) + // Subtract 1 to ensure that we have the same check for the inclusive startVersion + var lastSeenVersion = startVersion - 1 + deltasWithVersion.map { case (status, version) => + if (failOnDataLoss && version > lastSeenVersion + 1) { + throw DeltaErrors.failOnDataLossException(lastSeenVersion + 1, version) + } + lastSeenVersion = version + (version, status) + } + } + + /* --------------------- * + | Protocol validation | + * --------------------- */ + + /** + * Asserts the highest protocol supported by this client is not less than what required by the + * table for performing read or write operations. This ensures the client to support a + * greater-or-equal protocol versions and recognizes/supports all features enabled by the table. + * + * The operation type to be checked is passed as a string in `readOrWrite`. Valid values are + * `read` and `write`. + */ + private def protocolCheck(tableProtocol: Protocol, readOrWrite: String): Unit = { + val clientSupportedProtocol = Action.supportedProtocolVersion() + // Depending on the operation, pull related protocol versions out of Protocol objects. + // `getEnabledFeatures` is a pointer to pull reader/writer features out of a Protocol. + val (clientSupportedVersions, tableRequiredVersion, getEnabledFeatures) = readOrWrite match { + case "read" => ( + Action.supportedReaderVersionNumbers, + tableProtocol.minReaderVersion, + (f: Protocol) => f.readerFeatureNames) + case "write" => ( + Action.supportedWriterVersionNumbers, + tableProtocol.minWriterVersion, + (f: Protocol) => f.writerFeatureNames) + case _ => + throw new IllegalArgumentException("Table operation must be either `read` or `write`.") + } + + // Check is complete when both the protocol version and all referenced features are supported. + val clientSupportedFeatureNames = getEnabledFeatures(clientSupportedProtocol) + val tableEnabledFeatureNames = getEnabledFeatures(tableProtocol) + if (tableEnabledFeatureNames.subsetOf(clientSupportedFeatureNames) && + clientSupportedVersions.contains(tableRequiredVersion)) { + return + } + + // Otherwise, either the protocol version, or few features referenced by the table, is + // unsupported. + val clientUnsupportedFeatureNames = + tableEnabledFeatureNames.diff(clientSupportedFeatureNames) + // Prepare event log constants and the appropriate error message handler. + val (opType, versionKey, unsupportedFeaturesException) = readOrWrite match { + case "read" => ( + "delta.protocol.failure.read", + "minReaderVersion", + DeltaErrors.unsupportedReaderTableFeaturesInTableException _) + case "write" => ( + "delta.protocol.failure.write", + "minWriterVersion", + DeltaErrors.unsupportedWriterTableFeaturesInTableException _) + } + recordDeltaEvent( + this, + opType, + data = Map( + "clientVersion" -> clientSupportedVersions.max, + versionKey -> tableRequiredVersion, + "clientFeatures" -> clientSupportedFeatureNames.mkString(","), + "clientUnsupportedFeatures" -> clientUnsupportedFeatureNames.mkString(","))) + if (!clientSupportedVersions.contains(tableRequiredVersion)) { + throw new InvalidProtocolVersionException( + dataPath.toString(), + tableProtocol.minReaderVersion, + tableProtocol.minWriterVersion, + Action.supportedReaderVersionNumbers.toSeq, + Action.supportedWriterVersionNumbers.toSeq) + } else { + throw unsupportedFeaturesException(dataPath.toString(), clientUnsupportedFeatureNames) + } + } + + /** + * Asserts that the table's protocol enabled all features that are active in the metadata. + * + * A mismatch shouldn't happen when the table has gone through a proper write process because we + * require all active features during writes. However, other clients may void this guarantee. + */ + def assertTableFeaturesMatchMetadata( + targetProtocol: Protocol, + targetMetadata: Metadata): Unit = { + if (!targetProtocol.supportsReaderFeatures && !targetProtocol.supportsWriterFeatures) return + + val protocolEnabledFeatures = targetProtocol.writerFeatureNames + .flatMap(TableFeature.featureNameToFeature) + val activeFeatures = + Protocol.extractAutomaticallyEnabledFeatures(spark, targetMetadata, Some(targetProtocol)) + val activeButNotEnabled = activeFeatures.diff(protocolEnabledFeatures) + if (activeButNotEnabled.nonEmpty) { + throw DeltaErrors.tableFeatureMismatchException(activeButNotEnabled.map(_.name)) + } + } + + /** + * Asserts that the client is up to date with the protocol and allowed to read the table that is + * using the given `protocol`. + */ + def protocolRead(protocol: Protocol): Unit = { + protocolCheck(protocol, "read") + } + + /** + * Asserts that the client is up to date with the protocol and allowed to write to the table + * that is using the given `protocol`. + */ + def protocolWrite(protocol: Protocol): Unit = { + protocolCheck(protocol, "write") + } + + /* ---------------------------------------- * + | Log Directory Management and Retention | + * ---------------------------------------- */ + + /** + * Whether a Delta table exists at this directory. + * It is okay to use the cached volatile snapshot here, since the worst case is that the table + * has recently started existing which hasn't been picked up here. If so, any subsequent command + * that updates the table will see the right value. + */ + def tableExists: Boolean = unsafeVolatileSnapshot.version >= 0 + + def isSameLogAs(otherLog: DeltaLog): Boolean = this.compositeId == otherLog.compositeId + + /** Creates the log directory if it does not exist. */ + def ensureLogDirectoryExist(): Unit = { + val fs = logPath.getFileSystem(newDeltaHadoopConf()) + def createDirIfNotExists(path: Path): Unit = { + if (!fs.exists(path)) { + if (!fs.mkdirs(path)) { + throw DeltaErrors.cannotCreateLogPathException(logPath.toString) + } + } + } + createDirIfNotExists(logPath) + } + + /** + * Create the log directory. Unlike `ensureLogDirectoryExist`, this method doesn't check whether + * the log directory exists and it will ignore the return value of `mkdirs`. + */ + def createLogDirectory(): Unit = { + logPath.getFileSystem(newDeltaHadoopConf()).mkdirs(logPath) + } + + /* ------------ * + | Integration | + * ------------ */ + + /** + * Returns a [[org.apache.spark.sql.DataFrame]] containing the new files within the specified + * version range. + */ + def createDataFrame( + snapshot: SnapshotDescriptor, + addFiles: Seq[AddFile], + isStreaming: Boolean = false, + actionTypeOpt: Option[String] = None): DataFrame = { + val actionType = actionTypeOpt.getOrElse(if (isStreaming) "streaming" else "batch") + // It's ok to not pass down the partitionSchema to TahoeBatchFileIndex. Schema evolution will + // ensure any partitionSchema changes will be captured, and upon restart, the new snapshot will + // be initialized with the correct partition schema again. + val fileIndex = new TahoeBatchFileIndex(spark, actionType, addFiles, this, dataPath, snapshot) + val relation = buildHadoopFsRelationWithFileIndex(snapshot, fileIndex, bucketSpec = None) + Dataset.ofRows(spark, LogicalRelation(relation, isStreaming = isStreaming)) + } + + /** + * Returns a [[BaseRelation]] that contains all of the data present + * in the table. This relation will be continually updated + * as files are added or removed from the table. However, new [[BaseRelation]] + * must be requested in order to see changes to the schema. + */ + def createRelation( + partitionFilters: Seq[Expression] = Nil, + snapshotToUseOpt: Option[Snapshot] = None, + catalogTableOpt: Option[CatalogTable] = None, + isTimeTravelQuery: Boolean = false): BaseRelation = { + + /** Used to link the files present in the table into the query planner. */ + // TODO: If snapshotToUse is unspecified, get the correct snapshot from update() + val snapshotToUse = snapshotToUseOpt.getOrElse(unsafeVolatileSnapshot) + if (snapshotToUse.version < 0) { + // A negative version here means the dataPath is an empty directory. Read query should error + // out in this case. + throw DeltaErrors.pathNotExistsException(dataPath.toString) + } + + val fileIndex = TahoeLogFileIndex( + spark, this, dataPath, snapshotToUse, partitionFilters, isTimeTravelQuery) + var bucketSpec: Option[BucketSpec] = None + + val r = buildHadoopFsRelationWithFileIndex(snapshotToUse, fileIndex, bucketSpec = bucketSpec) + new HadoopFsRelation( + r.location, + r.partitionSchema, + r.dataSchema, + r.bucketSpec, + r.fileFormat, + r.options + )(spark) with InsertableRelation { + def insert(data: DataFrame, overwrite: Boolean): Unit = { + val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append + WriteIntoDelta( + deltaLog = DeltaLog.this, + mode = mode, + new DeltaOptions(Map.empty[String, String], spark.sessionState.conf), + partitionColumns = Seq.empty, + configuration = Map.empty, + data = data, + catalogTableOpt = catalogTableOpt).run(spark) + } + } + } + + def buildHadoopFsRelationWithFileIndex(snapshot: SnapshotDescriptor, fileIndex: TahoeFileIndex, + bucketSpec: Option[BucketSpec]): HadoopFsRelation = { + HadoopFsRelation( + fileIndex, + partitionSchema = DeltaColumnMapping.dropColumnMappingMetadata( + snapshot.metadata.partitionSchema), + // We pass all table columns as `dataSchema` so that Spark will preserve the partition + // column locations. Otherwise, for any partition columns not in `dataSchema`, Spark would + // just append them to the end of `dataSchema`. + dataSchema = DeltaColumnMapping.dropColumnMappingMetadata( + DeltaTableUtils.removeInternalMetadata(spark, + SchemaUtils.dropNullTypeColumns(snapshot.metadata.schema))), + bucketSpec = bucketSpec, + fileFormat(snapshot.protocol, snapshot.metadata), + // `metadata.format.options` is not set today. Even if we support it in future, we shouldn't + // store any file system options since they may contain credentials. Hence, it will never + // conflict with `DeltaLog.options`. + snapshot.metadata.format.options ++ options)(spark) + } + + /** + * Verify the required Spark conf for delta + * Throw `DeltaErrors.configureSparkSessionWithExtensionAndCatalog` exception if + * `spark.sql.catalog.spark_catalog` config is missing. We do not check for + * `spark.sql.extensions` because DeltaSparkSessionExtension can alternatively + * be activated using the `.withExtension()` API. This check can be disabled + * by setting DELTA_CHECK_REQUIRED_SPARK_CONF to false. + */ + protected def checkRequiredConfigurations(): Unit = { + if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_REQUIRED_SPARK_CONFS_CHECK)) { + if (spark.conf.getOption( + SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key).isEmpty) { + throw DeltaErrors.configureSparkSessionWithExtensionAndCatalog(None) + } + } + } + + /** + * Returns a proper path canonicalization function for the current Delta log. + * + * If `runsOnExecutors` is true, the returned method will use a broadcast Hadoop Configuration + * so that the method is suitable for execution on executors. Otherwise, the returned method + * will use a local Hadoop Configuration and the method can only be executed on the driver. + */ + private[delta] def getCanonicalPathFunction(runsOnExecutors: Boolean): String => String = { + val hadoopConf = newDeltaHadoopConf() + // Wrap `hadoopConf` with a method to delay the evaluation to run on executors. + val getHadoopConf = if (runsOnExecutors) { + val broadcastHadoopConf = + spark.sparkContext.broadcast(new SerializableConfiguration(hadoopConf)) + () => broadcastHadoopConf.value.value + } else { + () => hadoopConf + } + + new DeltaLog.CanonicalPathFunction(getHadoopConf) + } + + /** + * Returns a proper path canonicalization UDF for the current Delta log. + * + * If `runsOnExecutors` is true, the returned UDF will use a broadcast Hadoop Configuration. + * Otherwise, the returned UDF will use a local Hadoop Configuration and the UDF can + * only be executed on the driver. + */ + private[delta] def getCanonicalPathUdf(runsOnExecutors: Boolean = true): UserDefinedFunction = { + DeltaUDF.stringFromString(getCanonicalPathFunction(runsOnExecutors)) + } +} + +object DeltaLog extends DeltaLogging { + + /** + * The key type of `DeltaLog` cache. It's a pair of the canonicalized table path and the file + * system options (options starting with "fs." or "dfs." prefix) passed into + * `DataFrameReader/Writer` + */ + private type DeltaLogCacheKey = (Path, Map[String, String]) + + /** The name of the subdirectory that holds Delta metadata files */ + private val LOG_DIR_NAME = "_delta_log" + + private[delta] def logPathFor(dataPath: String): Path = logPathFor(new Path(dataPath)) + private[delta] def logPathFor(dataPath: Path): Path = + DeltaTableUtils.safeConcatPaths(dataPath, LOG_DIR_NAME) + + /** + * We create only a single [[DeltaLog]] for any given `DeltaLogCacheKey` to avoid wasted work + * in reconstructing the log. + */ + private val deltaLogCache = { + val builder = CacheBuilder.newBuilder() + .expireAfterAccess(60, TimeUnit.MINUTES) + .removalListener((removalNotification: RemovalNotification[DeltaLogCacheKey, DeltaLog]) => { + val log = removalNotification.getValue + // TODO: We should use ref-counting to uncache snapshots instead of a manual timed op + try log.unsafeVolatileSnapshot.uncache() catch { + case _: java.lang.NullPointerException => + // Various layers will throw null pointer if the RDD is already gone. + } + }) + sys.props.get("delta.log.cacheSize") + .flatMap(v => Try(v.toLong).toOption) + .foreach(builder.maximumSize) + builder.build[DeltaLogCacheKey, DeltaLog]() + } + + + /** + * Creates a [[LogicalRelation]] for a given [[DeltaLogFileIndex]], with all necessary file source + * options taken from the Delta Log. All reads of Delta metadata files should use this method. + */ + def indexToRelation( + spark: SparkSession, + index: DeltaLogFileIndex, + additionalOptions: Map[String, String], + schema: StructType = Action.logSchema): LogicalRelation = { + val formatSpecificOptions: Map[String, String] = index.format match { + case DeltaLogFileIndex.COMMIT_FILE_FORMAT => + jsonCommitParseOption + case _ => Map.empty + } + // Delta should NEVER ignore missing or corrupt metadata files, because doing so can render the + // entire table unusable. Hard-wire that into the file source options so the user can't override + // it by setting spark.sql.files.ignoreCorruptFiles or spark.sql.files.ignoreMissingFiles. + val allOptions = additionalOptions ++ formatSpecificOptions ++ Map( + FileSourceOptions.IGNORE_CORRUPT_FILES -> "false", + FileSourceOptions.IGNORE_MISSING_FILES -> "false" + ) + val fsRelation = HadoopFsRelation( + index, index.partitionSchema, schema, None, index.format, allOptions)(spark) + LogicalRelation(fsRelation) + } + + // Don't tolerate malformed JSON when parsing Delta log actions (default is PERMISSIVE) + val jsonCommitParseOption = Map("mode" -> FailFastMode.name) + + /** Helper for creating a log when it stored at the root of the data. */ + def forTable(spark: SparkSession, dataPath: String): DeltaLog = { + apply(spark, logPathFor(dataPath), Map.empty, new SystemClock) + } + + /** Helper for creating a log when it stored at the root of the data. */ + def forTable(spark: SparkSession, dataPath: Path): DeltaLog = { + apply(spark, logPathFor(dataPath), new SystemClock) + } + + /** Helper for creating a log when it stored at the root of the data. */ + def forTable(spark: SparkSession, dataPath: Path, options: Map[String, String]): DeltaLog = { + apply(spark, logPathFor(dataPath), options, new SystemClock) + } + + /** Helper for creating a log when it stored at the root of the data. */ + def forTable(spark: SparkSession, dataPath: Path, clock: Clock): DeltaLog = { + apply(spark, logPathFor(dataPath), clock) + } + + /** Helper for creating a log for the table. */ + def forTable(spark: SparkSession, tableName: TableIdentifier): DeltaLog = { + forTable(spark, tableName, new SystemClock) + } + + /** Helper for creating a log for the table. */ + def forTable(spark: SparkSession, table: CatalogTable): DeltaLog = { + forTable(spark, table, new SystemClock) + } + + /** Helper for creating a log for the table. */ + def forTable(spark: SparkSession, tableName: TableIdentifier, clock: Clock): DeltaLog = { + if (DeltaTableIdentifier.isDeltaPath(spark, tableName)) { + forTable(spark, new Path(tableName.table)) + } else { + forTable(spark, spark.sessionState.catalog.getTableMetadata(tableName), clock) + } + } + + /** Helper for creating a log for the table. */ + def forTable(spark: SparkSession, table: CatalogTable, clock: Clock): DeltaLog = { + apply(spark, logPathFor(new Path(table.location)), clock) + } + + private def apply(spark: SparkSession, rawPath: Path, clock: Clock = new SystemClock): DeltaLog = + apply(spark, rawPath, Map.empty, clock) + + + /** Helper for getting a log, as well as the latest snapshot, of the table */ + def forTableWithSnapshot(spark: SparkSession, dataPath: String): (DeltaLog, Snapshot) = + withFreshSnapshot { forTable(spark, new Path(dataPath), _) } + + /** Helper for getting a log, as well as the latest snapshot, of the table */ + def forTableWithSnapshot(spark: SparkSession, dataPath: Path): (DeltaLog, Snapshot) = + withFreshSnapshot { forTable(spark, dataPath, _) } + + /** Helper for getting a log, as well as the latest snapshot, of the table */ + def forTableWithSnapshot( + spark: SparkSession, + tableName: TableIdentifier): (DeltaLog, Snapshot) = + withFreshSnapshot { forTable(spark, tableName, _) } + + /** Helper for getting a log, as well as the latest snapshot, of the table */ + def forTableWithSnapshot( + spark: SparkSession, + dataPath: Path, + options: Map[String, String]): (DeltaLog, Snapshot) = + withFreshSnapshot { apply(spark, logPathFor(dataPath), options, _) } + + /** + * Helper function to be used with the forTableWithSnapshot calls. Thunk is a + * partially applied DeltaLog.forTable call, which we can then wrap around with a + * snapshot update. We use the system clock to avoid back-to-back updates. + */ + private[delta] def withFreshSnapshot(thunk: Clock => DeltaLog): (DeltaLog, Snapshot) = { + val clock = new SystemClock + val ts = clock.getTimeMillis() + val deltaLog = thunk(clock) + val snapshot = deltaLog.update(checkIfUpdatedSinceTs = Some(ts)) + (deltaLog, snapshot) + } + + private def apply( + spark: SparkSession, + rawPath: Path, + options: Map[String, String], + clock: Clock + ): DeltaLog = { + val fileSystemOptions: Map[String, String] = + if (spark.sessionState.conf.getConf( + DeltaSQLConf.LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS)) { + // We pick up only file system options so that we don't pass any parquet or json options to + // the code that reads Delta transaction logs. + options.filterKeys { k => + DeltaTableUtils.validDeltaTableHadoopPrefixes.exists(k.startsWith) + }.toMap + } else { + Map.empty + } + // scalastyle:off deltahadoopconfiguration + val hadoopConf = spark.sessionState.newHadoopConfWithOptions(fileSystemOptions) + // scalastyle:on deltahadoopconfiguration + val fs = rawPath.getFileSystem(hadoopConf) + val path = fs.makeQualified(rawPath) + def createDeltaLog(): DeltaLog = recordDeltaOperation( + null, + "delta.log.create", + Map(TAG_TAHOE_PATH -> path.getParent.toString)) { + AnalysisHelper.allowInvokingTransformsInAnalyzer { + new DeltaLog( + logPath = path, + dataPath = path.getParent, + options = fileSystemOptions, + allOptions = options, + clock = clock + ) + } + } + def getDeltaLogFromCache(): DeltaLog = { + // The following cases will still create a new ActionLog even if there is a cached + // ActionLog using a different format path: + // - Different `scheme` + // - Different `authority` (e.g., different user tokens in the path) + // - Different mount point. + try { + deltaLogCache.get(path -> fileSystemOptions, () => { + createDeltaLog() + } + ) + } catch { + case e: com.google.common.util.concurrent.UncheckedExecutionException => throw e.getCause + case e: java.util.concurrent.ExecutionException => throw e.getCause + } + } + + val deltaLog = getDeltaLogFromCache() + if (Option(deltaLog.sparkContext.get).map(_.isStopped).getOrElse(true)) { + // Invalid the cached `DeltaLog` and create a new one because the `SparkContext` of the cached + // `DeltaLog` has been stopped. + deltaLogCache.invalidate(path -> fileSystemOptions) + getDeltaLogFromCache() + } else { + deltaLog + } + } + + /** Invalidate the cached DeltaLog object for the given `dataPath`. */ + def invalidateCache(spark: SparkSession, dataPath: Path): Unit = { + try { + val rawPath = logPathFor(dataPath) + // scalastyle:off deltahadoopconfiguration + // This method cannot be called from DataFrameReader/Writer so it's safe to assume the user + // has set the correct file system configurations in the session configs. + val fs = rawPath.getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val path = fs.makeQualified(rawPath) + + if (spark.sessionState.conf.getConf( + DeltaSQLConf.LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS)) { + // We rely on the fact that accessing the key set doesn't modify the entry access time. See + // `CacheBuilder.expireAfterAccess`. + val keysToBeRemoved = mutable.ArrayBuffer[DeltaLogCacheKey]() + val iter = deltaLogCache.asMap().keySet().iterator() + while (iter.hasNext) { + val key = iter.next() + if (key._1 == path) { + keysToBeRemoved += key + } + } + deltaLogCache.invalidateAll(keysToBeRemoved.asJava) + } else { + deltaLogCache.invalidate(path -> Map.empty) + } + } catch { + case NonFatal(e) => logWarning(e.getMessage, e) + } + } + + def clearCache(): Unit = { + deltaLogCache.invalidateAll() + } + + /** Return the number of cached `DeltaLog`s. Exposing for testing */ + private[delta] def cacheSize: Long = { + deltaLogCache.size() + } + + /** + * Filters the given [[Dataset]] by the given `partitionFilters`, returning those that match. + * @param files The active files in the DeltaLog state, which contains the partition value + * information + * @param partitionFilters Filters on the partition columns + * @param partitionColumnPrefixes The path to the `partitionValues` column, if it's nested + * @param shouldRewritePartitionFilters Whether to rewrite `partitionFilters` to be over the + * [[AddFile]] schema + */ + def filterFileList( + partitionSchema: StructType, + files: DataFrame, + partitionFilters: Seq[Expression], + partitionColumnPrefixes: Seq[String] = Nil, + shouldRewritePartitionFilters: Boolean = true): DataFrame = { + + val rewrittenFilters = if (shouldRewritePartitionFilters) { + rewritePartitionFilters( + partitionSchema, + files.sparkSession.sessionState.conf.resolver, + partitionFilters, + partitionColumnPrefixes) + } else { + partitionFilters + } + val expr = rewrittenFilters.reduceLeftOption(And).getOrElse(Literal.TrueLiteral) + val columnFilter = new Column(expr) + files.filter(columnFilter) + } + + /** + * Rewrite the given `partitionFilters` to be used for filtering partition values. + * We need to explicitly resolve the partitioning columns here because the partition columns + * are stored as keys of a Map type instead of attributes in the AddFile schema (below) and thus + * cannot be resolved automatically. + * + * @param partitionFilters Filters on the partition columns + * @param partitionColumnPrefixes The path to the `partitionValues` column, if it's nested + */ + def rewritePartitionFilters( + partitionSchema: StructType, + resolver: Resolver, + partitionFilters: Seq[Expression], + partitionColumnPrefixes: Seq[String] = Nil): Seq[Expression] = { + partitionFilters + .map(_.transformUp { + case a: Attribute => + // If we have a special column name, e.g. `a.a`, then an UnresolvedAttribute returns + // the column name as '`a.a`' instead of 'a.a', therefore we need to strip the backticks. + val unquoted = a.name.stripPrefix("`").stripSuffix("`") + val partitionCol = partitionSchema.find { field => resolver(field.name, unquoted) } + partitionCol match { + case Some(f: StructField) => + val name = DeltaColumnMapping.getPhysicalName(f) + Cast( + UnresolvedAttribute(partitionColumnPrefixes ++ Seq("partitionValues", name)), + f.dataType) + case None => + // This should not be able to happen, but the case was present in the original code so + // we kept it to be safe. + log.error(s"Partition filter referenced column ${a.name} not in the partition schema") + UnresolvedAttribute(partitionColumnPrefixes ++ Seq("partitionValues", a.name)) + } + }) + } + + + /** + * Checks whether this table only accepts appends. If so it will throw an error in operations that + * can remove data such as DELETE/UPDATE/MERGE. + */ + def assertRemovable(snapshot: Snapshot): Unit = { + val metadata = snapshot.metadata + if (DeltaConfigs.IS_APPEND_ONLY.fromMetaData(metadata)) { + throw DeltaErrors.modifyAppendOnlyTableException(metadata.name) + } + } + + /** How long to keep around SetTransaction actions before physically deleting them. */ + def minSetTransactionRetentionInterval(metadata: Metadata): Option[Long] = { + DeltaConfigs.TRANSACTION_ID_RETENTION_DURATION + .fromMetaData(metadata) + .map(DeltaConfigs.getMilliSeconds) + } + /** How long to keep around logically deleted files before physically deleting them. */ + def tombstoneRetentionMillis(metadata: Metadata): Long = { + DeltaConfigs.getMilliSeconds(DeltaConfigs.TOMBSTONE_RETENTION.fromMetaData(metadata)) + } + + /** Get a function that canonicalizes a given `path`. */ + private[delta] class CanonicalPathFunction(getHadoopConf: () => Configuration) + extends Function[String, String] with Serializable { + // Mark it `@transient lazy val` so that de-serialization happens only once on every executor. + @transient + private lazy val fs = { + // scalastyle:off FileSystemGet + FileSystem.get(getHadoopConf()) + // scalastyle:on FileSystemGet + } + + override def apply(path: String): String = { + // scalastyle:off pathfromuri + val hadoopPath = new Path(new URI(path)) + // scalastyle:on pathfromuri + if (hadoopPath.isAbsoluteAndSchemeAuthorityNull) { + fs.makeQualified(hadoopPath).toUri.toString + } else { + // return untouched if it is a relative path or is already fully qualified + hadoopPath.toUri.toString + } + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala new file mode 100644 index 00000000000..de111aadd3b --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala @@ -0,0 +1,105 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.util.FileNames +import org.apache.hadoop.fs._ + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.execution.datasources.{FileFormat, FileIndex, PartitionDirectory} +import org.apache.spark.sql.execution.datasources.json.JsonFileFormat +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.types.{LongType, StructField, StructType} + +/** + * A specialized file index for files found in the _delta_log directory. By using this file index, + * we avoid any additional file listing, partitioning inference, and file existence checks when + * computing the state of a Delta table. + * + * @param format The file format of the log files. Currently "parquet" or "json" + * @param files The files to read + */ +case class DeltaLogFileIndex private ( + format: FileFormat, + files: Array[FileStatus]) + extends FileIndex + with Logging { + + import DeltaLogFileIndex._ + + override lazy val rootPaths: Seq[Path] = files.map(_.getPath) + + def listAllFiles(): Seq[PartitionDirectory] = { + files + .groupBy(f => FileNames.getFileVersionOpt(f.getPath).getOrElse(-1L)) + .map { case (version, files) => PartitionDirectory(InternalRow(version), files) } + .toSeq + } + + override def listFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[PartitionDirectory] = { + if (partitionFilters.isEmpty) { + listAllFiles() + } else { + val predicate = partitionFilters.reduce(And) + val boundPredicate = predicate.transform { + case a: AttributeReference => + val index = partitionSchema.indexWhere(a.name == _.name) + BoundReference(index, partitionSchema(index).dataType, partitionSchema(index).nullable) + } + val predicateEvaluator = Predicate.create(boundPredicate, Nil) + listAllFiles().filter(d => predicateEvaluator.eval(d.values)) + } + } + + override val inputFiles: Array[String] = files.map(_.getPath.toString) + + override def refresh(): Unit = {} + + override val sizeInBytes: Long = files.map(_.getLen).sum + + override val partitionSchema: StructType = + new StructType().add(COMMIT_VERSION_COLUMN, LongType, nullable = false) + + override def toString: String = + s"DeltaLogFileIndex($format, numFilesInSegment: ${files.size}, totalFileSize: $sizeInBytes)" + + logInfo(s"Created $this") +} + +object DeltaLogFileIndex { + val COMMIT_VERSION_COLUMN = "version" + + lazy val COMMIT_FILE_FORMAT = new JsonFileFormat + lazy val CHECKPOINT_FILE_FORMAT_PARQUET = new ParquetFileFormat + lazy val CHECKPOINT_FILE_FORMAT_JSON = new JsonFileFormat + + def apply(format: FileFormat, fs: FileSystem, paths: Seq[Path]): DeltaLogFileIndex = { + DeltaLogFileIndex(format, paths.map(fs.getFileStatus).toArray) + } + + def apply(format: FileFormat, files: Seq[FileStatus]): Option[DeltaLogFileIndex] = { + if (files.isEmpty) None else Some(DeltaLogFileIndex(format, files.toArray)) + } + + def apply(format: FileFormat, filesOpt: Option[Seq[FileStatus]]): Option[DeltaLogFileIndex] = { + filesOpt.flatMap(DeltaLogFileIndex(format, _)) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaOperations.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaOperations.scala new file mode 100644 index 00000000000..5887e052712 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaOperations.scala @@ -0,0 +1,839 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaOperationMetrics.MetricsTransformer +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.constraints.Constraint +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.JsonUtils + +import org.apache.spark.sql.{SaveMode, SparkSession} +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.DeltaMergeIntoClause +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.OutputMode +import org.apache.spark.sql.types.{StructField, StructType} + +/** + * Exhaustive list of operations that can be performed on a Delta table. These operations are + * tracked as the first line in delta logs, and power `DESCRIBE HISTORY` for Delta tables. + */ +object DeltaOperations { + + /** + * An operation that can be performed on a Delta table. + * @param name The name of the operation. + */ + sealed abstract class Operation(val name: String) { + def parameters: Map[String, Any] + + lazy val jsonEncodedValues: Map[String, String] = + parameters.mapValues(JsonUtils.toJson(_)).toMap + + val operationMetrics: Set[String] = Set() + + def transformMetrics(metrics: Map[String, SQLMetric]): Map[String, String] = { + metrics.filterKeys( s => + operationMetrics.contains(s) + ).mapValues(_.value.toString).toMap + } + + val userMetadata: Option[String] = None + + /** Whether this operation changes data */ + def changesData: Boolean = false + + /** + * Manually transform the deletion vector metrics, because they are not part of + * `operationMetrics` and are filtered out by the super.transformMetrics() call. + */ + def transformDeletionVectorMetrics( + allMetrics: Map[String, SQLMetric], + dvMetrics: Map[String, MetricsTransformer] = DeltaOperationMetrics.DELETION_VECTORS) + : Map[String, String] = { + dvMetrics.flatMap { case (metric, transformer) => + transformer.transformToString(metric, allMetrics) + } + } + } + + abstract class OperationWithPredicates(name: String, val predicates: Seq[Expression]) + extends Operation(name) { + private val predicateString = JsonUtils.toJson(predicatesToString(predicates)) + override def parameters: Map[String, Any] = Map("predicate" -> predicateString) + } + + /** Recorded during batch inserts. Predicates can be provided for overwrites. */ + case class Write( + mode: SaveMode, + partitionBy: Option[Seq[String]] = None, + predicate: Option[String] = None, + override val userMetadata: Option[String] = None + ) extends Operation("WRITE") { + override val parameters: Map[String, Any] = Map("mode" -> mode.name() + ) ++ + partitionBy.map("partitionBy" -> JsonUtils.toJson(_)) ++ + predicate.map("predicate" -> _) + + val replaceWhereMetricsEnabled = SparkSession.active.conf.get( + DeltaSQLConf.REPLACEWHERE_METRICS_ENABLED) + + override def transformMetrics(metrics: Map[String, SQLMetric]): Map[String, String] = { + // Need special handling for replaceWhere as it is implemented as a Write + Delete. + if (predicate.nonEmpty && replaceWhereMetricsEnabled) { + var strMetrics = super.transformMetrics(metrics) + // find the case where deletedRows are not captured + if (strMetrics.get("numDeletedRows").exists(_ == "0") && + strMetrics.get("numRemovedFiles").exists(_ != "0")) { + // identify when row level metrics are unavailable. This will happen when the entire + // table or partition are deleted. + strMetrics -= "numDeletedRows" + strMetrics -= "numCopiedRows" + strMetrics -= "numAddedFiles" + } + + // in the case when stats are not collected we need to remove all row based metrics + // If the DF provided to replaceWhere is an empty DataFrame and we don't have stats + // we won't return row level metrics. + if (strMetrics.get("numOutputRows").exists(_ == "0") && + strMetrics.get("numFiles").exists(_ != 0)) { + strMetrics -= "numDeletedRows" + strMetrics -= "numOutputRows" + strMetrics -= "numCopiedRows" + } + + strMetrics + } else { + super.transformMetrics(metrics) + } + } + + override val operationMetrics: Set[String] = if (predicate.isEmpty || + !replaceWhereMetricsEnabled) { + DeltaOperationMetrics.WRITE + } else { + // Need special handling for replaceWhere as rows/files are deleted as well. + DeltaOperationMetrics.WRITE_REPLACE_WHERE + } + override def changesData: Boolean = true + } + /** Recorded during streaming inserts. */ + case class StreamingUpdate( + outputMode: OutputMode, + queryId: String, + epochId: Long, + override val userMetadata: Option[String] = None + ) extends Operation("STREAMING UPDATE") { + override val parameters: Map[String, Any] = + Map("outputMode" -> outputMode.toString, "queryId" -> queryId, "epochId" -> epochId.toString + ) + override val operationMetrics: Set[String] = DeltaOperationMetrics.STREAMING_UPDATE + override def changesData: Boolean = true + } + /** Recorded while deleting certain partitions. */ + case class Delete(predicate: Seq[Expression]) + extends OperationWithPredicates("DELETE", predicate) { + override val operationMetrics: Set[String] = DeltaOperationMetrics.DELETE + + override def transformMetrics(metrics: Map[String, SQLMetric]): Map[String, String] = { + var strMetrics = super.transformMetrics(metrics) + // find the case where deletedRows are not captured + if (strMetrics("numDeletedRows") == "0" && strMetrics("numRemovedFiles") != "0") { + // identify when row level metrics are unavailable. This will happen when the entire + // table or partition are deleted. + strMetrics -= "numDeletedRows" + strMetrics -= "numCopiedRows" + strMetrics -= "numAddedFiles" + } + + val dvMetrics = transformDeletionVectorMetrics(metrics) + strMetrics ++ dvMetrics + } + override def changesData: Boolean = true + } + /** Recorded when truncating the table. */ + case class Truncate() extends Operation("TRUNCATE") { + override val parameters: Map[String, Any] = Map.empty + override val operationMetrics: Set[String] = DeltaOperationMetrics.TRUNCATE + override def changesData: Boolean = true + } + + /** Recorded when converting a table into a Delta table. */ + case class Convert( + numFiles: Long, + partitionBy: Seq[String], + collectStats: Boolean, + catalogTable: Option[String], + sourceFormat: Option[String]) extends Operation("CONVERT") { + override val parameters: Map[String, Any] = Map( + "numFiles" -> numFiles, + "partitionedBy" -> JsonUtils.toJson(partitionBy), + "collectStats" -> collectStats) ++ + catalogTable.map("catalogTable" -> _) ++ + sourceFormat.map("sourceFormat" -> _) + override val operationMetrics: Set[String] = DeltaOperationMetrics.CONVERT + override def changesData: Boolean = true + } + + /** Represents the predicates and action type (insert, update, delete) for a Merge clause */ + case class MergePredicate( + predicate: Option[String], + actionType: String) + + object MergePredicate { + def apply(mergeClause: DeltaMergeIntoClause): MergePredicate = { + MergePredicate( + predicate = mergeClause.condition.map(_.simpleString(SQLConf.get.maxToStringFields)), + mergeClause.clauseType.toLowerCase()) + } + } + + /** + * Recorded when a merge operation is committed to the table. + * + * `updatePredicate`, `deletePredicate`, and `insertPredicate` are DEPRECATED. + * Only use `predicate`, `matchedPredicates`, `notMatchedPredicates` and + * `notMatchedBySourcePredicates` to record the merge. + */ + val OP_MERGE = "MERGE" + case class Merge( + predicate: Option[Expression], + updatePredicate: Option[String], + deletePredicate: Option[String], + insertPredicate: Option[String], + matchedPredicates: Seq[MergePredicate], + notMatchedPredicates: Seq[MergePredicate], + notMatchedBySourcePredicates: Seq[MergePredicate] + ) + extends OperationWithPredicates(OP_MERGE, predicate.toSeq) { + + override val parameters: Map[String, Any] = { + super.parameters ++ + updatePredicate.map("updatePredicate" -> _).toMap ++ + deletePredicate.map("deletePredicate" -> _).toMap ++ + insertPredicate.map("insertPredicate" -> _).toMap + + ("matchedPredicates" -> JsonUtils.toJson(matchedPredicates)) + + ("notMatchedPredicates" -> JsonUtils.toJson(notMatchedPredicates)) + + ("notMatchedBySourcePredicates" -> JsonUtils.toJson(notMatchedBySourcePredicates)) + } + override val operationMetrics: Set[String] = DeltaOperationMetrics.MERGE + + override def transformMetrics(metrics: Map[String, SQLMetric]): Map[String, String] = { + + var strMetrics = super.transformMetrics(metrics) + + // We have to recalculate "numOutputRows" to avoid counting CDC rows + if (metrics.contains("numTargetRowsInserted") && + metrics.contains("numTargetRowsUpdated") && + metrics.contains("numTargetRowsCopied")) { + val actualNumOutputRows = metrics("numTargetRowsInserted").value + + metrics("numTargetRowsUpdated").value + + metrics("numTargetRowsCopied").value + strMetrics += "numOutputRows" -> actualNumOutputRows.toString + } + + val dvMetrics = transformDeletionVectorMetrics( + metrics, dvMetrics = DeltaOperationMetrics.MERGE_DELETION_VECTORS) + strMetrics ++= dvMetrics + + strMetrics + } + + override def changesData: Boolean = true + } + + object Merge { + /** constructor to provide default values for deprecated fields */ + def apply( + predicate: Option[Expression], + matchedPredicates: Seq[MergePredicate], + notMatchedPredicates: Seq[MergePredicate], + notMatchedBySourcePredicates: Seq[MergePredicate] + ): Merge = Merge( + predicate, + updatePredicate = None, + deletePredicate = None, + insertPredicate = None, + matchedPredicates, + notMatchedPredicates, + notMatchedBySourcePredicates + ) + } + + /** Recorded when an update operation is committed to the table. */ + case class Update(predicate: Option[Expression]) + extends OperationWithPredicates("UPDATE", predicate.toSeq) { + override val operationMetrics: Set[String] = DeltaOperationMetrics.UPDATE + + override def changesData: Boolean = true + + override def transformMetrics(metrics: Map[String, SQLMetric]): Map[String, String] = { + val dvMetrics = transformDeletionVectorMetrics(metrics) + super.transformMetrics(metrics) ++ dvMetrics + } + } + /** Recorded when the table is created. */ + case class CreateTable( + metadata: Metadata, + isManaged: Boolean, + asSelect: Boolean = false + ) extends Operation("CREATE TABLE" + s"${if (asSelect) " AS SELECT" else ""}") { + override val parameters: Map[String, Any] = Map( + "isManaged" -> isManaged.toString, + "description" -> Option(metadata.description), + "partitionBy" -> JsonUtils.toJson(metadata.partitionColumns), + "properties" -> JsonUtils.toJson(metadata.configuration) + ) + override val operationMetrics: Set[String] = if (!asSelect) { + Set() + } else { + DeltaOperationMetrics.WRITE + } + override def changesData: Boolean = asSelect + } + /** Recorded when the table is replaced. */ + case class ReplaceTable( + metadata: Metadata, + isManaged: Boolean, + orCreate: Boolean, + asSelect: Boolean = false, + override val userMetadata: Option[String] = None + ) extends Operation(s"${if (orCreate) "CREATE OR " else ""}REPLACE TABLE" + + s"${if (asSelect) " AS SELECT" else ""}") { + override val parameters: Map[String, Any] = Map( + "isManaged" -> isManaged.toString, + "description" -> Option(metadata.description), + "partitionBy" -> JsonUtils.toJson(metadata.partitionColumns), + "properties" -> JsonUtils.toJson(metadata.configuration) + ) + override val operationMetrics: Set[String] = if (!asSelect) { + Set() + } else { + DeltaOperationMetrics.WRITE + } + override def changesData: Boolean = true + } + /** Recorded when the table properties are set. */ + val OP_SET_TBLPROPERTIES = "SET TBLPROPERTIES" + case class SetTableProperties( + properties: Map[String, String]) extends Operation(OP_SET_TBLPROPERTIES) { + override val parameters: Map[String, Any] = Map("properties" -> JsonUtils.toJson(properties)) + } + /** Recorded when the table properties are unset. */ + case class UnsetTableProperties( + propKeys: Seq[String], + ifExists: Boolean) extends Operation("UNSET TBLPROPERTIES") { + override val parameters: Map[String, Any] = Map( + "properties" -> JsonUtils.toJson(propKeys), + "ifExists" -> ifExists) + } + /** Recorded when dropping a table feature. */ + case class DropTableFeature( + featureName: String, + truncateHistory: Boolean) extends Operation("DROP FEATURE") { + override val parameters: Map[String, Any] = Map( + "featureName" -> featureName, + "truncateHistory" -> truncateHistory) + } + /** Recorded when columns are added. */ + case class AddColumns( + colsToAdd: Seq[QualifiedColTypeWithPositionForLog]) extends Operation("ADD COLUMNS") { + + override val parameters: Map[String, Any] = Map( + "columns" -> JsonUtils.toJson(colsToAdd.map { + case QualifiedColTypeWithPositionForLog(columnPath, column, colPosition) => + Map( + "column" -> structFieldToMap(columnPath, column) + ) ++ colPosition.map("position" -> _.toString) + })) + } + + /** Recorded when columns are dropped. */ + val OP_DROP_COLUMN = "DROP COLUMNS" + case class DropColumns( + colsToDrop: Seq[Seq[String]]) extends Operation(OP_DROP_COLUMN) { + + override val parameters: Map[String, Any] = Map( + "columns" -> JsonUtils.toJson(colsToDrop.map(UnresolvedAttribute(_).name))) + } + + /** Recorded when column is renamed */ + val OP_RENAME_COLUMN = "RENAME COLUMN" + case class RenameColumn(oldColumnPath: Seq[String], newColumnPath: Seq[String]) + extends Operation(OP_RENAME_COLUMN) { + override val parameters: Map[String, Any] = Map( + "oldColumnPath" -> UnresolvedAttribute(oldColumnPath).name, + "newColumnPath" -> UnresolvedAttribute(newColumnPath).name + ) + } + + /** Recorded when columns are changed. */ + case class ChangeColumn( + columnPath: Seq[String], + columnName: String, + newColumn: StructField, + colPosition: Option[String]) extends Operation("CHANGE COLUMN") { + + override val parameters: Map[String, Any] = Map( + "column" -> JsonUtils.toJson(structFieldToMap(columnPath, newColumn)) + ) ++ colPosition.map("position" -> _) + } + /** Recorded when columns are replaced. */ + case class ReplaceColumns( + columns: Seq[StructField]) extends Operation("REPLACE COLUMNS") { + + override val parameters: Map[String, Any] = Map( + "columns" -> JsonUtils.toJson(columns.map(structFieldToMap(Seq.empty, _)))) + } + + case class UpgradeProtocol(newProtocol: Protocol) extends Operation("UPGRADE PROTOCOL") { + override val parameters: Map[String, Any] = Map("newProtocol" -> JsonUtils.toJson(Map( + "minReaderVersion" -> newProtocol.minReaderVersion, + "minWriterVersion" -> newProtocol.minWriterVersion, + "readerFeatures" -> newProtocol.readerFeatures, + "writerFeatures" -> newProtocol.writerFeatures + ))) + } + + object ManualUpdate extends Operation("Manual Update") { + override val parameters: Map[String, Any] = Map.empty + } + + /** A commit without any actions. Could be used to force creation of new checkpoints. */ + object EmptyCommit extends Operation("Empty Commit") { + override val parameters: Map[String, Any] = Map.empty + } + + case class UpdateColumnMetadata( + operationName: String, + columns: Seq[(Seq[String], StructField)]) + extends Operation(operationName) { + override val parameters: Map[String, Any] = { + Map("columns" -> JsonUtils.toJson(columns.map { + case (path, field) => structFieldToMap(path, field) + })) + } + } + + case class UpdateSchema(oldSchema: StructType, newSchema: StructType) + extends Operation("UPDATE SCHEMA") { + override val parameters: Map[String, Any] = Map( + "oldSchema" -> JsonUtils.toJson(oldSchema), + "newSchema" -> JsonUtils.toJson(newSchema)) + } + + case class AddConstraint( + constraintName: String, expr: String) extends Operation("ADD CONSTRAINT") { + override val parameters: Map[String, Any] = Map("name" -> constraintName, "expr" -> expr) + } + + case class DropConstraint( + constraintName: String, expr: Option[String]) extends Operation("DROP CONSTRAINT") { + override val parameters: Map[String, Any] = { + expr.map { e => + Map("name" -> constraintName, "expr" -> e, "existed" -> "true") + }.getOrElse { + Map("name" -> constraintName, "existed" -> "false") + } + } + } + + /** Recorded when recomputing stats on the table. */ + case class ComputeStats(predicate: Seq[Expression]) + extends OperationWithPredicates("COMPUTE STATS", predicate) + + /** Recorded when restoring a Delta table to an older version. */ + val OP_RESTORE = "RESTORE" + case class Restore( + version: Option[Long], + timestamp: Option[String]) extends Operation(OP_RESTORE) { + override val parameters: Map[String, Any] = Map( + "version" -> version, + "timestamp" -> timestamp) + override def changesData: Boolean = true + + override val operationMetrics: Set[String] = DeltaOperationMetrics.RESTORE + } + + sealed abstract class OptimizeOrReorg(override val name: String, predicates: Seq[Expression]) + extends OperationWithPredicates(name, predicates) + /** parameter key to indicate whether it's an Auto Compaction */ + val AUTO_COMPACTION_PARAMETER_KEY = "auto" + + /** operation name for REORG command */ + val REORG_OPERATION_NAME = "REORG" + /** operation name for OPTIMIZE command */ + val OPTIMIZE_OPERATION_NAME = "OPTIMIZE" + /** parameter key to indicate which columns to z-order by */ + val ZORDER_PARAMETER_KEY = "zOrderBy" + + /** Recorded when optimizing the table. */ + case class Optimize( + predicate: Seq[Expression], + zOrderBy: Seq[String] = Seq.empty, + auto: Boolean = false + ) extends OptimizeOrReorg(OPTIMIZE_OPERATION_NAME, predicate) { + override val parameters: Map[String, Any] = super.parameters ++ Map( + ZORDER_PARAMETER_KEY -> JsonUtils.toJson(zOrderBy), + AUTO_COMPACTION_PARAMETER_KEY -> auto + ) + + override val operationMetrics: Set[String] = DeltaOperationMetrics.OPTIMIZE + } + + /** Recorded when cloning a Delta table into a new location. */ + val OP_CLONE = "CLONE" + case class Clone( + source: String, + sourceVersion: Long + ) extends Operation(OP_CLONE) { + override val parameters: Map[String, Any] = Map( + "source" -> source, + "sourceVersion" -> sourceVersion + ) + override def changesData: Boolean = true + override val operationMetrics: Set[String] = DeltaOperationMetrics.CLONE + } + + /** + * @param retentionCheckEnabled - whether retention check was enabled for this run of vacuum. + * @param specifiedRetentionMillis - specified retention interval + * @param defaultRetentionMillis - default retention period for the table + */ + case class VacuumStart( + retentionCheckEnabled: Boolean, + specifiedRetentionMillis: Option[Long], + defaultRetentionMillis: Long) extends Operation("VACUUM START") { + override val parameters: Map[String, Any] = Map( + "retentionCheckEnabled" -> retentionCheckEnabled, + "defaultRetentionMillis" -> defaultRetentionMillis + ) ++ specifiedRetentionMillis.map("specifiedRetentionMillis" -> _) + + override val operationMetrics: Set[String] = DeltaOperationMetrics.VACUUM_START + } + + /** + * @param status - whether the vacuum operation was successful; either "COMPLETED" or "FAILED" + */ + case class VacuumEnd(status: String) extends Operation(s"VACUUM END") { + override val parameters: Map[String, Any] = Map( + "status" -> status + ) + + override val operationMetrics: Set[String] = DeltaOperationMetrics.VACUUM_END + } + + /** Recorded when running REORG on the table. */ + case class Reorg( + predicate: Seq[Expression], + applyPurge: Boolean = true) extends OptimizeOrReorg(REORG_OPERATION_NAME, predicate) { + override val parameters: Map[String, Any] = super.parameters ++ Map( + "applyPurge" -> applyPurge + ) + + override val operationMetrics: Set[String] = DeltaOperationMetrics.OPTIMIZE + } + + + private def structFieldToMap(colPath: Seq[String], field: StructField): Map[String, Any] = { + Map( + "name" -> UnresolvedAttribute(colPath :+ field.name).name, + "type" -> field.dataType.typeName, + "nullable" -> field.nullable, + "metadata" -> JsonUtils.mapper.readValue[Map[String, Any]](field.metadata.json) + ) + } + + /** + * Qualified column type with position. We define a copy of the type here to avoid depending on + * the parser output classes in our logging. + */ + case class QualifiedColTypeWithPositionForLog( + columnPath: Seq[String], + column: StructField, + colPosition: Option[String]) + + /** Dummy operation only for testing with arbitrary operation names */ + case class TestOperation(operationName: String = "TEST") extends Operation(operationName) { + override val parameters: Map[String, Any] = Map.empty + } + + /** + * Helper method to convert a sequence of command predicates in the form of an + * [[Expression]]s to a sequence of Strings so be stored in the commit info. + */ + def predicatesToString(predicates: Seq[Expression]): Seq[String] = { + val maxToStringFields = SQLConf.get.maxToStringFields + predicates.map(_.simpleString(maxToStringFields)) + } + + /** Recorded when the table properties are set. */ + private val OP_UPGRADE_UNIFORM_BY_REORG = "REORG TABLE UPGRADE UNIFORM" + + /** + * recorded when upgrading a table set uniform properties by REORG TABLE ... UPGRADE UNIFORM + */ + case class UpgradeUniformProperties(properties: Map[String, String]) extends Operation( + OP_UPGRADE_UNIFORM_BY_REORG) { + override val parameters: Map[String, Any] = Map("properties" -> JsonUtils.toJson(properties)) + } +} + +private[delta] object DeltaOperationMetrics { + val WRITE = Set( + "numFiles", // number of files written + "numOutputBytes", // size in bytes of the written contents + "numOutputRows" // number of rows written + ) + + val STREAMING_UPDATE = Set( + "numAddedFiles", // number of files added + "numRemovedFiles", // number of files removed + "numOutputRows", // number of rows written + "numOutputBytes" // number of output writes + ) + + val DELETE = Set( + "numAddedFiles", // number of files added + "numRemovedFiles", // number of files removed + "numDeletionVectorsAdded", // number of deletion vectors added + "numDeletionVectorsRemoved", // number of deletion vectors removed + "numDeletionVectorsUpdated", // number of deletion vectors updated + "numAddedChangeFiles", // number of CDC files + "numDeletedRows", // number of rows removed + "numCopiedRows", // number of rows copied in the process of deleting files + "executionTimeMs", // time taken to execute the entire operation + "scanTimeMs", // time taken to scan the files for matches + "rewriteTimeMs", // time taken to rewrite the matched files + "numRemovedBytes", // number of bytes removed + "numAddedBytes" // number of bytes added + ) + + val WRITE_REPLACE_WHERE = Set( + "numFiles", // number of files written + "numOutputBytes", // size in bytes of the written + "numOutputRows", // number of rows written + "numRemovedFiles", // number of files removed + "numAddedChangeFiles", // number of CDC files + "numDeletedRows", // number of rows removed + "numCopiedRows", // number of rows copied in the process of deleting files + "numRemovedBytes" // number of bytes removed + ) + + val WRITE_REPLACE_WHERE_PARTITIONS = Set( + "numFiles", // number of files written + "numOutputBytes", // size in bytes of the written contents + "numOutputRows", // number of rows written + "numAddedChangeFiles", // number of CDC files + "numRemovedFiles", // number of files removed + // Records below only exist when DELTA_DML_METRICS_FROM_METADATA is enabled + "numCopiedRows", // number of rows copied + "numDeletedRows", // number of rows deleted + "numRemovedBytes" // number of bytes removed + ) + + /** + * Deleting the entire table or partition will record row level metrics when + * DELTA_DML_METRICS_FROM_METADATA is enabled + * * DELETE_PARTITIONS is used only in test to verify specific delete cases. + */ + val DELETE_PARTITIONS = Set( + "numRemovedFiles", // number of files removed + "numAddedChangeFiles", // number of CDC files generated - generally 0 in this case + "numDeletionVectorsAdded", // number of deletion vectors added + "numDeletionVectorsRemoved", // number of deletion vectors removed + "numDeletionVectorsUpdated", // number of deletion vectors updated + "executionTimeMs", // time taken to execute the entire operation + "scanTimeMs", // time taken to scan the files for matches + "rewriteTimeMs", // time taken to rewrite the matched files + // Records below only exist when DELTA_DML_METRICS_FROM_METADATA is enabled + "numCopiedRows", // number of rows copied + "numDeletedRows", // number of rows deleted + "numAddedFiles", // number of files added + "numRemovedBytes", // number of bytes removed + "numAddedBytes" // number of bytes added + ) + + + trait MetricsTransformer { + /** + * Produce the output metric `metricName`, given all available metrics. + * + * If one or more input metrics are missing, the output metrics may be skipped by + * returning `None`. + */ + def transform( + metricName: String, + allMetrics: Map[String, SQLMetric]): Option[(String, Long)] + + def transformToString( + metricName: String, + allMetrics: Map[String, SQLMetric]): Option[(String, String)] = { + this.transform(metricName, allMetrics).map { case (name, metric) => + name -> metric.toString + } + } + } + + /** Pass metric on unaltered. */ + final object PassMetric extends MetricsTransformer { + override def transform( + metricName: String, + allMetrics: Map[String, SQLMetric]): Option[(String, Long)] = + allMetrics.get(metricName).map(metric => metricName -> metric.value) + } + + /** + * Produce a new metric by summing up the values of `inputMetrics`. + * + * Treats missing metrics at 0. + */ + final case class SumMetrics(inputMetrics: String*) + extends MetricsTransformer { + + override def transform( + metricName: String, + allMetrics: Map[String, SQLMetric]): Option[(String, Long)] = { + var atLeastOneMetricExists = false + val total = inputMetrics.map { name => + val metricValueOpt = allMetrics.get(name) + atLeastOneMetricExists |= metricValueOpt.isDefined + metricValueOpt.map(_.value).getOrElse(0L) + }.sum + if (atLeastOneMetricExists) { + Some(metricName -> total) + } else { + None + } + } + } + + val DELETION_VECTORS: Map[String, MetricsTransformer] = Map( + // Adding "numDeletionVectorsUpdated" here makes the values line up with how + // "numFilesAdded"/"numFilesRemoved" behave. + "numDeletionVectorsAdded" -> SumMetrics("numDeletionVectorsAdded", "numDeletionVectorsUpdated"), + "numDeletionVectorsRemoved" -> + SumMetrics("numDeletionVectorsRemoved", "numDeletionVectorsUpdated") + ) + + // The same as [[DELETION_VECTORS]] but with the "Target" prefix that is used by MERGE. + val MERGE_DELETION_VECTORS = Map( + // Adding "numDeletionVectorsUpdated" here makes the values line up with how + // "numFilesAdded"/"numFilesRemoved" behave. + "numTargetDeletionVectorsAdded" -> + SumMetrics("numTargetDeletionVectorsAdded", "numTargetDeletionVectorsUpdated"), + "numTargetDeletionVectorsRemoved" -> + SumMetrics("numTargetDeletionVectorsRemoved", "numTargetDeletionVectorsUpdated") + ) + + val TRUNCATE = Set( + "numRemovedFiles", // number of files removed + "executionTimeMs" // time taken to execute the entire operation + ) + + val CONVERT = Set( + "numConvertedFiles" // number of parquet files that have been converted. + ) + + val MERGE = Set( + "numSourceRows", // number of rows in the source dataframe + "numTargetRowsInserted", // number of rows inserted into the target table. + "numTargetRowsUpdated", // number of rows updated in the target table. + "numTargetRowsMatchedUpdated", // number of rows updated by a matched clause. + // number of rows updated by a not matched by source clause. + "numTargetRowsNotMatchedBySourceUpdated", + "numTargetRowsDeleted", // number of rows deleted in the target table. + "numTargetRowsMatchedDeleted", // number of rows deleted by a matched clause. + // number of rows deleted by a not matched by source clause. + "numTargetRowsNotMatchedBySourceDeleted", + "numTargetRowsCopied", // number of target rows copied + "numTargetBytesAdded", // number of target bytes added + "numTargetBytesRemoved", // number of target bytes removed + "numOutputRows", // total number of rows written out + "numTargetFilesAdded", // num files added to the sink(target) + "numTargetFilesRemoved", // number of files removed from the sink(target) + "numTargetChangeFilesAdded", // number of CDC files + "executionTimeMs", // time taken to execute the entire operation + "scanTimeMs", // time taken to scan the files for matches + "rewriteTimeMs", // time taken to rewrite the matched files + "numTargetDeletionVectorsAdded", // number of deletion vectors added + "numTargetDeletionVectorsRemoved", // number of deletion vectors removed + "numTargetDeletionVectorsUpdated" // number of deletion vectors updated + ) + + val UPDATE = Set( + "numAddedFiles", // number of files added + "numRemovedFiles", // number of files removed + "numAddedChangeFiles", // number of CDC files + "numDeletionVectorsAdded", // number of deletion vectors added + "numDeletionVectorsRemoved", // number of deletion vectors removed + "numDeletionVectorsUpdated", // number of deletion vectors updated + "numUpdatedRows", // number of rows updated + "numCopiedRows", // number of rows just copied over in the process of updating files. + "executionTimeMs", // time taken to execute the entire operation + "scanTimeMs", // time taken to scan the files for matches + "rewriteTimeMs", // time taken to rewrite the matched files + "numRemovedBytes", // number of bytes removed + "numAddedBytes" // number of bytes added + ) + + val OPTIMIZE = Set( + "numAddedFiles", // number of data files added + "numRemovedFiles", // number of data files removed + "numAddedBytes", // number of data bytes added by optimize + "numRemovedBytes", // number of data bytes removed by optimize + "minFileSize", // the size of the smallest file + "p25FileSize", // the size of the 25th percentile file + "p50FileSize", // the median file size + "p75FileSize", // the 75th percentile of the file sizes + "maxFileSize", // the size of the largest file + "numDeletionVectorsRemoved" // number of deletion vectors removed by optimize + ) + + val RESTORE = Set( + "tableSizeAfterRestore", // table size in bytes after restore + "numOfFilesAfterRestore", // number of files in the table after restore + "numRemovedFiles", // number of files removed by the restore operation + "numRestoredFiles", // number of files that were added as a result of the restore + "removedFilesSize", // size in bytes of files removed by the restore + "restoredFilesSize" // size in bytes of files added by the restore + ) + + val CLONE = Set( + "sourceTableSize", // size in bytes of source table at version + "sourceNumOfFiles", // number of files in source table at version + "numRemovedFiles", // number of files removed from target table if delta table was replaced + "numCopiedFiles", // number of files that were cloned - 0 for shallow tables + "removedFilesSize", // size in bytes of files removed from an existing Delta table if one exists + "copiedFilesSize" // size of files copied - 0 for shallow tables + ) + + val VACUUM_START = Set( + "numFilesToDelete", // number of files that will be deleted by vacuum + "sizeOfDataToDelete" // total size in bytes of files that will be deleted by vacuum + ) + + val VACUUM_END = Set( + "numDeletedFiles", // number of files deleted by vacuum + "numVacuumedDirectories" // number of directories vacuumed + ) + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaOptions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaOptions.scala new file mode 100644 index 00000000000..dd01fc0ba2f --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaOptions.scala @@ -0,0 +1,398 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.util.Locale +import java.util.regex.PatternSyntaxException + +import scala.util.Try +import scala.util.matching.Regex + +import org.apache.spark.sql.delta.DeltaOptions.{DATA_CHANGE_OPTION, MERGE_SCHEMA_OPTION, OVERWRITE_SCHEMA_OPTION, PARTITION_OVERWRITE_MODE_OPTION} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.network.util.{ByteUnit, JavaUtils} +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.internal.SQLConf + + +trait DeltaOptionParser { + protected def sqlConf: SQLConf + protected def options: CaseInsensitiveMap[String] + + def toBoolean(input: String, name: String): Boolean = { + Try(input.toBoolean).toOption.getOrElse { + throw DeltaErrors.illegalDeltaOptionException(name, input, "must be 'true' or 'false'") + } + } +} + +trait DeltaWriteOptions + extends DeltaWriteOptionsImpl + with DeltaOptionParser { + + import DeltaOptions._ + + val replaceWhere: Option[String] = options.get(REPLACE_WHERE_OPTION) + val userMetadata: Option[String] = options.get(USER_METADATA_OPTION) + + /** + * Whether to add an adaptive shuffle before writing out the files to break skew, and coalesce + * data into chunkier files. + */ + val optimizeWrite: Option[Boolean] = options.get(OPTIMIZE_WRITE_OPTION) + .map(toBoolean(_, OPTIMIZE_WRITE_OPTION)) + +} + +trait DeltaWriteOptionsImpl extends DeltaOptionParser { + import DeltaOptions._ + + /** + * Whether the user has enabled auto schema merging in writes using either a DataFrame option + * or SQL Session configuration. Automerging is off when table ACLs are enabled. + * We always respect the DataFrame writer configuration over the session config. + */ + def canMergeSchema: Boolean = { + options.get(MERGE_SCHEMA_OPTION) + .map(toBoolean(_, MERGE_SCHEMA_OPTION)) + .getOrElse(sqlConf.getConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE)) + } + + /** + * Whether to allow overwriting the schema of a Delta table in an overwrite mode operation. If + * ACLs are enabled, we can't change the schema of an operation through a write, which requires + * MODIFY permissions, when schema changes require OWN permissions. + */ + def canOverwriteSchema: Boolean = { + options.get(OVERWRITE_SCHEMA_OPTION).exists(toBoolean(_, OVERWRITE_SCHEMA_OPTION)) + } + + /** + * Whether to write new data to the table or just rearrange data that is already + * part of the table. This option declares that the data being written by this job + * does not change any data in the table and merely rearranges existing data. + * This makes sure streaming queries reading from this table will not see any new changes + */ + def rearrangeOnly: Boolean = { + options.get(DATA_CHANGE_OPTION).exists(!toBoolean(_, DATA_CHANGE_OPTION)) + } + + val txnVersion = options.get(TXN_VERSION).map { str => + Try(str.toLong).toOption.filter(_ >= 0).getOrElse { + throw DeltaErrors.illegalDeltaOptionException( + TXN_VERSION, str, "must be a non-negative integer") + } + } + + val txnAppId = options.get(TXN_APP_ID) + + private def validateIdempotentWriteOptions(): Unit = { + // Either both txnVersion and txnAppId must be specified to get idempotent writes or + // neither must be given. In all other cases, throw an exception. + val numOptions = txnVersion.size + txnAppId.size + if (numOptions != 0 && numOptions != 2) { + throw DeltaErrors.invalidIdempotentWritesOptionsException("Both txnVersion and txnAppId " + + "must be specified for idempotent data frame writes") + } + } + + validateIdempotentWriteOptions() + + /** Whether partitionOverwriteMode is provided as a DataFrameWriter option. */ + val partitionOverwriteModeInOptions: Boolean = + options.contains(PARTITION_OVERWRITE_MODE_OPTION) + + /** Whether to only overwrite partitions that have data written into it at runtime. */ + def isDynamicPartitionOverwriteMode: Boolean = { + val mode = options.get(PARTITION_OVERWRITE_MODE_OPTION) + .getOrElse(sqlConf.getConf(SQLConf.PARTITION_OVERWRITE_MODE)) + val modeIsDynamic = mode.equalsIgnoreCase(PARTITION_OVERWRITE_MODE_DYNAMIC) + if (!sqlConf.getConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED)) { + // Raise an exception when DYNAMIC_PARTITION_OVERWRITE_ENABLED=false + // but users explicitly request dynamic partition overwrite. + if (modeIsDynamic) { + throw DeltaErrors.deltaDynamicPartitionOverwriteDisabled() + } + // If dynamic partition overwrite mode is disabled, fallback to the default behavior + false + } else { + if (!DeltaOptions.PARTITION_OVERWRITE_MODE_VALUES.exists(mode.equalsIgnoreCase(_))) { + val acceptableStr = + DeltaOptions.PARTITION_OVERWRITE_MODE_VALUES.map("'" + _ + "'").mkString(" or ") + throw DeltaErrors.illegalDeltaOptionException( + PARTITION_OVERWRITE_MODE_OPTION, mode, s"must be ${acceptableStr}" + ) + } + modeIsDynamic + } + } +} + +trait DeltaReadOptions extends DeltaOptionParser { + import DeltaOptions._ + + val maxFilesPerTrigger = options.get(MAX_FILES_PER_TRIGGER_OPTION).map { str => + Try(str.toInt).toOption.filter(_ > 0).getOrElse { + throw DeltaErrors.illegalDeltaOptionException( + MAX_FILES_PER_TRIGGER_OPTION, str, "must be a positive integer") + } + } + + val maxBytesPerTrigger = options.get(MAX_BYTES_PER_TRIGGER_OPTION).map { str => + Try(JavaUtils.byteStringAs(str, ByteUnit.BYTE)).toOption.filter(_ > 0).getOrElse { + throw DeltaErrors.illegalDeltaOptionException( + MAX_BYTES_PER_TRIGGER_OPTION, str, "must be a size configuration such as '10g'") + } + } + + val ignoreFileDeletion = options.get(IGNORE_FILE_DELETION_OPTION) + .exists(toBoolean(_, IGNORE_FILE_DELETION_OPTION)) + + val ignoreChanges = options.get(IGNORE_CHANGES_OPTION).exists(toBoolean(_, IGNORE_CHANGES_OPTION)) + + val ignoreDeletes = options.get(IGNORE_DELETES_OPTION).exists(toBoolean(_, IGNORE_DELETES_OPTION)) + + val skipChangeCommits = options.get(SKIP_CHANGE_COMMITS_OPTION) + .exists(toBoolean(_, SKIP_CHANGE_COMMITS_OPTION)) + + val failOnDataLoss = options.get(FAIL_ON_DATA_LOSS_OPTION) + .forall(toBoolean(_, FAIL_ON_DATA_LOSS_OPTION)) // thanks to forall: by default true + + val readChangeFeed = options.get(CDC_READ_OPTION).exists(toBoolean(_, CDC_READ_OPTION)) || + options.get(CDC_READ_OPTION_LEGACY).exists(toBoolean(_, CDC_READ_OPTION_LEGACY)) + + + val excludeRegex: Option[Regex] = try options.get(EXCLUDE_REGEX_OPTION).map(_.r) catch { + case e: PatternSyntaxException => + throw DeltaErrors.excludeRegexOptionException(EXCLUDE_REGEX_OPTION, e) + } + + val startingVersion: Option[DeltaStartingVersion] = options.get(STARTING_VERSION_OPTION).map { + case "latest" => StartingVersionLatest + case str => + Try(str.toLong).toOption.filter(_ >= 0).map(StartingVersion).getOrElse{ + throw DeltaErrors.illegalDeltaOptionException( + STARTING_VERSION_OPTION, str, "must be greater than or equal to zero") + } + } + + val startingTimestamp = options.get(STARTING_TIMESTAMP_OPTION) + + private def provideOneStartingOption(): Unit = { + if (startingTimestamp.isDefined && startingVersion.isDefined) { + throw DeltaErrors.startingVersionAndTimestampBothSetException( + STARTING_VERSION_OPTION, + STARTING_TIMESTAMP_OPTION) + } + } + + def containsStartingVersionOrTimestamp: Boolean = { + options.contains(STARTING_VERSION_OPTION) || options.contains(STARTING_TIMESTAMP_OPTION) + } + + provideOneStartingOption() + + val schemaTrackingLocation = options.get(SCHEMA_TRACKING_LOCATION) + + val sourceTrackingId = options.get(STREAMING_SOURCE_TRACKING_ID) +} + + +/** + * Options for the Delta data source. + */ +class DeltaOptions( + @transient protected[delta] val options: CaseInsensitiveMap[String], + @transient protected val sqlConf: SQLConf) + extends DeltaWriteOptions with DeltaReadOptions with Serializable { + + DeltaOptions.verifyOptions(options) + + def this(options: Map[String, String], conf: SQLConf) = this(CaseInsensitiveMap(options), conf) +} + +object DeltaOptions extends DeltaLogging { + + /** An option to overwrite only the data that matches predicates over partition columns. */ + val REPLACE_WHERE_OPTION = "replaceWhere" + /** An option to allow automatic schema merging during a write operation. */ + val MERGE_SCHEMA_OPTION = "mergeSchema" + /** An option to allow overwriting schema and partitioning during an overwrite write operation. */ + val OVERWRITE_SCHEMA_OPTION = "overwriteSchema" + /** An option to specify user-defined metadata in commitInfo */ + val USER_METADATA_OPTION = "userMetadata" + + val PARTITION_OVERWRITE_MODE_OPTION = "partitionOverwriteMode" + val PARTITION_OVERWRITE_MODE_DYNAMIC = "DYNAMIC" + val PARTITION_OVERWRITE_MODE_STATIC = "STATIC" + val PARTITION_OVERWRITE_MODE_VALUES = + Set(PARTITION_OVERWRITE_MODE_STATIC, PARTITION_OVERWRITE_MODE_DYNAMIC) + + val MAX_FILES_PER_TRIGGER_OPTION = "maxFilesPerTrigger" + val MAX_FILES_PER_TRIGGER_OPTION_DEFAULT = 1000 + val MAX_BYTES_PER_TRIGGER_OPTION = "maxBytesPerTrigger" + val EXCLUDE_REGEX_OPTION = "excludeRegex" + val IGNORE_FILE_DELETION_OPTION = "ignoreFileDeletion" + val IGNORE_CHANGES_OPTION = "ignoreChanges" + val IGNORE_DELETES_OPTION = "ignoreDeletes" + val SKIP_CHANGE_COMMITS_OPTION = "skipChangeCommits" + val FAIL_ON_DATA_LOSS_OPTION = "failOnDataLoss" + val OPTIMIZE_WRITE_OPTION = "optimizeWrite" + val DATA_CHANGE_OPTION = "dataChange" + val STARTING_VERSION_OPTION = "startingVersion" + val STARTING_TIMESTAMP_OPTION = "startingTimestamp" + val CDC_START_VERSION = "startingVersion" + val CDC_START_TIMESTAMP = "startingTimestamp" + val CDC_END_VERSION = "endingVersion" + val CDC_END_TIMESTAMP = "endingTimestamp" + val CDC_READ_OPTION = "readChangeFeed" + val CDC_READ_OPTION_LEGACY = "readChangeData" + + val VERSION_AS_OF = "versionAsOf" + val TIMESTAMP_AS_OF = "timestampAsOf" + + val COMPRESSION = "compression" + val MAX_RECORDS_PER_FILE = "maxRecordsPerFile" + val TXN_APP_ID = "txnAppId" + val TXN_VERSION = "txnVersion" + + /** + * An option to allow column mapping enabled tables to conduct schema evolution during streaming + */ + val SCHEMA_TRACKING_LOCATION = "schemaTrackingLocation" + /** + * Alias for `schemaTrackingLocation`, so users familiar with AutoLoader can migrate easily. + */ + val SCHEMA_TRACKING_LOCATION_ALIAS = "schemaLocation" + /** + * An option to instruct DeltaSource to pick a customized subdirectory for schema log in case of + * rare conflicts such as when a stream needs to do a self-union of two Delta sources from the + * same table. + * The final schema log location will be $parent/_schema_log_${tahoeId}_${sourceTrackingId}. + */ + val STREAMING_SOURCE_TRACKING_ID = "streamingSourceTrackingId" + + /** + * An option to control if delta will write partition columns to data files + */ + val WRITE_PARTITION_COLUMNS = "writePartitionColumns" + + val validOptionKeys : Set[String] = Set( + REPLACE_WHERE_OPTION, + MERGE_SCHEMA_OPTION, + EXCLUDE_REGEX_OPTION, + OVERWRITE_SCHEMA_OPTION, + USER_METADATA_OPTION, + PARTITION_OVERWRITE_MODE_OPTION, + MAX_FILES_PER_TRIGGER_OPTION, + IGNORE_FILE_DELETION_OPTION, + IGNORE_CHANGES_OPTION, + IGNORE_DELETES_OPTION, + FAIL_ON_DATA_LOSS_OPTION, + OPTIMIZE_WRITE_OPTION, + DATA_CHANGE_OPTION, + STARTING_TIMESTAMP_OPTION, + STARTING_VERSION_OPTION, + CDC_READ_OPTION, + CDC_READ_OPTION_LEGACY, + CDC_START_TIMESTAMP, + CDC_END_TIMESTAMP, + CDC_START_VERSION, + CDC_END_VERSION, + COMPRESSION, + MAX_RECORDS_PER_FILE, + TXN_APP_ID, + TXN_VERSION, + SCHEMA_TRACKING_LOCATION, + SCHEMA_TRACKING_LOCATION_ALIAS, + STREAMING_SOURCE_TRACKING_ID, + "queryName", + "checkpointLocation", + "path", + VERSION_AS_OF, + TIMESTAMP_AS_OF, + WRITE_PARTITION_COLUMNS + ) + + + /** Iterates over all user passed options and logs any that are not valid. */ + def verifyOptions(options: CaseInsensitiveMap[String]): Unit = { + val invalidUserOptions = SQLConf.get.redactOptions(options -- + validOptionKeys.map(_.toLowerCase(Locale.ROOT))) + if (invalidUserOptions.nonEmpty) { + recordDeltaEvent(null, + "delta.option.invalid", + data = invalidUserOptions + ) + } + } +} + +/** + * Definitions for the batch read schema mode for CDF + */ +sealed trait DeltaBatchCDFSchemaMode { + def name: String +} + +/** + * `latest` batch CDF schema mode specifies that the latest schema should be used when serving + * the CDF batch. + */ +case object BatchCDFSchemaLatest extends DeltaBatchCDFSchemaMode { + val name = "latest" +} + +/** + * `endVersion` batch CDF schema mode specifies that the query range's end version's schema should + * be used for serving the CDF batch. + * This is the current default for column mapping enabled tables so we could read using the exact + * schema at the versions being queried to reduce schema read compatibility mismatches. + */ +case object BatchCDFSchemaEndVersion extends DeltaBatchCDFSchemaMode { + val name = "endversion" +} + +/** + * `legacy` batch CDF schema mode specifies that neither latest nor end version's schema is + * strictly used for serving the CDF batch, e.g. when user uses TimeTravel with batch CDF and wants + * to respect the time travelled schema. + * This is the current default for non-column mapping tables. + */ +case object BatchCDFSchemaLegacy extends DeltaBatchCDFSchemaMode { + val name = "legacy" +} + +object DeltaBatchCDFSchemaMode { + def apply(name: String): DeltaBatchCDFSchemaMode = { + name.toLowerCase(Locale.ROOT) match { + case BatchCDFSchemaLatest.name => BatchCDFSchemaLatest + case BatchCDFSchemaEndVersion.name => BatchCDFSchemaEndVersion + case BatchCDFSchemaLegacy.name => BatchCDFSchemaLegacy + } + } +} + +/** + * Definitions for the starting version of a Delta stream. + */ +sealed trait DeltaStartingVersion +case object StartingVersionLatest extends DeltaStartingVersion +case class StartingVersion(version: Long) extends DeltaStartingVersion diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala new file mode 100644 index 00000000000..6ed4a117fd5 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala @@ -0,0 +1,440 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.net.URI + +import scala.collection.mutable.ArrayBuffer +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.RowIndexFilterType +import org.apache.spark.sql.delta.DeltaParquetFileFormat._ +import org.apache.spark.sql.delta.actions.{DeletionVectorDescriptor, Metadata, Protocol} +import org.apache.spark.sql.delta.deletionvectors.{DropMarkedRowsFilter, KeepAllRowsFilter, KeepMarkedRowsFilter} +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.hadoop.mapreduce.Job +import org.apache.parquet.hadoop.ParquetOutputFormat +import org.apache.parquet.hadoop.util.ContextUtil + +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.datasources.OutputWriterFactory +import org.apache.spark.sql.execution.datasources.PartitionedFile +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector, WritableColumnVector} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources.Filter +import org.apache.spark.sql.types.{ByteType, LongType, MetadataBuilder, StructField, StructType} +import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnarBatchRow, ColumnVector} +import org.apache.spark.util.SerializableConfiguration + +/** + * A thin wrapper over the Parquet file format to support + * - columns names without restrictions. + * - populated a column from the deletion vector of this file (if exists) to indicate + * whether the row is deleted or not according to the deletion vector. Consumers + * of this scan can use the column values to filter out the deleted rows. + */ +case class DeltaParquetFileFormat( + protocol: Protocol, + metadata: Metadata, + isSplittable: Boolean = true, + disablePushDowns: Boolean = false, + tablePath: Option[String] = None, + broadcastDvMap: Option[Broadcast[Map[URI, DeletionVectorDescriptorWithFilterType]]] = None, + broadcastHadoopConf: Option[Broadcast[SerializableConfiguration]] = None) + extends ParquetFileFormat { + // Validate either we have all arguments for DV enabled read or none of them. + if (hasDeletionVectorMap) { + require(tablePath.isDefined && !isSplittable && disablePushDowns, + "Wrong arguments for Delta table scan with deletion vectors") + } + + val columnMappingMode: DeltaColumnMappingMode = metadata.columnMappingMode + val referenceSchema: StructType = metadata.schema + + if (columnMappingMode == IdMapping) { + val requiredReadConf = SQLConf.PARQUET_FIELD_ID_READ_ENABLED + require(SparkSession.getActiveSession.exists(_.sessionState.conf.getConf(requiredReadConf)), + s"${requiredReadConf.key} must be enabled to support Delta id column mapping mode") + val requiredWriteConf = SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED + require(SparkSession.getActiveSession.exists(_.sessionState.conf.getConf(requiredWriteConf)), + s"${requiredWriteConf.key} must be enabled to support Delta id column mapping mode") + } + + /** + * prepareSchemaForRead must only be used for parquet read. + * It removes "PARQUET_FIELD_ID_METADATA_KEY" for name mapping mode which address columns by + * physical name instead of id. + */ + def prepareSchemaForRead(inputSchema: StructType): StructType = { + val schema = DeltaColumnMapping.createPhysicalSchema( + inputSchema, referenceSchema, columnMappingMode) + if (columnMappingMode == NameMapping) { + SchemaMergingUtils.transformColumns(schema) { (_, field, _) => + field.copy(metadata = new MetadataBuilder() + .withMetadata(field.metadata) + .remove(DeltaColumnMapping.PARQUET_FIELD_ID_METADATA_KEY) + .remove(DeltaColumnMapping.PARQUET_FIELD_NESTED_IDS_METADATA_KEY) + .build()) + } + } else schema + } + + override def isSplitable( + sparkSession: SparkSession, options: Map[String, String], path: Path): Boolean = isSplittable + + def hasDeletionVectorMap: Boolean = broadcastDvMap.isDefined && broadcastHadoopConf.isDefined + + /** + * We sometimes need to replace FileFormat within LogicalPlans, so we have to override + * `equals` to ensure file format changes are captured + */ + override def equals(other: Any): Boolean = { + other match { + case ff: DeltaParquetFileFormat => + ff.columnMappingMode == columnMappingMode && + ff.referenceSchema == referenceSchema && + ff.isSplittable == isSplittable && + ff.disablePushDowns == disablePushDowns + case _ => false + } + } + + override def hashCode(): Int = getClass.getCanonicalName.hashCode() + + override def buildReaderWithPartitionValues( + sparkSession: SparkSession, + dataSchema: StructType, + partitionSchema: StructType, + requiredSchema: StructType, + filters: Seq[Filter], + options: Map[String, String], + hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = { + val pushdownFilters = if (disablePushDowns) Seq.empty else filters + + val parquetDataReader: PartitionedFile => Iterator[InternalRow] = + super.buildReaderWithPartitionValues( + sparkSession, + prepareSchemaForRead(dataSchema), + prepareSchemaForRead(partitionSchema), + prepareSchemaForRead(requiredSchema), + pushdownFilters, + options, + hadoopConf) + + val schemaWithIndices = requiredSchema.fields.zipWithIndex + def findColumn(name: String): Option[ColumnMetadata] = { + val results = schemaWithIndices.filter(_._1.name == name) + if (results.length > 1) { + throw new IllegalArgumentException( + s"There are more than one column with name=`$name` requested in the reader output") + } + results.headOption.map(e => ColumnMetadata(e._2, e._1)) + } + val isRowDeletedColumn = findColumn(IS_ROW_DELETED_COLUMN_NAME) + val rowIndexColumn = findColumn(ROW_INDEX_COLUMN_NAME) + + if (isRowDeletedColumn.isEmpty && rowIndexColumn.isEmpty) { + return parquetDataReader // no additional metadata is needed. + } else { + // verify the file splitting and filter pushdown are disabled. The new additional + // metadata columns cannot be generated with file splitting and filter pushdowns + require(!isSplittable, "Cannot generate row index related metadata with file splitting") + require(disablePushDowns, "Cannot generate row index related metadata with filter pushdown") + } + + if (hasDeletionVectorMap && isRowDeletedColumn.isEmpty) { + throw new IllegalArgumentException( + s"Expected a column $IS_ROW_DELETED_COLUMN_NAME in the schema") + } + + val useOffHeapBuffers = sparkSession.sessionState.conf.offHeapColumnVectorEnabled + (partitionedFile: PartitionedFile) => { + val rowIteratorFromParquet = parquetDataReader(partitionedFile) + try { + val iterToReturn = + iteratorWithAdditionalMetadataColumns( + partitionedFile, + rowIteratorFromParquet, + isRowDeletedColumn, + useOffHeapBuffers = useOffHeapBuffers, + rowIndexColumn = rowIndexColumn) + iterToReturn.asInstanceOf[Iterator[InternalRow]] + } catch { + case NonFatal(e) => + // Close the iterator if it is a closeable resource. The `ParquetFileFormat` opens + // the file and returns `RecordReaderIterator` (which implements `AutoCloseable` and + // `Iterator`) instance as a `Iterator`. + rowIteratorFromParquet match { + case resource: AutoCloseable => closeQuietly(resource) + case _ => // do nothing + } + throw e + } + } + } + + override def supportFieldName(name: String): Boolean = { + if (columnMappingMode != NoMapping) true else super.supportFieldName(name) + } + + override def metadataSchemaFields: Seq[StructField] = { + // Parquet reader in Spark has a bug where a file containing 2b+ rows in a single rowgroup + // causes it to run out of the `Integer` range (TODO: Create a SPARK issue) + // For Delta Parquet readers don't expose the row_index field as a metadata field. + super.metadataSchemaFields.filter(field => field != ParquetFileFormat.ROW_INDEX_FIELD) + } + + override def prepareWrite( + sparkSession: SparkSession, + job: Job, + options: Map[String, String], + dataSchema: StructType): OutputWriterFactory = { + val factory = super.prepareWrite(sparkSession, job, options, dataSchema) + val conf = ContextUtil.getConfiguration(job) + // Always write timestamp as TIMESTAMP_MICROS for Iceberg compat based on Iceberg spec + if (IcebergCompatV1.isEnabled(metadata) || IcebergCompatV2.isEnabled(metadata)) { + conf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key, + SQLConf.ParquetOutputTimestampType.TIMESTAMP_MICROS.toString) + } + if (IcebergCompatV2.isEnabled(metadata)) { + // For Uniform with IcebergCompatV2, we need to write nested field IDs for list and map + // types to the parquet schema. Spark currently does not support it so we hook in our + // own write support class. + ParquetOutputFormat.setWriteSupportClass(job, classOf[DeltaParquetWriteSupport]) + } + factory + } + + def copyWithDVInfo( + tablePath: String, + broadcastDvMap: Broadcast[Map[URI, DeletionVectorDescriptorWithFilterType]], + broadcastHadoopConf: Broadcast[SerializableConfiguration]): DeltaParquetFileFormat = { + this.copy( + isSplittable = false, + disablePushDowns = true, + tablePath = Some(tablePath), + broadcastDvMap = Some(broadcastDvMap), + broadcastHadoopConf = Some(broadcastHadoopConf)) + } + + /** + * Modifies the data read from underlying Parquet reader by populating one or both of the + * following metadata columns. + * - [[IS_ROW_DELETED_COLUMN_NAME]] - row deleted status from deletion vector corresponding + * to this file + * - [[ROW_INDEX_COLUMN_NAME]] - index of the row within the file. + */ + private def iteratorWithAdditionalMetadataColumns( + partitionedFile: PartitionedFile, + iterator: Iterator[Object], + isRowDeletedColumn: Option[ColumnMetadata], + rowIndexColumn: Option[ColumnMetadata], + useOffHeapBuffers: Boolean): Iterator[Object] = { + val pathUri = partitionedFile.pathUri + + val rowIndexFilter = isRowDeletedColumn.map { col => + // Fetch the DV descriptor from the broadcast map and create a row index filter + broadcastDvMap.get.value + .get(pathUri) + .map { case DeletionVectorDescriptorWithFilterType(dvDescriptor, filterType) => + filterType match { + case i if i == RowIndexFilterType.IF_CONTAINED => + DropMarkedRowsFilter.createInstance( + dvDescriptor, + broadcastHadoopConf.get.value.value, + tablePath.map(new Path(_))) + case i if i == RowIndexFilterType.IF_NOT_CONTAINED => + KeepMarkedRowsFilter.createInstance( + dvDescriptor, + broadcastHadoopConf.get.value.value, + tablePath.map(new Path(_))) + } + } + .getOrElse(KeepAllRowsFilter) + } + + val metadataColumns = Seq(isRowDeletedColumn, rowIndexColumn).filter(_.nonEmpty).map(_.get) + + // Unfortunately there is no way to verify the Parquet index is starting from 0. + // We disable the splits, so the assumption is ParquetFileFormat respects that + var rowIndex: Long = 0 + + // Used only when non-column row batches are received from the Parquet reader + val tempVector = new OnHeapColumnVector(1, ByteType) + + iterator.map { row => + row match { + case batch: ColumnarBatch => // When vectorized Parquet reader is enabled + val size = batch.numRows() + // Create vectors for all needed metadata columns. + // We can't use the one from Parquet reader as it set the + // [[WritableColumnVector.isAllNulls]] to true and it can't be reset with using any + // public APIs. + trySafely(useOffHeapBuffers, size, metadataColumns) { writableVectors => + val indexVectorTuples = new ArrayBuffer[(Int, ColumnVector)] + var index = 0 + isRowDeletedColumn.foreach { columnMetadata => + val isRowDeletedVector = writableVectors(index) + rowIndexFilter.get + .materializeIntoVector(rowIndex, rowIndex + size, isRowDeletedVector) + indexVectorTuples += (columnMetadata.index -> isRowDeletedVector) + index += 1 + } + + rowIndexColumn.foreach { columnMetadata => + val rowIndexVector = writableVectors(index) + // populate the row index column value + for (i <- 0 until size) { + rowIndexVector.putLong(i, rowIndex + i) + } + + indexVectorTuples += (columnMetadata.index -> rowIndexVector) + index += 1 + } + + val newBatch = replaceVectors(batch, indexVectorTuples.toSeq: _*) + rowIndex += size + newBatch + } + + case columnarRow: ColumnarBatchRow => + // When vectorized reader is enabled but returns immutable rows instead of + // columnar batches [[ColumnarBatchRow]]. So we have to copy the row as a + // mutable [[InternalRow]] and set the `row_index` and `is_row_deleted` + // column values. This is not efficient. It should affect only the wide + // tables. https://github.com/delta-io/delta/issues/2246 + val newRow = columnarRow.copy(); + isRowDeletedColumn.foreach { columnMetadata => + rowIndexFilter.get.materializeIntoVector(rowIndex, rowIndex + 1, tempVector) + newRow.setByte(columnMetadata.index, tempVector.getByte(0)) + } + + rowIndexColumn.foreach(columnMetadata => newRow.setLong(columnMetadata.index, rowIndex)) + rowIndex += 1 + newRow + + case rest: InternalRow => // When vectorized Parquet reader is disabled + // Temporary vector variable used to get DV values from RowIndexFilter + // Currently the RowIndexFilter only supports writing into a columnar vector + // and doesn't have methods to get DV value for a specific row index. + // TODO: This is not efficient, but it is ok given the default reader is vectorized + isRowDeletedColumn.foreach { columnMetadata => + rowIndexFilter.get.materializeIntoVector(rowIndex, rowIndex + 1, tempVector) + rest.setByte(columnMetadata.index, tempVector.getByte(0)) + } + + rowIndexColumn.foreach(columnMetadata => rest.setLong(columnMetadata.index, rowIndex)) + rowIndex += 1 + rest + case others => + throw new RuntimeException( + s"Parquet reader returned an unknown row type: ${others.getClass.getName}") + } + } + } +} + +object DeltaParquetFileFormat { + /** + * Column name used to identify whether the row read from the parquet file is marked + * as deleted according to the Delta table deletion vectors + */ + val IS_ROW_DELETED_COLUMN_NAME = "__delta_internal_is_row_deleted" + val IS_ROW_DELETED_STRUCT_FIELD = StructField(IS_ROW_DELETED_COLUMN_NAME, ByteType) + + /** Row index for each column */ + val ROW_INDEX_COLUMN_NAME = "__delta_internal_row_index" + val ROW_INDEX_STRUCT_FILED = StructField(ROW_INDEX_COLUMN_NAME, LongType) + + /** Utility method to create a new writable vector */ + private def newVector( + useOffHeapBuffers: Boolean, size: Int, dataType: StructField): WritableColumnVector = { + if (useOffHeapBuffers) { + OffHeapColumnVector.allocateColumns(size, Seq(dataType).toArray)(0) + } else { + OnHeapColumnVector.allocateColumns(size, Seq(dataType).toArray)(0) + } + } + + /** Try the operation, if the operation fails release the created resource */ + private def trySafely[R <: WritableColumnVector, T]( + useOffHeapBuffers: Boolean, + size: Int, + columns: Seq[ColumnMetadata])(f: Seq[WritableColumnVector] => T): T = { + val resources = new ArrayBuffer[WritableColumnVector](columns.size) + try { + columns.foreach(col => resources.append(newVector(useOffHeapBuffers, size, col.structField))) + f(resources.toSeq) + } catch { + case NonFatal(e) => + resources.foreach(closeQuietly(_)) + throw e + } + } + + /** Utility method to quietly close an [[AutoCloseable]] */ + private def closeQuietly(closeable: AutoCloseable): Unit = { + if (closeable != null) { + try { + closeable.close() + } catch { + case NonFatal(_) => // ignore + } + } + } + + /** + * Helper method to replace the vectors in given [[ColumnarBatch]]. + * New vectors and its index in the batch are given as tuples. + */ + private def replaceVectors( + batch: ColumnarBatch, + indexVectorTuples: (Int, ColumnVector) *): ColumnarBatch = { + val vectors = ArrayBuffer[ColumnVector]() + for (i <- 0 until batch.numCols()) { + var replaced: Boolean = false + for (indexVectorTuple <- indexVectorTuples) { + val index = indexVectorTuple._1 + val vector = indexVectorTuple._2 + if (indexVectorTuple._1 == i) { + vectors += indexVectorTuple._2 + // Make sure to close the existing vector allocated in the Parquet + batch.column(i).close() + replaced = true + } + } + if (!replaced) { + vectors += batch.column(i) + } + } + new ColumnarBatch(vectors.toArray, batch.numRows()) + } + + /** Helper class to encapsulate column info */ + case class ColumnMetadata(index: Int, structField: StructField) + + /** Helper class that encapsulate an [[RowIndexFilterType]]. */ + case class DeletionVectorDescriptorWithFilterType( + descriptor: DeletionVectorDescriptor, + filterType: RowIndexFilterType) { + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetWriteSupport.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetWriteSupport.scala new file mode 100644 index 00000000000..41038246982 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetWriteSupport.scala @@ -0,0 +1,149 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.util.Try + +import org.apache.spark.sql.delta.DeltaColumnMapping._ +import org.apache.hadoop.conf.Configuration +import org.apache.parquet.hadoop.api.WriteSupport.WriteContext +import org.apache.parquet.schema.{LogicalTypeAnnotation, Type, Types} +import org.apache.parquet.schema.LogicalTypeAnnotation.{ListLogicalTypeAnnotation, MapLogicalTypeAnnotation} + +import org.apache.spark.SparkRuntimeException +import org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser +import org.apache.spark.sql.catalyst.trees.Origin +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.datasources.parquet.{ParquetSchemaConverter, ParquetWriteSupport} +import org.apache.spark.sql.types.{DataType, StructField, StructType} + +class DeltaParquetWriteSupport extends ParquetWriteSupport { + + private def getNestedFieldId(field: StructField, path: Seq[String]): Int = { + field.metadata + .getMetadata(PARQUET_FIELD_NESTED_IDS_METADATA_KEY) + .getLong(path.mkString(".")) + .toInt + } + + private def findFieldInSparkSchema(schema: StructType, path: Seq[String]): StructField = { + schema.findNestedField(path, true) match { + case Some((_, field)) => field + case None => throw QueryCompilationErrors.invalidFieldName(Seq(path.head), path, Origin()) + } + } + + override def init(configuration: Configuration): WriteContext = { + val writeContext = super.init(configuration) + // Parse the Spark schema. This is the same as is done in super.init, however, the + // parsed schema is stored in [[ParquetWriteSupport.schema]], which is private so + // we can't access it here and need to parse it again. + val schemaString = configuration.get(ParquetWriteSupport.SPARK_ROW_SCHEMA) + // This code is copied from Spark StructType.fromString because it is not accessible here + val parsedSchema = Try(DataType.fromJson(schemaString)).getOrElse( + LegacyTypeStringParser.parseString(schemaString)) match { + case t: StructType => t + case _ => + // This code is copied from DataTypeErrors.failedParsingStructTypeError because + // it is not accessible here + throw new SparkRuntimeException( + errorClass = "FAILED_PARSE_STRUCT_TYPE", + messageParameters = Map("raw" -> s"'$schemaString'")) + } + + val messageType = writeContext.getSchema + val newMessageTypeBuilder = Types.buildMessage() + messageType.getFields.forEach { field => + val parentField = findFieldInSparkSchema(parsedSchema, Seq(field.getName)) + newMessageTypeBuilder.addField(convert( + field, parentField, parsedSchema, Seq(field.getName), Seq(field.getName))) + } + val newMessageType = newMessageTypeBuilder.named( + ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME) + new WriteContext(newMessageType, writeContext.getExtraMetaData) + } + + /** + * Recursively rewrites the parquet [[Type]] by adding the nested field + * IDs to list and map subtypes as defined in the schema. The + * recursion needs to keep track of the absolute field path in order + * to correctly identify the StructField in the spark schema for a + * corresponding parquet field. As nested field IDs are referenced + * by their relative path in a field's metadata, the recursion also needs + * to keep track of the relative path. + * + * For example, consider the following column type + * col1 STRUCT(a INT, b STRUCT(c INT, d ARRAY(INT))) + * + * The absolute path to the nested [[element]] field of the list is + * col1.b.d.element whereas the relative path is d.element, i.e. relative + * to the parent struct field. + */ + private def convert( + field: Type, + parentField: StructField, + sparkSchema: StructType, + absolutePath: Seq[String], + relativePath: Seq[String]): Type = { + field.getLogicalTypeAnnotation match { + case _: ListLogicalTypeAnnotation => + val relElemFieldPath = relativePath :+ PARQUET_LIST_ELEMENT_FIELD_NAME + val id = getNestedFieldId(parentField, relElemFieldPath) + val elementField = + field.asGroupType().getFields.get(0).asGroupType().getFields.get(0).withId(id) + Types + .buildGroup(field.getRepetition).as(LogicalTypeAnnotation.listType()) + .addField( + Types.repeatedGroup() + .addField(convert(elementField, parentField, sparkSchema, + absolutePath :+ PARQUET_LIST_ELEMENT_FIELD_NAME, relElemFieldPath)) + .named("list")) + .id(field.getId.intValue()) + .named(field.getName) + case _: MapLogicalTypeAnnotation => + val relKeyFieldPath = relativePath :+ PARQUET_MAP_KEY_FIELD_NAME + val relValFieldPath = relativePath :+ PARQUET_MAP_VALUE_FIELD_NAME + val keyId = getNestedFieldId(parentField, relKeyFieldPath) + val valId = getNestedFieldId(parentField, relValFieldPath) + val keyField = + field.asGroupType().getFields.get(0).asGroupType().getFields.get(0).withId(keyId) + val valueField = + field.asGroupType().getFields.get(0).asGroupType().getFields.get(1).withId(valId) + Types + .buildGroup(field.getRepetition).as(LogicalTypeAnnotation.mapType()) + .addField( + Types + .repeatedGroup() + .addField(convert(keyField, parentField, sparkSchema, + absolutePath :+ PARQUET_MAP_KEY_FIELD_NAME, relKeyFieldPath)) + .addField(convert(valueField, parentField, sparkSchema, + absolutePath :+ PARQUET_MAP_VALUE_FIELD_NAME, relValFieldPath)) + .named("key_value")) + .id(field.getId.intValue()) + .named(field.getName) + case _ if field.isPrimitive => field + case _ => + val builder = Types.buildGroup(field.getRepetition) + field.asGroupType().getFields.forEach { field => + val absPath = absolutePath :+ field.getName + val parentField = findFieldInSparkSchema(sparkSchema, absPath) + builder.addField(convert(field, parentField, sparkSchema, absPath, Seq(field.getName))) + } + builder.id(field.getId.intValue()).named(field.getName) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaSharedExceptions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaSharedExceptions.scala new file mode 100644 index 00000000000..0f07a125f95 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaSharedExceptions.scala @@ -0,0 +1,103 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.JavaConverters._ + +import org.antlr.v4.runtime.ParserRuleContext + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils} +import org.apache.spark.sql.catalyst.trees.Origin + +class DeltaAnalysisException( + errorClass: String, + messageParameters: Array[String], + cause: Option[Throwable] = None, + origin: Option[Origin] = None) + extends AnalysisException( + message = DeltaThrowableHelper.getMessage(errorClass, messageParameters), + messageParameters = DeltaThrowableHelper + .getParameterNames(errorClass, errorSubClass = null) + .zip(messageParameters) + .toMap, + errorClass = Some(errorClass), + line = origin.flatMap(_.line), + startPosition = origin.flatMap(_.startPosition), + context = origin.map(_.getQueryContext).getOrElse(Array.empty), + cause = cause) + with DeltaThrowable { + def getMessageParametersArray: Array[String] = messageParameters +} + +class DeltaIllegalArgumentException( + errorClass: String, + messageParameters: Array[String] = Array.empty, + cause: Throwable = null) + extends IllegalArgumentException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters), cause) + with DeltaThrowable { + override def getErrorClass: String = errorClass + def getMessageParametersArray: Array[String] = messageParameters + + override def getMessageParameters: java.util.Map[String, String] = { + DeltaThrowableHelper.getParameterNames(errorClass, errorSubClass = null) + .zip(messageParameters).toMap.asJava + } +} + +class DeltaUnsupportedOperationException( + errorClass: String, + messageParameters: Array[String] = Array.empty) + extends UnsupportedOperationException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass + def getMessageParametersArray: Array[String] = messageParameters + + override def getMessageParameters: java.util.Map[String, String] = { + DeltaThrowableHelper.getParameterNames(errorClass, errorSubClass = null) + .zip(messageParameters).toMap.asJava + } +} + +class DeltaParseException( + ctx: ParserRuleContext, + errorClass: String, + messageParameters: Map[String, String] = Map.empty) + extends ParseException( + Option(ParserUtils.command(ctx)), + ParserUtils.position(ctx.getStart), + ParserUtils.position(ctx.getStop), + errorClass, + messageParameters + ) with DeltaThrowable + +class DeltaArithmeticException( + errorClass: String, + messageParameters: Array[String]) + extends ArithmeticException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) + with DeltaThrowable { + override def getErrorClass: String = errorClass + + override def getMessageParameters: java.util.Map[String, String] = { + DeltaThrowableHelper.getParameterNames(errorClass, errorSubClass = null) + .zip(messageParameters).toMap.asJava + } +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala new file mode 100644 index 00000000000..1e2295ad472 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala @@ -0,0 +1,648 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import scala.util.{Failure, Success, Try} + +import org.apache.spark.sql.delta.files.{TahoeFileIndex, TahoeLogFileIndex} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.{DeltaSourceUtils, DeltaSQLConf} +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, UnresolvedAttribute, UnresolvedLeafNode, UnresolvedTable} +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke +import org.apache.spark.sql.catalyst.planning.NodeWithOnlyDeterministicProjectAndFilter +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LogicalPlan, Project} +import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils +import org.apache.spark.sql.connector.catalog.Identifier +import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform} +import org.apache.spark.sql.execution.datasources.{FileFormat, FileIndex, HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * Extractor Object for pulling out the file index of a logical relation. + */ +object RelationFileIndex { + def unapply(a: LogicalRelation): Option[FileIndex] = a match { + case LogicalRelation(hrel: HadoopFsRelation, _, _, _) => Some(hrel.location) + case _ => None + } +} + +/** + * Extractor Object for pulling out the table scan of a Delta table. It could be a full scan + * or a partial scan. + */ +object DeltaTable { + def unapply(a: LogicalRelation): Option[TahoeFileIndex] = a match { + case RelationFileIndex(fileIndex: TahoeFileIndex) => Some(fileIndex) + case _ => None + } +} + +/** + * Extractor Object for pulling out the full table scan of a Delta table. + */ +object DeltaFullTable { + def unapply(a: LogicalPlan): Option[(LogicalRelation, TahoeLogFileIndex)] = a match { + // `DeltaFullTable` is not only used to match a certain query pattern, but also does + // some validations to throw errors. We need to match both Project and Filter here, + // so that we can check if Filter is present or not during validations. + case NodeWithOnlyDeterministicProjectAndFilter(lr @ DeltaTable(index: TahoeLogFileIndex)) => + if (!index.deltaLog.tableExists) return None + val hasFilter = a.find(_.isInstanceOf[Filter]).isDefined + if (index.partitionFilters.isEmpty && index.versionToUse.isEmpty && !hasFilter) { + Some(lr -> index) + } else if (index.versionToUse.nonEmpty) { + throw DeltaErrors.failedScanWithHistoricalVersion(index.versionToUse.get) + } else { + throw DeltaErrors.unexpectedPartialScan(index.path) + } + // Convert V2 relations to V1 and perform the check + case DeltaRelation(lr) => unapply(lr) + case _ => + None + } +} + +object DeltaTableUtils extends PredicateHelper + with DeltaLogging { + + // The valid hadoop prefixes passed through `DeltaTable.forPath` or DataFrame APIs. + val validDeltaTableHadoopPrefixes: List[String] = List("fs.", "dfs.") + + /** Check whether this table is a Delta table based on information from the Catalog. */ + def isDeltaTable(table: CatalogTable): Boolean = DeltaSourceUtils.isDeltaTable(table.provider) + + + /** + * Check whether the provided table name is a Delta table based on information from the Catalog. + */ + def isDeltaTable(spark: SparkSession, tableName: TableIdentifier): Boolean = { + val catalog = spark.sessionState.catalog + val tableIsNotTemporaryTable = !catalog.isTempView(tableName) + val tableExists = { + (tableName.database.isEmpty || catalog.databaseExists(tableName.database.get)) && + catalog.tableExists(tableName) + } + tableIsNotTemporaryTable && tableExists && isDeltaTable(catalog.getTableMetadata(tableName)) + } + + /** Check if the provided path is the root or the children of a Delta table. */ + def isDeltaTable( + spark: SparkSession, + path: Path, + options: Map[String, String] = Map.empty): Boolean = { + findDeltaTableRoot(spark, path, options).isDefined + } + + /** + * Checks whether TableIdentifier is a path or a table name + * We assume it is a path unless the table and database both exist in the catalog + * @param catalog session catalog used to check whether db/table exist + * @param tableIdent the provided table or path + * @return true if using table name, false if using path, error otherwise + */ + def isCatalogTable(catalog: SessionCatalog, tableIdent: TableIdentifier): Boolean = { + val (dbExists, assumePath) = dbExistsAndAssumePath(catalog, tableIdent) + + // If we don't need to check that the table exists, return false since we think the tableIdent + // refers to a path at this point, because the database doesn't exist + if (assumePath) return false + + // check for dbexists otherwise catalog.tableExists may throw NoSuchDatabaseException + if ((dbExists || tableIdent.database.isEmpty) + && Try(catalog.tableExists(tableIdent)).getOrElse(false)) { + true + } else if (isValidPath(tableIdent)) { + false + } else { + throw new NoSuchTableException(tableIdent.database.getOrElse(""), tableIdent.table) + } + } + + /** + * It's possible that checking whether database exists can throw an exception. In that case, + * we want to surface the exception only if the provided tableIdentifier cannot be a path. + * + * @param catalog session catalog used to check whether db/table exist + * @param ident the provided table or path + * @return tuple where first indicates whether database exists and second indicates whether there + * is a need to check whether table exists + */ + private def dbExistsAndAssumePath( + catalog: SessionCatalog, + ident: TableIdentifier): (Boolean, Boolean) = { + def databaseExists = { + ident.database.forall(catalog.databaseExists) + } + + Try(databaseExists) match { + // DB exists, check table exists only if path is not valid + case Success(true) => (true, false) + // DB does not exist, check table exists only if path does not exist + case Success(false) => (false, new Path(ident.table).isAbsolute) + // Checking DB exists threw exception, if the path is still valid then check for table exists + case Failure(_) if isValidPath(ident) => (false, true) + // Checking DB exists threw exception, path is not valid so throw the initial exception + case Failure(e) => throw e + } + } + + /** + * @param tableIdent the provided table or path + * @return whether or not the provided TableIdentifier can specify a path for parquet or delta + */ + def isValidPath(tableIdent: TableIdentifier): Boolean = { + // If db doesnt exist or db is called delta/tahoe then check if path exists + DeltaSourceUtils.isDeltaDataSourceName(tableIdent.database.getOrElse("")) && + new Path(tableIdent.table).isAbsolute + } + + /** Find the root of a Delta table from the provided path. */ + def findDeltaTableRoot( + spark: SparkSession, + path: Path, + options: Map[String, String] = Map.empty): Option[Path] = { + // scalastyle:off deltahadoopconfiguration + val fs = path.getFileSystem(spark.sessionState.newHadoopConfWithOptions(options)) + // scalastyle:on deltahadoopconfiguration + + + findDeltaTableRoot(fs, path) + } + + /** Finds the root of a Delta table given a path if it exists. */ + def findDeltaTableRoot(fs: FileSystem, path: Path): Option[Path] = { + var currentPath = path + while (currentPath != null && currentPath.getName != "_delta_log" && + currentPath.getName != "_samples") { + val deltaLogPath = safeConcatPaths(currentPath, "_delta_log") + if (Try(fs.exists(deltaLogPath)).getOrElse(false)) { + return Option(currentPath) + } + currentPath = currentPath.getParent + } + None + } + + /** Whether a path should be hidden for delta-related file operations, such as Vacuum and Fsck. */ + def isHiddenDirectory(partitionColumnNames: Seq[String], pathName: String): Boolean = { + // Names of the form partitionCol=[value] are partition directories, and should be + // GCed even if they'd normally be hidden. The _db_index directory contains (bloom filter) + // indexes and these must be GCed when the data they are tied to is GCed. + // metadata name is reserved for converted iceberg metadata with delta universal format + pathName.equals("metadata") || + (pathName.startsWith(".") || pathName.startsWith("_")) && + !pathName.startsWith("_delta_index") && !pathName.startsWith("_change_data") && + !partitionColumnNames.exists(c => pathName.startsWith(c ++ "=")) + } + + /** + * Does the predicate only contains partition columns? + */ + def isPredicatePartitionColumnsOnly( + condition: Expression, + partitionColumns: Seq[String], + spark: SparkSession): Boolean = { + val nameEquality = spark.sessionState.analyzer.resolver + condition.references.forall { r => + partitionColumns.exists(nameEquality(r.name, _)) + } + } + + /** + * Partition the given condition into two sequence of conjunctive predicates: + * - predicates that can be evaluated using metadata only. + * - other predicates. + */ + def splitMetadataAndDataPredicates( + condition: Expression, + partitionColumns: Seq[String], + spark: SparkSession): (Seq[Expression], Seq[Expression]) = { + val (metadataPredicates, dataPredicates) = + splitConjunctivePredicates(condition).partition( + isPredicateMetadataOnly(_, partitionColumns, spark)) + // Extra metadata predicates that can partially extracted from `dataPredicates`. + val extraMetadataPredicates = + if (dataPredicates.nonEmpty) { + extractMetadataPredicates(dataPredicates.reduce(And), partitionColumns, spark) + .map(splitConjunctivePredicates) + .getOrElse(Seq.empty) + } else { + Seq.empty + } + (metadataPredicates ++ extraMetadataPredicates, dataPredicates) + } + + /** + * Returns a predicate that its reference is a subset of `partitionColumns` and it contains the + * maximum constraints from `condition`. + * When there is no such filter, `None` is returned. + */ + private def extractMetadataPredicates( + condition: Expression, + partitionColumns: Seq[String], + spark: SparkSession): Option[Expression] = { + condition match { + case And(left, right) => + val lhs = extractMetadataPredicates(left, partitionColumns, spark) + val rhs = extractMetadataPredicates(right, partitionColumns, spark) + (lhs.toSeq ++ rhs.toSeq).reduceOption(And) + + // The Or predicate is convertible when both of its children can be pushed down. + // That is to say, if one/both of the children can be partially pushed down, the Or + // predicate can be partially pushed down as well. + // + // Here is an example used to explain the reason. + // Let's say we have + // condition: (a1 AND a2) OR (b1 AND b2), + // outputSet: AttributeSet(a1, b1) + // a1 and b1 is convertible, while a2 and b2 is not. + // The predicate can be converted as + // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2) + // As per the logical in And predicate, we can push down (a1 OR b1). + case Or(left, right) => + for { + lhs <- extractMetadataPredicates(left, partitionColumns, spark) + rhs <- extractMetadataPredicates(right, partitionColumns, spark) + } yield Or(lhs, rhs) + + // Here we assume all the `Not` operators is already below all the `And` and `Or` operators + // after the optimization rule `BooleanSimplification`, so that we don't need to handle the + // `Not` operators here. + case other => + if (isPredicatePartitionColumnsOnly(other, partitionColumns, spark)) { + Some(other) + } else { + None + } + } + } + + /** + * Check if condition involves a subquery expression. + */ + def containsSubquery(condition: Expression): Boolean = { + SubqueryExpression.hasSubquery(condition) + } + + /** + * Check if condition can be evaluated using only metadata. In Delta, this means the condition + * only references partition columns and involves no subquery. + */ + def isPredicateMetadataOnly( + condition: Expression, + partitionColumns: Seq[String], + spark: SparkSession): Boolean = { + isPredicatePartitionColumnsOnly(condition, partitionColumns, spark) && + !containsSubquery(condition) + } + + /** + * Replace the file index in a logical plan and return the updated plan. + * It's a common pattern that, in Delta commands, we use data skipping to determine a subset of + * files that can be affected by the command, so we replace the whole-table file index in the + * original logical plan with a new index of potentially affected files, while everything else in + * the original plan, e.g., resolved references, remain unchanged. + * + * @param target the logical plan in which we replace the file index + * @param fileIndex the new file index + */ + def replaceFileIndex( + target: LogicalPlan, + fileIndex: FileIndex): LogicalPlan = { + target transform { + case l @ LogicalRelation(hfsr: HadoopFsRelation, _, _, _) => + l.copy(relation = hfsr.copy(location = fileIndex)(hfsr.sparkSession)) + } + } + + /** + * Replace the file index in a logical plan and return the updated plan. + * It's a common pattern that, in Delta commands, we use data skipping to determine a subset of + * files that can be affected by the command, so we replace the whole-table file index in the + * original logical plan with a new index of potentially affected files, while everything else in + * the original plan, e.g., resolved references, remain unchanged. + * + * Many Delta meta-queries involve nondeterminstic functions, which interfere with automatic + * column pruning, so columns can be manually pruned from the new scan. Note that partition + * columns can never be dropped even if they're not referenced in the rest of the query. + * + * @param spark the spark session to use + * @param target the logical plan in which we replace the file index + * @param fileIndex the new file index + * @param columnsToDrop columns to drop from the scan + * @param newOutput If specified, new logical output to replace the current LogicalRelation. + * Used for schema evolution to produce the new schema-evolved types from + * old files, because `target` will have the old types. + */ + def replaceFileIndex( + spark: SparkSession, + target: LogicalPlan, + fileIndex: FileIndex, + columnsToDrop: Seq[String], + newOutput: Option[Seq[AttributeReference]]): LogicalPlan = { + val resolver = spark.sessionState.analyzer.resolver + + var actualNewOutput = newOutput + var hasChar = false + var newTarget = target transformDown { + case l @ LogicalRelation(hfsr: HadoopFsRelation, _, _, _) => + // Prune columns from the scan. + val finalOutput = actualNewOutput.getOrElse(l.output).filterNot { col => + columnsToDrop.exists(resolver(_, col.name)) + } + + // If the output columns were changed e.g. by schema evolution, we need to update + // the relation to expose all the columns that are expected after schema evolution. + val newDataSchema = StructType(finalOutput.map(attr => + StructField(attr.name, attr.dataType, attr.nullable, attr.metadata))) + val newBaseRelation = hfsr.copy( + location = fileIndex, dataSchema = newDataSchema)( + hfsr.sparkSession) + l.copy(relation = newBaseRelation, output = finalOutput) + + case p @ Project(projectList, _) => + // Spark does char type read-side padding via an additional Project over the scan node. + // `newOutput` references the Project attributes, we need to translate their expression IDs + // so that `newOutput` references attributes from the LogicalRelation instead. + def hasCharPadding(e: Expression): Boolean = e.exists { + case s: StaticInvoke => s.staticObject == classOf[CharVarcharCodegenUtils] && + s.functionName == "readSidePadding" + case _ => false + } + val charColMapping = AttributeMap(projectList.collect { + case a: Alias if hasCharPadding(a.child) && a.references.size == 1 => + hasChar = true + val tableCol = a.references.head.asInstanceOf[AttributeReference] + a.toAttribute -> tableCol + }) + actualNewOutput = newOutput.map(_.map { attr => + charColMapping.get(attr).map { tableCol => + attr.withExprId(tableCol.exprId) + }.getOrElse(attr) + }) + p + } + + if (hasChar) { + // When char type read-side padding is applied, we need to apply column pruning for the + // Project as well, otherwise the Project will contain missing attributes. + newTarget = newTarget.transformUp { + case p @ Project(projectList, child) => + val newProjectList = projectList.filter { e => + e.references.subsetOf(child.outputSet) + } + p.copy(projectList = newProjectList) + } + } + newTarget + } + + /** + * Update FileFormat for a plan and return the updated plan + * + * @param target Target plan to update + * @param updatedFileFormat Updated file format + * @return Updated logical plan + */ + def replaceFileFormat( + target: LogicalPlan, + updatedFileFormat: FileFormat): LogicalPlan = { + target transform { + case l @ LogicalRelation(hfsr: HadoopFsRelation, _, _, _) => + l.copy( + relation = hfsr.copy(fileFormat = updatedFileFormat)(hfsr.sparkSession)) + } + } + + /** + * Check if the given path contains time travel syntax with the `@`. If the path genuinely exists, + * return `None`. If the path doesn't exist, but is specifying time travel, return the + * `DeltaTimeTravelSpec` as well as the real path. + */ + def extractIfPathContainsTimeTravel( + session: SparkSession, + path: String, + options: Map[String, String]): (String, Option[DeltaTimeTravelSpec]) = { + val conf = session.sessionState.conf + if (!DeltaTimeTravelSpec.isApplicable(conf, path)) return path -> None + + val maybePath = new Path(path) + + // scalastyle:off deltahadoopconfiguration + val fs = maybePath.getFileSystem(session.sessionState.newHadoopConfWithOptions(options)) + // scalastyle:on deltahadoopconfiguration + + // If the folder really exists, quit + if (fs.exists(maybePath)) return path -> None + + val (tt, realPath) = DeltaTimeTravelSpec.resolvePath(conf, path) + realPath -> Some(tt) + } + + /** + * Given a time travel node, resolve which version it is corresponding to for the given table and + * return the resolved version as well as the access type, i.e. by `version` or `timestamp`. + */ + def resolveTimeTravelVersion( + conf: SQLConf, + deltaLog: DeltaLog, + tt: DeltaTimeTravelSpec, + canReturnLastCommit: Boolean = false): (Long, String) = { + if (tt.version.isDefined) { + val userVersion = tt.version.get + deltaLog.history.checkVersionExists(userVersion) + userVersion -> "version" + } else { + val timestamp = tt.getTimestamp(conf) + deltaLog.history.getActiveCommitAtTime(timestamp, canReturnLastCommit).version -> "timestamp" + } + } + + def parseColToTransform(col: String): IdentityTransform = { + IdentityTransform(FieldReference(Seq(col))) + } + + // Workaround for withActive not being visible in io/delta. + def withActiveSession[T](spark: SparkSession)(body: => T): T = spark.withActive(body) + + /** + * Uses org.apache.hadoop.fs.Path(Path, String) to concatenate a base path + * and a relative child path and safely handles the case where the base path represents + * a Uri with an empty path component (e.g. s3://my-bucket, where my-bucket would be + * interpreted as the Uri authority). + * + * In that case, the child path is converted to an absolute path at the root, i.e. /childPath. + * This prevents a "URISyntaxException: Relative path in absolute URI", which would be thrown + * by org.apache.hadoop.fs.Path(Path, String) because it tries to convert the base path to a Uri + * and then resolve the child on top of it. This is invalid for an empty base path and a + * relative child path according to the Uri specification, which states that if an authority + * is defined, the path component needs to be either empty or start with a '/'. + */ + def safeConcatPaths(basePath: Path, relativeChildPath: String): Path = { + if (basePath.toUri.getPath.isEmpty) { + new Path(basePath, s"/$relativeChildPath") + } else { + new Path(basePath, relativeChildPath) + } + } + + /** + * A list of Spark internal metadata keys that we may save in a Delta table schema + * unintentionally due to SPARK-43123. We need to remove them before handing over the schema to + * Spark to avoid Spark interpreting table columns incorrectly. + * + * Hard-coded strings are used intentionally as we want to capture possible keys used before + * SPARK-43123 regardless Spark versions. For example, if Spark changes any key string in future + * after SPARK-43123, the new string won't be leaked, but we still want to clean up the old key. + */ + val SPARK_INTERNAL_METADATA_KEYS = Seq( + "__autoGeneratedAlias", + "__metadata_col", + "__supports_qualified_star", // A key used by an old version. Doesn't exist in latest code + "__qualified_access_only", + "__file_source_metadata_col", + "__file_source_constant_metadata_col", + "__file_source_generated_metadata_col" + ) + + /** + * Remove leaked metadata keys from the persisted table schema. Old versions might leak metadata + * intentionally. This method removes all possible metadata keys to avoid Spark interpreting + * table columns incorrectly. + */ + def removeInternalMetadata(spark: SparkSession, persistedSchema: StructType): StructType = { + val schema = ColumnWithDefaultExprUtils.removeDefaultExpressions(persistedSchema) + if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SCHEMA_REMOVE_SPARK_INTERNAL_METADATA)) { + var updated = false + val updatedSchema = schema.map { field => + if (SPARK_INTERNAL_METADATA_KEYS.exists(field.metadata.contains)) { + updated = true + val newMetadata = new MetadataBuilder().withMetadata(field.metadata) + SPARK_INTERNAL_METADATA_KEYS.foreach(newMetadata.remove) + field.copy(metadata = newMetadata.build()) + } else { + field + } + } + if (updated) { + StructType(updatedSchema) + } else { + schema + } + } else { + schema + } + } +} + +sealed abstract class UnresolvedPathBasedDeltaTableBase(path: String) extends UnresolvedLeafNode { + def identifier: Identifier = Identifier.of(Array(DeltaSourceUtils.ALT_NAME), path) + def deltaTableIdentifier: DeltaTableIdentifier = DeltaTableIdentifier(Some(path), None) + +} + +/** Resolves to a [[ResolvedTable]] if the DeltaTable exists */ +case class UnresolvedPathBasedDeltaTable( + path: String, + options: Map[String, String], + commandName: String) extends UnresolvedPathBasedDeltaTableBase(path) + +/** Resolves to a [[DataSourceV2Relation]] if the DeltaTable exists */ +case class UnresolvedPathBasedDeltaTableRelation( + path: String, + options: CaseInsensitiveStringMap) extends UnresolvedPathBasedDeltaTableBase(path) + +/** + * This operator represents path-based tables in general including both Delta or non-Delta tables. + * It resolves to a [[ResolvedTable]] if the path is for delta table, + * [[ResolvedPathBasedNonDeltaTable]] if the path is for a non-Delta table. + */ +case class UnresolvedPathBasedTable( + path: String, + options: Map[String, String], + commandName: String) extends LeafNode { + override lazy val resolved: Boolean = false + override val output: Seq[Attribute] = Nil +} + +/** + * This operator is a placeholder that identifies a non-Delta path-based table. Given the fact + * that some Delta commands (e.g. DescribeDeltaDetail) support non-Delta table, we introduced + * ResolvedPathBasedNonDeltaTable as the resolved placeholder after analysis on a non delta path + * from UnresolvedPathBasedTable. + */ +case class ResolvedPathBasedNonDeltaTable( + path: String, + options: Map[String, String], + commandName: String) extends LeafNode { + override val output: Seq[Attribute] = Nil +} + +/** + * A helper object with an apply method to transform a path or table identifier to a LogicalPlan. + * If the path is set, it will be resolved to an [[UnresolvedPathBasedDeltaTable]] whereas if the + * tableIdentifier is set, the LogicalPlan will be an [[UnresolvedTable]]. If neither of the two + * options or both of them are set, [[apply]] will throw an exception. + */ +object UnresolvedDeltaPathOrIdentifier { + def apply( + path: Option[String], + tableIdentifier: Option[TableIdentifier], + cmd: String): LogicalPlan = { + (path, tableIdentifier) match { + case (Some(p), None) => UnresolvedPathBasedDeltaTable(p, Map.empty, cmd) + case (None, Some(t)) => + UnresolvedTable(t.nameParts, cmd, None) + case _ => throw new IllegalArgumentException( + s"Exactly one of path or tableIdentifier must be provided to $cmd") + } + } +} + +/** + * A helper object with an apply method to transform a path or table identifier to a LogicalPlan. + * This is required by Delta commands that can also run against non-Delta tables, e.g. DESC DETAIL, + * VACUUM command. If the tableIdentifier is set, the LogicalPlan will be an [[UnresolvedTable]]. + * If the tableIdentifier is not set but the path is set, it will be resolved to an + * [[UnresolvedPathBasedTable]] since we can not tell if the path is for delta table or non delta + * table at this stage. If neither of the two are set, throws an exception. + */ +object UnresolvedPathOrIdentifier { + def apply( + path: Option[String], + tableIdentifier: Option[TableIdentifier], + cmd: String): LogicalPlan = { + (path, tableIdentifier) match { + case (_, Some(t)) => + UnresolvedTable(t.nameParts, cmd, None) + case (Some(p), None) => UnresolvedPathBasedTable(p, Map.empty, cmd) + case _ => throw new IllegalArgumentException( + s"At least one of path or tableIdentifier must be provided to $cmd") + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableIdentifier.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableIdentifier.scala new file mode 100644 index 00000000000..cc96b07e4bb --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableIdentifier.scala @@ -0,0 +1,126 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSourceUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +// scalastyle:on import.ordering.noEmptyLine + +/** + * An identifier for a Delta table containing one of the path or the table identifier. + */ +case class DeltaTableIdentifier( + path: Option[String] = None, + table: Option[TableIdentifier] = None) { + assert(path.isDefined ^ table.isDefined, "Please provide one of the path or the table identifier") + + val identifier: String = path.getOrElse(table.get.identifier) + + def database: Option[String] = table.flatMap(_.database) + + def getPath(spark: SparkSession): Path = { + path.map(new Path(_)).getOrElse { + val metadata = spark.sessionState.catalog.getTableMetadata(table.get) + new Path(metadata.location) + } + } + + /** + * Escapes back-ticks within the identifier name with double-back-ticks. + */ + private def quoteIdentifier(name: String): String = name.replace("`", "``") + + def quotedString: String = { + val replacedId = quoteIdentifier(identifier) + val replacedDb = database.map(quoteIdentifier) + + if (replacedDb.isDefined) s"`${replacedDb.get}`.`$replacedId`" else s"`$replacedId`" + } + + def unquotedString: String = { + if (database.isDefined) s"${database.get}.$identifier" else identifier + } + + override def toString: String = quotedString +} + +/** + * Utilities for DeltaTableIdentifier. + * TODO(burak): Get rid of these utilities. DeltaCatalog should be the skinny-waist for figuring + * these things out. + */ +object DeltaTableIdentifier extends DeltaLogging { + + /** + * Check the specified table identifier represents a Delta path. + */ + def isDeltaPath(spark: SparkSession, identifier: TableIdentifier): Boolean = { + val catalog = spark.sessionState.catalog + def tableIsTemporaryTable = catalog.isTempView(identifier) + def tableExists: Boolean = { + try { + catalog.databaseExists(identifier.database.get) && catalog.tableExists(identifier) + } catch { + case e: AnalysisException if gluePermissionError(e) => + logWarning("Received an access denied error from Glue. Will check to see if this " + + s"identifier ($identifier) is path based.", e) + false + } + } + + spark.sessionState.conf.runSQLonFile && + new Path(identifier.table).isAbsolute && + DeltaSourceUtils.isDeltaTable(identifier.database) && + !tableIsTemporaryTable && + !tableExists + } + + /** + * Creates a [[DeltaTableIdentifier]] if the specified table identifier represents a Delta table, + * otherwise returns [[None]]. + */ + def apply(spark: SparkSession, identifier: TableIdentifier) + : Option[DeltaTableIdentifier] = recordFrameProfile( + "DeltaAnalysis", "DeltaTableIdentifier.resolve") { + if (isDeltaPath(spark, identifier)) { + Some(DeltaTableIdentifier(path = Option(identifier.table))) + } else if (DeltaTableUtils.isDeltaTable(spark, identifier)) { + Some(DeltaTableIdentifier(table = Option(identifier))) + } else { + None + } + } + + /** + * When users try to access Delta tables by path, e.g. delta.`/some/path`, we need to first check + * if such a table exists in the MetaStore (due to Spark semantics :/). The Glue MetaStore may + * return Access Denied errors during this check. This method matches on this failure mode. + */ + def gluePermissionError(e: AnalysisException): Boolean = e.getCause match { + case h: Exception if h.getClass.getName == "org.apache.hadoop.hive.ql.metadata.HiveException" => + Seq("AWSGlue", "AccessDeniedException").forall { kw => + h.getMessage.contains(kw) + } + case _ => false + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala new file mode 100644 index 00000000000..521c1b0f501 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala @@ -0,0 +1,197 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.text.SimpleDateFormat +import java.util.{Date, Locale} + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.sources.DeltaDataSource + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.FunctionIdentifier +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistryBase, NamedRelation, TableFunctionRegistry, UnresolvedLeafNode, UnresolvedRelation} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, ExpressionInfo, StringLiteral} +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, UnaryNode} +import org.apache.spark.sql.connector.catalog.V1Table +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.types.{IntegerType, LongType, StringType, TimestampType} +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * Resolve Delta specific table-value functions. + */ +object DeltaTableValueFunctions { + val CDC_NAME_BASED = "table_changes" + val CDC_PATH_BASED = "table_changes_by_path" + val supportedFnNames = Seq(CDC_NAME_BASED, CDC_PATH_BASED) + + // For use with SparkSessionExtensions + type TableFunctionDescription = + (FunctionIdentifier, ExpressionInfo, TableFunctionRegistry.TableFunctionBuilder) + + /** + * For a supported Delta table value function name, get the TableFunctionDescription to be + * injected in DeltaSparkSessionExtension + */ + def getTableValueFunctionInjection(fnName: String): TableFunctionDescription = { + val (info, builder) = fnName match { + case CDC_NAME_BASED => FunctionRegistryBase.build[CDCNameBased](fnName, since = None) + case CDC_PATH_BASED => FunctionRegistryBase.build[CDCPathBased](fnName, since = None) + case _ => throw DeltaErrors.invalidTableValueFunction(fnName) + } + val ident = FunctionIdentifier(fnName) + (ident, info, builder) + } +} + +/////////////////////////////////////////////////////////////////////////// +// Logical plans for Delta TVFs // +/////////////////////////////////////////////////////////////////////////// + +/** + * Represents an unresolved Delta Table Value Function + */ +trait DeltaTableValueFunction extends UnresolvedLeafNode { + def fnName: String + val functionArgs: Seq[Expression] +} + +/** + * Base trait for analyzing `table_changes` and `table_changes_for_path`. The resolution works as + * follows: + * 1. The TVF logical plan is resolved using the TableFunctionRegistry in the Analyzer. This uses + * reflection to create one of `CDCNameBased` or `CDCPathBased` by passing all the arguments. + * 2. DeltaAnalysis turns the plans to a `TableChanges` node to resolve the DeltaTable. This can + * be resolved by the DeltaCatalog for tables or DeltaAnalysis for the path based use. + * 3. TableChanges then turns into a LogicalRelation that returns the CDC relation. + */ +trait CDCStatementBase extends DeltaTableValueFunction { + /** Get the table that the function is being called on as an unresolved relation */ + protected def getTable(spark: SparkSession, name: Expression): LogicalPlan + + if (functionArgs.size < 2) { + throw new DeltaAnalysisException( + errorClass = "INCORRECT_NUMBER_OF_ARGUMENTS", + messageParameters = Array( + "not enough args", // failure + fnName, + "2", // minArgs + "3")) // maxArgs + } + if (functionArgs.size > 3) { + throw new DeltaAnalysisException( + errorClass = "INCORRECT_NUMBER_OF_ARGUMENTS", + messageParameters = Array( + "too many args", // failure + fnName, + "2", // minArgs + "3")) // maxArgs + } + + protected def getOptions: CaseInsensitiveStringMap = { + def toDeltaOption(keyPrefix: String, value: Expression): (String, String) = { + value.dataType match { + // We dont need to explicitly handle ShortType as it is parsed as IntegerType. + case _: IntegerType | LongType => (keyPrefix + "Version") -> value.eval().toString + case _: StringType => (keyPrefix + "Timestamp") -> value.eval().toString + case _: TimestampType => (keyPrefix + "Timestamp") -> { + val time = value.eval().toString + val fmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") + // when evaluated the time is represented with microseconds, which needs to be trimmed. + fmt.format(new Date(time.toLong / 1000)) + } + case _ => + throw DeltaErrors.unsupportedExpression(s"${keyPrefix} option", value.dataType, + Seq("IntegerType", "LongType", "StringType", "TimestampType")) + } + } + + val startingOption = toDeltaOption("starting", functionArgs(1)) + val endingOption = functionArgs.drop(2).headOption.map(toDeltaOption("ending", _)) + val options = Map(DeltaDataSource.CDC_ENABLED_KEY -> "true", startingOption) ++ endingOption + new CaseInsensitiveStringMap(options.asJava) + } + + protected def getStringLiteral(e: Expression, whatFor: String): String = e match { + case StringLiteral(value) => value + case o => + throw DeltaErrors.unsupportedExpression(whatFor, o.dataType, Seq("StringType literal")) + } + + def toTableChanges(spark: SparkSession): TableChanges = + TableChanges(getTable(spark, functionArgs.head), fnName) +} + +/** + * Plan for the "table_changes" function + */ +case class CDCNameBased(override val functionArgs: Seq[Expression]) + extends CDCStatementBase { + override def fnName: String = DeltaTableValueFunctions.CDC_NAME_BASED + // Provide a constructor to get a better error message, when no expressions are provided + def this() = this(Nil) + + override protected def getTable(spark: SparkSession, name: Expression): LogicalPlan = { + val stringId = getStringLiteral(name, "table name") + val identifier = spark.sessionState.sqlParser.parseMultipartIdentifier(stringId) + UnresolvedRelation(identifier, getOptions, isStreaming = false) + } +} + +/** + * Plan for the "table_changes_by_path" function + */ +case class CDCPathBased(override val functionArgs: Seq[Expression]) + extends CDCStatementBase { + override def fnName: String = DeltaTableValueFunctions.CDC_PATH_BASED + // Provide a constructor to get a better error message, when no expressions are provided + def this() = this(Nil) + + override protected def getTable(spark: SparkSession, name: Expression): LogicalPlan = { + UnresolvedPathBasedDeltaTableRelation(getStringLiteral(name, "table path"), getOptions) + } +} + +case class TableChanges( + child: LogicalPlan, + fnName: String, + cdcAttr: Seq[Attribute] = CDCReader.cdcAttributes) extends UnaryNode { + + override lazy val resolved: Boolean = false + override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = + this.copy(child = newChild) + + override def output: Seq[Attribute] = Nil + + /** Converts the table changes plan to a query over a Delta table */ + def toReadQuery: LogicalPlan = child.transformUp { + case DataSourceV2Relation(d: DeltaTableV2, _, _, _, options) => + // withOptions empties the catalog table stats + d.withOptions(options.asScala.toMap).toLogicalRelation + case r: NamedRelation => + throw DeltaErrors.notADeltaTableException(fnName, r.name) + case l: LogicalRelation => + val relationName = l.catalogTable.map(_.identifier.toString).getOrElse("relation") + throw DeltaErrors.notADeltaTableException(fnName, relationName) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala new file mode 100644 index 00000000000..5d8b185ba70 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala @@ -0,0 +1,32 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.SparkThrowable + +/** + * The trait for all exceptions of Delta code path. + */ +trait DeltaThrowable extends SparkThrowable { + // Portable error identifier across SQL engines + // If null, error class or SQLSTATE is not set + override def getSqlState: String = + DeltaThrowableHelper.getSqlState(this.getErrorClass.split('.').head) + + // True if this error is an internal error. + override def isInternalError: Boolean = DeltaThrowableHelper.isInternalError(this.getErrorClass) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala new file mode 100644 index 00000000000..138caaa20ed --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala @@ -0,0 +1,79 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.FileNotFoundException +import java.net.URL + +import org.apache.spark.ErrorClassesJsonReader +import org.apache.spark.util.Utils + +/** + * The helper object for Delta code base to pick error class template and compile + * the exception message. + */ +object DeltaThrowableHelper +{ + /** + * Try to find the error class source file and throw exception if it is no found. + */ + private def safeGetErrorClassesSource(sourceFile: String): URL = { + val classLoader = Utils.getContextOrSparkClassLoader + Option(classLoader.getResource(sourceFile)).getOrElse { + throw new FileNotFoundException( + s"""Cannot find the error class definition file on path $sourceFile" through the """ + + s"class loader ${classLoader.toString}") + } + } + + lazy val sparkErrorClassSource: URL = { + safeGetErrorClassesSource("error/error-classes.json") + } + + def deltaErrorClassSource: URL = { + safeGetErrorClassesSource("error/delta-error-classes.json") + } + + private val errorClassReader = new ErrorClassesJsonReader( + Seq(deltaErrorClassSource, sparkErrorClassSource)) + + def getMessage(errorClass: String, messageParameters: Array[String]): String = { + val template = errorClassReader.getMessageTemplate(errorClass) + val message = String.format(template.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), + messageParameters: _*) + s"[$errorClass] $message" + } + + def getSqlState(errorClass: String): String = errorClassReader.getSqlState(errorClass) + + def isInternalError(errorClass: String): Boolean = errorClass == "INTERNAL_ERROR" + + def getParameterNames(errorClass: String, errorSubClass: String): Array[String] = { + val wholeErrorClass = if (errorSubClass == null) { + errorClass + } else { + errorClass + "." + errorSubClass + } + val parameterizedMessage = errorClassReader.getMessageTemplate(wholeErrorClass) + val pattern = "<[a-zA-Z0-9_-]+>".r + val matches = pattern.findAllIn(parameterizedMessage) + val parameterSeq = matches.toArray + val parameterNames = parameterSeq.map(p => p.stripPrefix("<").stripSuffix(">")) + parameterNames + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTimeTravelSpec.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTimeTravelSpec.scala new file mode 100644 index 00000000000..ec0e245d95d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTimeTravelSpec.scala @@ -0,0 +1,135 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.sql.Timestamp + +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.commons.lang3.time.FastDateFormat + +import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, Literal, PreciseTimestampConversion, RuntimeReplaceable, Unevaluable} +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{LongType, TimestampType} + +/** + * The specification to time travel a Delta Table to the given `timestamp` or `version`. + * @param timestamp An expression that can be evaluated into a timestamp. The expression cannot + * be a subquery. + * @param version The version of the table to time travel to. Must be >= 0. + * @param creationSource The API used to perform time travel, e.g. `atSyntax`, `dfReader` or SQL + */ +case class DeltaTimeTravelSpec( + timestamp: Option[Expression], + version: Option[Long], + creationSource: Option[String]) extends DeltaLogging { + + assert(version.isEmpty ^ timestamp.isEmpty, + "Either the version or timestamp should be provided for time travel") + + /** + * Compute the timestamp to use for time travelling the relation from the given expression for + * the given time zone. + */ + def getTimestamp(conf: SQLConf): Timestamp = { + // note @brkyvz (2020-04-13): not great that we need to handle RuntimeReplaceable expressions... + val timeZone = conf.sessionLocalTimeZone + val evaluable = timestamp match { + case Some(e) => e.transform { + case rr: RuntimeReplaceable => + rr.children.head + case e: Unevaluable => + recordDeltaEvent(null, "delta.timeTravel.unexpected", data = e.sql) + throw new IllegalStateException(s"Unsupported expression (${e.sql}) for time travel.") + } + case None => + // scalastyle:off throwerror + throw new AssertionError( + "Should not ask to get Timestamp for time travel when the timestamp was not available") + // scalastyle:on throwerror + } + val strict = conf.getConf(DeltaSQLConf.DELTA_TIME_TRAVEL_STRICT_TIMESTAMP_PARSING) + val castResult = Cast(evaluable, TimestampType, Option(timeZone), ansiEnabled = false).eval() + if (strict && castResult == null) { + throw DeltaErrors.timestampInvalid(evaluable) + } + DateTimeUtils.toJavaTimestamp(castResult.asInstanceOf[java.lang.Long]) + } +} + +object DeltaTimeTravelSpec { + /** A regex which looks for the pattern ...@v(some numbers) for extracting the version number */ + private val VERSION_URI_FOR_TIME_TRAVEL = ".*@[vV](\\d+)$".r + + /** The timestamp format which we accept after the `@` character. */ + private val TIMESTAMP_FORMAT = "yyyyMMddHHmmssSSS" + + /** Length of yyyyMMddHHmmssSSS */ + private val TIMESTAMP_FORMAT_LENGTH = TIMESTAMP_FORMAT.length + + /** A regex which looks for the pattern ...@(yyyyMMddHHmmssSSS) for extracting timestamps. */ + private val TIMESTAMP_URI_FOR_TIME_TRAVEL = s".*@(\\d{$TIMESTAMP_FORMAT_LENGTH})$$".r + + /** Returns whether the given table identifier may contain time travel syntax. */ + def isApplicable(conf: SQLConf, identifier: String): Boolean = { + conf.getConf(DeltaSQLConf.RESOLVE_TIME_TRAVEL_ON_IDENTIFIER) && + identifierContainsTimeTravel(identifier) + } + + /** Checks if the table identifier contains patterns that resemble time travel syntax. */ + private def identifierContainsTimeTravel(identifier: String): Boolean = identifier match { + case TIMESTAMP_URI_FOR_TIME_TRAVEL(ts) => true + case VERSION_URI_FOR_TIME_TRAVEL(v) => true + case _ => false + } + + /** Adds a time travel node based on the special syntax in the table identifier. */ + def resolvePath(conf: SQLConf, identifier: String): (DeltaTimeTravelSpec, String) = { + identifier match { + case TIMESTAMP_URI_FOR_TIME_TRAVEL(ts) => + val timestamp = parseTimestamp(ts, conf.sessionLocalTimeZone) + // Drop the 18 characters in the right, which is the timestamp format and the @ character. + val realIdentifier = identifier.dropRight(TIMESTAMP_FORMAT_LENGTH + 1) + + DeltaTimeTravelSpec(Some(timestamp), None, Some("atSyntax.path")) -> realIdentifier + case VERSION_URI_FOR_TIME_TRAVEL(v) => + // Drop the version, and `@v` characters from the identifier + val realIdentifier = identifier.dropRight(v.length + 2) + DeltaTimeTravelSpec(None, Some(v.toLong), Some("atSyntax.path")) -> realIdentifier + } + } + + /** + * Parse the given timestamp string into a proper Catalyst TimestampType. We support millisecond + * level precision, therefore don't use standard SQL timestamp functions, which only support + * second level precision. + * + * @throws `AnalysisException` when the timestamp format doesn't match our criteria + */ + private def parseTimestamp(ts: String, timeZone: String): Expression = { + val format = FastDateFormat.getInstance(TIMESTAMP_FORMAT, DateTimeUtils.getTimeZone(timeZone)) + + try { + val sqlTs = DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(format.parse(ts).getTime)) + PreciseTimestampConversion(Literal(sqlTs), LongType, TimestampType) + } catch { + case e: java.text.ParseException => + throw DeltaErrors.invalidTimestampFormat(ts, TIMESTAMP_FORMAT, Some(e)) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaUDF.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaUDF.scala new file mode 100644 index 00000000000..fb5647b82fb --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaUDF.scala @@ -0,0 +1,91 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedFunction} +import org.apache.spark.sql.functions.udf + +/** + * Define a few templates for udfs used by Delta. Use these templates to create + * `SparkUserDefinedFunction` to avoid creating new Encoders. This would save us from touching + * `ScalaReflection` to reduce the lock contention in concurrent queries. + */ +object DeltaUDF { + + def stringFromString(f: String => String): UserDefinedFunction = + createUdfFromTemplateUnsafe(stringFromStringTemplate, f, udf(f)) + + def intFromString(f: String => Int): UserDefinedFunction = + createUdfFromTemplateUnsafe(intFromStringTemplate, f, udf(f)) + + def intFromStringBoolean(f: (String, Boolean) => Int): UserDefinedFunction = + createUdfFromTemplateUnsafe(intFromStringBooleanTemplate, f, udf(f)) + + def boolean(f: () => Boolean): UserDefinedFunction = + createUdfFromTemplateUnsafe(booleanTemplate, f, udf(f)) + + def stringFromMap(f: Map[String, String] => String): UserDefinedFunction = + createUdfFromTemplateUnsafe(stringFromMapTemplate, f, udf(f)) + + def booleanFromMap(f: Map[String, String] => Boolean): UserDefinedFunction = + createUdfFromTemplateUnsafe(booleanFromMapTemplate, f, udf(f)) + + def booleanFromByte(x: Byte => Boolean): UserDefinedFunction = + createUdfFromTemplateUnsafe(booleanFromByteTemplate, x, udf(x)) + + private lazy val stringFromStringTemplate = + udf[String, String](identity).asInstanceOf[SparkUserDefinedFunction] + + private lazy val booleanTemplate = udf(() => true).asInstanceOf[SparkUserDefinedFunction] + + private lazy val intFromStringTemplate = + udf((_: String) => 1).asInstanceOf[SparkUserDefinedFunction] + + private lazy val intFromStringBooleanTemplate = + udf((_: String, _: Boolean) => 1).asInstanceOf[SparkUserDefinedFunction] + + private lazy val stringFromMapTemplate = + udf((_: Map[String, String]) => "").asInstanceOf[SparkUserDefinedFunction] + + private lazy val booleanFromMapTemplate = + udf((_: Map[String, String]) => true).asInstanceOf[SparkUserDefinedFunction] + + private lazy val booleanFromByteTemplate = + udf((_: Byte) => true).asInstanceOf[SparkUserDefinedFunction] + + /** + * Return a `UserDefinedFunction` for the given `f` from `template` if + * `INTERNAL_UDF_OPTIMIZATION_ENABLED` is enabled. Otherwise, `orElse` will be called to create a + * new `UserDefinedFunction`. + */ + private def createUdfFromTemplateUnsafe( + template: SparkUserDefinedFunction, + f: AnyRef, + orElse: => UserDefinedFunction): UserDefinedFunction = { + if (SparkSession.active.sessionState.conf + .getConf(DeltaSQLConf.INTERNAL_UDF_OPTIMIZATION_ENABLED)) { + val inputEncoders = template.inputEncoders.map(_.map(_.copy())) + val outputEncoder = template.outputEncoder.map(_.copy()) + template.copy(f = f, inputEncoders = inputEncoders, outputEncoder = outputEncoder) + } else { + orElse + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaUnsupportedOperationsCheck.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaUnsupportedOperationsCheck.scala new file mode 100644 index 00000000000..e94770a9e09 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaUnsupportedOperationsCheck.scala @@ -0,0 +1,131 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.util.control.NonFatal + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSourceUtils + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.ResolvedTable +import org.apache.spark.sql.catalyst.catalog.CatalogTableType +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, DropTable, LogicalPlan, OverwriteByExpression, ShowCreateTable, V2WriteCommand} +import org.apache.spark.sql.execution.command._ +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation + +/** + * A rule to add helpful error messages when Delta is being used with unsupported Hive operations + * or if an unsupported operation is being made, e.g. a DML operation like + * INSERT/UPDATE/DELETE/MERGE when a table doesn't exist. + */ +case class DeltaUnsupportedOperationsCheck(spark: SparkSession) + extends (LogicalPlan => Unit) + with DeltaLogging { + + private def fail(operation: String, tableIdent: TableIdentifier): Unit = { + val metadata = try Some(spark.sessionState.catalog.getTableMetadata(tableIdent)) catch { + case NonFatal(_) => None + } + if (metadata.exists(DeltaTableUtils.isDeltaTable)) { + throw DeltaErrors.operationNotSupportedException(operation, tableIdent) + } + } + + private def fail(operation: String, provider: String): Unit = { + if (DeltaSourceUtils.isDeltaDataSourceName(provider)) { + throw DeltaErrors.operationNotSupportedException(operation) + } + } + + def apply(plan: LogicalPlan): Unit = plan.foreach { + // Unsupported Hive commands + + case a: AnalyzePartitionCommand => + recordDeltaEvent(null, "delta.unsupported.analyzePartition") + fail(operation = "ANALYZE TABLE PARTITION", a.tableIdent) + + case a: AlterTableAddPartitionCommand => + recordDeltaEvent(null, "delta.unsupported.addPartition") + fail(operation = "ALTER TABLE ADD PARTITION", a.tableName) + + case a: AlterTableDropPartitionCommand => + recordDeltaEvent(null, "delta.unsupported.dropPartition") + fail(operation = "ALTER TABLE DROP PARTITION", a.tableName) + + case a: RepairTableCommand => + recordDeltaEvent(null, "delta.unsupported.recoverPartitions") + fail(operation = "ALTER TABLE RECOVER PARTITIONS", a.tableName) + + case a: AlterTableSerDePropertiesCommand => + recordDeltaEvent(null, "delta.unsupported.alterSerDe") + fail(operation = "ALTER TABLE SET SERDEPROPERTIES", a.tableName) + + case l: LoadDataCommand => + recordDeltaEvent(null, "delta.unsupported.loadData") + fail(operation = "LOAD DATA", l.table) + + case i: InsertIntoDataSourceDirCommand => + recordDeltaEvent(null, "delta.unsupported.insertDirectory") + fail(operation = "INSERT OVERWRITE DIRECTORY", i.provider) + + case ShowCreateTable(t: ResolvedTable, _, _) if t.table.isInstanceOf[DeltaTableV2] => + recordDeltaEvent(null, "delta.unsupported.showCreateTable") + fail(operation = "SHOW CREATE TABLE", "DELTA") + + // Delta table checks + case append: AppendData => + val op = if (append.isByName) "APPEND" else "INSERT" + checkDeltaTableExists(append, op) + + case overwrite: OverwriteByExpression => + checkDeltaTableExists(overwrite, "OVERWRITE") + + case _: DropTable => + // For Delta tables being dropped, we do not need the underlying Delta log to exist so this is + // OK + return + + case DataSourceV2Relation(tbl: DeltaTableV2, _, _, _, _) if !tbl.tableExists => + throw DeltaErrors.pathNotExistsException(tbl.deltaLog.dataPath.toString) + + case r: ResolvedTable if r.table.isInstanceOf[DeltaTableV2] && + !r.table.asInstanceOf[DeltaTableV2].tableExists => + throw DeltaErrors.pathNotExistsException( + r.table.asInstanceOf[DeltaTableV2].deltaLog.dataPath.toString) + + case _ => // OK + } + + /** + * Check that the given operation is being made on a full Delta table that exists. + */ + private def checkDeltaTableExists(command: V2WriteCommand, operation: String): Unit = { + command.table match { + case DeltaRelation(lr) => + // the extractor performs the check that we want if this is indeed being called on a Delta + // table. It should leave others unchanged + if (DeltaFullTable.unapply(lr).isEmpty) { + throw DeltaErrors.notADeltaTableException(operation) + } + case _ => + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaViewHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaViewHelper.scala new file mode 100644 index 00000000000..7dcb9a67184 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaViewHelper.scala @@ -0,0 +1,108 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, NamedExpression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, SubqueryAlias, View} +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.internal.SQLConf + +object DeltaViewHelper { + def stripTempViewForMerge(plan: LogicalPlan, conf: SQLConf): LogicalPlan = { + // Check that the two expression lists have the same names and types in the same order, and + // are either attributes or direct casts of attributes. + def attributesMatch(left: Seq[NamedExpression], right: Seq[NamedExpression]): Boolean = { + if (left.length != right.length) return false + + val allowedExprs = (left ++ right).forall { + case _: Attribute => true + case Alias(Cast(_: Attribute, dataType, timeZone, _), name) => true + case _ => false + } + + val exprsMatch = left.zip(right).forall { + case (a, b) => a.dataType == b.dataType && conf.resolver(a.name, b.name) + } + + allowedExprs && exprsMatch + } + + + // We have to do a pretty complicated transformation here to support using two specific things + // which are not a Delta table as the target of Delta DML commands: + // A view defined as `SELECT * FROM underlying_tbl` + // A view defined as `SELECT * FROM underlying_tbl as alias` + // This requires stripping their intermediate nodes and pulling out just the scan, because + // some of our internal attribute fiddling requires the target plan to have the same attribute + // IDs as the underlying scan. + object ViewPlan { + def unapply( + plan: LogicalPlan): Option[(CatalogTable, Seq[NamedExpression], LogicalRelation)] = { + // A `SELECT * from underlying_table` view will have: + // * A View node marking it as a view. + // * An outer Project explicitly casting the scanned types to the types defined in the + // metastore for the view. We don't need this cast for Delta DML commands and it will + // end up being eliminated. + // * An inner no-op project. + // * A SubqueryAlias explicitly aliasing the scan to its own name (plus another if there's + // a user specified alias. + // * The actual scan of the Delta table. + // We check for these Projects by ensuring that the name lists are an exact match, and + // produce a scan with the outer list's attribute IDs aliased to the view's name. + plan match { + case View(desc, true, // isTempView + Project(outerList, + Project(innerList, + SubqueryAlias(innerAlias, scan: LogicalRelation)))) + if attributesMatch(outerList, innerList) && attributesMatch(outerList, scan.output) => + Some(desc, outerList, scan) + case View(desc, true, // isTempView + Project(outerList, + Project(innerList, + SubqueryAlias(innerAlias, SubqueryAlias(subalias, scan: LogicalRelation))))) + if attributesMatch(outerList, innerList) && attributesMatch(outerList, scan.output) => + Some(desc, outerList, scan) + case _ => None + } + } + } + + plan.transformUp { + case ViewPlan(desc, outerList, scan) => + val newOutput = scan.output.map { oldAttr => + val newId = outerList.collectFirst { + case newAttr if conf.resolver(oldAttr.qualifiedName, newAttr.qualifiedName) => + newAttr.exprId + }.getOrElse { + throw DeltaErrors.noNewAttributeId(oldAttr) + } + oldAttr.withExprId(newId) + } + SubqueryAlias(desc.qualifiedName, scan.copy(output = newOutput)) + + case v: View if v.isTempView => + v.child + } + } + + def stripTempView(plan: LogicalPlan, conf: SQLConf): LogicalPlan = { + plan.transformUp { + case v: View if v.isTempView => v.child + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DomainMetadataUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DomainMetadataUtils.scala new file mode 100644 index 00000000000..af633012234 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DomainMetadataUtils.scala @@ -0,0 +1,99 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.{Action, DomainMetadata, Protocol} +import org.apache.spark.sql.delta.metering.DeltaLogging + +object DomainMetadataUtils extends DeltaLogging { + // List of metadata domains that will be removed for the REPLACE TABLE operation. + private val METADATA_DOMAINS_TO_REMOVE_FOR_REPLACE_TABLE: Set[String] = Set( + ) + // List of metadata domains that will be copied from the table we are restoring to. + private val METADATA_DOMAIN_TO_COPY_FOR_RESTORE_TABLE = + METADATA_DOMAINS_TO_REMOVE_FOR_REPLACE_TABLE + + /** + * Returns whether the protocol version supports the [[DomainMetadata]] action. + */ + def domainMetadataSupported(protocol: Protocol): Boolean = + protocol.isFeatureSupported(DomainMetadataTableFeature) + + /** + * Given a list of [[Action]]s, build a domain name to [[DomainMetadata]] map. + * Note duplicated domain name is not expected otherwise an internal error is thrown. + */ + def extractDomainMetadatasMap(actions: Seq[Action]): Map[String, DomainMetadata] = { + actions + .collect { case action: DomainMetadata => action } + .groupBy(_.domain) + .map { case (name, domains) => + if (domains.length != 1) { + throw DeltaErrors.domainMetadataDuplicate(domains.head.domain) + } + name -> domains.head + } + } + + /** + * Validate there are no two [[DomainMetadata]] actions with the same domain name. An internal + * exception is thrown if any duplicated domains are detected. + * + * @param actions: Actions the current transaction wants to commit. + */ + def validateDomainMetadataSupportedAndNoDuplicate( + actions: Seq[Action], protocol: Protocol): Seq[DomainMetadata] = { + val domainMetadatas = extractDomainMetadatasMap(actions) + if (domainMetadatas.nonEmpty && !domainMetadataSupported(protocol)) { + throw DeltaErrors.domainMetadataTableFeatureNotSupported( + domainMetadatas.map(_._2.domain).mkString("[", ",", "]")) + } + domainMetadatas.values.toSeq + } + + /** + * Generates a new sequence of DomainMetadata to commits for REPLACE TABLE. + * - By default, existing metadata domains survive as long as they don't appear in the + * new metadata domains, in which case new metadata domains overwrite the existing ones. + * - Existing domains will be removed only if they appear in the pre-defined + * "removal" list (e.g., table features require some specific domains to be removed). + */ + def handleDomainMetadataForReplaceTable( + existingDomainMetadatas: Seq[DomainMetadata], + newDomainMetadatas: Seq[DomainMetadata]): Seq[DomainMetadata] = { + val newDomainNames = newDomainMetadatas.map(_.domain).toSet + existingDomainMetadatas + // Filter out metadata domains unless they are in the list to be removed + // and they don't appear in the new metadata domains. + .filter(m => !newDomainNames.contains(m.domain) && + METADATA_DOMAINS_TO_REMOVE_FOR_REPLACE_TABLE.contains(m.domain)) + .map(_.copy(removed = true)) ++ newDomainMetadatas + } + + /** + * Generates a new sequence of DomainMetadata to commits for RESTORE TABLE. + * - Source (table to restore to) domains will be copied if they appear in the pre-defined + * "copy" list (e.g., table features require some specific domains to be copied). + * - All other domains not in the list are "retained". + */ + def handleDomainMetadataForRestoreTable( + sourceDomainMetadatas: Seq[DomainMetadata]): Seq[DomainMetadata] = { + sourceDomainMetadatas.filter { m => + METADATA_DOMAIN_TO_COPY_FOR_RESTORE_TABLE.contains(m.domain) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala b/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala new file mode 100644 index 00000000000..7aafadf31d1 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala @@ -0,0 +1,598 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.util.Locale + +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.files.{TahoeBatchFileIndex, TahoeFileIndex} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils.quoteIdentifier +import org.apache.spark.sql.delta.sources.DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.AnalysisHelper + +import org.apache.spark.sql.{AnalysisException, Column, Dataset, SparkSession} +import org.apache.spark.sql.catalyst.analysis.Analyzer +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, CaseInsensitiveMap} +import org.apache.spark.sql.execution.SQLExecution +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.types._ +import org.apache.spark.sql.types.{Metadata => FieldMetadata} +/** + * Provide utility methods to implement Generated Columns for Delta. Users can use the following + * SQL syntax to create a table with generated columns. + * + * ``` + * CREATE TABLE table_identifier( + * column_name column_type, + * column_name column_type GENERATED ALWAYS AS ( generation_expr ), + * ... + * ) + * USING delta + * [ PARTITIONED BY (partition_column_name, ...) ] + * ``` + * + * This is an example: + * ``` + * CREATE TABLE foo( + * id bigint, + * type string, + * subType string GENERATED ALWAYS AS ( SUBSTRING(type FROM 0 FOR 4) ), + * data string, + * eventTime timestamp, + * day date GENERATED ALWAYS AS ( days(eventTime) ) + * USING delta + * PARTITIONED BY (type, day) + * ``` + * + * When writing to a table, for these generated columns: + * - If the output is missing a generated column, we will add an expression to generate it. + * - If a generated column exists in the output, in other words, we will add a constraint to ensure + * the given value doesn't violate the generation expression. + */ +object GeneratedColumn extends DeltaLogging with AnalysisHelper { + + def satisfyGeneratedColumnProtocol(protocol: Protocol): Boolean = + protocol.isFeatureSupported(GeneratedColumnsTableFeature) + + /** + * Whether the field contains the generation expression. Note: this doesn't mean the column is a + * generated column. A column is a generated column only if the table's + * `minWriterVersion` >= `GeneratedColumn.MIN_WRITER_VERSION` and the column metadata contains + * generation expressions. Use the other `isGeneratedColumn` to check whether it's a generated + * column instead. + */ + private[delta] def isGeneratedColumn(field: StructField): Boolean = { + field.metadata.contains(GENERATION_EXPRESSION_METADATA_KEY) + } + + /** Whether a column is a generated column. */ + def isGeneratedColumn(protocol: Protocol, field: StructField): Boolean = { + satisfyGeneratedColumnProtocol(protocol) && isGeneratedColumn(field) + } + + /** + * Whether any generation expressions exist in the schema. Note: this doesn't mean the table + * contains generated columns. A table has generated columns only if its protocol satisfies + * Generated Column (listed in Table Features or supported implicitly) and some of columns in + * the table schema contain generation expressions. Use `enforcesGeneratedColumns` to check + * generated column tables instead. + */ + def hasGeneratedColumns(schema: StructType): Boolean = { + schema.exists(isGeneratedColumn) + } + + /** + * Returns the generated columns of a table. A column is a generated column requires: + * - The table writer protocol >= GeneratedColumn.MIN_WRITER_VERSION; + * - It has a generation expression in the column metadata. + */ + def getGeneratedColumns(snapshot: Snapshot): Seq[StructField] = { + if (satisfyGeneratedColumnProtocol(snapshot.protocol)) { + snapshot.metadata.schema.partition(isGeneratedColumn)._1 + } else { + Nil + } + } + + /** + * Whether the table has generated columns. A table has generated columns only if its + * protocol satisfies Generated Column (listed in Table Features or supported implicitly) and + * some of columns in the table schema contain generation expressions. + * + * As Spark will propagate column metadata storing the generation expression through + * the entire plan, old versions that don't support generated columns may create tables whose + * schema contain generation expressions. However, since these old versions has a lower writer + * version, we can use the table's `minWriterVersion` to identify such tables and treat them as + * normal tables. + * + * @param protocol the table protocol. + * @param metadata the table metadata. + */ + def enforcesGeneratedColumns(protocol: Protocol, metadata: Metadata): Boolean = { + satisfyGeneratedColumnProtocol(protocol) && metadata.schema.exists(isGeneratedColumn) + } + + /** Return the generation expression from a field metadata if any. */ + def getGenerationExpressionStr(metadata: FieldMetadata): Option[String] = { + if (metadata.contains(GENERATION_EXPRESSION_METADATA_KEY)) { + Some(metadata.getString(GENERATION_EXPRESSION_METADATA_KEY)) + } else { + None + } + } + + /** + * Return the generation expression from a field if any. This method doesn't check the protocl. + * The caller should make sure the table writer protocol meets `satisfyGeneratedColumnProtocol` + * before calling method. + */ + def getGenerationExpression(field: StructField): Option[Expression] = { + getGenerationExpressionStr(field.metadata).map { exprStr => + parseGenerationExpression(SparkSession.active, exprStr) + } + } + + /** Return the generation expression from a field if any. */ + private def getGenerationExpressionStr(field: StructField): Option[String] = { + getGenerationExpressionStr(field.metadata) + } + + /** Parse a generation expression string and convert it to an [[Expression]] object. */ + private def parseGenerationExpression(spark: SparkSession, exprString: String): Expression = { + spark.sessionState.sqlParser.parseExpression(exprString) + } + + /** + * SPARK-27561 added support for lateral column alias. This means generation expressions that + * reference other generated columns no longer fail analysis in `validateGeneratedColumns`. + * + * This method checks for and throws an error if: + * - A generated column references itself + * - A generated column references another generated column + */ + def validateColumnReferences( + spark: SparkSession, + fieldName: String, + expression: Expression, + schema: StructType): Unit = { + val allowedBaseColumns = schema + .filterNot(_.name == fieldName) // Can't reference itself + .filterNot(isGeneratedColumn) // Can't reference other generated columns + val relation = new LocalRelation(toAttributes(StructType(allowedBaseColumns))) + try { + val analyzer: Analyzer = spark.sessionState.analyzer + val analyzed = analyzer.execute(Project(Seq(Alias(expression, fieldName)()), relation)) + analyzer.checkAnalysis(analyzed) + } catch { + case ex: AnalysisException => + // Improve error message if possible + if (ex.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION") { + throw DeltaErrors.generatedColumnsReferToWrongColumns(ex) + } + throw ex + } + } + + /** + * If the schema contains generated columns, check the following unsupported cases: + * - Refer to a non-existent column or another generated column. + * - Use an unsupported expression. + * - The expression type is not the same as the column type. + */ + def validateGeneratedColumns(spark: SparkSession, schema: StructType): Unit = { + val (generatedColumns, normalColumns) = schema.partition(isGeneratedColumn) + // Create a fake relation using the normal columns and add a project with generation expressions + // on top of it to ask Spark to analyze the plan. This will help us find out the following + // errors: + // - Refer to a non existent column in a generation expression. + // - Refer to a generated column in another one. + val relation = new LocalRelation(toAttributes(StructType(normalColumns))) + val selectExprs = generatedColumns.map { f => + getGenerationExpressionStr(f) match { + case Some(exprString) => + val expr = parseGenerationExpression(spark, exprString) + validateColumnReferences(spark, f.name, expr, schema) + new Column(expr).alias(f.name) + case None => + // Should not happen + throw DeltaErrors.expressionsNotFoundInGeneratedColumn(f.name) + } + } + val dfWithExprs = try { + val plan = Project(selectExprs.map(_.expr.asInstanceOf[NamedExpression]), relation) + Dataset.ofRows(spark, plan) + } catch { + case e: AnalysisException if e.getMessage != null => + val regexCandidates = Seq( + ("A column, variable, or function parameter with name .*?cannot be resolved. " + + "Did you mean one of the following?.*?").r, + "cannot resolve.*?given input columns:.*?".r, + "Column.*?does not exist.".r + ) + if (regexCandidates.exists(_.findFirstMatchIn(e.getMessage).isDefined)) { + throw DeltaErrors.generatedColumnsReferToWrongColumns(e) + } else { + throw e + } + } + // Check whether the generation expressions are valid + dfWithExprs.queryExecution.analyzed.transformAllExpressions { + case expr: Alias => + // Alias will be non deterministic if it points to a non deterministic expression. + // Skip `Alias` to provide a better error for a non deterministic expression. + expr + case expr @ (_: GetStructField | _: GetArrayItem) => + // The complex type extractors don't have a function name, so we need to check them + // separately. `GetMapValue` and `GetArrayStructFields` are not supported because Delta + // Invariant Check doesn't support them. + expr + case expr: UserDefinedExpression => + throw DeltaErrors.generatedColumnsUDF(expr) + case expr if !expr.deterministic => + throw DeltaErrors.generatedColumnsNonDeterministicExpression(expr) + case expr if expr.isInstanceOf[AggregateExpression] => + throw DeltaErrors.generatedColumnsAggregateExpression(expr) + case expr if !SupportedGenerationExpressions.expressions.contains(expr.getClass) => + throw DeltaErrors.generatedColumnsUnsupportedExpression(expr) + } + // Compare the columns types defined in the schema and the expression types. + generatedColumns.zip(dfWithExprs.schema).foreach { case (column, expr) => + if (column.dataType != expr.dataType) { + throw DeltaErrors.generatedColumnsTypeMismatch(column.name, column.dataType, expr.dataType) + } + } + } + + def getGeneratedColumnsAndColumnsUsedByGeneratedColumns(schema: StructType): Set[String] = { + val generationExprs = schema.flatMap { col => + getGenerationExpressionStr(col).map { exprStr => + val expr = parseGenerationExpression(SparkSession.active, exprStr) + new Column(expr).alias(col.name) + } + } + if (generationExprs.isEmpty) { + return Set.empty + } + + val df = Dataset.ofRows(SparkSession.active, new LocalRelation(toAttributes(schema))) + val generatedColumnsAndColumnsUsedByGeneratedColumns = + df.select(generationExprs: _*).queryExecution.analyzed match { + case Project(exprs, _) => + exprs.flatMap { + case Alias(expr, column) => + expr.references.map { + case a: AttributeReference => a.name + case other => + // Should not happen since the columns should be resolved + throw DeltaErrors.unexpectedAttributeReference(s"$other") + }.toSeq :+ column + case other => + // Should not happen since we use `Alias` expressions. + throw DeltaErrors.unexpectedAlias(s"$other") + } + case other => + // Should not happen since `select` should use `Project`. + throw DeltaErrors.unexpectedProject(other.toString()) + } + // Converting columns to lower case is fine since Delta's schema is always case insensitive. + generatedColumnsAndColumnsUsedByGeneratedColumns.map(_.toLowerCase(Locale.ROOT)).toSet + } + + private def createFieldPath(nameParts: Seq[String]): String = { + nameParts.map(quoteIfNeeded _).mkString(".") + } + + /** + * Try to get `OptimizablePartitionExpression`s of a data column when a partition column is + * defined as a generated column and refers to this data column. + * + * @param schema the table schema + * @param partitionSchema the partition schema. If a partition column is defined as a generated + * column, its column metadata should contain the generation expression. + */ + def getOptimizablePartitionExpressions( + schema: StructType, + partitionSchema: StructType): Map[String, Seq[OptimizablePartitionExpression]] = { + val partitionGenerationExprs = partitionSchema.flatMap { col => + getGenerationExpressionStr(col).map { exprStr => + val expr = parseGenerationExpression(SparkSession.active, exprStr) + new Column(expr).alias(col.name) + } + } + if (partitionGenerationExprs.isEmpty) { + return Map.empty + } + + val spark = SparkSession.active + val resolver = spark.sessionState.analyzer.resolver + + // `a.name` comes from the generation expressions which users may use different cases. We + // need to normalize it to the same case so that we can group expressions for the same + // column name together. + val nameNormalizer: String => String = + if (spark.sessionState.conf.caseSensitiveAnalysis) x => x else _.toLowerCase(Locale.ROOT) + + /** + * Returns a normalized column name with its `OptimizablePartitionExpression` + */ + def createExpr(nameParts: Seq[String])(func: => OptimizablePartitionExpression): + Option[(String, OptimizablePartitionExpression)] = { + if (schema.findNestedField(nameParts, resolver = resolver).isDefined) { + Some(nameNormalizer(createFieldPath(nameParts)) -> func) + } else { + None + } + } + + val df = Dataset.ofRows(SparkSession.active, new LocalRelation(toAttributes(schema))) + val extractedPartitionExprs = + df.select(partitionGenerationExprs: _*).queryExecution.analyzed match { + case Project(exprs, _) => + exprs.flatMap { + case Alias(expr, partColName) => + expr match { + case Cast(ExtractBaseColumn(name, TimestampType), DateType, _, _) => + createExpr(name)(DatePartitionExpr(partColName)) + case Cast(ExtractBaseColumn(name, DateType), DateType, _, _) => + createExpr(name)(DatePartitionExpr(partColName)) + case Year(ExtractBaseColumn(name, DateType)) => + createExpr(name)(YearPartitionExpr(partColName)) + case Year(Cast(ExtractBaseColumn(name, TimestampType), DateType, _, _)) => + createExpr(name)(YearPartitionExpr(partColName)) + case Year(Cast(ExtractBaseColumn(name, DateType), DateType, _, _)) => + createExpr(name)(YearPartitionExpr(partColName)) + case Month(Cast(ExtractBaseColumn(name, TimestampType), DateType, _, _)) => + createExpr(name)(MonthPartitionExpr(partColName)) + case DateFormatClass( + Cast(ExtractBaseColumn(name, DateType), TimestampType, _, _), + StringLiteral(format), _) => + format match { + case DATE_FORMAT_YEAR_MONTH => + createExpr(name)( + DateFormatPartitionExpr(partColName, DATE_FORMAT_YEAR_MONTH)) + case _ => None + } + case DateFormatClass(ExtractBaseColumn(name, TimestampType), + StringLiteral(format), _) => + format match { + case DATE_FORMAT_YEAR_MONTH => + createExpr(name)( + DateFormatPartitionExpr(partColName, DATE_FORMAT_YEAR_MONTH)) + case DATE_FORMAT_YEAR_MONTH_DAY => + createExpr(name)( + DateFormatPartitionExpr(partColName, DATE_FORMAT_YEAR_MONTH_DAY)) + case DATE_FORMAT_YEAR_MONTH_DAY_HOUR => + createExpr(name)( + DateFormatPartitionExpr(partColName, DATE_FORMAT_YEAR_MONTH_DAY_HOUR)) + case _ => None + } + case DayOfMonth(Cast(ExtractBaseColumn(name, TimestampType), + DateType, _, _)) => + createExpr(name)(DayPartitionExpr(partColName)) + case Hour(ExtractBaseColumn(name, TimestampType), _) => + createExpr(name)(HourPartitionExpr(partColName)) + case Substring(ExtractBaseColumn(name, StringType), IntegerLiteral(pos), + IntegerLiteral(len)) => + createExpr(name)(SubstringPartitionExpr(partColName, pos, len)) + case TruncTimestamp( + StringLiteral(format), ExtractBaseColumn(name, TimestampType), _) => + createExpr(name)(TimestampTruncPartitionExpr(format, partColName)) + case TruncTimestamp( + StringLiteral(format), + Cast(ExtractBaseColumn(name, DateType), TimestampType, _, _), _) => + createExpr(name)(TimestampTruncPartitionExpr(format, partColName)) + case ExtractBaseColumn(name, _) => + createExpr(name)(IdentityPartitionExpr(partColName)) + case TruncDate(ExtractBaseColumn(name, DateType), StringLiteral(format)) => + createExpr(name)(TruncDatePartitionExpr(partColName, + format)) + case TruncDate(Cast( + ExtractBaseColumn(name, TimestampType | StringType), DateType, _, _), + StringLiteral(format)) => + createExpr(name)(TruncDatePartitionExpr(partColName, + format)) + case _ => None + } + case other => + // Should not happen since we use `Alias` expressions. + throw DeltaErrors.unexpectedAlias(s"$other") + } + case other => + // Should not happen since `select` should use `Project`. + throw DeltaErrors.unexpectedProject(other.toString()) + } + extractedPartitionExprs.groupBy(_._1).map { case (name, group) => + val groupedExprs = group.map(_._2) + val mergedExprs = mergePartitionExpressionsIfPossible(groupedExprs) + if (log.isDebugEnabled) { + logDebug(s"Optimizable partition expressions for column $name:") + mergedExprs.foreach(expr => logDebug(expr.toString)) + } + name -> mergedExprs + } + } + + /** + * Merge multiple partition expressions into one if possible. For example, users may define + * three partitions columns, `year`, `month` and `day`, rather than defining a single `date` + * partition column. Hence, we need to take the multiple partition columns into a single + * part to consider when optimizing queries. + */ + private def mergePartitionExpressionsIfPossible( + exprs: Seq[OptimizablePartitionExpression]): Seq[OptimizablePartitionExpression] = { + def isRedundantPartitionExpr(f: OptimizablePartitionExpression): Boolean = { + f.isInstanceOf[YearPartitionExpr] || + f.isInstanceOf[MonthPartitionExpr] || + f.isInstanceOf[DayPartitionExpr] || + f.isInstanceOf[HourPartitionExpr] + } + + // Take the first option because it's safe to drop other duplicate partition expressions + val year = exprs.collect { case y: YearPartitionExpr => y }.headOption + val month = exprs.collect { case m: MonthPartitionExpr => m }.headOption + val day = exprs.collect { case d: DayPartitionExpr => d }.headOption + val hour = exprs.collect { case h: HourPartitionExpr => h }.headOption + (year ++ month ++ day ++ hour) match { + case Seq( + year: YearPartitionExpr, + month: MonthPartitionExpr, + day: DayPartitionExpr, + hour: HourPartitionExpr) => + exprs.filterNot(isRedundantPartitionExpr) :+ + YearMonthDayHourPartitionExpr(year.yearPart, month.monthPart, day.dayPart, hour.hourPart) + case Seq(year: YearPartitionExpr, month: MonthPartitionExpr, day: DayPartitionExpr) => + exprs.filterNot(isRedundantPartitionExpr) :+ + YearMonthDayPartitionExpr(year.yearPart, month.monthPart, day.dayPart) + case Seq(year: YearPartitionExpr, month: MonthPartitionExpr) => + exprs.filterNot(isRedundantPartitionExpr) :+ + YearMonthPartitionExpr(year.yearPart, month.monthPart) + case _ => + exprs + } + } + + def partitionFilterOptimizationEnabled(spark: SparkSession): Boolean = { + spark.sessionState.conf + .getConf(DeltaSQLConf.GENERATED_COLUMN_PARTITION_FILTER_OPTIMIZATION_ENABLED) + } + + + /** + * Try to generate partition filters from data filters if possible. + * + * @param delta the logical plan that outputs the same attributes as the table schema. This will + * be used to resolve auto generated expressions. + */ + def generatePartitionFilters( + spark: SparkSession, + snapshot: SnapshotDescriptor, + dataFilters: Seq[Expression], + delta: LogicalPlan): Seq[Expression] = { + if (!satisfyGeneratedColumnProtocol(snapshot.protocol)) { + return Nil + } + if (snapshot.metadata.optimizablePartitionExpressions.isEmpty) { + return Nil + } + + val optimizablePartitionExpressions = + if (spark.sessionState.conf.caseSensitiveAnalysis) { + snapshot.metadata.optimizablePartitionExpressions + } else { + CaseInsensitiveMap(snapshot.metadata.optimizablePartitionExpressions) + } + + /** + * Preprocess the data filter such as reordering to ensure the column name appears on the left + * and the literal appears on the right. + */ + def preprocess(filter: Expression): Expression = filter match { + case LessThan(lit: Literal, e: Expression) => + GreaterThan(e, lit) + case LessThanOrEqual(lit: Literal, e: Expression) => + GreaterThanOrEqual(e, lit) + case EqualTo(lit: Literal, e: Expression) => + EqualTo(e, lit) + case GreaterThan(lit: Literal, e: Expression) => + LessThan(e, lit) + case GreaterThanOrEqual(lit: Literal, e: Expression) => + LessThanOrEqual(e, lit) + case e => e + } + + /** + * Find the `OptimizablePartitionExpression`s of column `a` and apply them to get the partition + * filters. + */ + def toPartitionFilter( + nameParts: Seq[String], + func: (OptimizablePartitionExpression) => Option[Expression]): Seq[Expression] = { + optimizablePartitionExpressions.get(createFieldPath(nameParts)).toSeq.flatMap { exprs => + exprs.flatMap(expr => func(expr)) + } + } + + val partitionFilters = dataFilters.flatMap { filter => + preprocess(filter) match { + case LessThan(ExtractBaseColumn(nameParts, _), lit: Literal) => + toPartitionFilter(nameParts, _.lessThan(lit)) + case LessThanOrEqual(ExtractBaseColumn(nameParts, _), lit: Literal) => + toPartitionFilter(nameParts, _.lessThanOrEqual(lit)) + case EqualTo(ExtractBaseColumn(nameParts, _), lit: Literal) => + toPartitionFilter(nameParts, _.equalTo(lit)) + case GreaterThan(ExtractBaseColumn(nameParts, _), lit: Literal) => + toPartitionFilter(nameParts, _.greaterThan(lit)) + case GreaterThanOrEqual(ExtractBaseColumn(nameParts, _), lit: Literal) => + toPartitionFilter(nameParts, _.greaterThanOrEqual(lit)) + case IsNull(ExtractBaseColumn(nameParts, _)) => + toPartitionFilter(nameParts, _.isNull()) + case _ => Nil + } + } + + val resolvedPartitionFilters = resolveReferencesForExpressions(spark, partitionFilters, delta) + + if (log.isDebugEnabled) { + logDebug("User provided data filters:") + dataFilters.foreach(f => logDebug(f.sql)) + logDebug("Auto generated partition filters:") + partitionFilters.foreach(f => logDebug(f.sql)) + logDebug("Resolved generated partition filters:") + resolvedPartitionFilters.foreach(f => logDebug(f.sql)) + } + + val executionId = Option(spark.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)) + .getOrElse("unknown") + recordDeltaEvent( + snapshot.deltaLog, + "delta.generatedColumns.optimize", + data = Map( + "executionId" -> executionId, + "triggered" -> resolvedPartitionFilters.nonEmpty + )) + + resolvedPartitionFilters + } + + private val DATE_FORMAT_YEAR_MONTH = "yyyy-MM" + private val DATE_FORMAT_YEAR_MONTH_DAY = "yyyy-MM-dd" + private val DATE_FORMAT_YEAR_MONTH_DAY_HOUR = "yyyy-MM-dd-HH" +} + +/** + * Finds the full dot-separated path to a field and the data type of the field. This unifies + * handling of nested and non-nested fields, and allows pattern matching on the data type. + */ +object ExtractBaseColumn { + def unapply(e: Expression): Option[(Seq[String], DataType)] = e match { + case AttributeReference(name, dataType, _, _) => + Some(Seq(name), dataType) + case g: GetStructField => g.child match { + case ExtractBaseColumn(nameParts, _) => + Some(nameParts :+ g.extractFieldName, g.dataType) + case _ => None + } + case _ => None + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/IcebergCompat.scala b/spark/src/main/scala/org/apache/spark/sql/delta/IcebergCompat.scala new file mode 100644 index 00000000000..3910e37bcea --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/IcebergCompat.scala @@ -0,0 +1,418 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.{Action, AddFile, Metadata, Protocol} +import org.apache.spark.sql.delta.commands.DeletionVectorUtils +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils + +import org.apache.spark.sql.types._ + +/** + * Utils to validate the IcebergCompatV1 table feature, which is responsible for keeping Delta + * tables in valid states (see the Delta spec for full invariants, dependencies, and requirements) + * so that they are capable of having Delta to Iceberg metadata conversion applied to them. The + * IcebergCompatV1 table feature does not implement, specify, or control the actual metadata + * conversion; that is handled by the Delta UniForm feature. + * + * Note that UniForm (Iceberg) depends on IcebergCompatV1, but IcebergCompatV1 does not depend on or + * require UniForm (Iceberg). It is perfectly valid for a Delta table to have IcebergCompatV1 + * enabled but UniForm (Iceberg) not enabled. + */ + +object IcebergCompatV1 extends IcebergCompat( + version = 1, + config = DeltaConfigs.ICEBERG_COMPAT_V1_ENABLED, + requiredTableFeatures = Seq(ColumnMappingTableFeature), + requiredTableProperties = Seq(RequireColumnMapping), + checks = Seq( + CheckOnlySingleVersionEnabled, + CheckAddFileHasStats, + CheckNoPartitionEvolution, + CheckNoListMapNullType, + CheckNoDeletionVector, + CheckVersionChangeNeedsRewrite) +) + +object IcebergCompatV2 extends IcebergCompat( + version = 2, + config = DeltaConfigs.ICEBERG_COMPAT_V2_ENABLED, + requiredTableFeatures = Seq(ColumnMappingTableFeature), + requiredTableProperties = Seq(RequireColumnMapping), + checks = Seq( + CheckOnlySingleVersionEnabled, + CheckAddFileHasStats, + CheckTypeInV2AllowList, + CheckNoPartitionEvolution, + CheckNoDeletionVector, + CheckVersionChangeNeedsRewrite) +) + +/** + * All IcebergCompatVx should extend from this base class + * + * @param version the compat version number + * @param config the DeltaConfig for this IcebergCompat version + * @param requiredTableFeatures a list of table features it relies on + * @param requiredTableProperties a list of table properties it relies on. + * See [[RequiredDeltaTableProperty]] + * @param checks a list of checks this IcebergCompatVx will perform. + * @see [[RequiredDeltaTableProperty]] + */ +case class IcebergCompat( + version: Integer, + config: DeltaConfig[Option[Boolean]], + requiredTableFeatures: Seq[TableFeature], + requiredTableProperties: Seq[RequiredDeltaTableProperty[_<:Any]], + checks: Seq[IcebergCompatCheck]) extends DeltaLogging { + def isEnabled(metadata: Metadata): Boolean = config.fromMetaData(metadata).getOrElse(false) + + /** + * Expected to be called after the newest metadata and protocol have been ~ finalized. + * + * Furthermore, this should be called *after* + * [[UniversalFormat.enforceIcebergInvariantsAndDependencies]]. + * + * If you are enabling IcebergCompatV1 and are creating a new table, this method will + * automatically upgrade the table protocol to support ColumnMapping and set it to 'name' mode, + * too. + * + * If you are disabling IcebergCompatV1, this method will also disable Universal Format (Iceberg), + * if it is enabled. + * + * @param actions The actions to be committed in the txn. We will only look at the [[AddFile]]s. + * + * @return tuple of options of (updatedProtocol, updatedMetadata). For either action, if no + * updates need to be applied, will return None. + */ + def enforceInvariantsAndDependencies( + prevSnapshot: Snapshot, + newestProtocol: Protocol, + newestMetadata: Metadata, + isCreatingOrReorgTable: Boolean, + actions: Seq[Action]): (Option[Protocol], Option[Metadata]) = { + val prevProtocol = prevSnapshot.protocol + val prevMetadata = prevSnapshot.metadata + val wasEnabled = this.isEnabled(prevMetadata) + val isEnabled = this.isEnabled(newestMetadata) + val tableId = newestMetadata.id + + (wasEnabled, isEnabled) match { + case (_, false) => (None, None) // not enable or disabling, Ignore + case (_, true) => // Enabling now or already-enabled + val tblFeatureUpdates = scala.collection.mutable.Set.empty[TableFeature] + val tblPropertyUpdates = scala.collection.mutable.Map.empty[String, String] + + // Check we have all required table features + requiredTableFeatures.foreach { f => + (prevProtocol.isFeatureSupported(f), newestProtocol.isFeatureSupported(f)) match { + case (_, true) => // all good + case (false, false) => // txn has not supported it! + // Note: this code path should be impossible, since the IcebergCompatVxTableFeature + // specifies ColumnMappingTableFeature as a required table feature. Thus, + // it should already have been added during + // OptimisticTransaction::updateMetadataInternal + if (isCreatingOrReorgTable) { + tblFeatureUpdates += f + } else { + throw DeltaErrors.icebergCompatMissingRequiredTableFeatureException(version, f) + } + case (true, false) => // txn is removing/un-supporting it! + throw DeltaErrors.icebergCompatDisablingRequiredTableFeatureException(version, f) + } + } + + // Check we have all required delta table properties + requiredTableProperties.foreach { + case RequiredDeltaTableProperty(deltaConfig, validator, autoSetValue) => + val newestValue = deltaConfig.fromMetaData(newestMetadata) + val newestValueOkay = validator(newestValue) + val newestValueExplicitlySet = newestMetadata.configuration.contains(deltaConfig.key) + + val err = DeltaErrors.icebergCompatWrongRequiredTablePropertyException( + version, deltaConfig.key, newestValue.toString, autoSetValue) + + if (!newestValueOkay) { + if (!newestValueExplicitlySet && isCreatingOrReorgTable) { + // This case covers both CREATE and REPLACE TABLE commands that + // did not explicitly specify the required deltaConfig. In these + // cases, we set the property automatically. + tblPropertyUpdates += deltaConfig.key -> autoSetValue + } else { + // In all other cases, if the property value is not compatible + // with the IcebergV1 requirements, we fail + throw err + } + } + } + + // Update Protocol and Metadata if necessary + val protocolResult = if (tblFeatureUpdates.nonEmpty) { + logInfo(s"[tableId=$tableId] IcebergCompatV1 auto-supporting table features: " + + s"${tblFeatureUpdates.map(_.name)}") + Some(newestProtocol.merge(tblFeatureUpdates.map(Protocol.forTableFeature).toSeq: _*)) + } else None + + val metadataResult = if (tblPropertyUpdates.nonEmpty) { + logInfo(s"[tableId=$tableId] IcebergCompatV1 auto-setting table properties: " + + s"$tblPropertyUpdates") + val newConfiguration = newestMetadata.configuration ++ tblPropertyUpdates.toMap + var tmpNewMetadata = newestMetadata.copy(configuration = newConfiguration) + + requiredTableProperties.foreach { tp => + tmpNewMetadata = tp.postProcess(prevMetadata, tmpNewMetadata, isCreatingOrReorgTable) + } + + Some(tmpNewMetadata) + } else None + + // Apply additional checks + val context = IcebergCompatContext(prevSnapshot, + protocolResult.getOrElse(newestProtocol), + metadataResult.getOrElse(newestMetadata), + isCreatingOrReorgTable, actions, tableId, version) + checks.foreach(_.apply(context)) + + (protocolResult, metadataResult) + } + } +} + +/** + * Util methods to manage between IcebergCompat versions + */ +object IcebergCompat extends DeltaLogging { + + val knownVersions = Seq( + DeltaConfigs.ICEBERG_COMPAT_V1_ENABLED -> 1, + DeltaConfigs.ICEBERG_COMPAT_V2_ENABLED -> 2) + + /** + * Fetch from Metadata the current enabled IcebergCompat version. + * @return a number indicate the version. E.g., 1 for CompatV1. + * None if no version enabled. + */ + def getEnabledVersion(metadata: Metadata): Option[Int] = + knownVersions + .find{ case (config, _) => config.fromMetaData(metadata).getOrElse(false) } + .map{ case (_, version) => version } + + /** + * Get the DeltaConfig for the given IcebergCompat version. If version is not valid, + * throw an exception. + * @return the DeltaConfig for the given version. E.g., + * [[DeltaConfigs.ICEBERG_COMPAT_V1_ENABLED]] for version 1. + */ + def getIcebergCompatVersionConfigForValidVersion(version: Int): DeltaConfig[Option[Boolean]] = { + if (version <= 0 || version > knownVersions.length) { + throw DeltaErrors.icebergCompatVersionNotSupportedException( + version, knownVersions.length + ) + } + knownVersions(version - 1)._1 + } + + /** + * @return true if any version of IcebergCompat is enabled + */ + def isAnyEnabled(metadata: Metadata): Boolean = + knownVersions.exists{ case (config, _) => config.fromMetaData(metadata).getOrElse(false) } + + /** + * @return true if the target version is enabled on the table. + */ + def isVersionEnabled(metadata: Metadata, version: Integer): Boolean = + knownVersions.exists{ case (_, v) => v == version } +} + +/** + * Wrapper class for table property validation + * + * @param deltaConfig [[DeltaConfig]] we are checking + * @param validator A generic method to validate the given value + * @param autoSetValue The value to set if we can auto-set this value (e.g. during table creation) + */ +case class RequiredDeltaTableProperty[T]( + deltaConfig: DeltaConfig[T], + validator: T => Boolean, + autoSetValue: String) { + /** + * A callback after all required properties are added to the new metadata. + * @return Updated metadata. None if no change + */ + def postProcess( + prevMetadata: Metadata, + newMetadata: Metadata, + isCreatingNewTable: Boolean) : Metadata = newMetadata +} + +object RequireColumnMapping extends RequiredDeltaTableProperty( + deltaConfig = DeltaConfigs.COLUMN_MAPPING_MODE, + validator = (mode: DeltaColumnMappingMode) => (mode == NameMapping || mode == IdMapping), + autoSetValue = NameMapping.name) { + override def postProcess( + prevMetadata: Metadata, + newMetadata: Metadata, + isCreatingNewTable: Boolean): Metadata = { + if (newMetadata.configuration.contains(DeltaConfigs.COLUMN_MAPPING_MODE.key)) { + assert(isCreatingNewTable, "we only auto-upgrade Column Mapping on new tables") + val tmpNewMetadata = DeltaColumnMapping.assignColumnIdAndPhysicalName( + newMetadata = newMetadata, + oldMetadata = prevMetadata, + isChangingModeOnExistingTable = false, + isOverwritingSchema = false + ) + DeltaColumnMapping.checkColumnIdAndPhysicalNameAssignments(tmpNewMetadata) + tmpNewMetadata + } else { + newMetadata + } + } +} + +case class IcebergCompatContext( + prevSnapshot: Snapshot, + newestProtocol: Protocol, + newestMetadata: Metadata, + isCreatingOrReorgTable: Boolean, + actions: Seq[Action], + tableId: String, + version: Integer) { + def prevMetadata: Metadata = prevSnapshot.metadata + + def prevProtocol: Protocol = prevSnapshot.protocol +} + +trait IcebergCompatCheck extends (IcebergCompatContext => Unit) + +/** + * Checks that ensures no more than one IcebergCompatVx is enabled. + */ +object CheckOnlySingleVersionEnabled extends IcebergCompatCheck { + override def apply(context: IcebergCompatContext): Unit = { + val numEnabled = IcebergCompat.knownVersions + .map{ case (config, _) => + if (config.fromMetaData(context.newestMetadata).getOrElse(false)) 1 else 0 } + .sum + if (numEnabled > 1) { + throw DeltaErrors.icebergCompatVersionMutualExclusive(context.version) + } + } +} + +object CheckAddFileHasStats extends IcebergCompatCheck { + override def apply(context: IcebergCompatContext): Unit = { + // If this field is empty, then the AddFile is missing the `numRecords` statistic. + context.actions.collect { case a: AddFile if a.numLogicalRecords.isEmpty => + throw new UnsupportedOperationException(s"[tableId=${context.tableId}] " + + s"IcebergCompatV${context.version} requires all AddFiles to contain " + + s"the numRecords statistic. AddFile ${a.path} is missing this statistic. " + + s"Stats: ${a.stats}") + } + } +} + +object CheckNoPartitionEvolution extends IcebergCompatCheck { + override def apply(context: IcebergCompatContext): Unit = { + // Note: Delta doesn't support partition evolution, but you can change the partitionColumns + // by doing a REPLACE or DataFrame overwrite. + // + // Iceberg-Spark itself *doesn't* support the following cases + // - CREATE TABLE partitioned by colA; REPLACE TABLE partitioned by colB + // - CREATE TABLE partitioned by colA; REPLACE TABLE not partitioned + // + // While Iceberg-Spark *does* support + // - CREATE TABLE not partitioned; REPLACE TABLE not partitioned + // - CREATE TABLE not partitioned; REPLACE TABLE partitioned by colA + // - CREATE TABLE partitioned by colA dataType1; REPLACE TABLE partitioned by colA dataType2 + if (context.prevMetadata.partitionColumns.nonEmpty && + context.prevMetadata.partitionColumns != context.newestMetadata.partitionColumns) { + throw DeltaErrors.icebergCompatReplacePartitionedTableException( + context.version, + context.prevMetadata.partitionColumns, + context.newestMetadata.partitionColumns) + } + } +} + +object CheckNoListMapNullType extends IcebergCompatCheck { + override def apply(context: IcebergCompatContext): Unit = { + SchemaUtils.findAnyTypeRecursively(context.newestMetadata.schema) { f => + f.isInstanceOf[MapType] || f.isInstanceOf[ArrayType] || f.isInstanceOf[NullType] + } match { + case Some(unsupportedType) => + throw DeltaErrors.icebergCompatUnsupportedDataTypeException( + context.version, unsupportedType, context.newestMetadata.schema) + case _ => + } + } +} + +object CheckTypeInV2AllowList extends IcebergCompatCheck { + private val allowTypes = Set[Class[_]] ( + ByteType.getClass, ShortType.getClass, IntegerType.getClass, LongType.getClass, + FloatType.getClass, DoubleType.getClass, classOf[DecimalType], + StringType.getClass, BinaryType.getClass, + BooleanType.getClass, + TimestampType.getClass, TimestampNTZType.getClass, DateType.getClass, + classOf[ArrayType], classOf[MapType], classOf[StructType] + ) + override def apply(context: IcebergCompatContext): Unit = { + SchemaUtils + .findAnyTypeRecursively(context.newestMetadata.schema)(t => !allowTypes.contains(t.getClass)) + match { + case Some(unsupportedType) => + throw DeltaErrors.icebergCompatUnsupportedDataTypeException( + context.version, unsupportedType, context.newestMetadata.schema) + case _ => + } + } +} + +object CheckNoDeletionVector extends IcebergCompatCheck { + + override def apply(context: IcebergCompatContext): Unit = { + // Check for incompatible table features; + // Deletion Vectors cannot be writeable; Note that concurrent txns are also covered + // to NOT write deletion vectors as that txn would need to make DVs writable, which + // would conflict with current txn because of metadata change. + if (DeletionVectorUtils.deletionVectorsWritable( + context.newestProtocol, context.newestMetadata)) { + throw DeltaErrors.icebergCompatDeletionVectorsShouldBeDisabledException(context.version) + } + } +} + + +/** + * Check if change IcebergCompat version needs a REORG operation + */ +object CheckVersionChangeNeedsRewrite extends IcebergCompatCheck { + + private val versionChangesWithoutRewrite: Map[Int, Set[Int]] = + Map(0 -> Set(0, 1), 1 -> Set(0, 1), 2 -> Set(0, 1, 2)) + override def apply(context: IcebergCompatContext): Unit = { + if (!context.isCreatingOrReorgTable) { + val oldVersion = IcebergCompat.getEnabledVersion(context.prevMetadata).getOrElse(0) + val allowedChanges = versionChangesWithoutRewrite.getOrElse(oldVersion, Set.empty[Int]) + if (!allowedChanges.contains(context.version)) { + throw DeltaErrors.icebergCompatChangeVersionNeedRewrite(oldVersion, context.version) + } + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/JsonMetadataDomain.scala b/spark/src/main/scala/org/apache/spark/sql/delta/JsonMetadataDomain.scala new file mode 100644 index 00000000000..2dd783dd649 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/JsonMetadataDomain.scala @@ -0,0 +1,53 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.DomainMetadata +import org.apache.spark.sql.delta.util.JsonUtils + +/** + * A trait for capturing metadata domain of type T. + */ +trait JsonMetadataDomain[T] { + val domainName: String + + /** + * Creates [[DomainMetadata]] with configuration set as a JSON-serialized value of + * the metadata domain of type T. + */ + def toDomainMetadata[T: Manifest]: DomainMetadata = + DomainMetadata(domainName, JsonUtils.toJson(this.asInstanceOf[T]), removed = false) +} + +abstract class JsonMetadataDomainUtils[T: Manifest] { + protected val domainName: String + + /** + * Returns the metadata domain's configuration as type T for domain metadata that + * matches "domainName" in the given snapshot. Returns None if there is no matching + * domain metadata. + */ + def fromSnapshot(snapshot: Snapshot): Option[T] = { + snapshot.domainMetadata + .find(_.domain == domainName) + .map(m => fromJsonConfiguration(m)) + } + + protected def fromJsonConfiguration(domain: DomainMetadata): T = + JsonUtils.fromJson[T](domain.configuration) +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/LastCheckpointInfo.scala b/spark/src/main/scala/org/apache/spark/sql/delta/LastCheckpointInfo.scala new file mode 100644 index 00000000000..3d23153af36 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/LastCheckpointInfo.scala @@ -0,0 +1,274 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.actions.{CheckpointMetadata, SidecarFile, SingleAction} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.FileNames.{checkpointVersion, numCheckpointParts} +import org.apache.spark.sql.delta.util.JsonUtils +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonIgnoreProperties, JsonPropertyOrder} +import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode} +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import com.fasterxml.jackson.databind.node.ObjectNode +import org.apache.commons.codec.digest.DigestUtils +import org.apache.hadoop.fs.FileStatus + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.types.StructType + +/** + * Information about the V2 Checkpoint in the LAST_CHECKPOINT file + * @param path file name corresponding to the uuid-named v2 checkpoint + * @param sizeInBytes size in bytes for the uuid-named v2 checkpoint + * @param modificationTime modification time for the uuid-named v2 checkpoint + * @param nonFileActions all non file actions for the v2 checkpoint. This info may or may not be + * available. A None value means that info is missing. + * If it is not None, then it should have all the non-FileAction + * corresponding to the checkpoint. + * @param sidecarFiles sidecar files corresponding to the v2 checkpoint. This info may or may + * not be available. A None value means that this info is missing. + * An empty list denotes that the v2 checkpoint has no sidecars. + */ +case class LastCheckpointV2( + path: String, + sizeInBytes: Long, + modificationTime: Long, + nonFileActions: Option[Seq[SingleAction]], + sidecarFiles: Option[Seq[SidecarFile]]) { + + @JsonIgnore + lazy val checkpointMetadataOpt: Option[CheckpointMetadata] = + nonFileActions.flatMap(_.map(_.unwrap).collectFirst { case cm: CheckpointMetadata => cm }) + +} + +object LastCheckpointV2 { + def apply( + fileStatus: FileStatus, + nonFileActions: Option[Seq[SingleAction]] = None, + sidecarFiles: Option[Seq[SidecarFile]] = None): LastCheckpointV2 = { + LastCheckpointV2( + path = fileStatus.getPath.getName, + sizeInBytes = fileStatus.getLen, + modificationTime = fileStatus.getModificationTime, + nonFileActions = nonFileActions, + sidecarFiles = sidecarFiles) + } +} + +/** + * Records information about a checkpoint. + * + * This class provides the checksum validation logic, needed to ensure that content of + * LAST_CHECKPOINT file points to a valid json. The readers might read some part from old file and + * some part from the new file (if the file is read across multiple requests). In some rare + * scenarios, the split read might produce a valid json and readers will be able to parse it and + * convert it into a [[LastCheckpointInfo]] object that contains invalid data. In order to prevent + * using it, we do a checksum match on the read json to validate that it is consistent. + * + * For old Delta versions, which do not have checksum logic, we want to make sure that the old + * fields (i.e. version, size, parts) are together in the beginning of last_checkpoint json. All + * these fields together are less than 50 bytes, so even in split read scenario, we want to make + * sure that old delta readers which do not do have checksum validation logic, gets all 3 fields + * from one read request. For this reason, we use `JsonPropertyOrder` to force them in the beginning + * together. + * + * @param version the version of this checkpoint + * @param size the number of actions in the checkpoint, -1 if the information is unavailable. + * @param parts the number of parts when the checkpoint has multiple parts. None if this is a + * singular checkpoint + * @param sizeInBytes the number of bytes of the checkpoint + * @param numOfAddFiles the number of AddFile actions in the checkpoint + * @param checkpointSchema the schema of the underlying checkpoint files + * @param checksum the checksum of the [[LastCheckpointInfo]]. + */ +@JsonPropertyOrder(Array("version", "size", "parts")) +case class LastCheckpointInfo( + version: Long, + size: Long, + parts: Option[Int], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + sizeInBytes: Option[Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + numOfAddFiles: Option[Long], + checkpointSchema: Option[StructType], + v2Checkpoint: Option[LastCheckpointV2] = None, + checksum: Option[String] = None) { + + @JsonIgnore + def getFormatEnum(): CheckpointInstance.Format = parts match { + case _ if v2Checkpoint.nonEmpty => CheckpointInstance.Format.V2 + case Some(_) => CheckpointInstance.Format.WITH_PARTS + case None => CheckpointInstance.Format.SINGLE + } + + /** Whether two [[LastCheckpointInfo]] represents the same checkpoint */ + def semanticEquals(other: LastCheckpointInfo): Boolean = { + CheckpointInstance(this) == CheckpointInstance(other) + } +} + +object LastCheckpointInfo { + + val STORED_CHECKSUM_KEY = "checksum" + + /** Whether to store checksum OR do checksum validations around [[LastCheckpointInfo]] */ + def checksumEnabled(spark: SparkSession): Boolean = + spark.sessionState.conf.getConf(DeltaSQLConf.LAST_CHECKPOINT_CHECKSUM_ENABLED) + + /** + * Returns the json representation of this [[LastCheckpointInfo]] object. + * Also adds the checksum to the returned json if `addChecksum` is set. The checksum can be + * used by readers to validate consistency of the [[LastCheckpointInfo]]. + * It is calculated using rules mentioned in "JSON checksum" section in PROTOCOL.md. + */ + def serializeToJson( + lastCheckpointInfo: LastCheckpointInfo, + addChecksum: Boolean, + suppressOptionalFields: Boolean = false): String = { + if (suppressOptionalFields) { + return JsonUtils.toJson( + LastCheckpointInfo( + lastCheckpointInfo.version, + lastCheckpointInfo.size, + lastCheckpointInfo.parts, + sizeInBytes = None, + numOfAddFiles = None, + v2Checkpoint = None, + checkpointSchema = None)) + } + + val jsonStr: String = JsonUtils.toJson(lastCheckpointInfo.copy(checksum = None)) + if (!addChecksum) return jsonStr + val rootNode = JsonUtils.mapper.readValue(jsonStr, classOf[ObjectNode]) + val checksum = treeNodeToChecksum(rootNode) + rootNode.put(STORED_CHECKSUM_KEY, checksum).toString + } + + /** + * Converts the given `jsonStr` into a [[LastCheckpointInfo]] object. + * if `validate` is set, then it also validates the consistency of the json: + * - calculating the checksum and comparing it with the `storedChecksum`. + * - json should not have any duplicates. + */ + def deserializeFromJson(jsonStr: String, validate: Boolean): LastCheckpointInfo = { + if (validate) { + val (storedChecksumOpt, actualChecksum) = LastCheckpointInfo.getChecksums(jsonStr) + storedChecksumOpt.filter(_ != actualChecksum).foreach { storedChecksum => + throw new IllegalStateException(s"Checksum validation failed for json: $jsonStr,\n" + + s"storedChecksum:$storedChecksum, actualChecksum:$actualChecksum") + } + } + + // This means: + // 1) EITHER: Checksum validation is config-disabled + // 2) OR: The json lacked a checksum (e.g. written by old client). Nothing to validate. + // 3) OR: The Stored checksum matches the calculated one. Validation succeeded. + JsonUtils.fromJson[LastCheckpointInfo](jsonStr) + } + + /** + * Analyzes the json representation of [[LastCheckpointInfo]] and returns checksum tuple where + * - first element refers to the stored checksum in the json representation of + * [[LastCheckpointInfo]], None if the checksum is not present. + * - second element refers to the checksum computed from the canonicalized json representation of + * the [[LastCheckpointInfo]]. + */ + def getChecksums(jsonStr: String): (Option[String], String) = { + val reader = + JsonUtils.mapper.reader().withFeatures(DeserializationFeature.FAIL_ON_READING_DUP_TREE_KEY) + val rootNode = reader.readTree(jsonStr) + val storedChecksum = if (rootNode.has(STORED_CHECKSUM_KEY)) { + Some(rootNode.get(STORED_CHECKSUM_KEY).asText()) + } else { + None + } + val actualChecksum = treeNodeToChecksum(rootNode) + storedChecksum -> actualChecksum + } + + /** + * Canonicalizes the given `treeNode` json and returns its md5 checksum. + * Refer to "JSON checksum" section in PROTOCOL.md for canonicalization steps. + */ + def treeNodeToChecksum(treeNode: JsonNode): String = { + val jsonEntriesBuffer = ArrayBuffer.empty[(String, String)] + + import scala.collection.JavaConverters._ + def traverseJsonNode(currentNode: JsonNode, prefix: ArrayBuffer[String]): Unit = { + if (currentNode.isObject) { + currentNode.fields().asScala.foreach { entry => + prefix.append(encodeString(entry.getKey)) + traverseJsonNode(entry.getValue, prefix) + prefix.trimEnd(1) + } + } else if (currentNode.isArray) { + currentNode.asScala.zipWithIndex.foreach { case (jsonNode, index) => + prefix.append(index.toString) + traverseJsonNode(jsonNode, prefix) + prefix.trimEnd(1) + } + } else { + var nodeValue = currentNode.asText() + if (currentNode.isTextual) nodeValue = encodeString(nodeValue) + jsonEntriesBuffer.append(prefix.mkString("+") -> nodeValue) + } + } + traverseJsonNode(treeNode, prefix = ArrayBuffer.empty) + import Ordering.Implicits._ + val normalizedJsonKeyValues = jsonEntriesBuffer + .filter { case (k, _) => k != s""""$STORED_CHECKSUM_KEY"""" } + .map { case (k, v) => s"$k=$v" } + .sortBy(_.toSeq: Seq[Char]) + .mkString(",") + DigestUtils.md5Hex(normalizedJsonKeyValues) + } + + private val isUnreservedOctet = + (Set.empty ++ ('a' to 'z') ++ ('A' to 'Z') ++ ('0' to '9') ++ "-._~").map(_.toByte) + + /** + * URL encodes a String based on the following rules: + * 1. Use uppercase hexadecimals for all percent encodings + * 2. percent-encode everything other than unreserved characters + * 3. unreserved characters are = a-z / A-Z / 0-9 / "-" / "." / "_" / "~" + */ + private def encodeString(str: String): String = { + val result = str.getBytes(java.nio.charset.StandardCharsets.UTF_8).map { + case b if isUnreservedOctet(b) => b.toChar.toString + case b => + // convert to char equivalent of unsigned byte + val c = (b & 0xff) + f"%%$c%02X" + }.mkString + s""""$result"""" + } + + def fromFiles(files: Seq[FileStatus]): LastCheckpointInfo = { + assert(files.nonEmpty, "files should be non empty to construct LastCheckpointInfo") + LastCheckpointInfo( + version = checkpointVersion(files.head), + size = -1L, + parts = numCheckpointParts(files.head.getPath), + sizeInBytes = Some(files.map(_.getLen).sum), + numOfAddFiles = None, + checkpointSchema = None + ) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/MaterializedRowTrackingColumn.scala b/spark/src/main/scala/org/apache/spark/sql/delta/MaterializedRowTrackingColumn.scala new file mode 100644 index 00000000000..e10245fea55 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/MaterializedRowTrackingColumn.scala @@ -0,0 +1,158 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.UUID + +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.catalyst.expressions.Attribute + +/** + * Represents a materialized row tracking column. Concrete implementations are [[MaterializedRowId]] + * and [[MaterializedRowCommitVersion]]. + */ +abstract class MaterializedRowTrackingColumn { + /** + * Table metadata configuration property name storing the name of this materialized row tracking + * column. + */ + val MATERIALIZED_COLUMN_NAME_PROP: String + + /** Prefix to use for the name of this materialized row tracking column */ + val MATERIALIZED_COLUMN_NAME_PREFIX: String + + /** + * Returns the exception to throw when the materialized column name is not set in the table + * metadata. The table name is passed as argument. + */ + def missingMetadataException: String => Throwable + + /** + * Generate a random name for a materialized row tracking column. The generated name contains a + * unique UUID, we assume it shall not conflict with existing column. + */ + private def generateMaterializedColumnName: String = + MATERIALIZED_COLUMN_NAME_PREFIX + UUID.randomUUID().toString + + /** + * Update this materialized row tracking column name in the metadata. + * - If row tracking is not allowed or not supported, this operation is a noop. + * - If row tracking is supported on the table and no name is assigned to the old metadata, we + * assign a name. If a name was already assigned, we copy over this name. + * Throws in case the assignment of a new name fails due to a conflict. + */ + private[delta] def updateMaterializedColumnName( + protocol: Protocol, + oldMetadata: Metadata, + newMetadata: Metadata): Metadata = { + if (!RowTracking.isSupported(protocol)) { + // During a CLONE we might not enable row tracking, but still receive the materialized column + // name from the source. In this case, we need to remove the column name to not have the same + // column name in two different tables. + return newMetadata.copy( + configuration = newMetadata.configuration - MATERIALIZED_COLUMN_NAME_PROP) + } + + // Take the materialized column name from the old metadata, as this is the materialized column + // name of the current table. We overwrite the materialized column name of the new metadata as + // it could contain a materialized column name from another table, e.g. the source table during + // a CLONE. + val materializedColumnName = oldMetadata.configuration + .getOrElse(MATERIALIZED_COLUMN_NAME_PROP, generateMaterializedColumnName) + newMetadata.copy(configuration = newMetadata.configuration + + (MATERIALIZED_COLUMN_NAME_PROP -> materializedColumnName)) + } + + /** + * Throws an exception if row tracking is allowed and the materialized column name conflicts with + * another column name. + */ + private[delta] def throwIfMaterializedColumnNameConflictsWithSchema(metadata: Metadata): Unit = { + val logicalColumnNames = metadata.schema.fields.map(_.name) + val physicalColumnNames = metadata.schema.fields + .map(field => DeltaColumnMapping.getPhysicalName(field)) + + metadata.configuration.get(MATERIALIZED_COLUMN_NAME_PROP).foreach { columnName => + if (logicalColumnNames.contains(columnName) || physicalColumnNames.contains(columnName)) { + throw DeltaErrors.addingColumnWithInternalNameFailed(columnName) + } + } + } + + /** Extract the materialized column name from the [[Metadata]] of a [[DeltaLog]]. */ + def getMaterializedColumnName(protocol: Protocol, metadata: Metadata): Option[String] = { + if (RowTracking.isEnabled(protocol, metadata)) { + metadata.configuration.get(MATERIALIZED_COLUMN_NAME_PROP) + } else { + None + } + } + + /** Convenience method that throws if the materialized column name cannot be extracted. */ + def getMaterializedColumnNameOrThrow( + protocol: Protocol, metadata: Metadata, tableId: String): String = { + getMaterializedColumnName(protocol, metadata).getOrElse { + throw missingMetadataException(tableId) + } + } + + /** + * If Row tracking is enabled, return an Expression referencing this Row tracking column Attribute + * in 'dataFrame' if one is available. Otherwise returns None. + */ + private[delta] def getAttribute( + snapshot: Snapshot, dataFrame: DataFrame): Option[Attribute] = { + if (!RowTracking.isEnabled(snapshot.protocol, snapshot.metadata)) { + return None + } + + val materializedColumnName = getMaterializedColumnNameOrThrow( + snapshot.protocol, snapshot.metadata, snapshot.deltaLog.tableId) + + val analyzedPlan = dataFrame.queryExecution.analyzed + analyzedPlan.outputSet.view.find(attr => materializedColumnName == attr.name) + } +} + +object MaterializedRowId extends MaterializedRowTrackingColumn { + /** + * Table metadata configuration property name storing the name of the column in which the + * Row IDs are materialized. + */ + val MATERIALIZED_COLUMN_NAME_PROP = "delta.rowTracking.materializedRowIdColumnName" + + /** Prefix to use for the name of the materialized Row ID column */ + val MATERIALIZED_COLUMN_NAME_PREFIX = "_row-id-col-" + + def missingMetadataException: String => Throwable = DeltaErrors.materializedRowIdMetadataMissing +} + +object MaterializedRowCommitVersion extends MaterializedRowTrackingColumn { + /** + * Table metadata configuration property name storing the name of the column in which the + * Row commit versions are materialized. + */ + val MATERIALIZED_COLUMN_NAME_PROP = "delta.rowTracking.materializedRowCommitVersionColumnName" + + /** Prefix to use for the name of the materialized Row commit version column */ + val MATERIALIZED_COLUMN_NAME_PREFIX = "_row-commit-version-col-" + + def missingMetadataException: String => Throwable = + DeltaErrors.materializedRowCommitVersionMetadataMissing +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/MetadataCleanup.scala b/spark/src/main/scala/org/apache/spark/sql/delta/MetadataCleanup.scala new file mode 100644 index 00000000000..9de270c359f --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/MetadataCleanup.scala @@ -0,0 +1,384 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.{Calendar, TimeZone} + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.DeltaHistoryManager.BufferingLogDeletionIterator +import org.apache.spark.sql.delta.TruncationGranularity.{DAY, HOUR, MINUTE, TruncationGranularity} +import org.apache.spark.sql.delta.actions.{Action, Metadata} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.util.FileNames +import org.apache.spark.sql.delta.util.FileNames.{checkpointVersion, listingPrefix, CheckpointFile, DeltaFile} +import org.apache.commons.lang3.time.DateUtils +import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} + +private[delta] object TruncationGranularity extends Enumeration { + type TruncationGranularity = Value + val DAY, HOUR, MINUTE = Value +} + +/** Cleans up expired Delta table metadata. */ +trait MetadataCleanup extends DeltaLogging { + self: DeltaLog => + + /** Whether to clean up expired log files and checkpoints. */ + def enableExpiredLogCleanup(metadata: Metadata): Boolean = + DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.fromMetaData(metadata) + + /** + * Returns the duration in millis for how long to keep around obsolete logs. We may keep logs + * beyond this duration until the next calendar day to avoid constantly creating checkpoints. + */ + def deltaRetentionMillis(metadata: Metadata): Long = { + val interval = DeltaConfigs.LOG_RETENTION.fromMetaData(metadata) + DeltaConfigs.getMilliSeconds(interval) + } + + override def doLogCleanup(snapshotToCleanup: Snapshot): Unit = { + if (enableExpiredLogCleanup(snapshot.metadata)) { + cleanUpExpiredLogs(snapshotToCleanup) + } + } + + /** Clean up expired delta and checkpoint logs. Exposed for testing. */ + private[delta] def cleanUpExpiredLogs( + snapshotToCleanup: Snapshot, + deltaRetentionMillisOpt: Option[Long] = None, + cutoffTruncationGranularity: TruncationGranularity = DAY): Unit = { + recordDeltaOperation(this, "delta.log.cleanup") { + val retentionMillis = + deltaRetentionMillisOpt.getOrElse(deltaRetentionMillis(snapshot.metadata)) + val fileCutOffTime = + truncateDate(clock.getTimeMillis() - retentionMillis, cutoffTruncationGranularity).getTime + val formattedDate = fileCutOffTime.toGMTString + logInfo(s"Starting the deletion of log files older than $formattedDate") + + val fs = logPath.getFileSystem(newDeltaHadoopConf()) + var numDeleted = 0 + val expiredDeltaLogs = listExpiredDeltaLogs(fileCutOffTime.getTime) + if (expiredDeltaLogs.hasNext) { + // Trigger compatibility checkpoint creation logic only when this round of metadata cleanup + // is going to delete any deltas/checkpoint files. + // We need to create compat checkpoint before deleting delta/checkpoint files so that we + // don't have a window in b/w where the old checkpoint is deleted and there is no + // compat-checkpoint available. + val v2CompatCheckpointMetrics = new V2CompatCheckpointMetrics + createSinglePartCheckpointForBackwardCompat(snapshotToCleanup, v2CompatCheckpointMetrics) + logInfo(s"Compatibility checkpoint creation metrics: $v2CompatCheckpointMetrics") + } + var wasCheckpointDeleted = false + expiredDeltaLogs.map(_.getPath).foreach { path => + // recursive = false + if (fs.delete(path, false)) { + numDeleted += 1 + if (FileNames.isCheckpointFile(path)) { + wasCheckpointDeleted = true + } + } + } + if (wasCheckpointDeleted) { + // Trigger sidecar deletion only when some checkpoints have been deleted as part of this + // round of Metadata cleanup. + val sidecarDeletionMetrics = new SidecarDeletionMetrics + identifyAndDeleteUnreferencedSidecarFiles( + snapshotToCleanup, + fileCutOffTime.getTime, + sidecarDeletionMetrics) + logInfo(s"Sidecar deletion metrics: $sidecarDeletionMetrics") + } + logInfo(s"Deleted $numDeleted log files older than $formattedDate") + } + } + + /** + * Returns an iterator of expired delta logs that can be cleaned up. For a delta log to be + * considered as expired, it must: + * - have a checkpoint file after it + * - be older than `fileCutOffTime` + */ + private def listExpiredDeltaLogs(fileCutOffTime: Long): Iterator[FileStatus] = { + import org.apache.spark.sql.delta.util.FileNames._ + + val latestCheckpoint = readLastCheckpointFile() + if (latestCheckpoint.isEmpty) return Iterator.empty + val threshold = latestCheckpoint.get.version - 1L + val files = store.listFrom(listingPrefix(logPath, 0), newDeltaHadoopConf()) + .filter(f => isCheckpointFile(f) || isDeltaFile(f)) + def getVersion(filePath: Path): Long = { + if (isCheckpointFile(filePath)) { + checkpointVersion(filePath) + } else { + deltaVersion(filePath) + } + } + + new BufferingLogDeletionIterator(files, fileCutOffTime, threshold, getVersion) + } + + /** + * Truncates a timestamp down to a given unit. The unit can be either DAY, HOUR or MINUTE. + * - DAY: The timestamp it truncated to the previous midnight. + * - HOUR: The timestamp it truncated to the last hour. + * - MINUTE: The timestamp it truncated to the last minute. + */ + private[delta] def truncateDate(timeMillis: Long, unit: TruncationGranularity): Calendar = { + val date = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + date.setTimeInMillis(timeMillis) + + val calendarUnit = unit match { + case DAY => Calendar.DAY_OF_MONTH + case HOUR => Calendar.HOUR_OF_DAY + case MINUTE => Calendar.MINUTE + } + + DateUtils.truncate(date, calendarUnit) + } + + /** Truncates a timestamp down to the previous midnight and returns the time. */ + private[delta] def truncateDay(timeMillis: Long): Calendar = { + truncateDate(timeMillis, TruncationGranularity.DAY) + } + + /** + * Helper method to create a compatibility classic single file checkpoint file for this table. + * This is needed so that any legacy reader which do not understand [[V2CheckpointTableFeature]] + * could read the legacy classic checkpoint file and fail gracefully with Protocol requirement + * failure. + */ + protected[delta] def createSinglePartCheckpointForBackwardCompat( + snapshotToCleanup: Snapshot, + metrics: V2CompatCheckpointMetrics): Unit = { + // Do nothing if this table does not use V2 Checkpoints, or has no checkpoints at all. + if (!CheckpointProvider.isV2CheckpointEnabled(snapshotToCleanup)) return + if (snapshotToCleanup.checkpointProvider.isEmpty) return + + val startTimeMs = System.currentTimeMillis() + val hadoopConf = newDeltaHadoopConf() + val checkpointInstance = + CheckpointInstance(snapshotToCleanup.checkpointProvider.topLevelFiles.head.getPath) + // The current checkpoint provider is already using a checkpoint with the naming + // scheme of classic checkpoints. There is no need to create a compatibility checkpoint + // in this case. + if (checkpointInstance.format != CheckpointInstance.Format.V2) return + + val checkpointVersion = snapshotToCleanup.checkpointProvider.version + val checkpoints = listFrom(checkpointVersion) + .takeWhile(file => FileNames.getFileVersionOpt(file.getPath).exists(_ <= checkpointVersion)) + .collect { + case file if FileNames.isCheckpointFile(file) => CheckpointInstance(file.getPath) + } + .filter(_.format != CheckpointInstance.Format.V2) + .toArray + val availableNonV2Checkpoints = + getLatestCompleteCheckpointFromList(checkpoints, Some(checkpointVersion)) + if (availableNonV2Checkpoints.nonEmpty) { + metrics.v2CheckpointCompatLogicTimeTakenMs = System.currentTimeMillis() - startTimeMs + return + } + + // topLevelFileIndex must be non-empty when topLevelFiles are present + val shallowCopyDf = + loadIndex(snapshotToCleanup.checkpointProvider.topLevelFileIndex.get, Action.logSchema) + val finalPath = + FileNames.checkpointFileSingular(snapshotToCleanup.deltaLog.logPath, checkpointVersion) + Checkpoints.createCheckpointV2ParquetFile( + spark, + shallowCopyDf, + finalPath, + hadoopConf, + useRename = false) + metrics.v2CheckpointCompatLogicTimeTakenMs = System.currentTimeMillis() - startTimeMs + metrics.checkpointVersion = checkpointVersion + } + + /** Deletes any unreferenced files from the sidecar directory `_delta_log/_sidecar` */ + protected def identifyAndDeleteUnreferencedSidecarFiles( + snapshotToCleanup: Snapshot, + checkpointRetention: Long, + metrics: SidecarDeletionMetrics): Unit = { + val startTimeMs = System.currentTimeMillis() + // If v2 checkpoints are not enabled on the table, we don't need to attempt the sidecar cleanup. + if (!CheckpointProvider.isV2CheckpointEnabled(snapshotToCleanup)) return + + val hadoopConf = newDeltaHadoopConf() + val fs = sidecarDirPath.getFileSystem(hadoopConf) + // This can happen when the V2 Checkpoint feature is present in the Protocol but + // only Classic checkpoints have been created for the table. + if (!fs.exists(sidecarDirPath)) return + + val (parquetCheckpointFiles, otherFiles) = store + .listFrom(listingPrefix(logPath, 0), hadoopConf) + .collect { case CheckpointFile(status, _) => (status, CheckpointInstance(status.getPath)) } + .collect { case (fileStatus, ci) if ci.format.usesSidecars => fileStatus } + .toSeq + .partition(_.getPath.getName.endsWith("parquet")) + val (jsonCheckpointFiles, unknownFormatCheckpointFiles) = + otherFiles.partition(_.getPath.getName.endsWith("json")) + if (unknownFormatCheckpointFiles.nonEmpty) { + logWarning( + "Found checkpoint files other than parquet and json: " + + s"${unknownFormatCheckpointFiles.map(_.getPath.toString).mkString(",")}") + } + metrics.numActiveParquetCheckpointFiles = parquetCheckpointFiles.size + metrics.numActiveJsonCheckpointFiles = jsonCheckpointFiles.size + val parquetCheckpointsFileIndex = + DeltaLogFileIndex(DeltaLogFileIndex.CHECKPOINT_FILE_FORMAT_PARQUET, parquetCheckpointFiles) + val jsonCheckpointsFileIndex = + DeltaLogFileIndex(DeltaLogFileIndex.CHECKPOINT_FILE_FORMAT_JSON, jsonCheckpointFiles) + val identifyActiveSidecarsStartTimeMs = System.currentTimeMillis() + metrics.activeCheckpointsListingTimeTakenMs = identifyActiveSidecarsStartTimeMs - startTimeMs + import org.apache.spark.sql.delta.implicits._ + val df = (parquetCheckpointsFileIndex ++ jsonCheckpointsFileIndex) + .map(loadIndex(_, Action.logSchema(Set("sidecar")))) + .reduceOption(_ union _) + .getOrElse { return } + + val activeSidecarFiles = df + .select("sidecar.path") + .where("path is not null") + .as[String] + .collect() + .map(p => new Path(p).getName) // Get bare file names + .toSet + + val identifyAndDeleteSidecarsStartTimeMs = System.currentTimeMillis() + metrics.identifyActiveSidecarsTimeTakenMs = + identifyAndDeleteSidecarsStartTimeMs - identifyActiveSidecarsStartTimeMs + // Retain all files created in the checkpoint retention window - irrespective of whether they + // are referenced in a checkpoint or not. This is to make sure that we don't end up deleting an + // in-progress checkpoint. + val retentionTimestamp: Long = checkpointRetention + val sidecarFilesIterator = new Iterator[FileStatus] { + // Hadoop's RemoteIterator is neither java nor scala Iterator, so have to wrap it + val remoteIterator = fs.listStatusIterator(sidecarDirPath) + override def hasNext: Boolean = remoteIterator.hasNext() + override def next(): FileStatus = remoteIterator.next() + } + val sidecarFilesToDelete = sidecarFilesIterator + .collect { case file if file.getModificationTime < retentionTimestamp => file.getPath } + .filterNot(path => activeSidecarFiles.contains(path.getName)) + val sidecarDeletionStartTimeMs = System.currentTimeMillis() + logInfo(s"Starting the deletion of unreferenced sidecar files") + val count = deleteMultiple(fs, sidecarFilesToDelete) + + logInfo(s"Deleted $count sidecar files") + metrics.numSidecarFilesDeleted = count + val endTimeMs = System.currentTimeMillis() + metrics.identifyAndDeleteSidecarsTimeTakenMs = + sidecarDeletionStartTimeMs - identifyAndDeleteSidecarsStartTimeMs + metrics.overallSidecarProcessingTimeTakenMs = endTimeMs - startTimeMs + } + + private def deleteMultiple(fs: FileSystem, paths: Iterator[Path]): Long = { + paths.map { path => + if (fs.delete(path, false)) 1L else 0L + }.sum + } + + /** Class to track metrics related to V2 Checkpoint Sidecars deletion. */ + protected class SidecarDeletionMetrics { + // number of sidecar files deleted + var numSidecarFilesDeleted: Long = -1 + // number of active parquet checkpoint files present in delta log directory + var numActiveParquetCheckpointFiles: Long = -1 + // number of active json checkpoint files present in delta log directory + var numActiveJsonCheckpointFiles: Long = -1 + // time taken (in ms) to list and identify active checkpoints + var activeCheckpointsListingTimeTakenMs: Long = -1 + // time taken (in ms) to list the sidecar directory to get all sidecars and delete those which + // aren't referenced by any checkpoint anymore + var identifyAndDeleteSidecarsTimeTakenMs: Long = -1 + // time taken (in ms) to read the active checkpoint json / parquet files and identify active + // sidecar files + var identifyActiveSidecarsTimeTakenMs: Long = -1 + // time taken (in ms) for everything related to sidecar processing + var overallSidecarProcessingTimeTakenMs: Long = -1 + } + + /** Class to track metrics related to V2 Compatibility checkpoint creation. */ + protected[delta] class V2CompatCheckpointMetrics { + // time taken (in ms) to run the v2 checkpoint compat logic + var v2CheckpointCompatLogicTimeTakenMs: Long = -1 + + // the version at which we have created a v2 compat checkpoint, -1 if no compat checkpoint was + // created. + var checkpointVersion: Long = -1 + } + + /** + * Finds a checkpoint such that we are able to construct table snapshot for all versions at or + * greater than the checkpoint version returned. + */ + def findEarliestReliableCheckpoint: Option[Long] = { + val hadoopConf = newDeltaHadoopConf() + var earliestCheckpointVersionOpt: Option[Long] = None + // This is used to collect the checkpoint files from the current version that we are listing. + // When we list a file that is not part of the checkpoint, then we must have seen the entire + // checkpoint. We then verify if the checkpoint was complete, and if it is not, we clear the + // collection and wait for the next checkpoint to appear in the file listing. + // Whenever we see a complete checkpoint for the first time, we remember it as the earliest + // checkpoint. + val currentCheckpointFiles = ArrayBuffer.empty[Path] + var prevCommitVersion = 0L + + def currentCheckpointVersionOpt: Option[Long] = + currentCheckpointFiles.headOption.map(checkpointVersion(_)) + + def isCurrentCheckpointComplete: Boolean = { + val instances = currentCheckpointFiles.map(CheckpointInstance(_)).toArray + getLatestCompleteCheckpointFromList(instances).isDefined + } + + store.listFrom(listingPrefix(logPath, 0L), hadoopConf) + .map(_.getPath) + .foreach { + case CheckpointFile(f, checkpointVersion) if earliestCheckpointVersionOpt.isEmpty => + if (!currentCheckpointVersionOpt.contains(checkpointVersion)) { + // If it's a different checkpoint, clear the existing one. + currentCheckpointFiles.clear() + } + currentCheckpointFiles += f + case DeltaFile(_, deltaVersion) => + if (earliestCheckpointVersionOpt.isEmpty && isCurrentCheckpointComplete) { + // We have found a complete checkpoint, but we should not stop here. If a future + // commit version is missing, then this checkpoint will be discarded and we will need + // to restart the search from that point. + + // Ensure that the commit json is there at the checkpoint version. If it's not there, + // we don't consider such a checkpoint as a reliable checkpoint. + if (currentCheckpointVersionOpt.contains(deltaVersion)) { + earliestCheckpointVersionOpt = currentCheckpointVersionOpt + prevCommitVersion = deltaVersion + } + } + // Need to clear it so that if there is a gap in commit versions, we are forced to + // look for a new complete checkpoint. + currentCheckpointFiles.clear() + if (deltaVersion > prevCommitVersion + 1) { + // Missing commit versions. Restart the search. + earliestCheckpointVersionOpt = None + } + prevCommitVersion = deltaVersion + case _ => + } + + earliestCheckpointVersionOpt + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/OptimisticTransaction.scala b/spark/src/main/scala/org/apache/spark/sql/delta/OptimisticTransaction.scala new file mode 100644 index 00000000000..18cc7f132d5 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/OptimisticTransaction.scala @@ -0,0 +1,2010 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.nio.file.FileAlreadyExistsException +import java.util.{ConcurrentModificationException, UUID} +import java.util.concurrent.TimeUnit.NANOSECONDS + +import scala.collection.mutable +import scala.collection.mutable.{ArrayBuffer, HashSet} +import scala.util.control.NonFatal + +import com.databricks.spark.util.TagDefinitions.TAG_LOG_STORE_CLASS +import org.apache.spark.sql.delta.DeltaOperations.{ChangeColumn, CreateTable, Operation, ReplaceColumns, ReplaceTable, UpdateSchema} +import org.apache.spark.sql.delta.RowId.RowTrackingMetadataDomain +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.DeletionVectorUtils +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.files._ +import org.apache.spark.sql.delta.hooks.{CheckpointHook, GenerateSymlinkManifest, IcebergConverterHook, PostCommitHook, UpdateCatalogFactory} +import org.apache.spark.sql.delta.implicits.addFileEncoder +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats._ +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.SparkException +import org.apache.spark.sql.{AnalysisException, Column, DataFrame, SparkSession} +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, ResolveDefaultColumns} +import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.util.{Clock, Utils} + +/** Record metrics about a successful commit. */ +case class CommitStats( + /** The version read by the txn when it starts. */ + startVersion: Long, + /** The version committed by the txn. */ + commitVersion: Long, + /** The version read by the txn right after it commits. It usually equals to commitVersion, + * but can be larger than commitVersion when there are concurrent commits. */ + readVersion: Long, + txnDurationMs: Long, + commitDurationMs: Long, + fsWriteDurationMs: Long, + stateReconstructionDurationMs: Long, + numAdd: Int, + numRemove: Int, + /** The number of [[SetTransaction]] actions in the committed actions. */ + numSetTransaction: Int, + bytesNew: Long, + /** The number of files in the table as of version `readVersion`. */ + numFilesTotal: Long, + /** The table size in bytes as of version `readVersion`. */ + sizeInBytesTotal: Long, + /** The number and size of CDC files added in this operation. */ + numCdcFiles: Long, + cdcBytesNew: Long, + /** The protocol as of version `readVersion`. */ + protocol: Protocol, + /** The size of the newly committed (usually json) file */ + commitSizeBytes: Long, + /** The size of the checkpoint committed, if present */ + checkpointSizeBytes: Long, + totalCommitsSizeSinceLastCheckpoint: Long, + /** Will we attempt a checkpoint after this commit is completed */ + checkpointAttempt: Boolean, + info: CommitInfo, + newMetadata: Option[Metadata], + numAbsolutePathsInAdd: Int, + numDistinctPartitionsInAdd: Int, + numPartitionColumnsInTable: Int, + isolationLevel: String, + fileSizeHistogram: Option[FileSizeHistogram] = None, + addFilesHistogram: Option[FileSizeHistogram] = None, + removeFilesHistogram: Option[FileSizeHistogram] = None, + numOfDomainMetadatas: Long = 0, + txnId: Option[String] = None +) + +/** + * Represents the partition and data predicates of a query on a Delta table. + * + * Partition predicates can either reference the table's logical partition columns, or the + * physical [[AddFile]]'s schema. When a predicate refers to the logical partition columns it needs + * to be rewritten to be over the [[AddFile]]'s schema before filtering files. This is indicated + * with shouldRewriteFilter=true. + * + * Currently the only path for a predicate with shouldRewriteFilter=false is through DPO + * (dynamic partition overwrite) since we filter directly on [[AddFile.partitionValues]]. + * + * For example, consider a table with the schema below and partition column "a" + * |-- a: integer {physicalName = "XX"} + * |-- b: integer {physicalName = "YY"} + * + * An example of a predicate that needs to be written is: (a = 0) + * Before filtering the [[AddFile]]s, this predicate needs to be rewritten to: + * (partitionValues.XX = 0) + * + * An example of a predicate that does not need to be rewritten is: + * (partitionValues = Map(XX -> 0)) + */ +private[delta] case class DeltaTableReadPredicate( + partitionPredicates: Seq[Expression] = Seq.empty, + dataPredicates: Seq[Expression] = Seq.empty, + shouldRewriteFilter: Boolean = true) { + + val partitionPredicate: Expression = + partitionPredicates.reduceLeftOption(And).getOrElse(Literal.TrueLiteral) +} + + /** + * Used to perform a set of reads in a transaction and then commit a set of updates to the + * state of the log. All reads from the [[DeltaLog]], MUST go through this instance rather + * than directly to the [[DeltaLog]] otherwise they will not be check for logical conflicts + * with concurrent updates. + * + * This class is not thread-safe. + * + * @param deltaLog The Delta Log for the table this transaction is modifying. + * @param snapshot The snapshot that this transaction is reading at. + */ +class OptimisticTransaction( + override val deltaLog: DeltaLog, + override val catalogTable: Option[CatalogTable], + override val snapshot: Snapshot) + extends OptimisticTransactionImpl + with DeltaLogging { + def this( + deltaLog: DeltaLog, + catalogTable: Option[CatalogTable], + snapshotOpt: Option[Snapshot] = None) = + this(deltaLog, catalogTable, snapshotOpt.getOrElse(deltaLog.update())) +} + +object OptimisticTransaction { + + private val active = new ThreadLocal[OptimisticTransaction] + + /** Get the active transaction */ + def getActive(): Option[OptimisticTransaction] = Option(active.get()) + + /** + * Runs the passed block of code with the given active transaction. This fails if a transaction is + * already active unless `overrideExistingTransaction` is set. + */ + def withActive[T]( + activeTransaction: OptimisticTransaction, + overrideExistingTransaction: Boolean = false)(block: => T): T = { + val original = getActive() + if (overrideExistingTransaction) { + clearActive() + } + setActive(activeTransaction) + try { + block + } finally { + clearActive() + if (original.isDefined) { + setActive(original.get) + } + } + } + + /** + * Sets a transaction as the active transaction. + * + * @note This is not meant for being called directly, only from + * `OptimisticTransaction.withNewTransaction`. Use that to create and set active txns. + */ + private[delta] def setActive(txn: OptimisticTransaction): Unit = { + if (active.get != null) { + throw DeltaErrors.activeTransactionAlreadySet() + } + active.set(txn) + } + + /** + * Clears the active transaction as the active transaction. + * + * @note This is not meant for being called directly, `OptimisticTransaction.withNewTransaction`. + */ + private[delta] def clearActive(): Unit = { + active.set(null) + } +} + +/** + * Used to perform a set of reads in a transaction and then commit a set of updates to the + * state of the log. All reads from the [[DeltaLog]], MUST go through this instance rather + * than directly to the [[DeltaLog]] otherwise they will not be check for logical conflicts + * with concurrent updates. + * + * This trait is not thread-safe. + */ +trait OptimisticTransactionImpl extends TransactionalWrite + with SQLMetricsReporting + with DeltaScanGenerator + with DeltaLogging { + + import org.apache.spark.sql.delta.util.FileNames._ + + val deltaLog: DeltaLog + val catalogTable: Option[CatalogTable] + val snapshot: Snapshot + def clock: Clock = deltaLog.clock + + protected def spark = SparkSession.active + + /** Tracks the appIds that have been seen by this transaction. */ + protected val readTxn = new ArrayBuffer[String] + + /** + * Tracks the data that could have been seen by recording the partition + * predicates by which files have been queried by this transaction. + */ + protected val readPredicates = new ArrayBuffer[DeltaTableReadPredicate] + + /** Tracks specific files that have been seen by this transaction. */ + protected val readFiles = new HashSet[AddFile] + + /** Whether the whole table was read during the transaction. */ + protected var readTheWholeTable = false + + /** Tracks if this transaction has already committed. */ + protected var committed = false + + /** + * Stores the updated metadata (if any) that will result from this txn. + * + * This is just one way to change metadata. + * New metadata can also be added during commit from actions. + * But metadata should *not* be updated via both paths. + */ + protected var newMetadata: Option[Metadata] = None + + /** Stores the updated protocol (if any) that will result from this txn. */ + protected var newProtocol: Option[Protocol] = None + + /** The transaction start time. */ + protected val txnStartNano = System.nanoTime() + + override val snapshotToScan: Snapshot = snapshot + + /** + * Tracks the first-access snapshots of other Delta logs read by this transaction. + * The snapshots are keyed by the log's unique id. + */ + protected var readSnapshots = new java.util.concurrent.ConcurrentHashMap[(String, Path), Snapshot] + + /** The transaction commit start time. */ + protected var commitStartNano = -1L + + /** The transaction commit end time. */ + protected var commitEndNano = -1L; + + protected var commitInfo: CommitInfo = _ + + /** Whether the txn should trigger a checkpoint after the commit */ + private[delta] var needsCheckpoint = false + + // Whether this transaction is creating a new table. + private var isCreatingNewTable: Boolean = false + + // Whether this transaction is overwriting the existing schema (i.e. overwriteSchema = true). + // When overwriting schema (and data) of a table, `isCreatingNewTable` should not be true. + private var isOverwritingSchema: Boolean = false + + // Whether this is a transaction that can select any new protocol, potentially downgrading + // the existing protocol of the table during REPLACE table operations. + private def canAssignAnyNewProtocol: Boolean = + readVersion == -1 || + (isCreatingNewTable && spark.conf.get(DeltaSQLConf.REPLACE_TABLE_PROTOCOL_DOWNGRADE_ALLOWED)) + + /** + * Tracks the start time since we started trying to write a particular commit. + * Used for logging duration of retried transactions. + */ + protected var commitAttemptStartTime: Long = _ + + /** + * Tracks actions within the transaction, will commit along with the passed-in actions in the + * commit function. + */ + protected val actions = new ArrayBuffer[Action] + + /** + * Record a SetTransaction action that will be committed as part of this transaction. + */ + def updateSetTransaction(appId: String, version: Long, lastUpdate: Option[Long]): Unit = { + actions += SetTransaction(appId, version, lastUpdate) + } + + /** The version that this transaction is reading from. */ + def readVersion: Long = snapshot.version + + /** Creates new metadata with global Delta configuration defaults. */ + private def withGlobalConfigDefaults(metadata: Metadata): Metadata = { + val conf = spark.sessionState.conf + metadata.copy(configuration = DeltaConfigs.mergeGlobalConfigs( + conf, metadata.configuration)) + } + + protected val postCommitHooks = new ArrayBuffer[PostCommitHook]() + catalogTable.foreach { ct => + registerPostCommitHook(UpdateCatalogFactory.getUpdateCatalogHook(ct, spark)) + } + // The CheckpointHook will only checkpoint if necessary, so always register it to run. + registerPostCommitHook(CheckpointHook) + registerPostCommitHook(IcebergConverterHook) + + /** The protocol of the snapshot that this transaction is reading at. */ + def protocol: Protocol = newProtocol.getOrElse(snapshot.protocol) + + /** Start time of txn in nanoseconds */ + def txnStartTimeNs: Long = txnStartNano + + /** Unique identifier for the transaction */ + val txnId = UUID.randomUUID().toString + + /** Whether to check unsupported data type when updating the table schema */ + protected var checkUnsupportedDataType: Boolean = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SCHEMA_TYPE_CHECK) + + // Some operations (e.g. stats collection) may set files with DVs back to tight bounds. + // In that case they need to skip this check. + protected var checkDeletionVectorFilesHaveWideBounds: Boolean = true + /** + * Disable the check that ensures that all files with DVs added have tightBounds set to false. + * + * This is necessary when recomputing the stats on a table with DVs. + */ + def disableDeletionVectorFilesHaveWideBoundsCheck(): Unit = { + checkDeletionVectorFilesHaveWideBounds = false + } + + /** The set of distinct partitions that contain added files by current transaction. */ + protected[delta] var partitionsAddedToOpt: Option[mutable.HashSet[Map[String, String]]] = None + + /** True if this transaction is a blind append. This is only valid after commit. */ + protected[delta] var isBlindAppend: Boolean = false + + /** + * The logSegment of the snapshot prior to the commit. + * Will be updated only when retrying due to a conflict. + */ + private[delta] var preCommitLogSegment: LogSegment = + snapshot.logSegment.copy(checkpointProvider = snapshot.checkpointProvider) + + /** The end to end execution time of this transaction. */ + def txnExecutionTimeMs: Option[Long] = if (commitEndNano == -1) { + None + } else { + Some(NANOSECONDS.toMillis((commitEndNano - txnStartNano))) + } + + /** Gets the stats collector for the table at the snapshot this transaction has. */ + def statsCollector: Column = snapshot.statsCollector + + /** + * Returns the metadata for this transaction. The metadata refers to the metadata of the snapshot + * at the transaction's read version unless updated during the transaction. + */ + def metadata: Metadata = newMetadata.getOrElse(snapshot.metadata) + + /** + * Records an update to the metadata that should be committed with this transaction. + * Note that this must be done before writing out any files so that file writing + * and checks happen with the final metadata for the table. + * + * IMPORTANT: It is the responsibility of the caller to ensure that files currently + * present in the table are still valid under the new metadata. + */ + def updateMetadata( + proposedNewMetadata: Metadata, + ignoreDefaultProperties: Boolean = false): Unit = { + assert(!hasWritten, + "Cannot update the metadata in a transaction that has already written data.") + assert(newMetadata.isEmpty, + "Cannot change the metadata more than once in a transaction.") + updateMetadataInternal(proposedNewMetadata, ignoreDefaultProperties) + } + + /** + * Can this transaction still update the metadata? + * This is allowed only once per transaction. + */ + def canUpdateMetadata: Boolean = { + !hasWritten && newMetadata.isEmpty + } + + /** + * This updates the protocol for the table with a given protocol. + * Note that the protocol set by this method can be overwritten by other methods, + * such as [[updateMetadata]]. + */ + def updateProtocol(protocol: Protocol): Unit = { + newProtocol = Some(protocol) + } + + /** + * Do the actual checks and works to update the metadata and save it into the `newMetadata` + * field, which will be added to the actions to commit in [[prepareCommit]]. + */ + protected def updateMetadataInternal( + proposedNewMetadata: Metadata, + ignoreDefaultProperties: Boolean): Unit = { + var newMetadataTmp = proposedNewMetadata + // Validate all indexed columns are inside table's schema. + StatisticsCollection.validateDeltaStatsColumns(newMetadataTmp) + if (readVersion == -1 || isCreatingNewTable) { + // We need to ignore the default properties when trying to create an exact copy of a table + // (as in CLONE and SHALLOW CLONE). + if (!ignoreDefaultProperties) { + newMetadataTmp = withGlobalConfigDefaults(newMetadataTmp) + } + isCreatingNewTable = true + } + val protocolBeforeUpdate = protocol + // The `.schema` cannot be generated correctly unless the column mapping metadata is correctly + // filled for all the fields. Therefore, the column mapping changes need to happen first. + newMetadataTmp = DeltaColumnMapping.verifyAndUpdateMetadataChange( + deltaLog, + protocolBeforeUpdate, + snapshot.metadata, + newMetadataTmp, + isCreatingNewTable, + isOverwritingSchema) + + if (newMetadataTmp.schemaString != null) { + // Replace CHAR and VARCHAR with StringType + val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema( + newMetadataTmp.schema) + newMetadataTmp = newMetadataTmp.copy(schemaString = schema.json) + } + + newMetadataTmp = if (snapshot.metadata.schemaString == newMetadataTmp.schemaString) { + // Shortcut when the schema hasn't changed to avoid generating spurious schema change logs. + // It's fine if two different but semantically equivalent schema strings skip this special + // case - that indicates that something upstream attempted to do a no-op schema change, and + // we'll just end up doing a bit of redundant work in the else block. + newMetadataTmp + } else { + val fixedSchema = SchemaUtils.removeUnenforceableNotNullConstraints( + newMetadataTmp.schema, spark.sessionState.conf).json + newMetadataTmp.copy(schemaString = fixedSchema) + } + + + if (canAssignAnyNewProtocol) { + // Check for the new protocol version after the removal of the unenforceable not null + // constraints + newProtocol = Some(Protocol.forNewTable(spark, Some(newMetadataTmp))) + } else if (newMetadataTmp.configuration.contains(Protocol.MIN_READER_VERSION_PROP) || + newMetadataTmp.configuration.contains(Protocol.MIN_WRITER_VERSION_PROP)) { + // Table features Part 1: bump protocol version numbers + // + // Collect new reader and writer versions from table properties, which could be provided by + // the user in `ALTER TABLE TBLPROPERTIES` or copied over from session defaults. + val readerVersionAsTableProp = + Protocol.getReaderVersionFromTableConf(newMetadataTmp.configuration) + .getOrElse(protocolBeforeUpdate.minReaderVersion) + val writerVersionAsTableProp = + Protocol.getWriterVersionFromTableConf(newMetadataTmp.configuration) + .getOrElse(protocolBeforeUpdate.minWriterVersion) + + val newProtocolForLatestMetadata = + Protocol(readerVersionAsTableProp, writerVersionAsTableProp) + val proposedNewProtocol = protocolBeforeUpdate.merge(newProtocolForLatestMetadata) + + if (proposedNewProtocol != protocolBeforeUpdate) { + // The merged protocol has higher versions and/or supports more features. + // It's a valid upgrade. + newProtocol = Some(proposedNewProtocol) + } else { + // The merged protocol is identical to the original one. Two possibilities: + // (1) the provided versions are lower than the original one, and all features supported by + // the provided versions are already supported. This is a no-op. + if (readerVersionAsTableProp < protocolBeforeUpdate.minReaderVersion || + writerVersionAsTableProp < protocolBeforeUpdate.minWriterVersion) { + recordProtocolChanges( + "delta.protocol.downgradeIgnored", + fromProtocol = protocolBeforeUpdate, + toProtocol = newProtocolForLatestMetadata, + isCreatingNewTable = false) + } else { + // (2) the new protocol versions is identical to the existing versions. Also a no-op. + } + } + } + + newMetadataTmp = if (isCreatingNewTable) { + // Creating a new table will drop all existing data, so we don't need to fix the old + // metadata. + newMetadataTmp + } else { + // This is not a new table. The new schema may be merged from the existing schema. We + // decide whether we should keep the Generated or IDENTITY columns by checking whether the + // protocol satisfies the requirements. + val keepGeneratedColumns = + GeneratedColumn.satisfyGeneratedColumnProtocol(protocolBeforeUpdate) + val keepIdentityColumns = + ColumnWithDefaultExprUtils.satisfiesIdentityColumnProtocol(protocolBeforeUpdate) + if (keepGeneratedColumns && keepIdentityColumns) { + // If a protocol satisfies both requirements, we do nothing here. + newMetadataTmp + } else { + // As the protocol doesn't match, this table is created by an old version that doesn't + // support generated columns or identity columns. We should remove the generation + // expressions to fix the schema to avoid bumping the writer version incorrectly. + val newSchema = ColumnWithDefaultExprUtils.removeDefaultExpressions( + newMetadataTmp.schema, + keepGeneratedColumns = keepGeneratedColumns, + keepIdentityColumns = keepIdentityColumns) + if (newSchema ne newMetadataTmp.schema) { + newMetadataTmp.copy(schemaString = newSchema.json) + } else { + newMetadataTmp + } + } + } + + // Table features Part 2: add manually-supported features specified in table properties, aka + // those start with [[FEATURE_PROP_PREFIX]]. + // + // This transaction's new metadata might contain some table properties to support some + // features (props start with [[FEATURE_PROP_PREFIX]]). We silently add them to the `protocol` + // action, and bump the protocol version to (3, 7) or (_, 7), depending on the existence of + // any reader-writer feature. + val newProtocolBeforeAddingFeatures = newProtocol.getOrElse(protocolBeforeUpdate) + val newFeaturesFromTableConf = + TableFeatureProtocolUtils.getSupportedFeaturesFromTableConfigs(newMetadataTmp.configuration) + val readerVersionForNewProtocol = { + // All features including those required features are considered to decide reader version. + if (Protocol() + .withFeatures(newFeaturesFromTableConf) + .readerAndWriterFeatureNames + .flatMap(TableFeature.featureNameToFeature) + .exists(_.isReaderWriterFeature)) { + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION + } else { + newProtocolBeforeAddingFeatures.minReaderVersion + } + } + val existingFeatureNames = newProtocolBeforeAddingFeatures.readerAndWriterFeatureNames + if (!newFeaturesFromTableConf.map(_.name).subsetOf(existingFeatureNames)) { + newProtocol = Some( + Protocol( + readerVersionForNewProtocol, + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) + .merge(newProtocolBeforeAddingFeatures) + .withFeatures(newFeaturesFromTableConf)) + } + + // We are done with protocol versions and features, time to remove related table properties. + val configsWithoutProtocolProps = newMetadataTmp.configuration.filterNot { + case (k, _) => TableFeatureProtocolUtils.isTableProtocolProperty(k) + } + newMetadataTmp = newMetadataTmp.copy(configuration = configsWithoutProtocolProps) + + // Table features Part 3: add automatically-enabled features by looking at the new table + // metadata. + // + // This code path is for existing tables and during `REPLACE` if the downgrade flag is not set. + // The new table case has been handled by [[Protocol.forNewTable]] earlier in this method. + if (!canAssignAnyNewProtocol) { + setNewProtocolWithFeaturesEnabledByMetadata(newMetadataTmp) + } + + newMetadataTmp = MaterializedRowId.updateMaterializedColumnName( + protocol, oldMetadata = snapshot.metadata, newMetadataTmp) + newMetadataTmp = MaterializedRowCommitVersion.updateMaterializedColumnName( + protocol, oldMetadata = snapshot.metadata, newMetadataTmp) + + RowId.verifyMetadata( + snapshot.protocol, protocol, snapshot.metadata, newMetadataTmp, isCreatingNewTable) + + assertMetadata(newMetadataTmp) + logInfo(s"Updated metadata from ${newMetadata.getOrElse("-")} to $newMetadataTmp") + newMetadata = Some(newMetadataTmp) + } + + /** + * Records an update to the metadata that should be committed with this transaction and when + * this transaction is logically creating a new table, e.g. replacing a previous table with new + * metadata. Note that this must be done before writing out any files so that file writing + * and checks happen with the final metadata for the table. + * IMPORTANT: It is the responsibility of the caller to ensure that files currently + * present in the table are still valid under the new metadata. + */ + def updateMetadataForNewTable(metadata: Metadata): Unit = { + isCreatingNewTable = true + updateMetadata(metadata) + } + + /** + * Records an update to the metadata that should be committed with this transaction and when + * this transaction is attempt to overwrite the data and schema using .mode('overwrite') and + * .option('overwriteSchema', true). + * REPLACE the table is not considered in this category, because that is logically equivalent + * to DROP and RECREATE the table. + */ + def updateMetadataForTableOverwrite(proposedNewMetadata: Metadata): Unit = { + isOverwritingSchema = true + updateMetadata(proposedNewMetadata) + } + + protected def assertMetadata(metadata: Metadata): Unit = { + assert(!CharVarcharUtils.hasCharVarchar(metadata.schema), + "The schema in Delta log should not contain char/varchar type.") + SchemaMergingUtils.checkColumnNameDuplication(metadata.schema, "in the metadata update") + if (metadata.columnMappingMode == NoMapping) { + SchemaUtils.checkSchemaFieldNames(metadata.dataSchema, metadata.columnMappingMode) + val partitionColCheckIsFatal = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_PARTITION_COLUMN_CHECK_ENABLED) + try { + SchemaUtils.checkFieldNames(metadata.partitionColumns) + } catch { + case e: AnalysisException => + recordDeltaEvent( + deltaLog, + "delta.schema.invalidPartitionColumn", + data = Map( + "checkEnabled" -> partitionColCheckIsFatal, + "columns" -> metadata.partitionColumns + ) + ) + if (partitionColCheckIsFatal) throw DeltaErrors.invalidPartitionColumn(e) + } + } else { + DeltaColumnMapping.checkColumnIdAndPhysicalNameAssignments(metadata) + } + + if (GeneratedColumn.hasGeneratedColumns(metadata.schema)) { + recordDeltaOperation(deltaLog, "delta.generatedColumns.check") { + GeneratedColumn.validateGeneratedColumns(spark, metadata.schema) + } + recordDeltaEvent(deltaLog, "delta.generatedColumns.definition") + } + + if (checkUnsupportedDataType) { + val unsupportedTypes = SchemaUtils.findUnsupportedDataTypes(metadata.schema) + if (unsupportedTypes.nonEmpty) { + throw DeltaErrors.unsupportedDataTypes(unsupportedTypes.head, unsupportedTypes.tail: _*) + } + } + + if (spark.conf.get(DeltaSQLConf.DELTA_TABLE_PROPERTY_CONSTRAINTS_CHECK_ENABLED)) { + Protocol.assertTablePropertyConstraintsSatisfied(spark, metadata, snapshot) + } + MaterializedRowId.throwIfMaterializedColumnNameConflictsWithSchema(metadata) + MaterializedRowCommitVersion.throwIfMaterializedColumnNameConflictsWithSchema(metadata) + } + + private def setNewProtocolWithFeaturesEnabledByMetadata(metadata: Metadata): Unit = { + val requiredProtocolOpt = + Protocol.upgradeProtocolFromMetadataForExistingTable(spark, metadata, protocol) + if (requiredProtocolOpt.isDefined) { + newProtocol = requiredProtocolOpt + } + } + + /** + * Must make sure that deletion vectors are never added to a table where that isn't allowed. + * Note, statistics recomputation is still allowed even though DVs might be currently disabled. + * + * This method returns a function that can be used to validate a single Action. + */ + protected def getAssertDeletionVectorWellFormedFunc( + spark: SparkSession, + op: DeltaOperations.Operation): (Action => Unit) = { + val deletionVectorCreationAllowed = + DeletionVectorUtils.deletionVectorsWritable(snapshot, newProtocol, newMetadata) + val isComputeStatsOperation = op.isInstanceOf[DeltaOperations.ComputeStats] + val commitCheckEnabled = spark.conf.get(DeltaSQLConf.DELETION_VECTORS_COMMIT_CHECK_ENABLED) + + val deletionVectorDisallowedForAddFiles = + commitCheckEnabled && !isComputeStatsOperation && !deletionVectorCreationAllowed + + val addFileMustHaveWideBounds = deletionVectorCreationAllowed && + checkDeletionVectorFilesHaveWideBounds + + action => action match { + case a: AddFile => + if (deletionVectorDisallowedForAddFiles && a.deletionVector != null) { + throw DeltaErrors.addingDeletionVectorsDisallowedException() + } + // Protocol requirement checks: + // 1. All files with DVs must have `stats` with `numRecords`. + if (a.deletionVector != null && (a.stats == null || a.numPhysicalRecords.isEmpty)) { + throw DeltaErrors.addFileWithDVsMissingNumRecordsException + } + + // 2. All operations that add new DVs should always turn bounds to wide. + // Operations that only update files with existing DVs may opt-out from this rule + // via `disableDeletionVectorFilesHaveWideBoundsCheck()`. + // (e.g. stats collection, metadata-only updates.) + // Note, the absence of the tightBounds column when DVs exist is also an illegal state. + if (addFileMustHaveWideBounds && + a.deletionVector != null && + // Extra inversion to also catch absent `tightBounds`. + !a.tightBounds.contains(false)) { + throw DeltaErrors.addFileWithDVsAndTightBoundsException() + } + case _ => // Not an AddFile, nothing to do. + } + } + + /** + * Returns the [[DeltaScanGenerator]] for the given log, which will be used to generate + * [[DeltaScan]]s. Every time this method is called on a log, the returned generator + * generator will read a snapshot that is pinned on the first access for that log. + * + * Internally, if the given log is the same as the log associated with this + * transaction, then it returns this transaction, otherwise it will return a snapshot of + * given log + */ + def getDeltaScanGenerator(index: TahoeLogFileIndex): DeltaScanGenerator = { + if (index.deltaLog.isSameLogAs(deltaLog)) return this + + val compositeId = index.deltaLog.compositeId + // Will be called only when the log is accessed the first time + readSnapshots.computeIfAbsent(compositeId, _ => index.getSnapshot) + } + + /** Returns a[[DeltaScan]] based on the given filters. */ + override def filesForScan( + filters: Seq[Expression], + keepNumRecords: Boolean = false + ): DeltaScan = { + val scan = snapshot.filesForScan(filters, keepNumRecords) + trackReadPredicates(filters) + trackFilesRead(scan.files) + scan + } + + /** Returns a[[DeltaScan]] based on the given partition filters, projections and limits. */ + override def filesForScan( + limit: Long, + partitionFilters: Seq[Expression]): DeltaScan = { + partitionFilters.foreach { f => + assert( + DeltaTableUtils.isPredicatePartitionColumnsOnly(f, metadata.partitionColumns, spark), + s"Only filters on partition columns [${metadata.partitionColumns.mkString(", ")}]" + + s" expected, found $f") + } + val scan = snapshot.filesForScan(limit, partitionFilters) + trackReadPredicates(partitionFilters, partitionOnly = true) + trackFilesRead(scan.files) + scan + } + + override def filesWithStatsForScan(partitionFilters: Seq[Expression]): DataFrame = { + val metadata = snapshot.filesWithStatsForScan(partitionFilters) + trackReadPredicates(partitionFilters, partitionOnly = true) + trackFilesRead(filterFiles(partitionFilters)) + metadata + } + + /** Returns files matching the given predicates. */ + def filterFiles(): Seq[AddFile] = filterFiles(Seq(Literal.TrueLiteral)) + + /** Returns files matching the given predicates. */ + def filterFiles(filters: Seq[Expression], keepNumRecords: Boolean = false): Seq[AddFile] = { + val scan = snapshot.filesForScan(filters, keepNumRecords) + trackReadPredicates(filters) + trackFilesRead(scan.files) + scan.files + } + + /** + * Returns files within the given partitions. + * + * `partitions` is a set of the `partitionValues` stored in [[AddFile]]s. This means they refer to + * the physical column names, and values are stored as strings. + * */ + def filterFiles(partitions: Set[Map[String, String]]): Seq[AddFile] = { + import org.apache.spark.sql.functions.col + val df = snapshot.allFiles.toDF() + val isFileInTouchedPartitions = + DeltaUDF.booleanFromMap(partitions.contains)(col("partitionValues")) + val filteredFiles = df + .filter(isFileInTouchedPartitions) + .withColumn("stats", DataSkippingReader.nullStringLiteral) + .as[AddFile] + .collect() + trackReadPredicates( + Seq(isFileInTouchedPartitions.expr), partitionOnly = true, shouldRewriteFilter = false) + filteredFiles + } + + /** Mark the entire table as tainted by this transaction. */ + def readWholeTable(): Unit = { + trackReadPredicates(Seq.empty) + readTheWholeTable = true + } + + /** Mark the given files as read within this transaction. */ + def trackFilesRead(files: Seq[AddFile]): Unit = { + readFiles ++= files + } + + /** Mark the predicates that have been queried by this transaction. */ + def trackReadPredicates( + filters: Seq[Expression], + partitionOnly: Boolean = false, + shouldRewriteFilter: Boolean = true): Unit = { + val (partitionFilters, dataFilters) = if (partitionOnly) { + (filters, Seq.empty[Expression]) + } else { + filters.partition { f => + DeltaTableUtils.isPredicatePartitionColumnsOnly(f, metadata.partitionColumns, spark) + } + } + + readPredicates += DeltaTableReadPredicate( + partitionPredicates = partitionFilters, + dataPredicates = dataFilters, + shouldRewriteFilter = shouldRewriteFilter) + } + + /** + * Returns the latest version that has committed for the idempotent transaction with given `id`. + */ + def txnVersion(id: String): Long = { + readTxn += id + snapshot.transactions.getOrElse(id, -1L) + } + + /** + * Return the operation metrics for the operation if it is enabled + */ + def getOperationMetrics(op: Operation): Option[Map[String, String]] = { + if (spark.conf.get(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED)) { + Some(getMetricsForOperation(op)) + } else { + None + } + } + + /** + * Return the user-defined metadata for the operation. + */ + def getUserMetadata(op: Operation): Option[String] = { + // option wins over config if both are set + op.userMetadata match { + case data @ Some(_) => data + case None => spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_USER_METADATA) + } + } + + def reportAutoCompactStatsError(e: Throwable): Unit = { + recordDeltaEvent(deltaLog, "delta.collectStats", data = Map("message" -> e.getMessage)) + logError(e.getMessage) + } + + def collectAutoOptimizeStats(numAdd: Long, numRemove: Long, actions: Iterator[Action]): Unit = { + // Early exit if no files were added or removed. + if (numAdd == 0 && numRemove == 0) return + val collector = createAutoCompactStatsCollector() + if (collector.isInstanceOf[DisabledAutoCompactPartitionStatsCollector]) return + AutoCompactPartitionStats.instance(spark) + .collectPartitionStats(collector, deltaLog.tableId, actions) + } + + /** + * A subclass of AutoCompactPartitionStatsCollector that's to be used if the config to collect + * auto compaction stats is turned off. This subclass intentionally does nothing. + */ + class DisabledAutoCompactPartitionStatsCollector extends AutoCompactPartitionStatsCollector { + override def collectPartitionStatsForAdd(file: AddFile): Unit = {} + override def collectPartitionStatsForRemove(file: RemoveFile): Unit = {} + override def finalizeStats(tableId: String): Unit = {} + } + + def createAutoCompactStatsCollector(): AutoCompactPartitionStatsCollector = { + try { + if (spark.conf.get(DeltaSQLConf.DELTA_AUTO_COMPACT_RECORD_PARTITION_STATS_ENABLED)) { + val minFileSize = spark.conf + .get(DeltaSQLConf.DELTA_AUTO_COMPACT_MIN_FILE_SIZE) + .getOrElse(Long.MaxValue) + return AutoCompactPartitionStats.instance(spark) + .createStatsCollector(minFileSize, reportAutoCompactStatsError) + } + } catch { + case NonFatal(e) => reportAutoCompactStatsError(e) + } + + // If config-disabled, or error caught, fall though and use a no-op stats collector. + new DisabledAutoCompactPartitionStatsCollector + } + + /** + * Checks if the new schema contains any CDC columns (which is invalid) and throws the appropriate + * error + */ + protected def performCdcMetadataCheck(): Unit = { + if (newMetadata.nonEmpty) { + if (CDCReader.isCDCEnabledOnTable(newMetadata.get, spark)) { + val schema = newMetadata.get.schema.fieldNames + val reservedColumnsUsed = CDCReader.cdcReadSchema(new StructType()).fieldNames + .intersect(schema) + if (reservedColumnsUsed.length > 0) { + if (!CDCReader.isCDCEnabledOnTable(snapshot.metadata, spark)) { + // cdc was not enabled previously but reserved columns are present in the new schema. + throw DeltaErrors.tableAlreadyContainsCDCColumns(reservedColumnsUsed) + } else { + // cdc was enabled but reserved columns are present in the new metadata. + throw DeltaErrors.cdcColumnsInData(reservedColumnsUsed) + } + } + } + } + } + + /** + * Checks if the passed-in actions have internal SetTransaction conflicts, will throw exceptions + * in case of conflicts. This function will also remove duplicated [[SetTransaction]]s. + */ + protected def checkForSetTransactionConflictAndDedup(actions: Seq[Action]): Seq[Action] = { + val finalActions = new ArrayBuffer[Action] + val txnIdToVersionMap = new mutable.HashMap[String, Long].empty + for (action <- actions) { + action match { + case st: SetTransaction => + txnIdToVersionMap.get(st.appId).map { version => + if (version != st.version) { + throw DeltaErrors.setTransactionVersionConflict(st.appId, version, st.version) + } + } getOrElse { + txnIdToVersionMap += (st.appId -> st.version) + finalActions += action + } + case _ => finalActions += action + } + } + finalActions.toSeq + } + + /** + * We want to future-proof and explicitly block any occurrences of + * - table has CDC enabled and there are FileActions to write, AND + * - table has column mapping enabled and there is a column mapping related metadata action + * + * This is because the semantics for this combination of features and file changes is undefined. + */ + private def performCdcColumnMappingCheck( + actions: Seq[Action], + op: DeltaOperations.Operation): Unit = { + if (newMetadata.nonEmpty) { + val _newMetadata = newMetadata.get + val _currentMetadata = snapshot.metadata + + val cdcEnabled = CDCReader.isCDCEnabledOnTable(_newMetadata, spark) + + val columnMappingEnabled = _newMetadata.columnMappingMode != NoMapping + + val isColumnMappingUpgrade = DeltaColumnMapping.isColumnMappingUpgrade( + oldMode = _currentMetadata.columnMappingMode, + newMode = _newMetadata.columnMappingMode + ) + + def dropColumnOp: Boolean = DeltaColumnMapping.isDropColumnOperation( + _newMetadata, _currentMetadata) + + def renameColumnOp: Boolean = DeltaColumnMapping.isRenameColumnOperation( + _newMetadata, _currentMetadata) + + def columnMappingChange: Boolean = isColumnMappingUpgrade || dropColumnOp || renameColumnOp + + def existsFileActions: Boolean = actions.exists { _.isInstanceOf[FileAction] } + + if (cdcEnabled && columnMappingEnabled && columnMappingChange && existsFileActions) { + throw DeltaErrors.blockColumnMappingAndCdcOperation(op) + } + } + } + + /** + * Modifies the state of the log by adding a new commit that is based on a read at + * [[readVersion]]. In the case of a conflict with a concurrent writer this + * method will throw an exception. + * + * Also skips creating the commit if the configured [[IsolationLevel]] doesn't need us to record + * the commit from correctness perspective. + */ + def commitIfNeeded( + actions: Seq[Action], + op: DeltaOperations.Operation, + tags: Map[String, String] = Map.empty): Unit = { + commitImpl(actions, op, canSkipEmptyCommits = true, tags = tags) + } + + /** + * Modifies the state of the log by adding a new commit that is based on a read at + * [[readVersion]]. In the case of a conflict with a concurrent writer this + * method will throw an exception. + * + * @param actions Set of actions to commit + * @param op Details of operation that is performing this transactional commit + */ + def commit( + actions: Seq[Action], + op: DeltaOperations.Operation): Long = { + commitImpl(actions, op, canSkipEmptyCommits = false, tags = Map.empty).getOrElse { + throw new SparkException(s"Unknown error while trying to commit for operation $op") + } + } + + /** + * Modifies the state of the log by adding a new commit that is based on a read at + * [[readVersion]]. In the case of a conflict with a concurrent writer this + * method will throw an exception. + * + * @param actions Set of actions to commit + * @param op Details of operation that is performing this transactional commit + * @param tags Extra tags to set to the CommitInfo action + */ + def commit( + actions: Seq[Action], + op: DeltaOperations.Operation, + tags: Map[String, String]): Long = { + commitImpl(actions, op, canSkipEmptyCommits = false, tags = tags).getOrElse { + throw new SparkException(s"Unknown error while trying to commit for operation $op") + } + } + + @throws(classOf[ConcurrentModificationException]) + protected def commitImpl( + actions: Seq[Action], + op: DeltaOperations.Operation, + canSkipEmptyCommits: Boolean, + tags: Map[String, String] + ): Option[Long] = recordDeltaOperation(deltaLog, "delta.commit") { + commitStartNano = System.nanoTime() + + val (version, postCommitSnapshot, actualCommittedActions) = try { + // Check for CDC metadata columns + performCdcMetadataCheck() + + // Check for internal SetTransaction conflicts and dedup. + val finalActions = checkForSetTransactionConflictAndDedup(actions ++ this.actions.toSeq) + + // Try to commit at the next version. + val preparedActions = prepareCommit(finalActions, op) + + // Find the isolation level to use for this commit + val isolationLevelToUse = getIsolationLevelToUse(preparedActions, op) + + // Check for duplicated [[MetadataAction]] with the same domain names and validate the table + // feature is enabled if [[MetadataAction]] is submitted. + val domainMetadata = + DomainMetadataUtils.validateDomainMetadataSupportedAndNoDuplicate(finalActions, protocol) + + isBlindAppend = { + val dependsOnFiles = readPredicates.nonEmpty || readFiles.nonEmpty + val onlyAddFiles = + preparedActions.collect { case f: FileAction => f }.forall(_.isInstanceOf[AddFile]) + onlyAddFiles && !dependsOnFiles + } + + val readRowIdHighWatermark = + RowId.extractHighWatermark(snapshot).getOrElse(RowId.MISSING_HIGH_WATER_MARK) + + commitInfo = CommitInfo( + clock.getTimeMillis(), + op.name, + op.jsonEncodedValues, + Map.empty, + Some(readVersion).filter(_ >= 0), + Option(isolationLevelToUse.toString), + Some(isBlindAppend), + getOperationMetrics(op), + getUserMetadata(op), + tags = if (tags.nonEmpty) Some(tags) else None, + txnId = Some(txnId)) + + val currentTransactionInfo = CurrentTransactionInfo( + txnId = txnId, + readPredicates = readPredicates.toSeq, + readFiles = readFiles.toSet, + readWholeTable = readTheWholeTable, + readAppIds = readTxn.toSet, + metadata = metadata, + protocol = protocol, + actions = preparedActions, + readSnapshot = snapshot, + commitInfo = Option(commitInfo), + readRowIdHighWatermark = readRowIdHighWatermark, + domainMetadata = domainMetadata) + + // Register post-commit hooks if any + lazy val hasFileActions = preparedActions.exists { + case _: FileAction => true + case _ => false + } + if (DeltaConfigs.SYMLINK_FORMAT_MANIFEST_ENABLED.fromMetaData(metadata) && hasFileActions) { + registerPostCommitHook(GenerateSymlinkManifest) + } + + commitAttemptStartTime = clock.getTimeMillis() + if (preparedActions.isEmpty && canSkipEmptyCommits && + skipRecordingEmptyCommitAllowed(isolationLevelToUse)) { + return None + } + + val (commitVersion, postCommitSnapshot, updatedCurrentTransactionInfo) = + doCommitRetryIteratively( + getFirstAttemptVersion, currentTransactionInfo, isolationLevelToUse) + logInfo(s"Committed delta #$commitVersion to ${deltaLog.logPath}") + (commitVersion, postCommitSnapshot, updatedCurrentTransactionInfo.actions) + } catch { + case e: DeltaConcurrentModificationException => + recordDeltaEvent(deltaLog, "delta.commit.conflict." + e.conflictType) + throw e + case NonFatal(e) => + recordDeltaEvent( + deltaLog, "delta.commit.failure", data = Map("exception" -> Utils.exceptionString(e))) + throw e + } + + runPostCommitHooks(version, postCommitSnapshot, actualCommittedActions) + + Some(version) + } + + /** Whether to skip recording the commit in DeltaLog */ + protected def skipRecordingEmptyCommitAllowed(isolationLevelToUse: IsolationLevel): Boolean = { + if (!spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SKIP_RECORDING_EMPTY_COMMITS)) { + return false + } + // Recording of empty commits in deltalog can be skipped only for SnapshotIsolation and + // Serializable mode. + Seq(SnapshotIsolation, Serializable).contains(isolationLevelToUse) + } + + /** + * Create a large commit on the Delta log by directly writing an iterator of FileActions to the + * LogStore. This function only commits the next possible version and will not check whether the + * commit is retry-able. If the next version has already been committed, then this function + * will fail. + * This bypasses all optimistic concurrency checks. We assume that transaction conflicts should be + * rare because this method is typically used to create new tables (e.g. CONVERT TO DELTA) or + * apply some commands which rarely receive other transactions (e.g. CLONE/RESTORE). + * In addition, the expectation is that the list of actions performed by the transaction + * remains an iterator and is never materialized, given the nature of a large commit potentially + * touching many files. + */ + def commitLarge( + spark: SparkSession, + actions: Iterator[Action], + op: DeltaOperations.Operation, + context: Map[String, String], + metrics: Map[String, String]): (Long, Snapshot) = { + assert(!committed, "Transaction already committed.") + commitStartNano = System.nanoTime() + val attemptVersion = getFirstAttemptVersion + try { + val tags = Map.empty[String, String] + val commitInfo = CommitInfo( + time = clock.getTimeMillis(), + operation = op.name, + operationParameters = op.jsonEncodedValues, + context, + readVersion = Some(readVersion), + isolationLevel = Some(Serializable.toString), + isBlindAppend = Some(false), + Some(metrics), + userMetadata = getUserMetadata(op), + tags = if (tags.nonEmpty) Some(tags) else None, + txnId = Some(txnId)) + + val extraActions = Seq(commitInfo, metadata) + // We don't expect commits to have more than 2 billion actions + var commitSize: Int = 0 + var numAbsolutePaths: Int = 0 + var numAddFiles: Int = 0 + var numRemoveFiles: Int = 0 + var numSetTransaction: Int = 0 + var bytesNew: Long = 0L + var numOfDomainMetadatas: Long = 0L + var addFilesHistogram: Option[FileSizeHistogram] = None + var removeFilesHistogram: Option[FileSizeHistogram] = None + val assertDeletionVectorWellFormed = getAssertDeletionVectorWellFormedFunc(spark, op) + // Initialize everything needed to maintain auto-compaction stats. + partitionsAddedToOpt = Some(new mutable.HashSet[Map[String, String]]) + val acStatsCollector = createAutoCompactStatsCollector() + var allActions = (extraActions.toIterator ++ actions).map { action => + commitSize += 1 + action match { + case a: AddFile => + numAddFiles += 1 + if (a.pathAsUri.isAbsolute) numAbsolutePaths += 1 + assertDeletionVectorWellFormed(a) + partitionsAddedToOpt.get += a.partitionValues + acStatsCollector.collectPartitionStatsForAdd(a) + if (a.dataChange) bytesNew += a.size + addFilesHistogram.foreach(_.insert(a.size)) + case r: RemoveFile => + numRemoveFiles += 1 + acStatsCollector.collectPartitionStatsForRemove(r) + removeFilesHistogram.foreach(_.insert(r.getFileSize)) + case _: SetTransaction => + numSetTransaction += 1 + case m: Metadata => + assertMetadata(m) + case p: Protocol => + recordProtocolChanges( + "delta.protocol.change", + fromProtocol = snapshot.protocol, + toProtocol = p, + isCreatingNewTable) + DeltaTableV2.withEnrichedUnsupportedTableException(catalogTable) { + deltaLog.protocolWrite(p) + } + case d: DomainMetadata => + numOfDomainMetadatas += 1 + case _ => + } + action + } + + // Validate protocol support, specifically writer features. + DeltaTableV2.withEnrichedUnsupportedTableException(catalogTable) { + deltaLog.protocolWrite(snapshot.protocol) + } + + allActions = RowId.assignFreshRowIds(protocol, snapshot, allActions) + allActions = DefaultRowCommitVersion + .assignIfMissing(protocol, allActions, getFirstAttemptVersion) + + if (readVersion < 0) { + deltaLog.createLogDirectory() + } + val fsWriteStartNano = System.nanoTime() + val jsonActions = allActions.map(_.json) + deltaLog.store.write( + deltaFile(deltaLog.logPath, attemptVersion), + jsonActions, + overwrite = false, + deltaLog.newDeltaHadoopConf()) + + acStatsCollector.finalizeStats(deltaLog.tableId) + spark.sessionState.conf.setConf( + DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION, + Some(attemptVersion)) + commitEndNano = System.nanoTime() + committed = true + // NOTE: commitLarge cannot run postCommitHooks (such as the CheckpointHook). + // Instead, manually run any necessary actions in updateAndCheckpoint. + val postCommitSnapshot = + updateAndCheckpoint(spark, deltaLog, commitSize, attemptVersion, txnId) + val postCommitReconstructionTime = System.nanoTime() + var stats = CommitStats( + startVersion = readVersion, + commitVersion = attemptVersion, + readVersion = postCommitSnapshot.version, + txnDurationMs = NANOSECONDS.toMillis(commitEndNano - txnStartTimeNs), + commitDurationMs = NANOSECONDS.toMillis(commitEndNano - commitStartNano), + fsWriteDurationMs = NANOSECONDS.toMillis(commitEndNano - fsWriteStartNano), + stateReconstructionDurationMs = + NANOSECONDS.toMillis(postCommitReconstructionTime - commitEndNano), + numAdd = numAddFiles, + numRemove = numRemoveFiles, + numSetTransaction = numSetTransaction, + bytesNew = bytesNew, + numFilesTotal = postCommitSnapshot.numOfFiles, + sizeInBytesTotal = postCommitSnapshot.sizeInBytes, + numCdcFiles = 0, + cdcBytesNew = 0, + protocol = postCommitSnapshot.protocol, + commitSizeBytes = jsonActions.map(_.size).sum, + checkpointSizeBytes = postCommitSnapshot.checkpointSizeInBytes(), + totalCommitsSizeSinceLastCheckpoint = 0L, + checkpointAttempt = true, + info = Option(commitInfo).map(_.copy(readVersion = None, isolationLevel = None)).orNull, + newMetadata = Some(metadata), + numAbsolutePathsInAdd = numAbsolutePaths, + numDistinctPartitionsInAdd = -1, // not tracking distinct partitions as of now + numPartitionColumnsInTable = postCommitSnapshot.metadata.partitionColumns.size, + isolationLevel = Serializable.toString, + numOfDomainMetadatas = numOfDomainMetadatas, + txnId = Some(txnId)) + + recordDeltaEvent(deltaLog, DeltaLogging.DELTA_COMMIT_STATS_OPTYPE, data = stats) + (attemptVersion, postCommitSnapshot) + } catch { + case e: java.nio.file.FileAlreadyExistsException => + recordDeltaEvent( + deltaLog, + "delta.commitLarge.failure", + data = Map("exception" -> Utils.exceptionString(e), "operation" -> op.name)) + // Actions of a commit which went in before ours + val logs = deltaLog.store.readAsIterator( + deltaFile(deltaLog.logPath, attemptVersion), + deltaLog.newDeltaHadoopConf()) + try { + val winningCommitActions = logs.map(Action.fromJson) + val commitInfo = winningCommitActions.collectFirst { case a: CommitInfo => a } + .map(ci => ci.copy(version = Some(attemptVersion))) + throw DeltaErrors.concurrentWriteException(commitInfo) + } finally { + logs.close() + } + + case NonFatal(e) => + recordDeltaEvent( + deltaLog, + "delta.commitLarge.failure", + data = Map("exception" -> Utils.exceptionString(e), "operation" -> op.name)) + throw e + } + } + + /** Update the table now that the commit has been made, and write a checkpoint. */ + protected def updateAndCheckpoint( + spark: SparkSession, + deltaLog: DeltaLog, + commitSize: Int, + attemptVersion: Long, + txnId: String): Snapshot = { + val currentSnapshot = deltaLog.update() + if (currentSnapshot.version != attemptVersion) { + throw DeltaErrors.invalidCommittedVersion(attemptVersion, currentSnapshot.version) + } + + logInfo(s"Committed delta #$attemptVersion to ${deltaLog.logPath}. Wrote $commitSize actions.") + + deltaLog.checkpoint(currentSnapshot) + currentSnapshot + } + + /** + * Prepare for a commit by doing all necessary pre-commit checks and modifications to the actions. + * @return The finalized set of actions. + */ + protected def prepareCommit( + actions: Seq[Action], + op: DeltaOperations.Operation): Seq[Action] = { + + assert(!committed, "Transaction already committed.") + + val (metadatasAndProtocols, otherActions) = actions + .partition(a => a.isInstanceOf[Metadata] || a.isInstanceOf[Protocol]) + + // New metadata can come either from `newMetadata` or from the `actions` there. + val metadataChanges = + newMetadata.toSeq ++ metadatasAndProtocols.collect { case m: Metadata => m } + if (metadataChanges.length > 1) { + recordDeltaEvent(deltaLog, "delta.metadataCheck.multipleMetadataActions", data = Map( + "metadataChanges" -> metadataChanges + )) + assert( + metadataChanges.length <= 1, "Cannot change the metadata more than once in a transaction.") + } + // There be at most one metadata entry at this point. + metadataChanges.foreach { m => + assertMetadata(m) + setNewProtocolWithFeaturesEnabledByMetadata(m) + + // Also update `newMetadata` so that the behaviour later is consistent irrespective of whether + // metadata was set via `updateMetadata` or `actions`. + newMetadata = Some(m) + } + + // A protocol change can be *explicit*, i.e. specified as a Protocol action as part of the + // commit actions, or *implicit*. Implicit protocol changes are mostly caused by setting + // new table properties that enable features that require a protocol upgrade. These implicit + // changes are usually captured in newProtocol. In case there is more than one protocol action, + // it is likely that it is due to a mix of explicit and implicit changes. + val protocolChanges = + newProtocol.toSeq ++ metadatasAndProtocols.collect { case p: Protocol => p } + if (protocolChanges.length > 1) { + recordDeltaEvent(deltaLog, "delta.protocolCheck.multipleProtocolActions", data = Map( + "protocolChanges" -> protocolChanges + )) + assert(protocolChanges.length <= 1, "Cannot change the protocol more than once in a " + + "transaction. More than one protocol change in a transaction is likely due to an " + + "explicitly specified Protocol action and an implicit protocol upgrade triggered by " + + "a table property.") + } + // Update newProtocol so that the behaviour later is consistent irrespective of whether + // the protocol was set via update/verifyMetadata or actions. + // NOTE: There is at most one protocol change at this point. + protocolChanges.foreach { p => + newProtocol = Some(p) + recordProtocolChanges("delta.protocol.change", snapshot.protocol, p, isCreatingNewTable) + DeltaTableV2.withEnrichedUnsupportedTableException(catalogTable) { + deltaLog.protocolWrite(p) + } + } + + // Now, we know that there is at most 1 Metadata change (stored in newMetadata) and at most 1 + // Protocol change (stored in newProtocol) + + val (protocolUpdate1, metadataUpdate1) = + UniversalFormat.enforceInvariantsAndDependencies( + // Note: if this txn has no protocol or metadata updates, then `prev` will equal `newest`. + snapshot, + newestProtocol = protocol, // Note: this will try to use `newProtocol` + newestMetadata = metadata, // Note: this will try to use `newMetadata` + isCreatingNewTable || op.isInstanceOf[DeltaOperations.UpgradeUniformProperties], + otherActions + ) + newProtocol = protocolUpdate1.orElse(newProtocol) + newMetadata = metadataUpdate1.orElse(newMetadata) + + var finalActions = newMetadata.toSeq ++ newProtocol.toSeq ++ otherActions + + // Block future cases of CDF + Column Mapping changes + file changes + // This check requires having called + // DeltaColumnMapping.checkColumnIdAndPhysicalNameAssignments which is done in the + // `assertMetadata` call above. + performCdcColumnMappingCheck(finalActions, op) + + if (snapshot.version == -1) { + deltaLog.ensureLogDirectoryExist() + // If this is the first commit and no protocol is specified, initialize the protocol version. + if (!finalActions.exists(_.isInstanceOf[Protocol])) { + finalActions = protocol +: finalActions + } + // If this is the first commit and no metadata is specified, throw an exception + if (!finalActions.exists(_.isInstanceOf[Metadata])) { + recordDeltaEvent( + deltaLog, + opType = "delta.metadataCheck.noMetadataInInitialCommit", + data = + Map("stacktrace" -> Thread.currentThread.getStackTrace.toSeq.take(20).mkString("\n\t")) + ) + if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COMMIT_VALIDATION_ENABLED)) { + throw DeltaErrors.metadataAbsentException() + } + logWarning("Detected no metadata in initial commit but commit validation was turned off.") + } + } + + val partitionColumns = metadata.physicalPartitionSchema.fieldNames.toSet + finalActions = finalActions.map { + case newVersion: Protocol => + require(newVersion.minReaderVersion > 0, "The reader version needs to be greater than 0") + require(newVersion.minWriterVersion > 0, "The writer version needs to be greater than 0") + if (!canAssignAnyNewProtocol) { + val currentVersion = snapshot.protocol + if (!currentVersion.canTransitionTo(newVersion, op)) { + throw new ProtocolDowngradeException(currentVersion, newVersion) + } + } + newVersion + + case a: AddFile if partitionColumns != a.partitionValues.keySet => + // If the partitioning in metadata does not match the partitioning in the AddFile + recordDeltaEvent(deltaLog, "delta.metadataCheck.partitionMismatch", data = Map( + "tablePartitionColumns" -> metadata.partitionColumns, + "filePartitionValues" -> a.partitionValues + )) + if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COMMIT_VALIDATION_ENABLED)) { + throw DeltaErrors.addFilePartitioningMismatchException( + a.partitionValues.keySet.toSeq, partitionColumns.toSeq) + } + logWarning( + s""" + |Detected mismatch in partition values between AddFile and table metadata but + |commit validation was turned off. + |To turn it back on set ${DeltaSQLConf.DELTA_COMMIT_VALIDATION_ENABLED} to "true" + """.stripMargin) + a + case other => other + } + + DeltaTableV2.withEnrichedUnsupportedTableException(catalogTable) { + newProtocol.foreach(deltaLog.protocolWrite) + deltaLog.protocolWrite(snapshot.protocol) + } + + finalActions = RowId.assignFreshRowIds(protocol, snapshot, finalActions.toIterator).toList + finalActions = DefaultRowCommitVersion + .assignIfMissing(protocol, finalActions.toIterator, getFirstAttemptVersion).toList + + // We make sure that this isn't an appendOnly table as we check if we need to delete + // files. + val removes = actions.collect { case r: RemoveFile => r } + if (removes.exists(_.dataChange)) DeltaLog.assertRemovable(snapshot) + + val assertDeletionVectorWellFormed = getAssertDeletionVectorWellFormedFunc(spark, op) + actions.foreach(assertDeletionVectorWellFormed) + + // Make sure this operation does not include default column values if the corresponding table + // feature is not enabled. + if (!protocol.isFeatureSupported(AllowColumnDefaultsTableFeature)) { + checkNoColumnDefaults(op) + } + + finalActions + } + + // Returns the isolation level to use for committing the transaction + protected def getIsolationLevelToUse( + preparedActions: Seq[Action], op: DeltaOperations.Operation): IsolationLevel = { + val isolationLevelToUse = if (canDowngradeToSnapshotIsolation(preparedActions, op)) { + SnapshotIsolation + } else { + getDefaultIsolationLevel() + } + isolationLevelToUse + } + + protected def canDowngradeToSnapshotIsolation( + preparedActions: Seq[Action], op: DeltaOperations.Operation): Boolean = { + + var dataChanged = false + var hasIncompatibleActions = false + preparedActions.foreach { + case f: FileAction => + if (f.dataChange) { + dataChanged = true + } + // Row tracking is able to resolve write conflicts regardless of isolation level. + case d: DomainMetadata if RowTrackingMetadataDomain.isRowTrackingDomain(d) => + // Do nothing + case _ => + hasIncompatibleActions = true + } + val noDataChanged = !dataChanged + + if (hasIncompatibleActions) { + // if incompatible actions are present (e.g. METADATA etc.), then don't downgrade the + // isolation level to SnapshotIsolation. + return false + } + + val defaultIsolationLevel = getDefaultIsolationLevel() + // Note-1: For no-data-change transactions such as OPTIMIZE/Auto Compaction/ZorderBY, we can + // change the isolation level to SnapshotIsolation. SnapshotIsolation allows reduced conflict + // detection by skipping the + // [[ConflictChecker.checkForAddedFilesThatShouldHaveBeenReadByCurrentTxn]] check i.e. + // don't worry about concurrent appends. + // Note-2: + // We can also use SnapshotIsolation for empty transactions. e.g. consider a commit: + // t0 - Initial state of table + // t1 - Q1, Q2 starts + // t2 - Q1 commits + // t3 - Q2 is empty and wants to commit. + // In this scenario, we can always allow Q2 to commit without worrying about new files + // generated by Q1. + // The final order which satisfies both Serializability and WriteSerializability is: Q2, Q1 + // Note that Metadata only update transactions shouldn't be considered empty. If Q2 above has + // a Metadata update (say schema change/identity column high watermark update), then Q2 can't + // be moved above Q1 in the final SERIALIZABLE order. This is because if Q2 is moved above Q1, + // then Q1 should see the updates from Q2 - which actually didn't happen. + + val allowFallbackToSnapshotIsolation = defaultIsolationLevel match { + case Serializable => noDataChanged + case WriteSerializable => noDataChanged && !op.changesData + case _ => false // This case should never happen + } + allowFallbackToSnapshotIsolation + } + + /** Log protocol change events. */ + private def recordProtocolChanges( + opType: String, + fromProtocol: Protocol, + toProtocol: Protocol, + isCreatingNewTable: Boolean): Unit = { + def extract(p: Protocol): Map[String, Any] = Map( + "minReaderVersion" -> p.minReaderVersion, // Number + "minWriterVersion" -> p.minWriterVersion, // Number + "supportedFeatures" -> + p.implicitlyAndExplicitlySupportedFeatures.map(_.name).toSeq.sorted // Array[String] + ) + + val payload = if (isCreatingNewTable) { + Map("toProtocol" -> extract(toProtocol)) + } else { + Map("fromProtocol" -> extract(fromProtocol), "toProtocol" -> extract(toProtocol)) + } + recordDeltaEvent(deltaLog, opType, data = payload) + } + + /** + * Default [[IsolationLevel]] as set in table metadata. + */ + private[delta] def getDefaultIsolationLevel(): IsolationLevel = { + DeltaConfigs.ISOLATION_LEVEL.fromMetaData(metadata) + } + + /** + * Sets needsCheckpoint if we should checkpoint the version that has just been committed. + */ + protected def setNeedsCheckpoint(committedVersion: Long, postCommitSnapshot: Snapshot): Unit = { + def checkpointInterval = deltaLog.checkpointInterval(postCommitSnapshot.metadata) + needsCheckpoint = committedVersion != 0 && committedVersion % checkpointInterval == 0 + } + + private[delta] def isCommitLockEnabled: Boolean = { + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COMMIT_LOCK_ENABLED).getOrElse( + deltaLog.store.isPartialWriteVisible(deltaLog.logPath, deltaLog.newDeltaHadoopConf())) + } + + private def lockCommitIfEnabled[T](body: => T): T = { + if (isCommitLockEnabled) { + // We are borrowing the `snapshotLock` even for commits. Ideally we should be + // using a separate lock for this purpose, because multiple threads fighting over + // a commit shouldn't interfere with normal snapshot updates by readers. + deltaLog.withSnapshotLockInterruptibly(body) + } else { + body + } + } + + /** + * Commit the txn represented by `currentTransactionInfo` using `attemptVersion` version number. + * If there are any conflicts that are found, we will retry a fixed number of times. + * + * @return the real version that was committed, the postCommitSnapshot, and the txn info + * NOTE: The postCommitSnapshot may not be the same as the version committed if racing + * commits were written while we updated the snapshot. + */ + protected def doCommitRetryIteratively( + attemptVersion: Long, + currentTransactionInfo: CurrentTransactionInfo, + isolationLevel: IsolationLevel) + : (Long, Snapshot, CurrentTransactionInfo) = lockCommitIfEnabled { + + var commitVersion = attemptVersion + var updatedCurrentTransactionInfo = currentTransactionInfo + val maxRetryAttempts = spark.conf.get(DeltaSQLConf.DELTA_MAX_RETRY_COMMIT_ATTEMPTS) + recordDeltaOperation(deltaLog, "delta.commit.allAttempts") { + for (attemptNumber <- 0 to maxRetryAttempts) { + try { + val postCommitSnapshot = if (attemptNumber == 0) { + doCommit(commitVersion, updatedCurrentTransactionInfo, attemptNumber, isolationLevel) + } else recordDeltaOperation(deltaLog, "delta.commit.retry") { + val (newCommitVersion, newCurrentTransactionInfo) = checkForConflicts( + commitVersion, updatedCurrentTransactionInfo, attemptNumber, isolationLevel) + commitVersion = newCommitVersion + updatedCurrentTransactionInfo = newCurrentTransactionInfo + doCommit(commitVersion, updatedCurrentTransactionInfo, attemptNumber, isolationLevel) + } + committed = true + return (commitVersion, postCommitSnapshot, updatedCurrentTransactionInfo) + } catch { + case _: FileAlreadyExistsException => // Do nothing, retry + } + } + } + // retries all failed + val totalCommitAttemptTime = clock.getTimeMillis() - commitAttemptStartTime + throw DeltaErrors.maxCommitRetriesExceededException( + maxRetryAttempts + 1, + commitVersion, + attemptVersion, + updatedCurrentTransactionInfo.finalActionsToCommit.length, + totalCommitAttemptTime) + } + + /** + * Commit `actions` using `attemptVersion` version number. Throws a FileAlreadyExistsException + * if any conflicts are detected. + * + * @return the post-commit snapshot of the deltaLog + */ + protected def doCommit( + attemptVersion: Long, + currentTransactionInfo: CurrentTransactionInfo, + attemptNumber: Int, + isolationLevel: IsolationLevel): Snapshot = { + val actions = currentTransactionInfo.finalActionsToCommit + logInfo( + s"Attempting to commit version $attemptVersion with ${actions.size} actions with " + + s"$isolationLevel isolation level") + + if (readVersion > -1 && metadata.id != snapshot.metadata.id) { + val msg = s"Change in the table id detected in txn. Table id for txn on table at " + + s"${deltaLog.dataPath} was ${snapshot.metadata.id} when the txn was created and " + + s"is now changed to ${metadata.id}." + logWarning(msg) + recordDeltaEvent(deltaLog, "delta.metadataCheck.commit", data = Map( + "readSnapshotVersion" -> snapshot.version, + "readSnapshotMetadata" -> snapshot.metadata, + "txnMetadata" -> metadata, + "commitAttemptVersion" -> attemptVersion, + "commitAttemptNumber" -> attemptNumber)) + } + + val fsWriteStartNano = System.nanoTime() + val jsonActions = actions.map(_.json) + + val newChecksumOpt = writeCommitFile( + attemptVersion, + jsonActions.toIterator, + currentTransactionInfo) + + spark.sessionState.conf.setConf( + DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION, + Some(attemptVersion)) + + commitEndNano = System.nanoTime() + + val postCommitSnapshot = deltaLog.updateAfterCommit( + attemptVersion, + newChecksumOpt, + preCommitLogSegment + ) + val postCommitReconstructionTime = System.nanoTime() + + // Post stats + // Here, we efficiently calculate various stats (number of each different action, number of + // bytes per action, etc.) by iterating over all actions, case matching by type, and updating + // variables. This is more efficient than a functional approach. + var numAbsolutePaths = 0 + val distinctPartitions = new mutable.HashSet[Map[String, String]] + + var bytesNew: Long = 0L + var numAdd: Int = 0 + var numOfDomainMetadatas: Long = 0L + var numRemove: Int = 0 + var numSetTransaction: Int = 0 + var numCdcFiles: Int = 0 + var cdcBytesNew: Long = 0L + actions.foreach { + case a: AddFile => + numAdd += 1 + if (a.pathAsUri.isAbsolute) numAbsolutePaths += 1 + distinctPartitions += a.partitionValues + if (a.dataChange) bytesNew += a.size + case r: RemoveFile => + numRemove += 1 + case c: AddCDCFile => + numCdcFiles += 1 + cdcBytesNew += c.size + case _: SetTransaction => + numSetTransaction += 1 + case _: DomainMetadata => + numOfDomainMetadatas += 1 + case _ => + } + collectAutoOptimizeStats(numAdd, numRemove, actions.iterator) + val info = currentTransactionInfo.commitInfo + .map(_.copy(readVersion = None, isolationLevel = None)).orNull + setNeedsCheckpoint(attemptVersion, postCommitSnapshot) + val stats = CommitStats( + startVersion = snapshot.version, + commitVersion = attemptVersion, + readVersion = postCommitSnapshot.version, + txnDurationMs = NANOSECONDS.toMillis(commitEndNano - txnStartNano), + commitDurationMs = NANOSECONDS.toMillis(commitEndNano - commitStartNano), + fsWriteDurationMs = NANOSECONDS.toMillis(commitEndNano - fsWriteStartNano), + stateReconstructionDurationMs = + NANOSECONDS.toMillis(postCommitReconstructionTime - commitEndNano), + numAdd = numAdd, + numRemove = numRemove, + numSetTransaction = numSetTransaction, + bytesNew = bytesNew, + numFilesTotal = postCommitSnapshot.numOfFiles, + sizeInBytesTotal = postCommitSnapshot.sizeInBytes, + numCdcFiles = numCdcFiles, + cdcBytesNew = cdcBytesNew, + protocol = postCommitSnapshot.protocol, + commitSizeBytes = jsonActions.map(_.size).sum, + checkpointSizeBytes = postCommitSnapshot.checkpointSizeInBytes(), + totalCommitsSizeSinceLastCheckpoint = postCommitSnapshot.deltaFileSizeInBytes(), + checkpointAttempt = needsCheckpoint, + info = info, + newMetadata = newMetadata, + numAbsolutePathsInAdd = numAbsolutePaths, + numDistinctPartitionsInAdd = distinctPartitions.size, + numPartitionColumnsInTable = postCommitSnapshot.metadata.partitionColumns.size, + isolationLevel = isolationLevel.toString, + numOfDomainMetadatas = numOfDomainMetadatas, + txnId = Some(txnId)) + recordDeltaEvent(deltaLog, DeltaLogging.DELTA_COMMIT_STATS_OPTYPE, data = stats) + + partitionsAddedToOpt = Some(distinctPartitions) + postCommitSnapshot + } + + /** Writes the json actions provided to the commit file corresponding to attemptVersion */ + protected def writeCommitFile( + attemptVersion: Long, + jsonActions: Iterator[String], + currentTransactionInfo: CurrentTransactionInfo): Option[VersionChecksum] = { + deltaLog.store.write( + deltaFile(deltaLog.logPath, attemptVersion), + jsonActions, + overwrite = false, + deltaLog.newDeltaHadoopConf()) + None // No VersionChecksum available yet + } + + /** + * Looks at actions that have happened since the txn started and checks for logical + * conflicts with the read/writes. Resolve conflicts and returns a tuple representing + * the commit version to attempt next and the commit summary which we need to commit. + */ + protected def checkForConflicts( + checkVersion: Long, + currentTransactionInfo: CurrentTransactionInfo, + attemptNumber: Int, + commitIsolationLevel: IsolationLevel) + : (Long, CurrentTransactionInfo) = recordDeltaOperation( + deltaLog, + "delta.commit.retry.conflictCheck", + tags = Map(TAG_LOG_STORE_CLASS -> deltaLog.store.getClass.getName)) { + + DeltaTableV2.withEnrichedUnsupportedTableException(catalogTable) { + val fileStatuses = getConflictingVersions(checkVersion) + val nextAttemptVersion = checkVersion + fileStatuses.size + + // validate that information about conflicting winning commit files is continuous and in the + // right order. + val expected = (checkVersion until nextAttemptVersion) + val found = fileStatuses.map(deltaVersion) + val mismatch = expected.zip(found).dropWhile{ case (v1, v2) => v1 == v2 }.take(10) + assert(mismatch.isEmpty, + s"Expected ${mismatch.map(_._1).mkString(",")} but got ${mismatch.map(_._2).mkString(",")}") + + val logPrefixStr = s"[attempt $attemptNumber]" + val txnDetailsLogStr = { + var adds = 0L + var removes = 0L + currentTransactionInfo.actions.foreach { + case _: AddFile => adds += 1 + case _: RemoveFile => removes += 1 + case _ => + } + s"$adds adds, $removes removes, ${readPredicates.size} read predicates, " + + s"${readFiles.size} read files" + } + + logInfo(s"$logPrefixStr Checking for conflicts with versions " + + s"[$checkVersion, $nextAttemptVersion) with current txn having $txnDetailsLogStr") + + var updatedCurrentTransactionInfo = currentTransactionInfo + (checkVersion until nextAttemptVersion) + .zip(fileStatuses) + .foreach { case (otherCommitVersion, otherCommitFileStatus) => + updatedCurrentTransactionInfo = checkForConflictsAgainstVersion( + updatedCurrentTransactionInfo, + otherCommitFileStatus, + commitIsolationLevel) + logInfo(s"$logPrefixStr No conflicts in version $otherCommitVersion, " + + s"${clock.getTimeMillis() - commitAttemptStartTime} ms since start") + } + + logInfo(s"$logPrefixStr No conflicts with versions [$checkVersion, $nextAttemptVersion) " + + s"with current txn having $txnDetailsLogStr, " + + s"${clock.getTimeMillis() - commitAttemptStartTime} ms since start") + (nextAttemptVersion, updatedCurrentTransactionInfo) + } + } + + protected def checkForConflictsAgainstVersion( + currentTransactionInfo: CurrentTransactionInfo, + otherCommitFileStatus: FileStatus, + commitIsolationLevel: IsolationLevel): CurrentTransactionInfo = { + + val conflictChecker = new ConflictChecker( + spark, + currentTransactionInfo, + otherCommitFileStatus, + commitIsolationLevel) + conflictChecker.checkConflicts() + } + + /** Returns the version that the first attempt will try to commit at. */ + protected def getFirstAttemptVersion: Long = readVersion + 1L + + /** Returns the conflicting commit information */ + protected def getConflictingVersions(previousAttemptVersion: Long): Seq[FileStatus] = { + assert(previousAttemptVersion == preCommitLogSegment.version + 1) + val (newPreCommitLogSegment, newCommitFileStatuses) = + deltaLog.getUpdatedLogSegment(preCommitLogSegment) + assert(preCommitLogSegment.version + newCommitFileStatuses.size == + newPreCommitLogSegment.version) + preCommitLogSegment = newPreCommitLogSegment + newCommitFileStatuses + } + + /** Register a hook that will be executed once a commit is successful. */ + def registerPostCommitHook(hook: PostCommitHook): Unit = { + if (!postCommitHooks.contains(hook)) { + postCommitHooks.append(hook) + } + } + + def containsPostCommitHook(hook: PostCommitHook): Boolean = postCommitHooks.contains(hook) + + /** Executes the registered post commit hooks. */ + protected def runPostCommitHooks( + version: Long, + postCommitSnapshot: Snapshot, + committedActions: Seq[Action]): Unit = { + assert(committed, "Can't call post commit hooks before committing") + + // Keep track of the active txn because hooks may create more txns and overwrite the active one. + val activeCommit = OptimisticTransaction.getActive() + OptimisticTransaction.clearActive() + + try { + postCommitHooks.foreach(runPostCommitHook(_, version, postCommitSnapshot, committedActions)) + } finally { + activeCommit.foreach(OptimisticTransaction.setActive) + } + } + + protected def runPostCommitHook( + hook: PostCommitHook, + version: Long, + postCommitSnapshot: Snapshot, + committedActions: Seq[Action]): Unit = { + try { + hook.run(spark, this, version, postCommitSnapshot, committedActions) + } catch { + case NonFatal(e) => + logWarning(s"Error when executing post-commit hook ${hook.name} " + + s"for commit $version", e) + recordDeltaEvent(deltaLog, "delta.commit.hook.failure", data = Map( + "hook" -> hook.name, + "version" -> version, + "exception" -> e.toString + )) + hook.handleError(e, version) + } + } + + private[delta] def unregisterPostCommitHooksWhere(predicate: PostCommitHook => Boolean): Unit = + postCommitHooks --= postCommitHooks.filter(predicate) + + protected lazy val logPrefix: String = { + def truncate(uuid: String): String = uuid.split("-").head + s"[tableId=${truncate(snapshot.metadata.id)},txnId=${truncate(txnId)}] " + } + + override def logInfo(msg: => String): Unit = { + super.logInfo(logPrefix + msg) + } + + override def logWarning(msg: => String): Unit = { + super.logWarning(logPrefix + msg) + } + + override def logWarning(msg: => String, throwable: Throwable): Unit = { + super.logWarning(logPrefix + msg, throwable) + } + + override def logError(msg: => String): Unit = { + super.logError(logPrefix + msg) + } + + override def logError(msg: => String, throwable: Throwable): Unit = { + super.logError(logPrefix + msg, throwable) + } + + /** + * If the operation assigns or modifies column default values, this method checks that the + * corresponding table feature is enabled and throws an error if not. + */ + protected def checkNoColumnDefaults(op: DeltaOperations.Operation): Unit = { + def usesDefaults(column: StructField): Boolean = { + column.metadata.contains(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY) || + column.metadata.contains(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY) + } + + def throwError(errorClass: String, parameters: Array[String]): Unit = { + throw new DeltaAnalysisException( + errorClass = errorClass, + messageParameters = parameters) + } + + op match { + case change: ChangeColumn if usesDefaults(change.newColumn) => + throwError("WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED", + Array("ALTER TABLE")) + case create: CreateTable if create.metadata.schema.fields.exists(usesDefaults) => + throwError("WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED", + Array("CREATE TABLE")) + case replace: ReplaceColumns if replace.columns.exists(usesDefaults) => + throwError("WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED", + Array("CREATE TABLE")) + case replace: ReplaceTable if replace.metadata.schema.fields.exists(usesDefaults) => + throwError("WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED", + Array("CREATE TABLE")) + case update: UpdateSchema if update.newSchema.fields.exists(usesDefaults) => + throwError("WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED", + Array("ALTER TABLE")) + case _ => + } + } + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/PostHocResolveUpCast.scala b/spark/src/main/scala/org/apache/spark/sql/delta/PostHocResolveUpCast.scala new file mode 100644 index 00000000000..d4f498ac03a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/PostHocResolveUpCast.scala @@ -0,0 +1,54 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} +import org.apache.spark.sql.internal.SQLConf + +/** + * Post-hoc resolution rules [[PreprocessTableMerge]] and [[PreprocessTableUpdate]] may introduce + * new unresolved UpCast expressions that won't be resolved by [[ResolveUpCast]] that ran in the + * previous resolution phase. This rule ensures these UpCast expressions get resolved in the + * Post-hoc resolution phase. + * + * Note: we can't inject [[ResolveUpCast]] directly because we need an initialized analyzer instance + * for that which is not available at the time Delta rules are injected. [[PostHocResolveUpCast]] is + * delaying the access to the analyzer until after it's initialized. + */ +case class PostHocResolveUpCast(spark: SparkSession) + extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = + if (!plan.resolved) PostHocUpCastResolver.execute(plan) else plan + + /** + * A rule executor that runs [[ResolveUpCast]] until all UpCast expressions have been resolved. + */ + object PostHocUpCastResolver extends RuleExecutor[LogicalPlan] { + final override protected def batches: Seq[Batch] = Seq( + Batch( + "Post-hoc UpCast Resolution", + FixedPoint( + conf.analyzerMaxIterations, + errorOnExceed = true, + maxIterationsSetting = SQLConf.ANALYZER_MAX_ITERATIONS.key), + spark.sessionState.analyzer.ResolveUpCast) + ) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/PreDowngradeTableFeatureCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/PreDowngradeTableFeatureCommand.scala new file mode 100644 index 00000000000..8e48f5b7954 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/PreDowngradeTableFeatureCommand.scala @@ -0,0 +1,119 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta +import java.util.concurrent.TimeUnit + +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.{AlterTableSetPropertiesDeltaCommand, AlterTableUnsetPropertiesDeltaCommand} +import org.apache.spark.sql.delta.metering.DeltaLogging + +/** + * A base class for implementing a preparation command for removing table features. + * Must implement a run method. Note, the run method must be implemented in a way that when + * it finishes, the table does not use the feature that is being removed, and nobody is + * allowed to start using it again implicitly. One way to achieve this is by + * disabling the feature on the table before proceeding to the actual removal. + * See [[RemovableFeature.preDowngradeCommand]]. + */ +sealed abstract class PreDowngradeTableFeatureCommand { + /** + * Returns true when it performs a cleaning action. When no action was required + * it returns false. + */ + def removeFeatureTracesIfNeeded(): Boolean +} + +case class TestWriterFeaturePreDowngradeCommand(table: DeltaTableV2) + extends PreDowngradeTableFeatureCommand + with DeltaLogging { + // To remove the feature we only need to remove the table property. + override def removeFeatureTracesIfNeeded(): Boolean = { + // Make sure feature data/metadata exist before proceeding. + if (TestRemovableWriterFeature.validateRemoval(table.initialSnapshot)) return false + + recordDeltaEvent(table.deltaLog, "delta.test.TestWriterFeaturePreDowngradeCommand") + val properties = Seq(TestRemovableWriterFeature.TABLE_PROP_KEY) + AlterTableUnsetPropertiesDeltaCommand(table, properties, ifExists = true).run(table.spark) + true + } +} + +case class TestReaderWriterFeaturePreDowngradeCommand(table: DeltaTableV2) + extends PreDowngradeTableFeatureCommand { + // To remove the feature we only need to remove the table property. + override def removeFeatureTracesIfNeeded(): Boolean = { + // Make sure feature data/metadata exist before proceeding. + if (TestRemovableReaderWriterFeature.validateRemoval(table.initialSnapshot)) return false + + val properties = Seq(TestRemovableReaderWriterFeature.TABLE_PROP_KEY) + AlterTableUnsetPropertiesDeltaCommand(table, properties, ifExists = true).run(table.spark) + true + } +} + +case class TestLegacyWriterFeaturePreDowngradeCommand(table: DeltaTableV2) + extends PreDowngradeTableFeatureCommand { + /** Return true if we removed the property, false if no action was needed. */ + override def removeFeatureTracesIfNeeded(): Boolean = { + if (TestRemovableLegacyWriterFeature.validateRemoval(table.initialSnapshot)) return false + + val properties = Seq(TestRemovableLegacyWriterFeature.TABLE_PROP_KEY) + AlterTableUnsetPropertiesDeltaCommand(table, properties, ifExists = true).run(table.spark) + true + } +} + +case class TestLegacyReaderWriterFeaturePreDowngradeCommand(table: DeltaTableV2) + extends PreDowngradeTableFeatureCommand { + /** Return true if we removed the property, false if no action was needed. */ + override def removeFeatureTracesIfNeeded(): Boolean = { + if (TestRemovableLegacyReaderWriterFeature.validateRemoval(table.initialSnapshot)) return false + + val properties = Seq(TestRemovableLegacyReaderWriterFeature.TABLE_PROP_KEY) + AlterTableUnsetPropertiesDeltaCommand(table, properties, ifExists = true).run(table.spark) + true + } +} + +case class V2CheckpointPreDowngradeCommand(table: DeltaTableV2) + extends PreDowngradeTableFeatureCommand + with DeltaLogging { + /** + * We set the checkpoint policy to classic to prevent any transactions from creating + * v2 checkpoints. + * + * @return True if it changed checkpoint policy metadata property to classic. + * False otherwise. + */ + override def removeFeatureTracesIfNeeded(): Boolean = { + + if (V2CheckpointTableFeature.validateRemoval(table.initialSnapshot)) return false + + val startTimeNs = System.nanoTime() + val properties = Map(DeltaConfigs.CHECKPOINT_POLICY.key -> CheckpointPolicy.Classic.name) + AlterTableSetPropertiesDeltaCommand(table, properties).run(table.spark) + + recordDeltaEvent( + table.deltaLog, + opType = "delta.v2CheckpointFeatureRemovalMetrics", + data = + Map(("downgradeTimeMs", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs))) + ) + + true + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableDelete.scala b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableDelete.scala new file mode 100644 index 00000000000..5e243d0e5a5 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableDelete.scala @@ -0,0 +1,42 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.commands.DeleteCommand + +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.{DeltaDelete, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.internal.SQLConf + +/** + * Preprocess the [[DeltaDelete]] plan to convert to [[DeleteCommand]]. + */ +case class PreprocessTableDelete(sqlConf: SQLConf) extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = { + plan.resolveOperators { + case d: DeltaDelete if d.resolved => + d.condition.foreach { cond => + if (SubqueryExpression.hasSubquery(cond)) { + throw DeltaErrors.subqueryNotSupportedException("DELETE", cond) + } + } + DeleteCommand(d) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableMerge.scala b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableMerge.scala new file mode 100644 index 00000000000..ab68dddc428 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableMerge.scala @@ -0,0 +1,451 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.time.{Instant, LocalDateTime} +import java.util.Locale + +import scala.collection.mutable +import scala.reflect.ClassTag + +import org.apache.spark.sql.delta.commands.MergeIntoCommand +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTime +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localDateTimeToMicros} +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{DataType, DateType, StringType, StructField, StructType, TimestampNTZType, TimestampType} + +case class PreprocessTableMerge(override val conf: SQLConf) + extends Rule[LogicalPlan] with UpdateExpressionsSupport { + + private var trackHighWaterMarks = Set[String]() + + def getTrackHighWaterMarks: Set[String] = trackHighWaterMarks + + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { + case m: DeltaMergeInto if m.resolved => apply(m, true) + } + + def apply(mergeInto: DeltaMergeInto, transformToCommand: Boolean): LogicalPlan = { + val DeltaMergeInto( + target, + source, + condition, + matched, + notMatched, + notMatchedBySource, + migrateSchema, + finalSchemaOpt) = mergeInto + + if (finalSchemaOpt.isEmpty) { + throw DeltaErrors.targetTableFinalSchemaEmptyException() + } + + val finalSchema = finalSchemaOpt.get + + def checkCondition(cond: Expression, conditionName: String): Unit = { + if (!cond.deterministic) { + throw DeltaErrors.nonDeterministicNotSupportedException( + s"$conditionName condition of MERGE operation", cond) + } + if (cond.find(_.isInstanceOf[AggregateExpression]).isDefined) { + throw DeltaErrors.aggsNotSupportedException( + s"$conditionName condition of MERGE operation", cond) + } + if (SubqueryExpression.hasSubquery(cond)) { + throw DeltaErrors.subqueryNotSupportedException( + s"$conditionName condition of MERGE operation", cond) + } + } + + checkCondition(condition, "search") + (matched ++ notMatched ++ notMatchedBySource).filter(_.condition.nonEmpty).foreach { clause => + checkCondition(clause.condition.get, clause.clauseType.toUpperCase(Locale.ROOT)) + } + + val deltaLogicalPlan = EliminateSubqueryAliases(target) + val tahoeFileIndex = deltaLogicalPlan match { + case DeltaFullTable(_, index) => index + case o => throw DeltaErrors.notADeltaSourceException("MERGE", Some(o)) + } + val generatedColumns = GeneratedColumn.getGeneratedColumns( + tahoeFileIndex.snapshotAtAnalysis) + if (generatedColumns.nonEmpty && !deltaLogicalPlan.isInstanceOf[LogicalRelation]) { + throw DeltaErrors.operationOnTempViewWithGenerateColsNotSupported("MERGE INTO") + } + // Additional columns with default expressions. + var additionalColumns = Seq[StructField]() + + val processedMatched = matched.map { + case m: DeltaMergeIntoMatchedUpdateClause => + val alignedActions = alignUpdateActions( + target, + m.resolvedActions, + whenClauses = matched ++ notMatched ++ notMatchedBySource, + identityColumns = additionalColumns, + generatedColumns = generatedColumns, + allowStructEvolution = migrateSchema, + finalSchema = finalSchema) + m.copy(m.condition, alignedActions) + case m: DeltaMergeIntoMatchedDeleteClause => m // Delete does not need reordering + } + val processedNotMatchedBySource = notMatchedBySource.map { + case m: DeltaMergeIntoNotMatchedBySourceUpdateClause => + val alignedActions = alignUpdateActions( + target, + m.resolvedActions, + whenClauses = matched ++ notMatched ++ notMatchedBySource, + identityColumns = additionalColumns, + generatedColumns, + migrateSchema, + finalSchema) + m.copy(m.condition, alignedActions) + case m: DeltaMergeIntoNotMatchedBySourceDeleteClause => m // Delete does not need reordering + } + + val processedNotMatched = notMatched.map { case m: DeltaMergeIntoNotMatchedInsertClause => + // Check if columns are distinct. All actions should have targetColNameParts.size = 1. + m.resolvedActions.foreach { a => + if (a.targetColNameParts.size > 1) { + throw DeltaErrors.nestedFieldNotSupported( + "INSERT clause of MERGE operation", + a.targetColNameParts.mkString("`", "`.`", "`") + ) + } + } + + + val targetColNames = m.resolvedActions.map(_.targetColNameParts.head) + if (targetColNames.distinct.size < targetColNames.size) { + throw DeltaErrors.duplicateColumnOnInsert() + } + + // Generate actions for columns that are not explicitly inserted. They might come from + // the original schema of target table or the schema evolved columns. In either case they are + // covered by `finalSchema`. + val implicitActions = finalSchema.filterNot { col => + m.resolvedActions.exists { insertAct => + conf.resolver(insertAct.targetColNameParts.head, col.name) + } + }.map { col => + import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.getDefaultValueExprOrNullLit + val defaultValue: Expression = + getDefaultValueExprOrNullLit(col, conf.useNullsForMissingDefaultColumnValues) + .getOrElse(Literal(null, col.dataType)) + DeltaMergeAction(Seq(col.name), defaultValue, targetColNameResolved = true) + } + + val actions = m.resolvedActions ++ implicitActions + val (actionsWithGeneratedColumns, trackFromInsert) = resolveImplicitColumns( + m.resolvedActions, + actions, + source, + generatedColumns.map(f => (f, true)) ++ additionalColumns.map(f => (f, false)), + finalSchema) + + trackHighWaterMarks ++= trackFromInsert + + val alignedActions: Seq[DeltaMergeAction] = finalSchema.map { targetAttrib => + actionsWithGeneratedColumns.find { a => + conf.resolver(targetAttrib.name, a.targetColNameParts.head) + }.map { a => + DeltaMergeAction( + Seq(targetAttrib.name), + castIfNeeded( + a.expr, + targetAttrib.dataType, + allowStructEvolution = migrateSchema, + targetAttrib.name), + targetColNameResolved = true) + }.getOrElse { + // If a target table column was not found in the INSERT columns and expressions, + // then throw exception as there must be an expression to set every target column. + throw DeltaErrors.columnOfTargetTableNotFoundInMergeException( + targetAttrib.name, targetColNames.mkString(", ")) + } + } + + m.copy(m.condition, alignedActions) + } + + if (transformToCommand) { + val (relation, tahoeFileIndex) = EliminateSubqueryAliases(target) match { + case DeltaFullTable(rel, index) => rel -> index + case o => throw DeltaErrors.notADeltaSourceException("MERGE", Some(o)) + } + + /** + * Because source and target are not children of MergeIntoCommand they are not processed when + * invoking the [[ComputeCurrentTime]] rule. This is why they need special handling. + */ + val now = Instant.now() + // Transform timestamps for the MergeIntoCommand, source, and target using the same instant. + // Called explicitly because source and target are not children of MergeIntoCommand. + transformTimestamps( + MergeIntoCommand( + transformTimestamps(source, now), + transformTimestamps(target, now), + relation.catalogTable, + tahoeFileIndex, + condition, + processedMatched, + processedNotMatched, + processedNotMatchedBySource, + finalSchemaOpt), + now) + } else { + DeltaMergeInto( + source, + target, + condition, + processedMatched, + processedNotMatched, + processedNotMatchedBySource, + migrateSchema, + finalSchemaOpt) + } + } + + private def transformTimestamps(plan: LogicalPlan, instant: Instant): LogicalPlan = { + import org.apache.spark.sql.delta.implicits._ + + val currentTimestampMicros = instantToMicros(instant) + val currentTime = Literal.create(currentTimestampMicros, TimestampType) + val timezone = Literal.create(conf.sessionLocalTimeZone, StringType) + + plan.transformUpWithSubqueries { + case subQuery => + subQuery.transformAllExpressionsUpWithPruning(_.containsPattern(CURRENT_LIKE)) { + case cd: CurrentDate => + Literal.create(DateTimeUtils.microsToDays(currentTimestampMicros, cd.zoneId), DateType) + case CurrentTimestamp() | Now() => currentTime + case CurrentTimeZone() => timezone + case localTimestamp: LocalTimestamp => + val asDateTime = LocalDateTime.ofInstant(instant, localTimestamp.zoneId) + Literal.create(localDateTimeToMicros(asDateTime), TimestampNTZType) + } + } + } + + /** + * Generates update expressions for columns that are not present in the target table and are + * introduced by one of the update or insert merge clauses. The generated update expressions and + * the update expressions for the existing columns are aligned to match the order in the + * target output schema. + * + * @param target Logical plan node of the target table of merge. + * @param resolvedActions Merge actions of the update clause being processed. + * @param whenClauses All merge clauses of the merge operation. + * @param identityColumns Additional identity columns present in the table. + * @param generatedColumns List of the generated columns in the table. See + * [[UpdateExpressionsSupport]]. + * @param allowStructEvolution Whether to allow structs to evolve. See + * [[UpdateExpressionsSupport]]. + * @param finalSchema The schema of the target table after the merge operation. + * @return Update actions aligned on the target output schema `finalSchema`. + */ + private def alignUpdateActions( + target: LogicalPlan, + resolvedActions: Seq[DeltaMergeAction], + whenClauses: Seq[DeltaMergeIntoClause], + identityColumns: Seq[StructField], + generatedColumns: Seq[StructField], + allowStructEvolution: Boolean, + finalSchema: StructType) + : Seq[DeltaMergeAction] = { + // Get the operations for columns that already exist... + val existingUpdateOps = resolvedActions.map { a => + UpdateOperation(a.targetColNameParts, a.expr) + } + + // And construct operations for columns that the insert/update clauses will add. + val newUpdateOps = generateUpdateOpsForNewTargetFields(target, finalSchema, resolvedActions) + + // Get expressions for the final schema for alignment. Note that attributes which already + // exist in the target need to use the same expression ID, even if the schema will evolve. + val finalSchemaExprs = + finalSchema.map { field => + target.resolve(Seq(field.name), conf.resolver).map { r => + AttributeReference(field.name, field.dataType)(r.exprId) + }.getOrElse { + AttributeReference(field.name, field.dataType)() + } + } + + // Use the helper methods for in UpdateExpressionsSupport to generate expressions such + // that nested fields can be updated (only for existing columns). + val alignedExprs = generateUpdateExpressions( + finalSchemaExprs, + existingUpdateOps ++ newUpdateOps, + conf.resolver, + allowStructEvolution = allowStructEvolution, + generatedColumns = generatedColumns) + + val alignedExprsWithGenerationExprs = + if (alignedExprs.forall(_.nonEmpty)) { + alignedExprs.map(_.get) + } else { + generateUpdateExprsForGeneratedColumns(target, generatedColumns, alignedExprs, + Some(finalSchemaExprs)) + } + + alignedExprsWithGenerationExprs + .zip(finalSchemaExprs) + .map { case (expr, attrib) => + DeltaMergeAction(Seq(attrib.name), expr, targetColNameResolved = true) + } + } + + /** + * Generate expressions to set to null the new (potentially nested) fields that are added to the + * target table by schema evolution and are not already set by any of the `resolvedActions` from + * the merge clause. + * + * @param target Logical plan node of the target table of merge. + * @param finalSchema The schema of the target table after the merge operation. + * @param resolvedActions Merge actions of the update clause being processed. + * @return List of update operations + */ + private def generateUpdateOpsForNewTargetFields( + target: LogicalPlan, + finalSchema: StructType, + resolvedActions: Seq[DeltaMergeAction]) + : Seq[UpdateOperation] = { + // Collect all fields in the final schema that were added by schema evolution. + // `SchemaPruning.pruneSchema` only prunes nested fields, we then filter out top-level fields + // ourself. + val targetSchemaBeforeEvolution = + target.schema.map(SchemaPruning.RootField(_, derivedFromAtt = false)) + val newTargetFields = + StructType(SchemaPruning.pruneSchema(finalSchema, targetSchemaBeforeEvolution) + .filterNot { topLevelField => target.schema.exists(_.name == topLevelField.name) }) + + /** + * Remove the field corresponding to `pathFilter` (if any) from `schema`. + */ + def filterSchema(schema: StructType, pathFilter: Seq[String]) + : Seq[StructField] = schema.flatMap { + case StructField(name, struct: StructType, _, _) + if name == pathFilter.head && pathFilter.length > 1 => + Some(StructField(name, StructType(filterSchema(struct, pathFilter.drop(1))))) + case f: StructField if f.name == pathFilter.head => None + case f => Some(f) + } + // Then filter out fields that are set by one of the merge actions. + val newTargetFieldsWithoutAssignment = resolvedActions + .map(_.targetColNameParts) + .foldRight(newTargetFields) { + (pathFilter, schema) => StructType(filterSchema(schema, pathFilter)) + } + + /** + * Generate the list of all leaf fields and their corresponding data type from `schema`. + */ + def leafFields(schema: StructType, prefix: Seq[String] = Seq.empty) + : Seq[(Seq[String], DataType)] = schema.flatMap { field => + val name = prefix :+ field.name.toLowerCase(Locale.ROOT) + field.dataType match { + case struct: StructType => leafFields(struct, name) + case dataType => Seq((name, dataType)) + } + } + // Finally, generate an update operation for each remaining field to set it to null. + leafFields(newTargetFieldsWithoutAssignment).map { + case (name, dataType) => UpdateOperation(name, Literal(null, dataType)) + } + } + + /** + * Resolves any non explicitly inserted generated columns in `allActions` to its + * corresponding generated expression. + * + * For each action, if it's a generated column that is not explicitly inserted, we will + * use its generated expression to calculate its value by resolving to a fake project of all the + * inserted values. Note that this fake project is created after we set all non explicitly + * inserted columns to nulls. This guarantees that all columns referenced by the generated + * column, regardless of whether they are explicitly inserted or not, will have a + * corresponding expression in the fake project and hence the generated expression can + * always be resolved. + * + * @param explicitActions Actions explicitly specified by users. + * @param allActions Actions with non explicitly specified columns added with nulls. + * @param sourcePlan Logical plan node of the source table of merge. + * @param columnWithDefaultExpr All the generated columns in the target table. + * @return `allActions` with expression for non explicitly inserted generated columns expression + * resolved. + */ + private def resolveImplicitColumns( + explicitActions: Seq[DeltaMergeAction], + allActions: Seq[DeltaMergeAction], + sourcePlan: LogicalPlan, + columnWithDefaultExpr: Seq[(StructField, Boolean)], + finalSchema: StructType): (Seq[DeltaMergeAction], Set[String]) = { + val implicitColumns = columnWithDefaultExpr.filter { + case (field, _) => + !explicitActions.exists { insertAct => + conf.resolver(insertAct.targetColNameParts.head, field.name) + } + } + if (implicitColumns.isEmpty) { + return (allActions, Set[String]()) + } + assert(finalSchema.size == allActions.size, + "Invalid number of columns in INSERT clause with generated columns. Expected schema: " + + s"$finalSchema, INSERT actions: $allActions") + + val track = mutable.Set[String]() + + // Fake projection used to resolve generated column expressions. + val fakeProjectMap = allActions.map { + action => { + val exprForProject = Alias(action.expr, action.targetColNameParts.head)() + exprForProject.exprId -> exprForProject + } + }.toMap + val fakeProject = Project(fakeProjectMap.values.toArray[Alias], sourcePlan) + + val resolvedActions = allActions.map { action => + val colName = action.targetColNameParts.head + implicitColumns.find { + case (field, _) => conf.resolver(field.name, colName) + } match { + case Some((field, true)) => + val expr = GeneratedColumn.getGenerationExpression(field).get + val resolvedExpr = resolveReferencesForExpressions(SparkSession.active, expr :: Nil, + fakeProject).head + // Replace references to fakeProject with original expression. + val transformedExpr = resolvedExpr.transform { + case a: AttributeReference if fakeProjectMap.contains(a.exprId) => + fakeProjectMap(a.exprId).child + } + action.copy(expr = transformedExpr) + case _ => action + } + } + (resolvedActions, track.toSet) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableUpdate.scala b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableUpdate.scala new file mode 100644 index 00000000000..0eb5c01ef33 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableUpdate.scala @@ -0,0 +1,90 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.commands.UpdateCommand + +import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.internal.SQLConf + +/** + * Preprocesses the [[DeltaUpdateTable]] logical plan before converting it to [[UpdateCommand]]. + * - Adjusts the column order, which could be out of order, based on the destination table + * - Generates expressions to compute the value of all target columns in Delta table, while taking + * into account that the specified SET clause may only update some columns or nested fields of + * columns. + */ +case class PreprocessTableUpdate(sqlConf: SQLConf) + extends Rule[LogicalPlan] with UpdateExpressionsSupport { + + override def conf: SQLConf = sqlConf + + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { + case u: DeltaUpdateTable if u.resolved => + u.condition.foreach { cond => + if (SubqueryExpression.hasSubquery(cond)) { + throw DeltaErrors.subqueryNotSupportedException("UPDATE", cond) + } + } + toCommand(u) + } + + def toCommand(update: DeltaUpdateTable): UpdateCommand = { + val deltaLogicalNode = EliminateSubqueryAliases(update.child) + val (relation, index) = deltaLogicalNode match { + case DeltaFullTable(rel, tahoeFileIndex) => rel -> tahoeFileIndex + case o => + throw DeltaErrors.notADeltaSourceException("UPDATE", Some(o)) + } + + val generatedColumns = GeneratedColumn.getGeneratedColumns(index.snapshotAtAnalysis) + if (generatedColumns.nonEmpty && !deltaLogicalNode.isInstanceOf[LogicalRelation]) { + // Disallow temp views referring to a Delta table that contains generated columns. When the + // user doesn't provide expressions for generated columns, we need to create update + // expressions for them automatically. Currently, we assume `update.child.output` is the same + // as the table schema when checking whether a column in `update.child.output` is a generated + // column in the table. + throw DeltaErrors.operationOnTempViewWithGenerateColsNotSupported("UPDATE") + } + + val targetColNameParts = update.updateColumns.map(DeltaUpdateTable.getTargetColNameParts(_)) + val alignedUpdateExprs = generateUpdateExpressions( + update.child.output, + targetColNameParts, + update.updateExpressions, + conf.resolver, + generatedColumns) + val alignedUpdateExprsAfterAddingGenerationExprs = + if (alignedUpdateExprs.forall(_.nonEmpty)) { + alignedUpdateExprs.map(_.get) + } else { + // Some expressions for generated columns are not specified by the user, so we need to + // create them based on the generation expressions. + generateUpdateExprsForGeneratedColumns(update.child, generatedColumns, alignedUpdateExprs) + } + UpdateCommand( + index, + relation.catalogTable, + update.child, + alignedUpdateExprsAfterAddingGenerationExprs, + update.condition) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala new file mode 100644 index 00000000000..55e40c7ac5f --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala @@ -0,0 +1,198 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.net.URI + +import org.apache.spark.sql.delta.{RowIndexFilter, RowIndexFilterType} +import org.apache.spark.sql.delta.DeltaParquetFileFormat._ +import org.apache.spark.sql.delta.commands.DeletionVectorUtils.deletionVectorsReadable +import org.apache.spark.sql.delta.files.{TahoeFileIndex, TahoeLogFileIndex} +import org.apache.spark.sql.delta.util.DeltaFileOperations.absolutePath + +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.sql.{Column, SparkSession} +import org.apache.spark.sql.catalyst.expressions.AttributeReference +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.SerializableConfiguration + +/** + * Plan transformer to inject a filter that removes the rows marked as deleted according to + * deletion vectors. For tables with no deletion vectors, this transformation has no effect. + * + * It modifies for plan for tables with deletion vectors as follows: + * Before rule: -> Delta Scan (key, value). + * - Here we are reading `key`, `value`` columns from the Delta table + * After rule: + * -> + * Project(key, value) -> + * Filter (udf(__skip_row == 0) -> + * Delta Scan (key, value, __skip_row) + * - Here we insert a new column `__skip_row` in Delta scan. This value is populated by the + * Parquet reader using the DV corresponding to the Parquet file read + * (See [[DeltaParquetFileFormat]]) and it contains 0 if we want to keep the row. + * The scan created also disables Parquet file splitting and filter pushdowns, because + * in order to generate the __skip_row, we need to read the rows in a file consecutively + * to generate the row index. This is a cost we need to pay until we upgrade to latest + * Apache Spark which contains Parquet reader changes that automatically generate the + * row_index irrespective of the file splitting and filter pushdowns. + * - The scan created also contains a broadcast variable of Parquet File -> DV File map. + * The Parquet reader created uses this map to find the DV file corresponding to the data file. + * - Filter created filters out rows with __skip_row equals to 0 + * - And at the end we have a Project to keep the plan node output same as before the rule is + * applied. + */ +trait PreprocessTableWithDVs extends SubqueryTransformerHelper { + def preprocessTablesWithDVs(plan: LogicalPlan): LogicalPlan = { + transformWithSubqueries(plan) { + case ScanWithDeletionVectors(dvScan) => dvScan + } + } +} + +object ScanWithDeletionVectors { + def unapply(a: LogicalRelation): Option[LogicalPlan] = a match { + case scan @ LogicalRelation( + relation @ HadoopFsRelation( + index: TahoeFileIndex, _, _, _, format: DeltaParquetFileFormat, _), _, _, _) => + dvEnabledScanFor(scan, relation, format, index) + case _ => None + } + + def dvEnabledScanFor( + scan: LogicalRelation, + hadoopRelation: HadoopFsRelation, + fileFormat: DeltaParquetFileFormat, + index: TahoeFileIndex): Option[LogicalPlan] = { + // If the table has no DVs enabled, no change needed + if (!deletionVectorsReadable(index.protocol, index.metadata)) return None + + require(!index.isInstanceOf[TahoeLogFileIndex], + "Cannot work with a non-pinned table snapshot of the TahoeFileIndex") + + // If the table has no DVs enabled, no change needed + if (!deletionVectorsReadable(index.protocol, index.metadata)) return None + + // See if the relation is already modified to include DV reads as part of + // a previous invocation of this rule on this table + if (fileFormat.hasDeletionVectorMap) return None + + // See if any files actually have a DV + val spark = SparkSession.getActiveSession.get + val filePathToDVBroadcastMap = createBroadcastDVMap(spark, index) + if (filePathToDVBroadcastMap.value.isEmpty) return None + + // Get the list of columns in the output of the `LogicalRelation` we are + // trying to modify. At the end of the plan, we need to return a + // `LogicalRelation` that has the same output as this `LogicalRelation` + val planOutput = scan.output + + val newScan = createScanWithSkipRowColumn( + spark, scan, fileFormat, index, filePathToDVBroadcastMap, hadoopRelation) + + // On top of the scan add a filter that filters out the rows which have + // skip row column value non-zero + val rowIndexFilter = createRowIndexFilterNode(newScan) + + // Now add a project on top of the row index filter node to + // remove the skip row column + Some(Project(planOutput, rowIndexFilter)) + } + /** + * Helper method that creates a new `LogicalRelation` for existing scan that outputs + * an extra column which indicates whether the row needs to be skipped or not. + */ + private def createScanWithSkipRowColumn( + spark: SparkSession, + inputScan: LogicalRelation, + fileFormat: DeltaParquetFileFormat, + tahoeFileIndex: TahoeFileIndex, + filePathToDVBroadcastMap: Broadcast[Map[URI, DeletionVectorDescriptorWithFilterType]], + hadoopFsRelation: HadoopFsRelation): LogicalRelation = { + // Create a new `LogicalRelation` that has modified `DeltaFileFormat` and output with an extra + // column to indicate whether to skip the row or not + + // Add a column for SKIP_ROW to the base output. Value of 0 means the row needs be kept, any + // other values mean the row needs be skipped. + val skipRowField = IS_ROW_DELETED_STRUCT_FIELD + val newScanOutput = inputScan.output :+ + AttributeReference(skipRowField.name, skipRowField.dataType)() + + // Data schema and scan schema could be different. The scan schema may contain additional + // columns such as `_metadata.file_path` (metadata columns) which are populated in Spark scan + // operator after the data is read from the underlying file reader. + val newDataSchema = hadoopFsRelation.dataSchema.add(skipRowField) + + val hadoopConfBroadcast = spark.sparkContext.broadcast( + new SerializableConfiguration(tahoeFileIndex.deltaLog.newDeltaHadoopConf())) + + val newFileFormat = fileFormat.copyWithDVInfo( + tahoeFileIndex.path.toString, filePathToDVBroadcastMap, hadoopConfBroadcast) + val newRelation = hadoopFsRelation.copy( + fileFormat = newFileFormat, + dataSchema = newDataSchema)(hadoopFsRelation.sparkSession) + + // Create a new scan LogicalRelation + inputScan.copy(relation = newRelation, output = newScanOutput) + } + + private def createRowIndexFilterNode(newScan: LogicalRelation): Filter = { + val skipRowColumnRefs = newScan.output.filter(_.name == IS_ROW_DELETED_COLUMN_NAME) + require(skipRowColumnRefs.size == 1, + s"Expected only one column with name=$IS_ROW_DELETED_COLUMN_NAME") + val skipRowColumnRef = skipRowColumnRefs.head + + val keepRow = DeltaUDF.booleanFromByte( _ == RowIndexFilter.KEEP_ROW_VALUE) + .asNondeterministic() // To avoid constant folding the filter based on stats. + + val filterExp = keepRow(new Column(skipRowColumnRef)).expr + Filter(filterExp, newScan) + } + + private def createBroadcastDVMap( + spark: SparkSession, + tahoeFileIndex: TahoeFileIndex) + : Broadcast[Map[URI, DeletionVectorDescriptorWithFilterType]] = { + val filterTypes = tahoeFileIndex.rowIndexFilters.getOrElse(Map.empty) + + // Given there is no way to find the final filters, just select all files in the + // file index and create the DV map. + val filesWithDVs = tahoeFileIndex + .matchingFiles(partitionFilters = Seq(TrueLiteral), dataFilters = Seq(TrueLiteral)) + .filter(_.deletionVector != null) + // Attach filter types to FileActions, so that later [[DeltaParquetFileFormat]] could pick it up + // to decide which kind of rows should be filtered out. This info is necessary for reading CDC + // rows that have been deleted (marked in DV), in which case marked rows must be kept rather + // than filtered out. In such a case, the `filterTypes` map will be populated by [[CDCReader]] + // to indicate IF_NOT_CONTAINED filter should be used. In other cases, `filterTypes` will be + // empty, so we generate IF_CONTAINED as the default DV behavior. + val filePathToDVMap = filesWithDVs.map { addFile => + val key = absolutePath(tahoeFileIndex.path.toString, addFile.path).toUri + val filterType = + filterTypes.getOrElse(addFile.path, RowIndexFilterType.IF_CONTAINED) + val value = + DeletionVectorDescriptorWithFilterType(addFile.deletionVector, filterType) + key -> value + }.toMap + + spark.sparkContext.broadcast(filePathToDVMap) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTimeTravel.scala b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTimeTravel.scala new file mode 100644 index 00000000000..74f615a3227 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/PreprocessTimeTravel.scala @@ -0,0 +1,85 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.catalyst.TimeTravel +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, ResolvedTable, UnresolvedRelation} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.internal.SQLConf + +/** + * Resolves the [[UnresolvedRelation]] in command 's child [[TimeTravel]]. + * Currently Delta depends on Spark 3.2 which does not resolve the [[UnresolvedRelation]] + * in [[TimeTravel]]. Once Delta upgrades to Spark 3.3, this code can be removed. + * + * TODO: refactoring this analysis using Spark's native [[TimeTravelRelation]] logical plan + */ +case class PreprocessTimeTravel(sparkSession: SparkSession) extends Rule[LogicalPlan] { + + override def conf: SQLConf = sparkSession.sessionState.conf + + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { + case _ @ RestoreTableStatement(tt @ TimeTravel(ur @ UnresolvedRelation(_, _, _), _, _, _)) => + val sourceRelation = resolveTimeTravelTable(sparkSession, ur, "RESTORE") + return RestoreTableStatement( + TimeTravel( + sourceRelation, + tt.timestamp, + tt.version, + tt.creationSource)) + + case ct @ CloneTableStatement( + tt @ TimeTravel(ur: UnresolvedRelation, _, _, _), _, + _, _, _, _, _) => + val sourceRelation = resolveTimeTravelTable(sparkSession, ur, "CLONE TABLE") + ct.copy(source = TimeTravel( + sourceRelation, + tt.timestamp, + tt.version, + tt.creationSource)) + } + + /** + * Helper to resolve a [[TimeTravel]] logical plan to Delta DSv2 relation. + */ + private def resolveTimeTravelTable( + sparkSession: SparkSession, + ur: UnresolvedRelation, + commandName: String): LogicalPlan = { + // Since TimeTravel is a leaf node, the table relation within TimeTravel won't be resolved + // automatically by the Apache Spark analyzer rule `ResolveRelations`. + // Thus, we need to explicitly use the rule `ResolveRelations` to table resolution here. + EliminateSubqueryAliases(sparkSession.sessionState.analyzer.ResolveRelations(ur)) match { + case _: View => + // If the identifier is a view, throw not supported error + throw DeltaErrors.notADeltaTableException(commandName) + case tableRelation if tableRelation.resolved => + tableRelation + case _ => + // If the identifier doesn't exist as a table, try resolving it as a path table. + ResolveDeltaPathTable.resolveAsPathTableRelation(sparkSession, ur).getOrElse { + ur.tableNotFound(ur.multipartIdentifier) + } + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/ProvidesUniFormConverters.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ProvidesUniFormConverters.scala new file mode 100644 index 00000000000..6803d1a996c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/ProvidesUniFormConverters.scala @@ -0,0 +1,52 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.lang.reflect.InvocationTargetException + +import org.apache.commons.lang3.exception.ExceptionUtils + +import org.apache.spark.sql.SparkSession +import org.apache.spark.util.Utils + +trait ProvidesUniFormConverters { self: DeltaLog => + /** + * Helper trait to instantiate the icebergConverter member variable of the [[DeltaLog]]. We do + * this through reflection so that delta-spark doesn't have a compile-time dependency on the + * shaded iceberg module. + */ + protected lazy val _icebergConverter: UniversalFormatConverter = try { + val clazz = + Utils.classForName("org.apache.spark.sql.delta.icebergShaded.IcebergConverter") + val constructor = clazz.getConstructor(classOf[SparkSession]) + constructor.newInstance(spark) + } catch { + case e: ClassNotFoundException => + logError(s"Failed to find Iceberg converter class", e) + throw DeltaErrors.icebergClassMissing(spark.sparkContext.getConf, e) + case e: InvocationTargetException => + logError(s"Got error when creating an Iceberg converter", e) + // The better error is within the cause + throw ExceptionUtils.getRootCause(e) + } + + /** Visible for tests (to be able to mock). */ + private[delta] var testIcebergConverter: Option[UniversalFormatConverter] = None + + def icebergConverter: UniversalFormatConverter = testIcebergConverter.getOrElse(_icebergConverter) +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/ResolveDeltaPathTable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ResolveDeltaPathTable.scala new file mode 100644 index 00000000000..96602045cde --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/ResolveDeltaPathTable.scala @@ -0,0 +1,80 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.catalyst.TimeTravel +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.util.AnalysisHelper +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.{ResolvedTable, UnresolvedRelation, UnresolvedTable} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{CatalogHelper, MultipartIdentifierHelper} +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation + +/** + * Replaces [[UnresolvedTable]]s if the plan is for direct query on files. + */ +case class ResolveDeltaPathTable(sparkSession: SparkSession) extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { + case u: UnresolvedTable => + ResolveDeltaPathTable.resolveAsPathTable(sparkSession, u.multipartIdentifier, Map.empty) + .getOrElse(u) + } +} + +object ResolveDeltaPathTable +{ + + /** + * Try resolving the input table as a Path table. + * If the path table exists, return a [[DataSourceV2Relation]] instance. Otherwise, return None. + */ + def resolveAsPathTableRelation( + sparkSession: SparkSession, + u: UnresolvedRelation) : Option[DataSourceV2Relation] = { + resolveAsPathTable(sparkSession, u.multipartIdentifier, Map.empty).map { resolvedTable => + DataSourceV2Relation.create( + resolvedTable.table, Some(resolvedTable.catalog), Some(resolvedTable.identifier)) + } + } + + /** + * Try resolving the input table as a Path table. + * If the path table exists, return a [[ResolvedTable]] instance. Otherwise, return None. + */ + def resolveAsPathTable( + sparkSession: SparkSession, + multipartIdentifier: Seq[String], + options: Map[String, String]): Option[ResolvedTable] = { + val sessionState = sparkSession.sessionState + if (!sessionState.conf.runSQLonFile || multipartIdentifier.size != 2) { + return None + } + val tableId = multipartIdentifier.asTableIdentifier + if (DeltaTableUtils.isValidPath(tableId)) { + val deltaTableV2 = DeltaTableV2(sparkSession, new Path(tableId.table), options = options) + val sessionCatalog = sessionState.catalogManager.v2SessionCatalog.asTableCatalog + Some(ResolvedTable.create(sessionCatalog, multipartIdentifier.asIdentifier, deltaTableV2)) + } else { + None + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/RowId.scala b/spark/src/main/scala/org/apache/spark/sql/delta/RowId.scala new file mode 100644 index 00000000000..8dc1a3bbc9a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/RowId.scala @@ -0,0 +1,130 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.{Action, AddFile, DomainMetadata, Metadata, Protocol} +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.propertyKey + +/** + * Collection of helpers to handle Row IDs. + */ +object RowId { + /** + * Metadata domain for the high water mark stored using a [[DomainMetadata]] action. + */ + case class RowTrackingMetadataDomain(rowIdHighWaterMark: Long) + extends JsonMetadataDomain[RowTrackingMetadataDomain] { + override val domainName: String = RowTrackingMetadataDomain.domainName + } + + object RowTrackingMetadataDomain extends JsonMetadataDomainUtils[RowTrackingMetadataDomain] { + override protected val domainName = "delta.rowTracking" + + def unapply(action: Action): Option[RowTrackingMetadataDomain] = action match { + case d: DomainMetadata if d.domain == domainName => Some(fromJsonConfiguration(d)) + case _ => None + } + + def isRowTrackingDomain(d: DomainMetadata): Boolean = d.domain == domainName + } + + val MISSING_HIGH_WATER_MARK: Long = -1L + + /** + * Returns whether the protocol version supports the Row ID table feature. Whenever Row IDs are + * supported, fresh Row IDs must be assigned to all newly committed files, even when Row IDs are + * disabled in the current table version. + */ + def isSupported(protocol: Protocol): Boolean = RowTracking.isSupported(protocol) + + /** + * Returns whether Row IDs are enabled on this table version. Checks that Row IDs are supported, + * which is a pre-requisite for enabling Row IDs, throws an error if not. + */ + def isEnabled(protocol: Protocol, metadata: Metadata): Boolean = { + val isEnabled = DeltaConfigs.ROW_TRACKING_ENABLED.fromMetaData(metadata) + if (isEnabled && !isSupported(protocol)) { + throw new IllegalStateException( + s"Table property '${DeltaConfigs.ROW_TRACKING_ENABLED.key}' is " + + s"set on the table but this table version doesn't support table feature " + + s"'${propertyKey(RowTrackingFeature)}'.") + } + isEnabled + } + + /** + * Verifies that row IDs are only set as readable when a new table is created. + */ + private[delta] def verifyMetadata( + oldProtocol: Protocol, + newProtocol: Protocol, + oldMetadata: Metadata, + newMetadata: Metadata, + isCreatingNewTable: Boolean): Unit = { + + val rowIdsEnabledBefore = isEnabled(oldProtocol, oldMetadata) + val rowIdsEnabledAfter = isEnabled(newProtocol, newMetadata) + + if (rowIdsEnabledAfter && !rowIdsEnabledBefore && !isCreatingNewTable) { + throw new UnsupportedOperationException( + "Cannot enable Row IDs on an existing table.") + } + } + + /** + * Assigns fresh row IDs to all AddFiles inside `actions` that do not have row IDs yet and emits + * a [[RowIdHighWaterMark]] action with the new high-water mark. + */ + private[delta] def assignFreshRowIds( + protocol: Protocol, + snapshot: Snapshot, + actions: Iterator[Action]): Iterator[Action] = { + if (!isSupported(protocol)) return actions + + val oldHighWatermark = extractHighWatermark(snapshot).getOrElse(MISSING_HIGH_WATER_MARK) + + var newHighWatermark = oldHighWatermark + + val actionsWithFreshRowIds = actions.map { + case a: AddFile if a.baseRowId.isEmpty => + val baseRowId = newHighWatermark + 1L + newHighWatermark += a.numPhysicalRecords.getOrElse { + throw DeltaErrors.rowIdAssignmentWithoutStats + } + a.copy(baseRowId = Some(baseRowId)) + case d: DomainMetadata if RowTrackingMetadataDomain.isRowTrackingDomain(d) => + throw new IllegalStateException( + "Manually setting the Row ID high water mark is not allowed") + case other => other + } + + val newHighWatermarkAction: Iterator[Action] = new Iterator[Action] { + // Iterators are lazy, so the first call to `hasNext` won't happen until after we + // exhaust the remapped actions iterator. At that point, the watermark (changed or not) + // decides whether the iterator is empty or infinite; take(1) below to bound it. + override def hasNext: Boolean = newHighWatermark != oldHighWatermark + override def next(): Action = RowTrackingMetadataDomain(newHighWatermark).toDomainMetadata + } + actionsWithFreshRowIds ++ newHighWatermarkAction.take(1) + } + + /** + * Extracts the high watermark of row IDs from a snapshot. + */ + private[delta] def extractHighWatermark(snapshot: Snapshot): Option[Long] = + RowTrackingMetadataDomain.fromSnapshot(snapshot).map(_.rowIdHighWaterMark) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/RowTracking.scala b/spark/src/main/scala/org/apache/spark/sql/delta/RowTracking.scala new file mode 100644 index 00000000000..3a28c4bbe88 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/RowTracking.scala @@ -0,0 +1,91 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.{Metadata, Protocol, TableFeatureProtocolUtils} + + +/** + * Utility functions for Row Tracking that are shared between Row IDs and Row Commit Versions. + */ +object RowTracking { + /** + * Returns whether the protocol version supports the Row Tracking table feature. Whenever Row + * Tracking is support, fresh Row IDs and Row Commit Versions must be assigned to all newly + * committed files, even when Row IDs are disabled in the current table version. + */ + def isSupported(protocol: Protocol): Boolean = protocol.isFeatureSupported(RowTrackingFeature) + + /** + * Returns whether Row Tracking is enabled on this table version. Checks that Row Tracking is + * supported, which is a pre-requisite for enabling Row Tracking, throws an error if not. + */ + def isEnabled(protocol: Protocol, metadata: Metadata): Boolean = { + val isEnabled = DeltaConfigs.ROW_TRACKING_ENABLED.fromMetaData(metadata) + if (isEnabled && !isSupported(protocol)) { + throw new IllegalStateException( + s"Table property '${DeltaConfigs.ROW_TRACKING_ENABLED.key}' is " + + s"set on the table but this table version doesn't support table feature " + + s"'${TableFeatureProtocolUtils.propertyKey(RowTrackingFeature)}'.") + } + isEnabled + } + + /** + * Checks whether CONVERT TO DELTA collects statistics if row tracking is supported. If it does + * not collect statistics, we cannot assign fresh row IDs, hence we throw an error to either rerun + * the command without enabling the row tracking table feature, or to enable the necessary + * flags to collect statistics. + */ + private[delta] def checkStatsCollectedIfRowTrackingSupported( + protocol: Protocol, + convertToDeltaShouldCollectStats: Boolean, + statsCollectionEnabled: Boolean): Unit = { + if (!isSupported(protocol)) return + if (!convertToDeltaShouldCollectStats || !statsCollectionEnabled) { + throw DeltaErrors.convertToDeltaRowTrackingEnabledWithoutStatsCollection + } + } + + /** + * Returns the sourceMetadata with the row tracking property coming from the targetMetadata. + */ + private[delta] def takeRowTrackingPropertyFromTarget( + targetMetadata: Metadata, + sourceMetadata: Metadata): Metadata = { + var newConfig = sourceMetadata.configuration - DeltaConfigs.ROW_TRACKING_ENABLED.key + targetMetadata.configuration.get(DeltaConfigs.ROW_TRACKING_ENABLED.key).foreach { v => + newConfig += DeltaConfigs.ROW_TRACKING_ENABLED.key -> v + } + sourceMetadata.copy(configuration = newConfig) + } + + /** + * Removes the row tracking property from the metadata. + */ + private[delta] def removeRowTrackingProperty(metadata: Metadata): Metadata = { + metadata.copy(configuration = metadata.configuration - DeltaConfigs.ROW_TRACKING_ENABLED.key) + } + + /** + * Removes the row tracking table feature from the protocol. + */ + private[delta] def removeRowTrackingTableFeature(protocol: Protocol): Protocol = { + val writerFeaturesWithoutRowTracking = protocol.writerFeatures.map(_ - RowTrackingFeature.name) + protocol.copy(writerFeatures = writerFeaturesWithoutRowTracking) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/Snapshot.scala b/spark/src/main/scala/org/apache/spark/sql/delta/Snapshot.scala new file mode 100644 index 00000000000..0b177a3d89a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/Snapshot.scala @@ -0,0 +1,481 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.mutable + +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.actions.Action.logSchema +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.DataSkippingReader +import org.apache.spark.sql.delta.stats.DeltaStatsColumnSpec +import org.apache.spark.sql.delta.stats.StatisticsCollection +import org.apache.spark.sql.delta.util.StateCache +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Utils + +/** + * A description of a Delta [[Snapshot]], including basic information such its [[DeltaLog]] + * metadata, protocol, and version. + */ +trait SnapshotDescriptor { + def deltaLog: DeltaLog + def version: Long + def metadata: Metadata + def protocol: Protocol + + def schema: StructType = metadata.schema + + protected[delta] def numOfFilesIfKnown: Option[Long] + protected[delta] def sizeInBytesIfKnown: Option[Long] +} + +/** + * An immutable snapshot of the state of the log at some delta version. Internally + * this class manages the replay of actions stored in checkpoint or delta files. + * + * After resolving any new actions, it caches the result and collects the + * following basic information to the driver: + * - Protocol Version + * - Metadata + * - Transaction state + * + * @param timestamp The timestamp of the latest commit in milliseconds. Can also be set to -1 if the + * timestamp of the commit is unknown or the table has not been initialized, i.e. + * `version = -1`. + * + */ +class Snapshot( + val path: Path, + override val version: Long, + val logSegment: LogSegment, + override val deltaLog: DeltaLog, + val timestamp: Long, + val checksumOpt: Option[VersionChecksum] + ) + extends SnapshotDescriptor + with SnapshotStateManager + with StateCache + with StatisticsCollection + with DataSkippingReader + with DeltaLogging { + + import Snapshot._ + import DeltaLogFileIndex.COMMIT_VERSION_COLUMN + // For implicits which re-use Encoder: + import org.apache.spark.sql.delta.implicits._ + + protected def spark = SparkSession.active + + /** Snapshot to scan by the DeltaScanGenerator for metadata query optimizations */ + override val snapshotToScan: Snapshot = this + + override def columnMappingMode: DeltaColumnMappingMode = metadata.columnMappingMode + + + private[delta] lazy val nonFileActions: Seq[Action] = { + Seq(protocol, metadata) ++ + setTransactions ++ + domainMetadata + } + + @volatile private[delta] var stateReconstructionTriggered = false + + /** + * Use [[stateReconstruction]] to create a representation of the actions in this table. + * Cache the resultant output. + */ + private lazy val cachedState = recordFrameProfile("Delta", "snapshot.cachedState") { + stateReconstructionTriggered = true + cacheDS(stateReconstruction, s"Delta Table State #$version - $redactedPath") + } + + /** + * Given the list of files from `LogSegment`, create respective file indices to help create + * a DataFrame and short-circuit the many file existence and partition schema inference checks + * that exist in DataSource.resolveRelation(). + */ + protected[delta] lazy val deltaFileIndexOpt: Option[DeltaLogFileIndex] = { + assertLogFilesBelongToTable(path, logSegment.deltas) + DeltaLogFileIndex(DeltaLogFileIndex.COMMIT_FILE_FORMAT, logSegment.deltas) + } + + protected lazy val fileIndices: Seq[DeltaLogFileIndex] = { + val checkpointFileIndexes = checkpointProvider.allActionsFileIndexes() + checkpointFileIndexes ++ deltaFileIndexOpt.toSeq + } + + /** + * Generate the protocol and metadata for this snapshot. This is usually cheaper than a + * full state reconstruction, but still only compute it when necessary. + */ + private lazy val (_protocol, _metadata): (Protocol, Metadata) = { + // Should be small. At most 'checkpointInterval' rows, unless new commits are coming + // in before a checkpoint can be written + var protocol: Protocol = null + var metadata: Metadata = null + protocolAndMetadataReconstruction().foreach { + case (p: Protocol, _) => protocol = p + case (_, m: Metadata) => metadata = m + } + + if (protocol == null) { + recordDeltaEvent( + deltaLog, + opType = "delta.assertions.missingAction", + data = Map( + "version" -> version.toString, "action" -> "Protocol", "source" -> "Snapshot")) + throw DeltaErrors.actionNotFoundException("protocol", version) + } + + if (metadata == null) { + recordDeltaEvent( + deltaLog, + opType = "delta.assertions.missingAction", + data = Map( + "version" -> version.toString, "action" -> "Metadata", "source" -> "Snapshot")) + throw DeltaErrors.actionNotFoundException("metadata", version) + } + + protocol -> metadata + } + + /** Number of columns to collect stats on for data skipping */ + override lazy val statsColumnSpec: DeltaStatsColumnSpec = + StatisticsCollection.configuredDeltaStatsColumnSpec(metadata) + + /** Performs validations during initialization */ + protected def init(): Unit = { + deltaLog.protocolRead(protocol) + deltaLog.assertTableFeaturesMatchMetadata(protocol, metadata) + SchemaUtils.recordUndefinedTypes(deltaLog, metadata.schema) + } + + /** The current set of actions in this [[Snapshot]] as plain Rows */ + def stateDF: DataFrame = recordFrameProfile("Delta", "stateDF") { + cachedState.getDF + } + + /** The current set of actions in this [[Snapshot]] as a typed Dataset. */ + def stateDS: Dataset[SingleAction] = recordFrameProfile("Delta", "stateDS") { + cachedState.getDS + } + + private[delta] def allFilesViaStateReconstruction: Dataset[AddFile] = { + stateDS.where("add IS NOT NULL").select(col("add").as[AddFile]) + } + + // Here we need to bypass the ACL checks for SELECT anonymous function permissions. + /** All of the files present in this [[Snapshot]]. */ + def allFiles: Dataset[AddFile] = allFilesViaStateReconstruction + + /** All unexpired tombstones. */ + def tombstones: Dataset[RemoveFile] = { + stateDS.where("remove IS NOT NULL").select(col("remove").as[RemoveFile]) + } + + def deltaFileSizeInBytes(): Long = deltaFileIndexOpt.map(_.sizeInBytes).getOrElse(0L) + + def checkpointSizeInBytes(): Long = checkpointProvider.effectiveCheckpointSizeInBytes() + + override def metadata: Metadata = _metadata + + override def protocol: Protocol = _protocol + + /** + * Pulls the protocol and metadata of the table from the files that are used to compute the + * Snapshot directly--without triggering a full state reconstruction. This is important, because + * state reconstruction depends on protocol and metadata for correctness. + * + * Also this method should only access methods defined in [[UninitializedCheckpointProvider]] + * which are not present in [[CheckpointProvider]]. This is because initialization of + * [[Snapshot.checkpointProvider]] depends on [[Snapshot.protocolAndMetadataReconstruction()]] + * and so if [[Snapshot.protocolAndMetadataReconstruction()]] starts depending on + * [[Snapshot.checkpointProvider]] then there will be cyclic dependency. + */ + protected def protocolAndMetadataReconstruction(): Array[(Protocol, Metadata)] = { + import implicits._ + + val schemaToUse = Action.logSchema(Set("protocol", "metaData")) + val checkpointOpt = checkpointProvider.topLevelFileIndex.map { index => + deltaLog.loadIndex(index, schemaToUse) + .withColumn(COMMIT_VERSION_COLUMN, lit(checkpointProvider.version)) + } + (checkpointOpt ++ deltaFileIndexOpt.map(deltaLog.loadIndex(_, schemaToUse)).toSeq) + .reduceOption(_.union(_)).getOrElse(emptyDF) + .select("protocol", "metaData", COMMIT_VERSION_COLUMN) + .where("protocol.minReaderVersion is not null or metaData.id is not null") + .as[(Protocol, Metadata, Long)] + .collect() + .sortBy(_._3) + .map { case (p, m, _) => p -> m } + } + + // Reconstruct the state by applying deltas in order to the checkpoint. + // We partition by path as it is likely the bulk of the data is add/remove. + // Non-path based actions will be collocated to a single partition. + protected def stateReconstruction: Dataset[SingleAction] = { + recordFrameProfile("Delta", "snapshot.stateReconstruction") { + // for serializability + val localMinFileRetentionTimestamp = minFileRetentionTimestamp + val localMinSetTransactionRetentionTimestamp = minSetTransactionRetentionTimestamp + + val canonicalPath = deltaLog.getCanonicalPathUdf() + + // Canonicalize the paths so we can repartition the actions correctly, but only rewrite the + // add/remove actions themselves after partitioning and sorting are complete. Otherwise, the + // optimizer can generate a really bad plan that re-evaluates _EVERY_ field of the rewritten + // struct(...) projection every time we touch _ANY_ field of the rewritten struct. + // + // NOTE: We sort by [[COMMIT_VERSION_COLUMN]] (provided by [[loadActions]]), to ensure that + // actions are presented to InMemoryLogReplay in the ascending version order it expects. + val ADD_PATH_CANONICAL_COL_NAME = "add_path_canonical" + val REMOVE_PATH_CANONICAL_COL_NAME = "remove_path_canonical" + loadActions + .withColumn(ADD_PATH_CANONICAL_COL_NAME, when( + col("add.path").isNotNull, canonicalPath(col("add.path")))) + .withColumn(REMOVE_PATH_CANONICAL_COL_NAME, when( + col("remove.path").isNotNull, canonicalPath(col("remove.path")))) + .repartition( + getNumPartitions, + coalesce(col(ADD_PATH_CANONICAL_COL_NAME), col(REMOVE_PATH_CANONICAL_COL_NAME))) + .sortWithinPartitions(COMMIT_VERSION_COLUMN) + .withColumn("add", when( + col("add.path").isNotNull, + struct( + col(ADD_PATH_CANONICAL_COL_NAME).as("path"), + col("add.partitionValues"), + col("add.size"), + col("add.modificationTime"), + col("add.dataChange"), + col(ADD_STATS_TO_USE_COL_NAME).as("stats"), + col("add.tags"), + col("add.deletionVector"), + col("add.baseRowId"), + col("add.defaultRowCommitVersion"), + col("add.clusteringProvider") + ))) + .withColumn("remove", when( + col("remove.path").isNotNull, + col("remove").withField("path", col(REMOVE_PATH_CANONICAL_COL_NAME)))) + .as[SingleAction] + .mapPartitions { iter => + val state: LogReplay = + new InMemoryLogReplay( + localMinFileRetentionTimestamp, + localMinSetTransactionRetentionTimestamp) + state.append(0, iter.map(_.unwrap)) + state.checkpoint.map(_.wrap) + } + } + } + + /** + * Loads the file indices into a DataFrame that can be used for LogReplay. + * + * In addition to the usual nested columns provided by the SingleAction schema, it should provide + * two additional columns to simplify the log replay process: [[COMMIT_VERSION_COLUMN]] (which, + * when sorted in ascending order, will order older actions before newer ones, as required by + * [[InMemoryLogReplay]]); and [[ADD_STATS_TO_USE_COL_NAME]] (to handle certain combinations of + * config settings for delta.checkpoint.writeStatsAsJson and delta.checkpoint.writeStatsAsStruct). + */ + protected def loadActions: DataFrame = { + fileIndices.map(deltaLog.loadIndex(_)) + .reduceOption(_.union(_)).getOrElse(emptyDF) + .withColumn(ADD_STATS_TO_USE_COL_NAME, col("add.stats")) + } + + /** + * Tombstones before the [[minFileRetentionTimestamp]] timestamp will be dropped from the + * checkpoint. + */ + private[delta] def minFileRetentionTimestamp: Long = { + deltaLog.clock.getTimeMillis() - DeltaLog.tombstoneRetentionMillis(metadata) + } + + /** + * [[SetTransaction]]s before [[minSetTransactionRetentionTimestamp]] will be considered expired + * and dropped from the snapshot. + */ + private[delta] def minSetTransactionRetentionTimestamp: Option[Long] = { + DeltaLog.minSetTransactionRetentionInterval(metadata).map(deltaLog.clock.getTimeMillis() - _) + } + + private[delta] def getNumPartitions: Int = { + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SNAPSHOT_PARTITIONS) + .getOrElse(Snapshot.defaultNumSnapshotPartitions) + } + + /** + * Computes all the information that is needed by the checksum for the current snapshot. + * May kick off state reconstruction if needed by any of the underlying fields. + * Note that it's safe to set txnId to none, since the snapshot doesn't always have a txn + * attached. E.g. if a snapshot is created by reading a checkpoint, then no txnId is present. + */ + def computeChecksum: VersionChecksum = VersionChecksum( + txnId = None, + tableSizeBytes = sizeInBytes, + numFiles = numOfFiles, + numMetadata = numOfMetadata, + numProtocol = numOfProtocol, + setTransactions = checksumOpt.flatMap(_.setTransactions), + domainMetadata = domainMetadatasIfKnown, + metadata = metadata, + protocol = protocol, + histogramOpt = fileSizeHistogram, + allFiles = checksumOpt.flatMap(_.allFiles)) + + /** Returns the data schema of the table, used for reading stats */ + def tableSchema: StructType = metadata.dataSchema + + def outputTableStatsSchema: StructType = metadata.dataSchema + + def outputAttributeSchema: StructType = metadata.dataSchema + + /** Returns the schema of the columns written out to file (overridden in write path) */ + def dataSchema: StructType = metadata.dataSchema + + /** Return the set of properties of the table. */ + def getProperties: mutable.Map[String, String] = { + val base = new mutable.LinkedHashMap[String, String]() + metadata.configuration.foreach { case (k, v) => + if (k != "path") { + base.put(k, v) + } + } + base.put(Protocol.MIN_READER_VERSION_PROP, protocol.minReaderVersion.toString) + base.put(Protocol.MIN_WRITER_VERSION_PROP, protocol.minWriterVersion.toString) + if (protocol.supportsReaderFeatures || protocol.supportsWriterFeatures) { + val features = protocol.readerAndWriterFeatureNames.map(name => + s"${TableFeatureProtocolUtils.FEATURE_PROP_PREFIX}$name" -> + TableFeatureProtocolUtils.FEATURE_PROP_SUPPORTED) + base ++ features.toSeq.sorted + } else { + base + } + } + + /** The [[CheckpointProvider]] for the underlying checkpoint */ + lazy val checkpointProvider: CheckpointProvider = logSegment.checkpointProvider match { + case cp: CheckpointProvider => cp + case uninitializedProvider: UninitializedCheckpointProvider => + CheckpointProvider(spark, this, checksumOpt, uninitializedProvider) + case o => throw new IllegalStateException(s"Unknown checkpoint provider: ${o.getClass.getName}") + } + + def redactedPath: String = + Utils.redact(spark.sessionState.conf.stringRedactionPattern, path.toUri.toString) + + + protected def emptyDF: DataFrame = + spark.createDataFrame(spark.sparkContext.emptyRDD[Row], logSchema) + + + override def logInfo(msg: => String): Unit = { + super.logInfo(s"[tableId=${deltaLog.tableId}] " + msg) + } + + override def logWarning(msg: => String): Unit = { + super.logWarning(s"[tableId=${deltaLog.tableId}] " + msg) + } + + override def logWarning(msg: => String, throwable: Throwable): Unit = { + super.logWarning(s"[tableId=${deltaLog.tableId}] " + msg, throwable) + } + + override def logError(msg: => String): Unit = { + super.logError(s"[tableId=${deltaLog.tableId}] " + msg) + } + + override def logError(msg: => String, throwable: Throwable): Unit = { + super.logError(s"[tableId=${deltaLog.tableId}] " + msg, throwable) + } + + override def toString: String = + s"${getClass.getSimpleName}(path=$path, version=$version, metadata=$metadata, " + + s"logSegment=$logSegment, checksumOpt=$checksumOpt)" + + logInfo(s"Created snapshot $this") + init() +} + +object Snapshot extends DeltaLogging { + + // Used by [[loadActions]] and [[stateReconstruction]] + val ADD_STATS_TO_USE_COL_NAME = "add_stats_to_use" + + private val defaultNumSnapshotPartitions: Int = 50 + + /** Verifies that a set of delta or checkpoint files to be read actually belongs to this table. */ + private def assertLogFilesBelongToTable(logBasePath: Path, files: Seq[FileStatus]): Unit = { + files.map(_.getPath).foreach { filePath => + if (new Path(filePath.toUri).getParent != new Path(logBasePath.toUri)) { + // scalastyle:off throwerror + throw new AssertionError(s"File ($filePath) doesn't belong in the " + + s"transaction log at $logBasePath.") + // scalastyle:on throwerror + } + } + } +} + +/** + * An initial snapshot with only metadata specified. Useful for creating a DataFrame from an + * existing parquet table during its conversion to delta. + * + * @param logPath the path to transaction log + * @param deltaLog the delta log object + * @param metadata the metadata of the table + */ +class InitialSnapshot( + val logPath: Path, + override val deltaLog: DeltaLog, + override val metadata: Metadata) + extends Snapshot( + path = logPath, + version = -1, + logSegment = LogSegment.empty(logPath), + deltaLog = deltaLog, + timestamp = -1, + checksumOpt = None + ) { + + def this(logPath: Path, deltaLog: DeltaLog) = this( + logPath, + deltaLog, + Metadata( + configuration = DeltaConfigs.mergeGlobalConfigs( + sqlConfs = SparkSession.active.sessionState.conf, + tableConf = Map.empty, + ignoreProtocolConfsOpt = Some( + DeltaConfigs.ignoreProtocolDefaultsIsSet( + sqlConfs = SparkSession.active.sessionState.conf, + tableConf = deltaLog.allOptions))), + createdTime = Some(System.currentTimeMillis()))) + + override def stateDS: Dataset[SingleAction] = emptyDF.as[SingleAction] + override def stateDF: DataFrame = emptyDF + override protected lazy val computedState: SnapshotState = initialState(metadata) + override def protocol: Protocol = computedState.protocol +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/SnapshotManagement.scala b/spark/src/main/scala/org/apache/spark/sql/delta/SnapshotManagement.scala new file mode 100644 index 00000000000..577e6fa6aed --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/SnapshotManagement.scala @@ -0,0 +1,1126 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.FileNotFoundException +import java.util.Objects +import java.util.concurrent.Future +import java.util.concurrent.locks.ReentrantLock + +import scala.collection.mutable +import scala.util.control.NonFatal + +// scalastyle:off import.ordering.noEmptyLine + +import com.databricks.spark.util.TagDefinitions.TAG_ASYNC +import org.apache.spark.sql.delta.actions.Metadata +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.FileNames._ +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.spark.sql.delta.util.threads.DeltaThreadPool +import com.fasterxml.jackson.annotation.JsonIgnore +import org.apache.hadoop.fs.{BlockLocation, FileStatus, LocatedFileStatus, Path} + +import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.sql.SparkSession +import org.apache.spark.util.{ThreadUtils, Utils} + +/** + * Wraps the most recently updated snapshot along with the timestamp the update was started. + * Defined outside the class since it's used in tests. + */ +case class CapturedSnapshot(snapshot: Snapshot, updateTimestamp: Long) + + +/** + * Manages the creation, computation, and access of Snapshot's for Delta tables. Responsibilities + * include: + * - Figuring out the set of files that are required to compute a specific version of a table + * - Updating and exposing the latest snapshot of the Delta table in a thread-safe manner + */ +trait SnapshotManagement { self: DeltaLog => + import SnapshotManagement.verifyDeltaVersions + + @volatile private[delta] var asyncUpdateTask: Future[Unit] = _ + + @volatile protected var currentSnapshot: CapturedSnapshot = getSnapshotAtInit + + /** Use ReentrantLock to allow us to call `lockInterruptibly` */ + protected val snapshotLock = new ReentrantLock() + + /** + * Run `body` inside `snapshotLock` lock using `lockInterruptibly` so that the thread + * can be interrupted when waiting for the lock. + */ + def withSnapshotLockInterruptibly[T](body: => T): T = { + snapshotLock.lockInterruptibly() + try { + body + } finally { + snapshotLock.unlock() + } + } + + /** + * Get the LogSegment that will help in computing the Snapshot of the table at DeltaLog + * initialization, or None if the directory was empty/missing. + * + * @param startingCheckpoint A checkpoint that we can start our listing from + */ + protected def getLogSegmentFrom( + startingCheckpoint: Option[LastCheckpointInfo]): Option[LogSegment] = { + getLogSegmentForVersion( + versionToLoad = None, + lastCheckpointInfo = startingCheckpoint + ) + } + + /** Get an iterator of files in the _delta_log directory starting with the startVersion. */ + private[delta] def listFrom(startVersion: Long): Iterator[FileStatus] = { + store.listFrom(listingPrefix(logPath, startVersion), newDeltaHadoopConf()) + } + + /** Returns true if the path is delta log files. Delta log files can be delta commit file + * (e.g., 000000000.json), or checkpoint file. (e.g., 000000001.checkpoint.00001.00003.parquet) + * @param path Path of a file + * @return Boolean Whether the file is delta log files + */ + protected def isDeltaCommitOrCheckpointFile(path: Path): Boolean = { + isCheckpointFile(path) || isDeltaFile(path) + } + + /** Returns an iterator containing a list of files found from the provided path */ + protected def listFromOrNone(startVersion: Long): Option[Iterator[FileStatus]] = { + // LIST the directory, starting from the provided lower bound (treat missing dir as empty). + // NOTE: "empty/missing" is _NOT_ equivalent to "contains no useful commit files." + try { + Some(listFrom(startVersion)).filterNot(_.isEmpty) + } catch { + case _: FileNotFoundException => None + } + } + + /** + * Returns the delta files and checkpoint files starting from the given `startVersion`. + * `versionToLoad` is an optional parameter to set the max bound. It's usually used to load a + * table snapshot for a specific version. + * + * @param startVersion the version to start. Inclusive. + * @param versionToLoad the optional parameter to set the max version we should return. Inclusive. + * @return Some array of files found (possibly empty, if no usable commit files are present), or + * None if the listing returned no files at all. + */ + protected final def listDeltaCompactedDeltaAndCheckpointFiles( + startVersion: Long, + versionToLoad: Option[Long], + includeMinorCompactions: Boolean): Option[Array[FileStatus]] = + recordDeltaOperation(self, "delta.deltaLog.listDeltaAndCheckpointFiles") { + listFromOrNone(startVersion).map { _ + .collect { + case DeltaFile(f, fileVersion) => + (f, fileVersion) + case CompactedDeltaFile(f, startVersion, endVersion) + if includeMinorCompactions && versionToLoad.forall(endVersion <= _) => + (f, startVersion) + case CheckpointFile(f, fileVersion) if f.getLen > 0 => + (f, fileVersion) + } + // take files until the version we want to load + .takeWhile { case (_, fileVersion) => versionToLoad.forall(fileVersion <= _) } + .map(_._1).toArray + } + } + + /** + * Get a list of files that can be used to compute a Snapshot at version `versionToLoad`, If + * `versionToLoad` is not provided, will generate the list of files that are needed to load the + * latest version of the Delta table. This method also performs checks to ensure that the delta + * files are contiguous. + * + * @param versionToLoad A specific version to load. Typically used with time travel and the + * Delta streaming source. If not provided, we will try to load the latest + * version of the table. + * @param oldCheckpointProviderOpt The [[CheckpointProvider]] from the previous snapshot. This is + * used as a start version for the listing when `startCheckpoint` is + * unavailable. This is also used to initialize the [[LogSegment]]. + * @param lastCheckpointInfo [[LastCheckpointInfo]] from the _last_checkpoint. This could be + * used to initialize the Snapshot's [[LogSegment]]. + * @return Some LogSegment to build a Snapshot if files do exist after the given + * startCheckpoint. None, if the directory was missing or empty. + */ + protected def getLogSegmentForVersion( + versionToLoad: Option[Long] = None, + oldCheckpointProviderOpt: Option[UninitializedCheckpointProvider] = None, + lastCheckpointInfo: Option[LastCheckpointInfo] = None): Option[LogSegment] = { + // List based on the last known checkpoint version. + // if that is -1, list from version 0L + val lastCheckpointVersion = getCheckpointVersion(lastCheckpointInfo, oldCheckpointProviderOpt) + val listingStartVersion = Math.max(0L, lastCheckpointVersion) + val includeMinorCompactions = + spark.conf.get(DeltaSQLConf.DELTALOG_MINOR_COMPACTION_USE_FOR_READS) + val newFiles = listDeltaCompactedDeltaAndCheckpointFiles( + startVersion = listingStartVersion, + versionToLoad = versionToLoad, + includeMinorCompactions = includeMinorCompactions) + getLogSegmentForVersion( + versionToLoad, + newFiles, + validateLogSegmentWithoutCompactedDeltas = true, + oldCheckpointProviderOpt = oldCheckpointProviderOpt, + lastCheckpointInfo = lastCheckpointInfo + ) + } + + /** + * Returns the last known checkpoint version based on [[LastCheckpointInfo]] or + * [[CheckpointProvider]]. + * Returns -1 if both the info is not available. + */ + protected def getCheckpointVersion( + lastCheckpointInfoOpt: Option[LastCheckpointInfo], + oldCheckpointProviderOpt: Option[UninitializedCheckpointProvider]): Long = { + lastCheckpointInfoOpt.map(_.version) + .orElse(oldCheckpointProviderOpt.map(_.version)) + .getOrElse(-1) + } + + /** + * Helper method to validate that selected deltas are contiguous from checkpoint version till + * the required `versionToLoad`. + * @param selectedDeltas - deltas selected for snapshot creation. + * @param checkpointVersion - checkpoint version selected for snapshot creation. Should be `-1` if + * no checkpoint is selected. + * @param versionToLoad - version for which we want to create the Snapshot. + */ + private def validateDeltaVersions( + selectedDeltas: Array[FileStatus], + checkpointVersion: Long, + versionToLoad: Option[Long]): Unit = { + // checkpointVersion should be passed as -1 if no checkpoint is needed for the LogSegment. + + // We may just be getting a checkpoint file. + selectedDeltas.headOption.foreach { headDelta => + val headDeltaVersion = deltaVersion(headDelta) + val lastDeltaVersion = selectedDeltas.last match { + case CompactedDeltaFile(_, _, endV) => endV + case DeltaFile(_, v) => v + } + + if (headDeltaVersion != checkpointVersion + 1) { + throw DeltaErrors.logFileNotFoundException( + deltaFile(logPath, checkpointVersion + 1), + lastDeltaVersion, + unsafeVolatileMetadata) // metadata is best-effort only + } + val deltaVersions = selectedDeltas.flatMap { + case CompactedDeltaFile(_, startV, endV) => (startV to endV) + case DeltaFile(_, v) => Seq(v) + } + verifyDeltaVersions(spark, deltaVersions, Some(checkpointVersion + 1), versionToLoad) + } + } + + /** + * Helper function for the getLogSegmentForVersion above. Called with a provided files list, + * and will then try to construct a new LogSegment using that. + */ + protected def getLogSegmentForVersion( + versionToLoad: Option[Long], + files: Option[Array[FileStatus]], + validateLogSegmentWithoutCompactedDeltas: Boolean, + oldCheckpointProviderOpt: Option[UninitializedCheckpointProvider], + lastCheckpointInfo: Option[LastCheckpointInfo]): Option[LogSegment] = { + recordFrameProfile("Delta", "SnapshotManagement.getLogSegmentForVersion") { + val lastCheckpointVersion = getCheckpointVersion(lastCheckpointInfo, oldCheckpointProviderOpt) + val newFiles = files.filterNot(_.isEmpty) + .getOrElse { + // No files found even when listing from 0 => empty directory => table does not exist yet. + if (lastCheckpointVersion < 0) return None + // We always write the commit and checkpoint files before updating _last_checkpoint. + // If the listing came up empty, then we either encountered a list-after-put + // inconsistency in the underlying log store, or somebody corrupted the table by + // deleting files. Either way, we can't safely continue. + // + // For now, we preserve existing behavior by returning Array.empty, which will trigger a + // recursive call to [[getLogSegmentForVersion]] below. + Array.empty[FileStatus] + } + + if (newFiles.isEmpty && lastCheckpointVersion < 0) { + // We can't construct a snapshot because the directory contained no usable commit + // files... but we can't return None either, because it was not truly empty. + throw DeltaErrors.emptyDirectoryException(logPath.toString) + } else if (newFiles.isEmpty) { + // The directory may be deleted and recreated and we may have stale state in our DeltaLog + // singleton, so try listing from the first version + return getLogSegmentForVersion(versionToLoad = versionToLoad) + } + val (checkpoints, deltasAndCompactedDeltas) = newFiles.partition(isCheckpointFile) + val (deltas, compactedDeltas) = deltasAndCompactedDeltas.partition(isDeltaFile) + // Find the latest checkpoint in the listing that is not older than the versionToLoad + val checkpointFiles = checkpoints.map(f => CheckpointInstance(f.getPath)) + val newCheckpoint = getLatestCompleteCheckpointFromList(checkpointFiles, versionToLoad) + val newCheckpointVersion = newCheckpoint.map(_.version).getOrElse { + // If we do not have any checkpoint, pass new checkpoint version as -1 so that first + // delta version can be 0. + if (lastCheckpointVersion >= 0) { + // `startCheckpoint` was given but no checkpoint found on delta log. This means that the + // last checkpoint we thought should exist (the `_last_checkpoint` file) no longer exists. + // Try to look up another valid checkpoint and create `LogSegment` from it. + // This case can arise if the user deleted the table (all commits and checkpoints) but + // left the _last_checkpoint intact. + recordDeltaEvent(this, "delta.checkpoint.error.partial") + val snapshotVersion = versionToLoad.getOrElse(deltaVersion(deltas.last)) + getLogSegmentWithMaxExclusiveCheckpointVersion(snapshotVersion, lastCheckpointVersion) + .foreach { alternativeLogSegment => return Some(alternativeLogSegment) } + + // No alternative found, but the directory contains files so we cannot return None. + throw DeltaErrors.missingPartFilesException( + lastCheckpointVersion, new FileNotFoundException( + s"Checkpoint file to load version: $lastCheckpointVersion is missing.")) + } + -1L + } + + // If there is a new checkpoint, start new lineage there. If `newCheckpointVersion` is -1, + // it will list all existing delta files. + val deltasAfterCheckpoint = deltas.filter { file => + deltaVersion(file) > newCheckpointVersion + } + + // Here we validate that we are able to create a valid LogSegment by just using commit deltas + // and without considering minor-compacted deltas. We want to fail early if log is messed up + // i.e. some commit deltas are missing (although compacted-deltas are present). + // We should not do this validation when we want to update the logSegment after a conflict + // via the [[SnapshotManagement.getUpdatedLogSegment]] method. In that specific flow, we just + // list from the committed version and reuse existing pre-commit logsegment together with + // listing result to create the new pre-commit logsegment. Because of this, we don't have info + // about all the delta files (e.g. when minor compactions are used in existing preCommit log + // segment) and hence the validation if attempted will fail. So we need to set + // `validateLogSegmentWithoutCompactedDeltas` to false in that case. + if (validateLogSegmentWithoutCompactedDeltas) { + validateDeltaVersions(deltasAfterCheckpoint, newCheckpointVersion, versionToLoad) + } + + val newVersion = + deltasAfterCheckpoint.lastOption.map(deltaVersion).getOrElse(newCheckpoint.get.version) + // reuse the oldCheckpointProvider if it is same as what we are looking for. + val checkpointProviderOpt = newCheckpoint.map { ci => + oldCheckpointProviderOpt + .collect { case cp if cp.version == ci.version => cp } + .getOrElse(ci.getCheckpointProvider(this, checkpoints, lastCheckpointInfo)) + } + // In the case where `deltasAfterCheckpoint` is empty, `deltas` should still not be empty, + // they may just be before the checkpoint version unless we have a bug in log cleanup. + if (deltas.isEmpty) { + throw new IllegalStateException(s"Could not find any delta files for version $newVersion") + } + if (versionToLoad.exists(_ != newVersion)) { + throw new IllegalStateException( + s"Trying to load a non-existent version ${versionToLoad.get}") + } + val lastCommitTimestamp = deltas.last.getModificationTime + + val deltasAndCompactedDeltasForLogSegment = useCompactedDeltasForLogSegment( + deltasAndCompactedDeltas, + deltasAfterCheckpoint, + latestCommitVersion = newVersion, + checkpointVersionToUse = newCheckpointVersion) + + validateDeltaVersions( + deltasAndCompactedDeltasForLogSegment, newCheckpointVersion, versionToLoad) + + Some(LogSegment( + logPath, + newVersion, + deltasAndCompactedDeltasForLogSegment, + checkpointProviderOpt, + lastCommitTimestamp)) + } + } + + /** + * @param deltasAndCompactedDeltas - all deltas or compacted deltas which could be used + * @param deltasAfterCheckpoint - deltas after the last checkpoint file + * @param latestCommitVersion - commit version for which we are trying to create Snapshot for + * @param checkpointVersionToUse - underlying checkpoint version to use in Snapshot, -1 if no + * checkpoint is used. + * @return Returns a list of deltas/compacted-deltas which can be used to construct the + * [[LogSegment]] instead of `deltasAfterCheckpoint`. + */ + protected def useCompactedDeltasForLogSegment( + deltasAndCompactedDeltas: Seq[FileStatus], + deltasAfterCheckpoint: Array[FileStatus], + latestCommitVersion: Long, + checkpointVersionToUse: Long): Array[FileStatus] = { + + val selectedDeltas = mutable.ArrayBuffer.empty[FileStatus] + var highestVersionSeen = checkpointVersionToUse + val commitRangeCovered = mutable.ArrayBuffer.empty[Long] + // track if there is at least 1 compacted delta in `deltasAndCompactedDeltas` + var hasCompactedDeltas = false + for (file <- deltasAndCompactedDeltas) { + val (startVersion, endVersion) = file match { + case CompactedDeltaFile(_, startVersion, endVersion) => + hasCompactedDeltas = true + (startVersion, endVersion) + case DeltaFile(_, version) => + (version, version) + } + + // select the compacted delta if the startVersion doesn't straddle `highestVersionSeen` and + // the endVersion doesn't cross the latestCommitVersion. + if (highestVersionSeen < startVersion && endVersion <= latestCommitVersion) { + commitRangeCovered.appendAll(startVersion to endVersion) + selectedDeltas += file + highestVersionSeen = endVersion + } + } + // If there are no compacted deltas in the `deltasAndCompactedDeltas` list, return from this + // method. + if (!hasCompactedDeltas) return deltasAfterCheckpoint + // Validation-1: Commits represented by `compactedDeltasToUse` should be unique and there must + // not be any duplicates. + val coveredCommits = commitRangeCovered.toSet + val hasDuplicates = (commitRangeCovered.size != coveredCommits.size) + + // Validation-2: All commits from (CheckpointVersion + 1) to latestCommitVersion should be + // either represented by compacted delta or by the delta. + val requiredCommits = (checkpointVersionToUse + 1) to latestCommitVersion + val missingCommits = requiredCommits.toSet -- coveredCommits + if (!hasDuplicates && missingCommits.isEmpty) return selectedDeltas.toArray + + // If the above check failed, that means the compacted delta validation failed. + // Just record that event and return just the deltas (deltasAfterCheckpoint). + val eventData = Map( + "deltasAndCompactedDeltas" -> deltasAndCompactedDeltas.map(_.getPath.getName), + "deltasAfterCheckpoint" -> deltasAfterCheckpoint.map(_.getPath.getName), + "latestCommitVersion" -> latestCommitVersion, + "checkpointVersionToUse" -> checkpointVersionToUse, + "hasDuplicates" -> hasDuplicates, + "missingCommits" -> missingCommits + ) + recordDeltaEvent( + deltaLog = this, + opType = "delta.getLogSegmentForVersion.compactedDeltaValidationFailed", + data = eventData) + if (Utils.isTesting) { + assert(false, s"Validation around Compacted deltas failed while creating Snapshot. " + + s"[${JsonUtils.toJson(eventData)}]") + } + deltasAfterCheckpoint + } + + /** + * Load the Snapshot for this Delta table at initialization. This method uses the `lastCheckpoint` + * file as a hint on where to start listing the transaction log directory. If the _delta_log + * directory doesn't exist, this method will return an `InitialSnapshot`. + */ + protected def getSnapshotAtInit: CapturedSnapshot = { + recordFrameProfile("Delta", "SnapshotManagement.getSnapshotAtInit") { + val currentTimestamp = clock.getTimeMillis() + val lastCheckpointOpt = readLastCheckpointFile() + createSnapshotAtInitInternal( + initSegment = getLogSegmentFrom(lastCheckpointOpt), + timestamp = currentTimestamp + ) + } + } + + protected def createSnapshotAtInitInternal( + initSegment: Option[LogSegment], + timestamp: Long): CapturedSnapshot = { + val snapshot = initSegment.map { segment => + val snapshot = createSnapshot( + initSegment = segment, + checksumOpt = None) + snapshot + }.getOrElse { + logInfo(s"Creating initial snapshot without metadata, because the directory is empty") + new InitialSnapshot(logPath, this) + } + CapturedSnapshot(snapshot, timestamp) + } + + /** + * Returns the current snapshot. This does not automatically `update()`. + * + * WARNING: This is not guaranteed to give you the latest snapshot of the log, nor stay + * consistent across multiple accesses. If you need the latest snapshot, it is recommended + * to fetch it using `deltaLog.update()`; and save the returned snapshot so it does not + * unexpectedly change from under you. See how [[OptimisticTransaction]] and [[DeltaScan]] + * use the snapshot as examples for write/read paths respectively. + * This API should only be used in scenarios where any recent snapshot will suffice and an + * update is undesired, or by internal code that holds the DeltaLog lock to prevent races. + */ + def unsafeVolatileSnapshot: Snapshot = Option(currentSnapshot).map(_.snapshot).orNull + + /** + * WARNING: This API is unsafe and deprecated. It will be removed in future versions. + * Use the above unsafeVolatileSnapshot to get the most recently cached snapshot on + * the cluster. + */ + @deprecated("This method is deprecated and will be removed in future versions. " + + "Use unsafeVolatileSnapshot instead", "12.0") + def snapshot: Snapshot = unsafeVolatileSnapshot + + /** + * Unsafe due to thread races that can change it at any time without notice, even between two + * calls in the same method. Like [[unsafeVolatileSnapshot]] it depends on, this method should be + * used only with extreme care in production code (or by unit tests where no races are possible). + */ + private[delta] def unsafeVolatileMetadata = + Option(unsafeVolatileSnapshot).map(_.metadata).getOrElse(Metadata()) + + protected def createSnapshot( + initSegment: LogSegment, + checksumOpt: Option[VersionChecksum]): Snapshot = { + val startingFrom = if (!initSegment.checkpointProvider.isEmpty) { + s" starting from checkpoint version ${initSegment.checkpointProvider.version}." + } else "." + logInfo(s"Loading version ${initSegment.version}$startingFrom") + createSnapshotFromGivenOrEquivalentLogSegment(initSegment) { segment => + new Snapshot( + path = logPath, + version = segment.version, + logSegment = segment, + deltaLog = this, + timestamp = segment.lastCommitTimestamp, + checksumOpt = checksumOpt.orElse(readChecksum(segment.version)) + ) + } + } + + /** + * Returns a [[LogSegment]] for reading `snapshotVersion` such that the segment's checkpoint + * version (if checkpoint present) is LESS THAN `maxExclusiveCheckpointVersion`. + * This is useful when trying to skip a bad checkpoint. Returns `None` when we are not able to + * construct such [[LogSegment]], for example, no checkpoint can be used but we don't have the + * entire history from version 0 to version `snapshotVersion`. + */ + private def getLogSegmentWithMaxExclusiveCheckpointVersion( + snapshotVersion: Long, + maxExclusiveCheckpointVersion: Long): Option[LogSegment] = { + assert( + snapshotVersion >= maxExclusiveCheckpointVersion, + s"snapshotVersion($snapshotVersion) is less than " + + s"maxExclusiveCheckpointVersion($maxExclusiveCheckpointVersion)") + val upperBoundVersion = math.min(snapshotVersion + 1, maxExclusiveCheckpointVersion) + val previousCp = + if (upperBoundVersion > 0) findLastCompleteCheckpointBefore(upperBoundVersion) else None + previousCp match { + case Some(cp) => + val filesSinceCheckpointVersion = listDeltaCompactedDeltaAndCheckpointFiles( + startVersion = cp.version, + versionToLoad = Some(snapshotVersion), + includeMinorCompactions = false + ).getOrElse(Array.empty) + val (checkpoints, deltas) = filesSinceCheckpointVersion.partition(isCheckpointFile) + if (deltas.isEmpty) { + // We cannot find any delta files. Returns None as we cannot construct a `LogSegment` only + // from checkpoint files. This is because in order to create a `LogSegment`, we need to + // set `LogSegment.lastCommitTimestamp`, and it must be read from the file modification + // time of the delta file for `snapshotVersion`. It cannot be the file modification time + // of a checkpoint file because it should be deterministic regardless how we construct the + // Snapshot, and only delta json log files can ensure that. + return None + } + // `checkpoints` may contain multiple checkpoints for different part sizes, we need to + // search `FileStatus`s of the checkpoint files for `cp`. + val checkpointProvider = + cp.getCheckpointProvider(this, checkpoints, lastCheckpointInfoHint = None) + // Create the list of `FileStatus`s for delta files after `cp.version`. + val deltasAfterCheckpoint = deltas.filter { file => + deltaVersion(file) > cp.version + } + val deltaVersions = deltasAfterCheckpoint.map(deltaVersion) + // `deltaVersions` should not be empty and `verifyDeltaVersions` will verify it + try { + verifyDeltaVersions(spark, deltaVersions, Some(cp.version + 1), Some(snapshotVersion)) + } catch { + case NonFatal(e) => + logWarning(s"Failed to find a valid LogSegment for $snapshotVersion", e) + return None + } + Some(LogSegment( + logPath, + snapshotVersion, + deltas, + Some(checkpointProvider), + deltas.last.getModificationTime)) + case None => + val listFromResult = + listDeltaCompactedDeltaAndCheckpointFiles( + startVersion = 0, + versionToLoad = Some(snapshotVersion), + includeMinorCompactions = false) + val (deltas, deltaVersions) = + listFromResult + .getOrElse(Array.empty) + .flatMap(DeltaFile.unapply(_)) + .unzip + try { + verifyDeltaVersions(spark, deltaVersions, Some(0), Some(snapshotVersion)) + } catch { + case NonFatal(e) => + logWarning(s"Failed to find a valid LogSegment for $snapshotVersion", e) + return None + } + Some(LogSegment( + logPath = logPath, + version = snapshotVersion, + deltas = deltas, + checkpointProviderOpt = None, + lastCommitTimestamp = deltas.last.getModificationTime)) + } + } + + /** Used to compute the LogSegment after a commit */ + protected[delta] def getLogSegmentAfterCommit( + oldCheckpointProvider: UninitializedCheckpointProvider): LogSegment = { + /** + * We can't specify `versionToLoad = committedVersion` for the call below. + * If there are a lot of concurrent commits to the table on the same cluster, each + * would generate a different snapshot, and thus each would trigger a new state + * reconstruction. The last commit would get stuck waiting for each of the previous + * jobs to finish to grab the update lock. + * Instead, just do a general update to the latest available version. The racing commits + * can then use the version check short-circuit to avoid constructing a new snapshot. + */ + getLogSegmentForVersion(oldCheckpointProviderOpt = Some(oldCheckpointProvider)).getOrElse { + // This shouldn't be possible right after a commit + logError(s"No delta log found for the Delta table at $logPath") + throw DeltaErrors.emptyDirectoryException(logPath.toString) + } + } + + /** + * Create a [[Snapshot]] from the given [[LogSegment]]. If failing to create the snapshot, we will + * search an equivalent [[LogSegment]] using a different checkpoint and retry up to + * [[DeltaSQLConf.DELTA_SNAPSHOT_LOADING_MAX_RETRIES]] times. + */ + protected def createSnapshotFromGivenOrEquivalentLogSegment( + initSegment: LogSegment)(snapshotCreator: LogSegment => Snapshot): Snapshot = { + val numRetries = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SNAPSHOT_LOADING_MAX_RETRIES) + var attempt = 0 + var segment = initSegment + // Remember the first error we hit. If all retries fail, we will throw the first error to + // provide the root cause. We catch `SparkException` because corrupt checkpoint files are + // detected in the executor side when a task is trying to read them. + var firstError: SparkException = null + while (true) { + try { + return snapshotCreator(segment) + } catch { + case e: SparkException if attempt < numRetries && !segment.checkpointProvider.isEmpty => + if (firstError == null) { + firstError = e + } + logWarning(s"Failed to create a snapshot from log segment: $segment. " + + s"Trying a different checkpoint.", e) + segment = getLogSegmentWithMaxExclusiveCheckpointVersion( + segment.version, + segment.checkpointProvider.version).getOrElse { + // Throw the first error if we cannot find an equivalent `LogSegment`. + throw firstError + } + attempt += 1 + case e: SparkException if firstError != null => + logWarning(s"Failed to create a snapshot from log segment: $segment", e) + throw firstError + } + } + throw new IllegalStateException("should not happen") + } + + /** Checks if the given timestamp is outside the current staleness window */ + protected def isCurrentlyStale: Long => Boolean = { + val limit = spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_ASYNC_UPDATE_STALENESS_TIME_LIMIT) + val cutoffOpt = if (limit > 0) Some(math.max(0, clock.getTimeMillis() - limit)) else None + timestamp => cutoffOpt.forall(timestamp < _) + } + + /** + * Get the newest logSegment, using the previous logSegment as a hint. This is faster than + * doing a full update, but it won't work if the table's log directory was replaced. + */ + def getUpdatedLogSegment(oldLogSegment: LogSegment): (LogSegment, Seq[FileStatus]) = { + val newFilesOpt = listDeltaCompactedDeltaAndCheckpointFiles( + startVersion = oldLogSegment.version + 1, + versionToLoad = None, + includeMinorCompactions = spark.conf.get(DeltaSQLConf.DELTALOG_MINOR_COMPACTION_USE_FOR_READS) + ) + val newFiles = newFilesOpt.getOrElse { + // An empty listing likely implies a list-after-write inconsistency or that somebody clobbered + // the Delta log. + return (oldLogSegment, Nil) + } + val allFiles = ( + oldLogSegment.checkpointProvider.topLevelFiles ++ + oldLogSegment.deltas ++ + newFiles + ).toArray + val lastCheckpointInfo = Option.empty[LastCheckpointInfo] + val newLogSegment = getLogSegmentForVersion( + versionToLoad = None, + files = Some(allFiles), + validateLogSegmentWithoutCompactedDeltas = false, + lastCheckpointInfo = lastCheckpointInfo, + oldCheckpointProviderOpt = Some(oldLogSegment.checkpointProvider) + ).getOrElse(oldLogSegment) + val fileStatusesOfConflictingCommits = newFiles.collect { + case DeltaFile(f, v) if v <= newLogSegment.version => f + } + (newLogSegment, fileStatusesOfConflictingCommits) + } + + /** + * Returns the snapshot, if it has been updated since the specified timestamp. + * + * Note that this should be used differently from isSnapshotStale. Staleness is + * used to allow async updates if the table has been updated within the staleness + * window, which allows for better perf in exchange for possibly using a slightly older + * view of the table. For eg, if a table is queried multiple times in quick succession. + * + * On the other hand, getSnapshotIfFresh is used to identify duplicate updates within a + * single transaction. For eg, if a table isn't cached and the snapshot was fetched from the + * logstore, then updating the snapshot again in the same transaction is superfluous. We can + * use this function to detect and skip such an update. + */ + private def getSnapshotIfFresh( + capturedSnapshot: CapturedSnapshot, + checkIfUpdatedSinceTs: Option[Long]): Option[Snapshot] = { + checkIfUpdatedSinceTs.collect { + case ts if ts <= capturedSnapshot.updateTimestamp => capturedSnapshot.snapshot + } + } + + /** + * Update ActionLog by applying the new delta files if any. + * + * @param stalenessAcceptable Whether we can accept working with a stale version of the table. If + * the table has surpassed our staleness tolerance, we will update to + * the latest state of the table synchronously. If staleness is + * acceptable, and the table hasn't passed the staleness tolerance, we + * will kick off a job in the background to update the table state, + * and can return a stale snapshot in the meantime. + * @param checkIfUpdatedSinceTs Skip the update if we've already updated the snapshot since the + * specified timestamp. + */ + def update( + stalenessAcceptable: Boolean = false, + checkIfUpdatedSinceTs: Option[Long] = None): Snapshot = { + val startTimeMs = System.currentTimeMillis() + // currentSnapshot is volatile. Make a local copy of it at the start of the update call, so + // that there's no chance of a race condition changing the snapshot partway through the update. + val capturedSnapshot = currentSnapshot + val oldVersion = capturedSnapshot.snapshot.version + def sendEvent( + newSnapshot: Snapshot, + snapshotAlreadyUpdatedAfterRequiredTimestamp: Boolean = false + ): Unit = { + recordDeltaEvent( + this, + opType = "deltaLog.update", + data = Map( + "snapshotAlreadyUpdatedAfterRequiredTimestamp" -> + snapshotAlreadyUpdatedAfterRequiredTimestamp, + "newVersion" -> newSnapshot.version, + "oldVersion" -> oldVersion, + "timeTakenMs" -> (System.currentTimeMillis() - startTimeMs) + ) + ) + } + // Eagerly exit if the snapshot is already new enough to satisfy the caller + getSnapshotIfFresh(capturedSnapshot, checkIfUpdatedSinceTs).foreach { snapshot => + sendEvent(snapshot, snapshotAlreadyUpdatedAfterRequiredTimestamp = true) + return snapshot + } + val doAsync = stalenessAcceptable && !isCurrentlyStale(capturedSnapshot.updateTimestamp) + if (!doAsync) { + recordFrameProfile("Delta", "SnapshotManagement.update") { + withSnapshotLockInterruptibly { + val newSnapshot = updateInternal(isAsync = false) + sendEvent(newSnapshot = capturedSnapshot.snapshot) + newSnapshot + } + } + } else { + // Kick off an async update, if one is not already obviously running. Intentionally racy. + if (Option(asyncUpdateTask).forall(_.isDone)) { + try { + val jobGroup = spark.sparkContext.getLocalProperty(SparkContext.SPARK_JOB_GROUP_ID) + asyncUpdateTask = SnapshotManagement.deltaLogAsyncUpdateThreadPool.submit(spark) { + spark.sparkContext.setLocalProperty("spark.scheduler.pool", "deltaStateUpdatePool") + spark.sparkContext.setJobGroup( + jobGroup, + s"Updating state of Delta table at ${capturedSnapshot.snapshot.path}", + interruptOnCancel = true) + tryUpdate(isAsync = true) + } + } catch { + case NonFatal(e) if !Utils.isTesting => + // Failed to schedule the future -- fail in testing, but just log it in prod. + recordDeltaEvent(this, "delta.snapshot.asyncUpdateFailed", data = Map("exception" -> e)) + } + } + currentSnapshot.snapshot + } + } + + /** + * Try to update ActionLog. If another thread is updating ActionLog, then this method returns + * at once and return the current snapshot. The return snapshot may be stale. + */ + private def tryUpdate(isAsync: Boolean): Snapshot = { + if (snapshotLock.tryLock()) { + try { + updateInternal(isAsync) + } finally { + snapshotLock.unlock() + } + } else { + currentSnapshot.snapshot + } + } + + /** + * Queries the store for new delta files and applies them to the current state. + * Note: the caller should hold `snapshotLock` before calling this method. + */ + protected def updateInternal(isAsync: Boolean): Snapshot = + recordDeltaOperation(this, "delta.log.update", Map(TAG_ASYNC -> isAsync.toString)) { + val updateTimestamp = clock.getTimeMillis() + val previousSnapshot = currentSnapshot.snapshot + val segmentOpt = getLogSegmentForVersion( + oldCheckpointProviderOpt = Some(previousSnapshot.checkpointProvider)) + installLogSegmentInternal(previousSnapshot, segmentOpt, updateTimestamp, isAsync) + } + + /** Install the provided segmentOpt as the currentSnapshot on the cluster */ + protected def installLogSegmentInternal( + previousSnapshot: Snapshot, + segmentOpt: Option[LogSegment], + updateTimestamp: Long, + isAsync: Boolean): Snapshot = { + segmentOpt.map { segment => + if (segment == previousSnapshot.logSegment) { + // If no changes were detected, just refresh the timestamp + val timestampToUse = math.max(updateTimestamp, currentSnapshot.updateTimestamp) + currentSnapshot = currentSnapshot.copy(updateTimestamp = timestampToUse) + } else { + val newSnapshot = createSnapshot( + initSegment = segment, + checksumOpt = None) + logMetadataTableIdChange(previousSnapshot, newSnapshot) + logInfo(s"Updated snapshot to $newSnapshot") + replaceSnapshot(newSnapshot, updateTimestamp) + } + }.getOrElse { + logInfo(s"No delta log found for the Delta table at $logPath") + replaceSnapshot(new InitialSnapshot(logPath, this), updateTimestamp) + } + currentSnapshot.snapshot + } + + /** Replace the given snapshot with the provided one. */ + protected def replaceSnapshot(newSnapshot: Snapshot, updateTimestamp: Long): Unit = { + if (!snapshotLock.isHeldByCurrentThread) { + recordDeltaEvent(this, "delta.update.unsafeReplace") + } + val oldSnapshot = currentSnapshot.snapshot + currentSnapshot = CapturedSnapshot(newSnapshot, updateTimestamp) + oldSnapshot.uncache() + } + + /** Log a change in the metadata's table id whenever we install a newer version of a snapshot */ + private def logMetadataTableIdChange(previousSnapshot: Snapshot, newSnapshot: Snapshot): Unit = { + if (previousSnapshot.version > -1 && + previousSnapshot.metadata.id != newSnapshot.metadata.id) { + val msg = s"Change in the table id detected while updating snapshot. " + + s"\nPrevious snapshot = $previousSnapshot\nNew snapshot = $newSnapshot." + logWarning(msg) + recordDeltaEvent(self, "delta.metadataCheck.update", data = Map( + "prevSnapshotVersion" -> previousSnapshot.version, + "prevSnapshotMetadata" -> previousSnapshot.metadata, + "nextSnapshotVersion" -> newSnapshot.version, + "nextSnapshotMetadata" -> newSnapshot.metadata)) + } + } + + /** + * Creates a snapshot for a new delta commit. + */ + protected def createSnapshotAfterCommit( + initSegment: LogSegment, + newChecksumOpt: Option[VersionChecksum], + committedVersion: Long): Snapshot = { + logInfo(s"Creating a new snapshot v${initSegment.version} for commit version $committedVersion") + createSnapshot( + initSegment, + checksumOpt = newChecksumOpt + ) + } + + /** + * Called after committing a transaction and updating the state of the table. + * + * @param committedVersion the version that was committed + * @param newChecksumOpt the checksum for the new commit, if available. + * Usually None, since the commit would have just finished. + * @param preCommitLogSegment the log segment of the table prior to commit + */ + def updateAfterCommit( + committedVersion: Long, + newChecksumOpt: Option[VersionChecksum], + preCommitLogSegment: LogSegment): Snapshot = withSnapshotLockInterruptibly { + recordDeltaOperation(this, "delta.log.updateAfterCommit") { + val updateTimestamp = clock.getTimeMillis() + val previousSnapshot = currentSnapshot.snapshot + // Somebody else could have already updated the snapshot while we waited for the lock + if (committedVersion <= previousSnapshot.version) return previousSnapshot + val segment = getLogSegmentAfterCommit( + previousSnapshot.checkpointProvider) + + // This likely implies a list-after-write inconsistency + if (segment.version < committedVersion) { + recordDeltaEvent(this, "delta.commit.inconsistentList", data = Map( + "committedVersion" -> committedVersion, + "currentVersion" -> segment.version + )) + throw DeltaErrors.invalidCommittedVersion(committedVersion, segment.version) + } + + val newSnapshot = createSnapshotAfterCommit( + segment, + newChecksumOpt, + committedVersion) + logMetadataTableIdChange(previousSnapshot, newSnapshot) + logInfo(s"Updated snapshot to $newSnapshot") + replaceSnapshot(newSnapshot, updateTimestamp) + currentSnapshot.snapshot + } + } + + /** + * Get the snapshot at `version` using the given `lastCheckpointProvider` hint + * as the listing hint. + */ + private[delta] def getSnapshotAt( + version: Long, + lastCheckpointProvider: CheckpointProvider): Snapshot = { + // See if the version currently cached on the cluster satisfies the requirement + val current = unsafeVolatileSnapshot + if (current.version == version) { + return current + } + if (lastCheckpointProvider.version > version) { + // if the provided lastCheckpointProvider's version is greater than the snapshot that we are + // trying to create => we can't use the provider. + // fallback to the other overload. + return getSnapshotAt(version) + } + val segment = getLogSegmentForVersion( + versionToLoad = Some(version), + oldCheckpointProviderOpt = Some(lastCheckpointProvider) + ).getOrElse { + // We can't return InitialSnapshot because our caller asked for a specific snapshot version. + throw DeltaErrors.emptyDirectoryException(logPath.toString) + } + createSnapshot( + initSegment = segment, + checksumOpt = None) + } + + /** Get the snapshot at `version`. */ + def getSnapshotAt( + version: Long, + lastCheckpointHint: Option[CheckpointInstance] = None): Snapshot = { + // See if the version currently cached on the cluster satisfies the requirement + val current = unsafeVolatileSnapshot + if (current.version == version) { + return current + } + + // Do not use the hint if the version we're asking for is smaller than the last checkpoint hint + val lastCheckpointInfoHint = + lastCheckpointHint + .collect { case ci if ci.version <= version => ci } + .orElse(findLastCompleteCheckpointBefore(version)) + .map(manuallyLoadCheckpoint) + getLogSegmentForVersion( + versionToLoad = Some(version), + lastCheckpointInfo = lastCheckpointInfoHint + ).map { segment => + createSnapshot( + initSegment = segment, + checksumOpt = None) + }.getOrElse { + // We can't return InitialSnapshot because our caller asked for a specific snapshot version. + throw DeltaErrors.emptyDirectoryException(logPath.toString) + } + } +} + +object SnapshotManagement { + // A thread pool for reading checkpoint files and collecting checkpoint v2 actions like + // checkpointMetadata, sidecarFiles. + private[delta] lazy val checkpointV2ThreadPool = { + val numThreads = SparkSession.active.sessionState.conf.getConf( + DeltaSQLConf.CHECKPOINT_V2_DRIVER_THREADPOOL_PARALLELISM) + DeltaThreadPool("checkpointV2-threadpool", numThreads) + } + + protected[delta] lazy val deltaLogAsyncUpdateThreadPool = { + val tpe = ThreadUtils.newDaemonCachedThreadPool("delta-state-update", 8) + new DeltaThreadPool(tpe) + } + + /** + * - Verify the versions are contiguous. + * - Verify the versions start with `expectedStartVersion` if it's specified. + * - Verify the versions end with `expectedEndVersion` if it's specified. + */ + def verifyDeltaVersions( + spark: SparkSession, + versions: Array[Long], + expectedStartVersion: Option[Long], + expectedEndVersion: Option[Long]): Unit = { + if (versions.nonEmpty) { + // Turn this to a vector so that we can compare it with a range. + val deltaVersions = versions.toVector + if ((deltaVersions.head to deltaVersions.last) != deltaVersions) { + throw DeltaErrors.deltaVersionsNotContiguousException(spark, deltaVersions) + } + } + expectedStartVersion.foreach { v => + require(versions.nonEmpty && versions.head == v, "Did not get the first delta " + + s"file version: $v to compute Snapshot") + } + expectedEndVersion.foreach { v => + require(versions.nonEmpty && versions.last == v, "Did not get the first delta " + + s"file version: $v to compute Snapshot") + } + } + + def appendCommitToLogSegment( + oldLogSegment: LogSegment, + commitFileStatus: FileStatus, + committedVersion: Long): LogSegment = { + require(oldLogSegment.version + 1 == committedVersion) + oldLogSegment.copy( + version = committedVersion, + deltas = oldLogSegment.deltas :+ commitFileStatus, + lastCommitTimestamp = commitFileStatus.getModificationTime) + } +} + +/** A serializable variant of HDFS's FileStatus. */ +case class SerializableFileStatus( + path: String, + length: Long, + isDir: Boolean, + modificationTime: Long) { + + // Important note! This is very expensive to compute, but we don't want to cache it + // as a `val` because Paths internally contain URIs and therefore consume lots of memory. + @JsonIgnore + def getHadoopPath: Path = new Path(path) + + def toFileStatus: FileStatus = { + new FileStatus(length, isDir, 0, 0, modificationTime, new Path(path)) + } + + override def equals(obj: Any): Boolean = obj match { + // We only compare the paths to stay consistent with FileStatus.equals. + case other: SerializableFileStatus => Objects.equals(path, other.path) + case _ => false + } + + // We only use the path to stay consistent with FileStatus.hashCode. + override def hashCode(): Int = Objects.hashCode(path) +} + +object SerializableFileStatus { + def fromStatus(status: FileStatus): SerializableFileStatus = { + SerializableFileStatus( + Option(status.getPath).map(_.toString).orNull, + status.getLen, + status.isDirectory, + status.getModificationTime) + } + + val EMPTY: SerializableFileStatus = fromStatus(new FileStatus()) +} + +/** + * Provides information around which files in the transaction log need to be read to create + * the given version of the log. + * + * @param logPath The path to the _delta_log directory + * @param version The Snapshot version to generate + * @param deltas The delta commit files (.json) to read + * @param checkpointProvider provider to give information about Checkpoint files. + * @param lastCommitTimestamp The "unadjusted" timestamp of the last commit within this segment. By + * unadjusted, we mean that the commit timestamps may not necessarily be + * monotonically increasing for the commits within this segment. + */ +case class LogSegment( + logPath: Path, + version: Long, + deltas: Seq[FileStatus], + checkpointProvider: UninitializedCheckpointProvider, + lastCommitTimestamp: Long) { + + override def hashCode(): Int = logPath.hashCode() * 31 + (lastCommitTimestamp % 10000).toInt + + /** + * An efficient way to check if a cached Snapshot's contents actually correspond to a new + * segment returned through file listing. + */ + override def equals(obj: Any): Boolean = { + obj match { + case other: LogSegment => + version == other.version && lastCommitTimestamp == other.lastCommitTimestamp && + logPath == other.logPath && checkpointProvider.version == other.checkpointProvider.version + case _ => false + } + } +} + +object LogSegment { + + def apply( + logPath: Path, + version: Long, + deltas: Seq[FileStatus], + checkpointProviderOpt: Option[UninitializedCheckpointProvider], + lastCommitTimestamp: Long): LogSegment = { + val checkpointProvider = checkpointProviderOpt.getOrElse(EmptyCheckpointProvider) + LogSegment(logPath, version, deltas, checkpointProvider, lastCommitTimestamp) + } + + /** The LogSegment for an empty transaction log directory. */ + def empty(path: Path): LogSegment = LogSegment( + logPath = path, + version = -1L, + deltas = Nil, + checkpointProviderOpt = None, + lastCommitTimestamp = -1L) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/SnapshotState.scala b/spark/src/main/scala/org/apache/spark/sql/delta/SnapshotState.scala new file mode 100644 index 00000000000..1c0e2334065 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/SnapshotState.scala @@ -0,0 +1,188 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.{Metadata, Protocol, SetTransaction} +import org.apache.spark.sql.delta.actions.DomainMetadata +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.FileSizeHistogram + +import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.functions.{coalesce, col, collect_set, count, last, lit, sum} +import org.apache.spark.util.Utils + + +/** + * Metrics and metadata computed around the Delta table. + * + * @param sizeInBytes The total size of the table (of active files, not including tombstones). + * @param numOfSetTransactions Number of streams writing to this table. + * @param numOfFiles The number of files in this table. + * @param numOfRemoves The number of tombstones in the state. + * @param numOfMetadata The number of metadata actions in the state. Should be 1. + * @param numOfProtocol The number of protocol actions in the state. Should be 1. + * @param setTransactions The streaming queries writing to this table. + * @param metadata The metadata of the table. + * @param protocol The protocol version of the Delta table. + * @param fileSizeHistogram A Histogram class tracking the file counts and total bytes + * in different size ranges. + */ +case class SnapshotState( + sizeInBytes: Long, + numOfSetTransactions: Long, + numOfFiles: Long, + numOfRemoves: Long, + numOfMetadata: Long, + numOfProtocol: Long, + setTransactions: Seq[SetTransaction], + domainMetadata: Seq[DomainMetadata], + metadata: Metadata, + protocol: Protocol, + fileSizeHistogram: Option[FileSizeHistogram] = None +) + +/** + * A helper class that manages the SnapshotState for a given snapshot. Will generate it only + * when necessary. + */ +trait SnapshotStateManager extends DeltaLogging { self: Snapshot => + + // For implicits which re-use Encoder: + import implicits._ + + /** A map to look up transaction version by appId. */ + lazy val transactions: Map[String, Long] = setTransactions.map(t => t.appId -> t.version).toMap + + /** + * Compute the SnapshotState of a table. Uses the stateDF from the Snapshot to extract + * the necessary stats. + */ + protected lazy val computedState: SnapshotState = { + withStatusCode("DELTA", s"Compute snapshot for version: $version") { + recordFrameProfile("Delta", "snapshot.computedState") { + val startTime = System.nanoTime() + val _computedState = extractComputedState(stateDF) + if (_computedState.protocol == null) { + recordDeltaEvent( + deltaLog, + opType = "delta.assertions.missingAction", + data = Map( + "version" -> version.toString, "action" -> "Protocol", "source" -> "Snapshot")) + throw DeltaErrors.actionNotFoundException("protocol", version) + } else if (_computedState.protocol != protocol) { + recordDeltaEvent( + deltaLog, + opType = "delta.assertions.mismatchedAction", + data = Map( + "version" -> version.toString, "action" -> "Protocol", "source" -> "Snapshot", + "computedState.protocol" -> _computedState.protocol, + "extracted.protocol" -> protocol)) + throw DeltaErrors.actionNotFoundException("protocol", version) + } + + if (_computedState.metadata == null) { + recordDeltaEvent( + deltaLog, + opType = "delta.assertions.missingAction", + data = Map( + "version" -> version.toString, "action" -> "Metadata", "source" -> "Metadata")) + throw DeltaErrors.actionNotFoundException("metadata", version) + } else if (_computedState.metadata != metadata) { + recordDeltaEvent( + deltaLog, + opType = "delta.assertions.mismatchedAction", + data = Map( + "version" -> version.toString, "action" -> "Metadata", "source" -> "Snapshot", + "computedState.metadata" -> _computedState.metadata, + "extracted.metadata" -> metadata)) + throw DeltaErrors.actionNotFoundException("metadata", version) + } + + _computedState + } + } + } + + /** + * Extract the SnapshotState from the provided dataframe of actions. Requires that the dataframe + * has already been deduplicated (either through logReplay or some other method). + */ + protected def extractComputedState(stateDF: DataFrame): SnapshotState = { + recordFrameProfile("Delta", "snapshot.computedState.aggregations") { + val aggregations = + aggregationsToComputeState.map { case (alias, agg) => agg.as(alias) }.toSeq + stateDF.select(aggregations: _*).as[SnapshotState].first() + } + } + + /** + * A Map of alias to aggregations which needs to be done to calculate the `computedState` + */ + protected def aggregationsToComputeState: Map[String, Column] = { + Map( + // sum may return null for empty data set. + "sizeInBytes" -> coalesce(sum(col("add.size")), lit(0L)), + "numOfSetTransactions" -> count(col("txn")), + "numOfFiles" -> count(col("add")), + "numOfRemoves" -> count(col("remove")), + "numOfMetadata" -> count(col("metaData")), + "numOfProtocol" -> count(col("protocol")), + "setTransactions" -> collect_set(col("txn")), + "domainMetadata" -> collect_set(col("domainMetadata")), + "metadata" -> last(col("metaData"), ignoreNulls = true), + "protocol" -> last(col("protocol"), ignoreNulls = true), + "fileSizeHistogram" -> lit(null).cast(FileSizeHistogram.schema) + ) + } + + /** + * The following is a list of convenience methods for accessing the computedState. + */ + def sizeInBytes: Long = computedState.sizeInBytes + def numOfSetTransactions: Long = computedState.numOfSetTransactions + def numOfFiles: Long = computedState.numOfFiles + def numOfRemoves: Long = computedState.numOfRemoves + def numOfMetadata: Long = computedState.numOfMetadata + def numOfProtocol: Long = computedState.numOfProtocol + def setTransactions: Seq[SetTransaction] = computedState.setTransactions + def fileSizeHistogram: Option[FileSizeHistogram] = computedState.fileSizeHistogram + def domainMetadata: Seq[DomainMetadata] = computedState.domainMetadata + protected[delta] def sizeInBytesIfKnown: Option[Long] = Some(sizeInBytes) + protected[delta] def setTransactionsIfKnown: Option[Seq[SetTransaction]] = Some(setTransactions) + protected[delta] def numOfFilesIfKnown: Option[Long] = Some(numOfFiles) + protected[delta] def domainMetadatasIfKnown: Option[Seq[DomainMetadata]] = Some(domainMetadata) + + /** Generate a default SnapshotState of a new table, given the table metadata */ + protected def initialState(metadata: Metadata): SnapshotState = { + val protocol = Protocol.forNewTable(spark, Some(metadata)) + + SnapshotState( + sizeInBytes = 0L, + numOfSetTransactions = 0L, + numOfFiles = 0L, + numOfRemoves = 0L, + numOfMetadata = 1L, + numOfProtocol = 1L, + setTransactions = Nil, + domainMetadata = Nil, + metadata = metadata, + protocol = protocol + ) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala new file mode 100644 index 00000000000..534536e10c8 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala @@ -0,0 +1,61 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery, SupportsSubquery} + +/** + * Trait to allow processing a special transformation of [[SubqueryExpression]] + * instances in a query plan. + */ +trait SubqueryTransformerHelper { + + /** + * Transform all nodes matched by the rule in the query plan rooted at given `plan`. + * It traverses the tree starting from the leaves, whenever a [[SubqueryExpression]] + * expression is encountered, given [[rule]] is applied to the subquery plan `plan` + * in [[SubqueryExpression]] starting from the `plan` root until leaves. + * + * This is slightly different behavior compared to [[QueryPlan.transformUpWithSubqueries]] + * or [[QueryPlan.transformDownWithSubqueries]] + * + * It requires that the given plan already gone through [[OptimizeSubqueries]] and the + * root node denoting a subquery is removed and optimized appropriately. + */ + def transformWithSubqueries(plan: LogicalPlan) + (rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = { + require(!isSubqueryRoot(plan)) + transformSubqueries(plan, rule) transform (rule) + } + + /** Is the give plan a subquery root. */ + def isSubqueryRoot(plan: LogicalPlan): Boolean = { + plan.isInstanceOf[Subquery] || plan.isInstanceOf[SupportsSubquery] + } + + private def transformSubqueries( + plan: LogicalPlan, + rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = { + import org.apache.spark.sql.delta.implicits._ + + plan transformAllExpressionsUp { + case subquery: SubqueryExpression => + subquery.withNewPlan(transformWithSubqueries(subquery.plan)(rule)) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/SupportedGenerationExpressions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/SupportedGenerationExpressions.scala new file mode 100644 index 00000000000..088b5a22bc3 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/SupportedGenerationExpressions.scala @@ -0,0 +1,334 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.reflect.ClassTag + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.xml._ + +/** This class defines the list of expressions that can be used in a generated column. */ +object SupportedGenerationExpressions { + + /** + * This method has the same signature as `FunctionRegistry.expression` so that we can define the + * list in the same format as `FunctionRegistry.expressions` and that's easy to diff. + */ + private def expression[T <: Expression : ClassTag]( + name: String, + setAlias: Boolean = false): Class[_] = { + implicitly[ClassTag[T]].runtimeClass + } + + // scalastyle:off + /** + * The white list is copied from `FunctionRegistry.expressions()` except the following types of + * functions: + * - explode functions. In other words, generate multiple rows from one row. + * - aggerate functions. + * - window functions. + * - grouping sets. + * - non deterministic functions. + * - deterministic functions in one query but non deterministic in multiple queries, + * such as, current_timestamp, rand, etc. + * + * To review the difference, you can run + * `diff sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala sql/core/src/main/scala/com/databricks/sql/transaction/SupportedGenerationExpression.scala` + */ + // scalastyle:on + val expressions: Set[Class[_]] = Set( + // misc non-aggregate functions + expression[Abs]("abs"), + expression[Coalesce]("coalesce"), + expression[Greatest]("greatest"), + expression[If]("if"), + expression[If]("iff", true), + expression[IsNaN]("isnan"), + expression[Nvl]("ifnull", true), + expression[IsNull]("isnull"), + expression[IsNotNull]("isnotnull"), + expression[Least]("least"), + expression[NaNvl]("nanvl"), + expression[NullIf]("nullif"), + expression[Nvl]("nvl"), + expression[Nvl2]("nvl2"), + expression[CaseWhen]("when"), + + // math functions + expression[Acos]("acos"), + expression[Acosh]("acosh"), + expression[Asin]("asin"), + expression[Asinh]("asinh"), + expression[Atan]("atan"), + expression[Atan2]("atan2"), + expression[Atanh]("atanh"), + expression[Bin]("bin"), + expression[BRound]("bround"), + expression[Cbrt]("cbrt"), + expression[Ceil]("ceil"), + expression[Ceil]("ceiling", true), + expression[Cos]("cos"), + expression[Cosh]("cosh"), + expression[Conv]("conv"), + expression[ToDegrees]("degrees"), + expression[EulerNumber]("e"), + expression[Exp]("exp"), + expression[Expm1]("expm1"), + expression[Floor]("floor"), + expression[Factorial]("factorial"), + expression[Hex]("hex"), + expression[Hypot]("hypot"), + expression[Logarithm]("log"), + expression[Log10]("log10"), + expression[Log1p]("log1p"), + expression[Log2]("log2"), + expression[Log]("ln"), + expression[Remainder]("mod", true), + expression[UnaryMinus]("negative", true), + expression[Pi]("pi"), + expression[Pmod]("pmod"), + expression[UnaryPositive]("positive"), + expression[Pow]("pow", true), + expression[Pow]("power"), + expression[ToRadians]("radians"), + expression[Rint]("rint"), + expression[Round]("round"), + expression[ShiftLeft]("shiftleft"), + expression[ShiftRight]("shiftright"), + expression[ShiftRightUnsigned]("shiftrightunsigned"), + expression[Signum]("sign", true), + expression[Signum]("signum"), + expression[Sin]("sin"), + expression[Sinh]("sinh"), + expression[StringToMap]("str_to_map"), + expression[Sqrt]("sqrt"), + expression[Tan]("tan"), + expression[Cot]("cot"), + expression[Tanh]("tanh"), + + expression[Add]("+"), + expression[Subtract]("-"), + expression[Multiply]("*"), + expression[Divide]("/"), + expression[IntegralDivide]("div"), + expression[Remainder]("%"), + + // string functions + expression[Ascii]("ascii"), + expression[Chr]("char", true), + expression[Chr]("chr"), + expression[Base64]("base64"), + expression[BitLength]("bit_length"), + expression[Length]("char_length", true), + expression[Length]("character_length", true), + expression[ConcatWs]("concat_ws"), + expression[Decode]("decode"), + expression[Elt]("elt"), + expression[Encode]("encode"), + expression[FindInSet]("find_in_set"), + expression[FormatNumber]("format_number"), + expression[FormatString]("format_string"), + expression[GetJsonObject]("get_json_object"), + expression[InitCap]("initcap"), + expression[StringInstr]("instr"), + expression[Lower]("lcase", true), + expression[Length]("length"), + expression[Levenshtein]("levenshtein"), + expression[Like]("like"), + expression[Lower]("lower"), + expression[OctetLength]("octet_length"), + expression[StringLocate]("locate"), + expression[StringLPad]("lpad"), + expression[StringTrimLeft]("ltrim"), + expression[JsonTuple]("json_tuple"), + expression[ParseUrl]("parse_url"), + expression[StringLocate]("position", true), + expression[StringLocate]("charindex", true), + expression[FormatString]("printf", true), + expression[RegExpExtract]("regexp_extract"), + expression[RegExpReplace]("regexp_replace"), + expression[RLike]("regexp_like", true), + expression[StringRepeat]("repeat"), + expression[StringReplace]("replace"), + expression[Overlay]("overlay"), + expression[RLike]("rlike"), + expression[StringRPad]("rpad"), + expression[StringTrimRight]("rtrim"), + expression[Sentences]("sentences"), + expression[SoundEx]("soundex"), + expression[StringSpace]("space"), + expression[StringSplit]("split"), + expression[Substring]("substr", true), + expression[Substring]("substring"), + expression[Left]("left"), + expression[Right]("right"), + expression[SubstringIndex]("substring_index"), + expression[StringTranslate]("translate"), + expression[StringTrim]("trim"), + expression[Upper]("ucase", true), + expression[UnBase64]("unbase64"), + expression[Unhex]("unhex"), + expression[Upper]("upper"), + expression[XPathList]("xpath"), + expression[XPathBoolean]("xpath_boolean"), + expression[XPathDouble]("xpath_double"), + expression[XPathDouble]("xpath_number", true), + expression[XPathFloat]("xpath_float"), + expression[XPathInt]("xpath_int"), + expression[XPathLong]("xpath_long"), + expression[XPathShort]("xpath_short"), + expression[XPathString]("xpath_string"), + + // datetime functions + expression[AddMonths]("add_months"), + expression[DateDiff]("datediff"), + expression[DateAdd]("date_add"), + expression[DateFormatClass]("date_format"), + expression[DateSub]("date_sub"), + expression[DayOfMonth]("day", true), + expression[DayOfYear]("dayofyear"), + expression[DayOfMonth]("dayofmonth"), + expression[FromUnixTime]("from_unixtime"), + expression[FromUTCTimestamp]("from_utc_timestamp"), + expression[Hour]("hour"), + expression[LastDay]("last_day"), + expression[Minute]("minute"), + expression[Month]("month"), + expression[MonthsBetween]("months_between"), + expression[NextDay]("next_day"), + expression[Now]("now"), + expression[Quarter]("quarter"), + expression[Second]("second"), + expression[ParseToTimestamp]("to_timestamp"), + expression[ParseToDate]("to_date"), + // `gettimestamp` is not a Spark built-in class but `ParseToDate` will refer to + // `gettimestamp` when a format is given, so it needs to be on the allowed list + expression[GetTimestamp]("gettimestamp"), + expression[ToUnixTimestamp]("to_unix_timestamp"), + expression[ToUTCTimestamp]("to_utc_timestamp"), + expression[TruncDate]("trunc"), + expression[TruncTimestamp]("date_trunc"), + expression[UnixTimestamp]("unix_timestamp"), + expression[DayOfWeek]("dayofweek"), + expression[WeekDay]("weekday"), + expression[WeekOfYear]("weekofyear"), + expression[Year]("year"), + expression[TimeWindow]("window"), + expression[MakeDate]("make_date"), + expression[MakeTimestamp]("make_timestamp"), + expression[MakeInterval]("make_interval"), + expression[Extract]("date_part", setAlias = true), + expression[Extract]("extract"), + + // collection functions + expression[CreateArray]("array"), + expression[ArrayContains]("array_contains"), + expression[ArraysOverlap]("arrays_overlap"), + expression[ArrayIntersect]("array_intersect"), + expression[ArrayJoin]("array_join"), + expression[ArrayPosition]("array_position"), + expression[ArraySort]("array_sort"), + expression[ArrayExcept]("array_except"), + expression[ArrayUnion]("array_union"), + expression[CreateMap]("map"), + expression[CreateNamedStruct]("named_struct"), + expression[ElementAt]("element_at"), + expression[MapFromArrays]("map_from_arrays"), + expression[MapKeys]("map_keys"), + expression[MapValues]("map_values"), + expression[MapEntries]("map_entries"), + expression[MapFromEntries]("map_from_entries"), + expression[MapConcat]("map_concat"), + expression[Size]("size"), + expression[Slice]("slice"), + expression[Size]("cardinality", true), + expression[ArraysZip]("arrays_zip"), + expression[SortArray]("sort_array"), + expression[ArrayMin]("array_min"), + expression[ArrayMax]("array_max"), + expression[Reverse]("reverse"), + expression[Concat]("concat"), + expression[Flatten]("flatten"), + expression[Sequence]("sequence"), + expression[ArrayRepeat]("array_repeat"), + expression[ArrayRemove]("array_remove"), + expression[ArrayDistinct]("array_distinct"), + expression[ArrayTransform]("transform"), + expression[MapFilter]("map_filter"), + expression[ArrayFilter]("filter"), + expression[ArrayExists]("exists"), + expression[ArrayForAll]("forall"), + expression[ArrayAggregate]("aggregate"), + expression[ArrayAggregate]("reduce"), + expression[TransformValues]("transform_values"), + expression[TransformKeys]("transform_keys"), + expression[MapZipWith]("map_zip_with"), + expression[ZipWith]("zip_with"), + + // misc functions + expression[AssertTrue]("assert_true"), + expression[Crc32]("crc32"), + expression[Md5]("md5"), + expression[Murmur3Hash]("hash"), + expression[XxHash64]("xxhash64"), + expression[Sha1]("sha", true), + expression[Sha1]("sha1"), + expression[Sha2]("sha2"), + expression[TypeOf]("typeof"), + + // predicates + expression[And]("and"), + expression[In]("in"), + expression[Not]("not"), + expression[Or]("or"), + + // comparison operators + expression[EqualNullSafe]("<=>"), + expression[EqualTo]("="), + expression[EqualTo]("=="), + expression[GreaterThan](">"), + expression[GreaterThanOrEqual](">="), + expression[LessThan]("<"), + expression[LessThanOrEqual]("<="), + expression[Not]("!"), + + // bitwise + expression[BitwiseAnd]("&"), + expression[BitwiseNot]("~"), + expression[BitwiseOr]("|"), + expression[BitwiseXor]("^"), + expression[BitwiseCount]("bit_count"), + + // json + expression[StructsToJson]("to_json"), + expression[JsonToStructs]("from_json"), + expression[SchemaOfJson]("schema_of_json"), + + // cast + expression[Cast]("cast"), + // We don't need to define `castAlias` since they will use the same `Cast` expression. + + // csv + expression[CsvToStructs]("from_csv"), + expression[SchemaOfCsv]("schema_of_csv"), + expression[StructsToCsv]("to_csv"), + + // Special expressions that are not built-in expressions. + expression[AttributeReference]("col"), + expression[Literal]("lit") + ) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala new file mode 100644 index 00000000000..0f997688237 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala @@ -0,0 +1,787 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.Locale + +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.constraints.{Constraints, Invariants} +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.{Utils => DeltaUtils} +import org.apache.spark.sql.delta.util.FileNames + +import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.types.TimestampNTZType + +/* --------------------------------------- * + | Table features base class definitions | + * --------------------------------------- */ + +/** + * A base class for all table features. + * + * A feature can be explicitly supported by a table's protocol when the protocol contains a + * feature's `name`. Writers (for writer-only features) or readers and writers (for reader-writer + * features) must recognize supported features and must handle them appropriately. + * + * A table feature that released before Delta Table Features (reader version 3 and writer version + * 7) is considered as a legacy feature. Legacy features are implicitly supported + * when (a) the protocol does not support table features, i.e., has reader version less than 3 or + * writer version less than 7 and (b) the feature's minimum reader/writer version is less than or + * equal to the current protocol's reader/writer version. + * + * Separately, a feature can be automatically supported by a table's metadata when certain + * feature-specific table properties are set. For example, `changeDataFeed` is automatically + * supported when there's a table property `delta.enableChangeDataFeed=true`. This is independent + * of the table's enabled features. When a feature is supported (explicitly or implicitly) by the + * table protocol but its metadata requirements are not satisfied, then clients still have to + * understand the feature (at least to the extent that they can read and preserve the existing + * data in the table that uses the feature). See the documentation of + * [[FeatureAutomaticallyEnabledByMetadata]] for more information. + * + * @param name + * a globally-unique string indicator to represent the feature. All characters must be letters + * (a-z, A-Z), digits (0-9), '-', or '_'. Words must be in camelCase. + * @param minReaderVersion + * the minimum reader version this feature requires. For a feature that can only be explicitly + * supported, this is either `0` or `3` (the reader protocol version that supports table + * features), depending on the feature is writer-only or reader-writer. For a legacy feature + * that can be implicitly supported, this is the first protocol version which the feature is + * introduced. + * @param minWriterVersion + * the minimum writer version this feature requires. For a feature that can only be explicitly + * supported, this is the writer protocol `7` that supports table features. For a legacy feature + * that can be implicitly supported, this is the first protocol version which the feature is + * introduced. + */ +// @TODO: distinguish Delta and 3rd-party features and give appropriate error messages +sealed abstract class TableFeature( + val name: String, + val minReaderVersion: Int, + val minWriterVersion: Int) extends java.io.Serializable { + + require(name.forall(c => c.isLetterOrDigit || c == '-' || c == '_')) + + /** + * Get a [[Protocol]] object stating the minimum reader and writer versions this feature + * requires. For a feature that can only be explicitly supported, this method returns a protocol + * version that supports table features, either `(0,7)` or `(3,7)` depending on the feature is + * writer-only or reader-writer. For a legacy feature that can be implicitly supported, this + * method returns the first protocol version which introduced the said feature. + * + * For all features, if the table's protocol version does not support table features, then the + * minimum protocol version is enough. However, if the protocol version supports table features + * for the feature type (writer-only or reader-writer), then the minimum protocol version is not + * enough to support a feature. In this case the feature must also be explicitly listed in the + * appropriate feature sets in the [[Protocol]]. + */ + def minProtocolVersion: Protocol = Protocol(minReaderVersion, minWriterVersion) + + /** Determine if this feature applies to both readers and writers. */ + def isReaderWriterFeature: Boolean = this.isInstanceOf[ReaderWriterFeatureType] + + /** + * Determine if this feature is a legacy feature. See the documentation of [[TableFeature]] for + * more information. + */ + def isLegacyFeature: Boolean = this.isInstanceOf[LegacyFeatureType] + + /** + * True if this feature can be removed. + */ + def isRemovable: Boolean = this.isInstanceOf[RemovableFeature] + + /** + * Set of table features that this table feature depends on. I.e. the set of features that need + * to be enabled if this table feature is enabled. + */ + def requiredFeatures: Set[TableFeature] = Set.empty +} + +/** A trait to indicate a feature applies to readers and writers. */ +sealed trait ReaderWriterFeatureType + +/** A trait to indicate a feature is legacy, i.e., released before Table Features. */ +sealed trait LegacyFeatureType + +/** + * A trait indicating this feature can be automatically enabled via a change in a table's + * metadata, e.g., through setting particular values of certain feature-specific table properties. + * + * When the feature's metadata requirements are satisfied for new tables, or for + * existing tables when [[automaticallyUpdateProtocolOfExistingTables]] set to `true`, the + * client will silently add the feature to the protocol's `readerFeatures` and/or + * `writerFeatures`. Otherwise, a proper protocol version bump must be present in the same + * transaction. + */ +sealed trait FeatureAutomaticallyEnabledByMetadata { this: TableFeature => + + /** + * Whether the feature can automatically update the protocol of an existing table when the + * metadata requirements are satisfied. As a rule of thumb, a table feature that requires + * explicit operations (e.g., turning on a table property) should set this flag to `true`, while + * features that are used implicitly (e.g., when using a new data type) should set this flag to + * `false`. + */ + def automaticallyUpdateProtocolOfExistingTables: Boolean = this.isLegacyFeature + + /** + * Determine whether the feature must be supported and enabled because its metadata requirements + * are satisfied. + */ + def metadataRequiresFeatureToBeEnabled(metadata: Metadata, spark: SparkSession): Boolean + + require( + !this.isLegacyFeature || automaticallyUpdateProtocolOfExistingTables, + "Legacy feature must be auto-update capable.") +} + +/** + * A trait indicating a feature can be removed. Classes that extend the trait need to + * implement the following three functions: + * + * a) preDowngradeCommand. This is where all required actions for removing the feature are + * implemented. For example, to remove the DVs feature we need to remove metadata config + * and purge all DVs from table. This action takes place before the protocol downgrade in + * separate commit(s). Note, the command needs to be implemented in a way concurrent + * transactions do not nullify the effect. For example, disabling DVs on a table before + * purging will stop concurrent transactions from adding DVs. During protocol downgrade + * we perform a validation in [[validateRemoval]] to make sure all invariants still hold. + * + * b) validateRemoval. Add any feature-specific checks before proceeding to the protocol + * downgrade. This function is guaranteed to be called at the latest version before the + * protocol downgrade is committed to the table. When the protocol downgrade txn conflicts, + * the validation is repeated against the winning txn snapshot. As soon as the protocol + * downgrade succeeds, all subsequent interleaved txns are aborted. + * + * c) actionUsesFeature. For reader+writer features we check whether past versions contain any + * traces of the removed feature. This is achieved by calling [[actionUsesFeature]] for + * every action of every reachable commit version in the log. Note, a feature may leave traces + * in both data and metadata. Depending on the feature, we need to check several types of + * actions such as Metadata, AddFile, RemoveFile etc. + * Writer features should directly return false. + * + * WARNING: actionUsesFeature should not check Protocol actions for the feature being removed, + * because at the time actionUsesFeature is invoked the protocol downgrade did not happen yet. + * Thus, the feature-to-remove is still active. As a result, any unrelated operations that + * produce a protocol action (while we are waiting for the retention period to expire) will + * "carry" the feature-to-remove. Checking protocol for that feature would result in an + * unnecessary failure during the history validation of the next DROP FEATURE call. Note, + * while the feature-to-remove is supported in the protocol we cannot generate a legit protocol + * action that adds support for that feature since it is already supported. + */ +sealed trait RemovableFeature { self: TableFeature => + def preDowngradeCommand(table: DeltaTableV2): PreDowngradeTableFeatureCommand + def validateRemoval(snapshot: Snapshot): Boolean + def actionUsesFeature(action: Action): Boolean + + /** + * Examines all historical commits for traces of the removableFeature. + * This is achieved as follows: + * + * 1) We find the earliest valid checkpoint, recreate a snapshot at that version and we check + * whether there any traces of the feature-to-remove. + * 2) We check all commits that exist between version 0 and the current version. + * This includes the versions we validated the snapshots. This is because a commit + * might include information that is not available in the snapshot. Examples include + * CommitInfo, CDCInfo etc. Note, there can still be valid log commit files with + * versions prior the earliest checkpoint version. + * 3) We do not need to recreate a snapshot at the current version because this is already being + * handled by validateRemoval. + * + * Note, this is a slow process. + * + * @param spark The SparkSession. + * @param downgradeTxnReadSnapshot The read snapshot of the protocol downgrade transaction. + * @return True if the history contains any trace of the feature. + */ + def historyContainsFeature( + spark: SparkSession, + downgradeTxnReadSnapshot: Snapshot): Boolean = { + require(isReaderWriterFeature) + val deltaLog = downgradeTxnReadSnapshot.deltaLog + val earliestCheckpointVersion = deltaLog.findEarliestReliableCheckpoint.getOrElse(0L) + val toVersion = downgradeTxnReadSnapshot.version + + // Use the snapshot at earliestCheckpointVersion to validate the checkpoint identified by + // findEarliestReliableCheckpoint. + val earliestSnapshot = deltaLog.getSnapshotAt(earliestCheckpointVersion) + + // Tombstones may contain traces of the removed feature. The earliest snapshot will include + // all tombstones within the tombstoneRetentionPeriod. This may disallow protocol downgrade + // because the log retention period is not aligned with the tombstoneRetentionPeriod. + // To resolve this issue, we filter out all tombstones from the earliest checkpoint. + // Tombstones at the earliest checkpoint should be irrelevant and should not be an + // issue for readers that do not support the feature. + if (containsFeatureTraces(earliestSnapshot.stateDS.filter("remove is null"))) { + return true + } + + // Check if commits between 0 version and toVersion contain any traces of the feature. + val allHistoricalDeltaFiles = deltaLog + .listFrom(0L) + .takeWhile(file => FileNames.getFileVersionOpt(file.getPath).forall(_ <= toVersion)) + .filter(FileNames.isDeltaFile) + .toSeq + DeltaLogFileIndex(DeltaLogFileIndex.COMMIT_FILE_FORMAT, allHistoricalDeltaFiles) + .exists(i => containsFeatureTraces(deltaLog.loadIndex(i, Action.logSchema).as[SingleAction])) + } + + /** Returns whether a dataset of actions contains any trace of this feature. */ + private def containsFeatureTraces(ds: Dataset[SingleAction]): Boolean = { + import org.apache.spark.sql.delta.implicits._ + ds.mapPartitions { actions => + actions + .map(_.unwrap) + .collectFirst { case a if actionUsesFeature(a) => true } + .toIterator + }.take(1).nonEmpty + } +} + +/** + * A base class for all writer-only table features that can only be explicitly supported. + * + * @param name + * a globally-unique string indicator to represent the feature. All characters must be letters + * (a-z, A-Z), digits (0-9), '-', or '_'. Words must be in camelCase. + */ +sealed abstract class WriterFeature(name: String) + extends TableFeature( + name, + minReaderVersion = 0, + minWriterVersion = TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) + +/** + * A base class for all reader-writer table features that can only be explicitly supported. + * + * @param name + * a globally-unique string indicator to represent the feature. All characters must be letters + * (a-z, A-Z), digits (0-9), '-', or '_'. Words must be in camelCase. + */ +sealed abstract class ReaderWriterFeature(name: String) + extends WriterFeature(name) + with ReaderWriterFeatureType { + override val minReaderVersion: Int = TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION +} + +/** + * A base class for all table legacy writer-only features. + * + * @param name + * a globally-unique string indicator to represent the feature. Allowed characters are letters + * (a-z, A-Z), digits (0-9), '-', and '_'. Words must be in camelCase. + * @param minWriterVersion + * the minimum writer protocol version that supports this feature. + */ +sealed abstract class LegacyWriterFeature(name: String, minWriterVersion: Int) + extends TableFeature(name, minReaderVersion = 0, minWriterVersion = minWriterVersion) + with LegacyFeatureType + +/** + * A base class for all legacy writer-only table features. + * + * @param name + * a globally-unique string indicator to represent the feature. Allowed characters are letters + * (a-z, A-Z), digits (0-9), '-', and '_'. Words must be in camelCase. + * @param minReaderVersion + * the minimum reader protocol version that supports this feature. + * @param minWriterVersion + * the minimum writer protocol version that supports this feature. + */ +sealed abstract class LegacyReaderWriterFeature( + name: String, + override val minReaderVersion: Int, + minWriterVersion: Int) + extends LegacyWriterFeature(name, minWriterVersion) + with ReaderWriterFeatureType + +object TableFeature { + /** + * All table features recognized by this client. Update this set when you added a new Table + * Feature. + * + * Warning: Do not call `get` on this Map to get a specific feature because keys in this map are + * in lower cases. Use [[featureNameToFeature]] instead. + */ + private[delta] val allSupportedFeaturesMap: Map[String, TableFeature] = { + var features: Set[TableFeature] = Set( + AllowColumnDefaultsTableFeature, + AppendOnlyTableFeature, + ChangeDataFeedTableFeature, + CheckConstraintsTableFeature, + ClusteringTableFeature, + DomainMetadataTableFeature, + GeneratedColumnsTableFeature, + InvariantsTableFeature, + ColumnMappingTableFeature, + TimestampNTZTableFeature, + IcebergCompatV1TableFeature, + IcebergCompatV2TableFeature, + DeletionVectorsTableFeature, + V2CheckpointTableFeature) + if (DeltaUtils.isTesting) { + features ++= Set( + TestLegacyWriterFeature, + TestLegacyReaderWriterFeature, + TestWriterFeature, + TestWriterMetadataNoAutoUpdateFeature, + TestReaderWriterFeature, + TestReaderWriterMetadataAutoUpdateFeature, + TestReaderWriterMetadataNoAutoUpdateFeature, + TestRemovableWriterFeature, + TestRemovableLegacyWriterFeature, + TestRemovableReaderWriterFeature, + TestRemovableLegacyReaderWriterFeature, + TestFeatureWithDependency, + TestFeatureWithTransitiveDependency, + TestWriterFeatureWithTransitiveDependency, + // Row IDs are still under development and only available in testing. + RowTrackingFeature) + } + val featureMap = features.map(f => f.name.toLowerCase(Locale.ROOT) -> f).toMap + require(features.size == featureMap.size, "Lowercase feature names must not duplicate.") + featureMap + } + + /** Get a [[TableFeature]] object by its name. */ + def featureNameToFeature(featureName: String): Option[TableFeature] = + allSupportedFeaturesMap.get(featureName.toLowerCase(Locale.ROOT)) + + /** + * Extracts the removed (explicit) feature names by comparing new and old protocols. + * Returns None if there are no removed (explicit) features. + */ + protected def getDroppedExplicitFeatureNames( + newProtocol: Protocol, + oldProtocol: Protocol): Option[Set[String]] = { + val newFeatureNames = newProtocol.readerAndWriterFeatureNames + val oldFeatureNames = oldProtocol.readerAndWriterFeatureNames + Option(oldFeatureNames -- newFeatureNames).filter(_.nonEmpty) + } + + /** + * Identifies whether there was any feature removal between two protocols. + */ + def isProtocolRemovingExplicitFeatures(newProtocol: Protocol, oldProtocol: Protocol): Boolean = { + getDroppedExplicitFeatureNames(newProtocol = newProtocol, oldProtocol = oldProtocol).isDefined + } + + /** + * Validates whether all requirements of a removed feature hold against the provided snapshot. + */ + def validateFeatureRemovalAtSnapshot( + newProtocol: Protocol, + oldProtocol: Protocol, + snapshot: Snapshot): Boolean = { + val droppedFeatureNamesOpt = TableFeature.getDroppedExplicitFeatureNames( + newProtocol = newProtocol, + oldProtocol = oldProtocol) + val droppedFeatureName = droppedFeatureNamesOpt match { + case Some(f) if f.size == 1 => f.head + // We do not support dropping more than one features at a time so we have to reject + // the validation. + case Some(_) => return false + case None => return true + } + + TableFeature.featureNameToFeature(droppedFeatureName) match { + case Some(feature: RemovableFeature) => feature.validateRemoval(snapshot) + case _ => throw DeltaErrors.dropTableFeatureFeatureNotSupportedByClient(droppedFeatureName) + } + } +} + +/* ---------------------------------------- * + | All table features known to the client | + * ---------------------------------------- */ + +object AppendOnlyTableFeature + extends LegacyWriterFeature(name = "appendOnly", minWriterVersion = 2) + with FeatureAutomaticallyEnabledByMetadata { + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + DeltaConfigs.IS_APPEND_ONLY.fromMetaData(metadata) + } +} + +object InvariantsTableFeature + extends LegacyWriterFeature(name = "invariants", minWriterVersion = 2) + with FeatureAutomaticallyEnabledByMetadata { + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + Invariants.getFromSchema(metadata.schema, spark).nonEmpty + } +} + +object CheckConstraintsTableFeature + extends LegacyWriterFeature(name = "checkConstraints", minWriterVersion = 3) + with FeatureAutomaticallyEnabledByMetadata { + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + Constraints.getCheckConstraints(metadata, spark).nonEmpty + } +} + +object ChangeDataFeedTableFeature + extends LegacyWriterFeature(name = "changeDataFeed", minWriterVersion = 4) + with FeatureAutomaticallyEnabledByMetadata { + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + DeltaConfigs.CHANGE_DATA_FEED.fromMetaData(metadata) + } +} + +object GeneratedColumnsTableFeature + extends LegacyWriterFeature(name = "generatedColumns", minWriterVersion = 4) + with FeatureAutomaticallyEnabledByMetadata { + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + GeneratedColumn.hasGeneratedColumns(metadata.schema) + } +} + +object ColumnMappingTableFeature + extends LegacyReaderWriterFeature( + name = "columnMapping", + minReaderVersion = 2, + minWriterVersion = 5) + with FeatureAutomaticallyEnabledByMetadata { + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + metadata.columnMappingMode match { + case NoMapping => false + case _ => true + } + } +} + +object TimestampNTZTableFeature extends ReaderWriterFeature(name = "timestampNtz") + with FeatureAutomaticallyEnabledByMetadata { + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, spark: SparkSession): Boolean = { + SchemaUtils.checkForTimestampNTZColumnsRecursively(metadata.schema) + } +} + +object DeletionVectorsTableFeature + extends ReaderWriterFeature(name = "deletionVectors") + with FeatureAutomaticallyEnabledByMetadata { + override def automaticallyUpdateProtocolOfExistingTables: Boolean = true + + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.fromMetaData(metadata) + } +} + +object RowTrackingFeature extends WriterFeature(name = "rowTracking") + with FeatureAutomaticallyEnabledByMetadata { + override def automaticallyUpdateProtocolOfExistingTables: Boolean = true + + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = DeltaConfigs.ROW_TRACKING_ENABLED.fromMetaData(metadata) + + override def requiredFeatures: Set[TableFeature] = Set(DomainMetadataTableFeature) +} + +object DomainMetadataTableFeature extends WriterFeature(name = "domainMetadata") + +object IcebergCompatV1TableFeature extends WriterFeature(name = "icebergCompatV1") + with FeatureAutomaticallyEnabledByMetadata { + + override def automaticallyUpdateProtocolOfExistingTables: Boolean = true + + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = IcebergCompatV1.isEnabled(metadata) + + override def requiredFeatures: Set[TableFeature] = Set(ColumnMappingTableFeature) +} + +object IcebergCompatV2TableFeature extends WriterFeature(name = "icebergCompatV2") + with FeatureAutomaticallyEnabledByMetadata { + + override def automaticallyUpdateProtocolOfExistingTables: Boolean = true + + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = IcebergCompatV2.isEnabled(metadata) + + override def requiredFeatures: Set[TableFeature] = Set(ColumnMappingTableFeature) +} + +/** + * Clustering table feature is enabled when a table is created with CLUSTER BY clause. + */ +object ClusteringTableFeature extends WriterFeature("clustering") { + override val requiredFeatures: Set[TableFeature] = Set(DomainMetadataTableFeature) +} + +/** + * This table feature represents support for column DEFAULT values for Delta Lake. With this + * feature, it is possible to assign default values to columns either at table creation time or + * later by using commands of the form: ALTER TABLE t ALTER COLUMN c SET DEFAULT v. Thereafter, + * queries from the table will return the specified default value instead of NULL when the + * corresponding field is not present in storage. + * + * We create this as a writer-only feature rather than a reader/writer feature in order to simplify + * the query execution implementation for scanning Delta tables. This means that commands of the + * following form are not allowed: ALTER TABLE t ADD COLUMN c DEFAULT v. The reason is that when + * commands of that form execute (such as for other data sources like CSV or JSON), then the data + * source scan implementation must take responsibility to return the supplied default value for all + * rows, including those previously present in the table before the command executed. We choose to + * avoid this complexity for Delta table scans, so we make this a writer-only feature instead. + * Therefore, the analyzer can take care of the entire job when processing commands that introduce + * new rows into the table by injecting the column default value (if present) into the corresponding + * query plan. This comes at the expense of preventing ourselves from easily adding a default value + * to an existing non-empty table, because all data files would need to be rewritten to include the + * new column value in an expensive backfill. + */ +object AllowColumnDefaultsTableFeature extends WriterFeature(name = "allowColumnDefaults") + + +/** + * V2 Checkpoint table feature is for checkpoints with sidecars and the new format and + * file naming scheme. + */ +object V2CheckpointTableFeature + extends ReaderWriterFeature(name = "v2Checkpoint") + with RemovableFeature + with FeatureAutomaticallyEnabledByMetadata { + + override def automaticallyUpdateProtocolOfExistingTables: Boolean = true + + private def isV2CheckpointSupportNeededByMetadata(metadata: Metadata): Boolean = + DeltaConfigs.CHECKPOINT_POLICY.fromMetaData(metadata).needsV2CheckpointSupport + + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = isV2CheckpointSupportNeededByMetadata(metadata) + + override def validateRemoval(snapshot: Snapshot): Boolean = { + // Fail validation if v2 checkpoints are still enabled in the current snapshot + if (isV2CheckpointSupportNeededByMetadata(snapshot.metadata)) return false + + // Validation also fails if the current snapshot might depend on a v2 checkpoint. + // NOTE: Empty and preloaded checkpoint providers never reference v2 checkpoints. + snapshot.checkpointProvider match { + case p if p.isEmpty => true + case _: PreloadedCheckpointProvider => true + case lazyProvider: LazyCompleteCheckpointProvider => + lazyProvider.underlyingCheckpointProvider.isInstanceOf[PreloadedCheckpointProvider] + case _ => false + } + } + + override def actionUsesFeature(action: Action): Boolean = action match { + case m: Metadata => isV2CheckpointSupportNeededByMetadata(m) + case _: CheckpointMetadata => true + case _: SidecarFile => true + case _ => false + } + + override def preDowngradeCommand(table: DeltaTableV2): PreDowngradeTableFeatureCommand = + V2CheckpointPreDowngradeCommand(table) +} + +/** + * Features below are for testing only, and are being registered to the system only in the testing + * environment. See [[TableFeature.allSupportedFeaturesMap]] for the registration. + */ + +object TestLegacyWriterFeature + extends LegacyWriterFeature(name = "testLegacyWriter", minWriterVersion = 5) + +object TestWriterFeature extends WriterFeature(name = "testWriter") + +object TestWriterMetadataNoAutoUpdateFeature + extends WriterFeature(name = "testWriterMetadataNoAutoUpdate") + with FeatureAutomaticallyEnabledByMetadata { + val TABLE_PROP_KEY = "_123testWriterMetadataNoAutoUpdate321_" + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + metadata.configuration.get(TABLE_PROP_KEY).exists(_.toBoolean) + } +} + +object TestLegacyReaderWriterFeature + extends LegacyReaderWriterFeature( + name = "testLegacyReaderWriter", + minReaderVersion = 2, + minWriterVersion = 5) + +object TestReaderWriterFeature extends ReaderWriterFeature(name = "testReaderWriter") + +object TestReaderWriterMetadataNoAutoUpdateFeature + extends ReaderWriterFeature(name = "testReaderWriterMetadataNoAutoUpdate") + with FeatureAutomaticallyEnabledByMetadata { + val TABLE_PROP_KEY = "_123testReaderWriterMetadataNoAutoUpdate321_" + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + metadata.configuration.get(TABLE_PROP_KEY).exists(_.toBoolean) + } +} + +object TestReaderWriterMetadataAutoUpdateFeature + extends ReaderWriterFeature(name = "testReaderWriterMetadataAutoUpdate") + with FeatureAutomaticallyEnabledByMetadata { + val TABLE_PROP_KEY = "_123testReaderWriterMetadataAutoUpdate321_" + + override def automaticallyUpdateProtocolOfExistingTables: Boolean = true + + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + metadata.configuration.get(TABLE_PROP_KEY).exists(_.toBoolean) + } +} + +private[sql] object TestRemovableWriterFeature + extends WriterFeature(name = "testRemovableWriter") + with FeatureAutomaticallyEnabledByMetadata + with RemovableFeature { + + val TABLE_PROP_KEY = "_123TestRemovableWriter321_" + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + metadata.configuration.get(TABLE_PROP_KEY).exists(_.toBoolean) + } + + override def validateRemoval(snapshot: Snapshot): Boolean = + !snapshot.metadata.configuration.contains(TABLE_PROP_KEY) + + override def preDowngradeCommand(table: DeltaTableV2): PreDowngradeTableFeatureCommand = + TestWriterFeaturePreDowngradeCommand(table) + + override def actionUsesFeature(action: Action): Boolean = false +} + +private[sql] object TestRemovableReaderWriterFeature + extends ReaderWriterFeature(name = "testRemovableReaderWriter") + with FeatureAutomaticallyEnabledByMetadata + with RemovableFeature { + + val TABLE_PROP_KEY = "_123TestRemovableReaderWriter321_" + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + metadata.configuration.get(TABLE_PROP_KEY).exists(_.toBoolean) + } + + override def validateRemoval(snapshot: Snapshot): Boolean = + !snapshot.metadata.configuration.contains(TABLE_PROP_KEY) + + override def actionUsesFeature(action: Action): Boolean = action match { + case m: Metadata => m.configuration.contains(TABLE_PROP_KEY) + case _ => false + } + + override def preDowngradeCommand(table: DeltaTableV2): PreDowngradeTableFeatureCommand = + TestReaderWriterFeaturePreDowngradeCommand(table) +} + +object TestRemovableLegacyWriterFeature + extends LegacyWriterFeature(name = "testRemovableLegacyWriter", minWriterVersion = 5) + with FeatureAutomaticallyEnabledByMetadata + with RemovableFeature { + + val TABLE_PROP_KEY = "_123TestRemovableLegacyWriter321_" + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + metadata.configuration.get(TABLE_PROP_KEY).exists(_.toBoolean) + } + + override def validateRemoval(snapshot: Snapshot): Boolean = { + !snapshot.metadata.configuration.contains(TABLE_PROP_KEY) + } + + override def preDowngradeCommand(table: DeltaTableV2): PreDowngradeTableFeatureCommand = + TestLegacyWriterFeaturePreDowngradeCommand(table) + + override def actionUsesFeature(action: Action): Boolean = false +} + +object TestRemovableLegacyReaderWriterFeature + extends LegacyReaderWriterFeature( + name = "testRemovableLegacyReaderWriter", minReaderVersion = 2, minWriterVersion = 5) + with FeatureAutomaticallyEnabledByMetadata + with RemovableFeature { + + val TABLE_PROP_KEY = "_123TestRemovableLegacyReaderWriter321_" + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + metadata.configuration.get(TABLE_PROP_KEY).exists(_.toBoolean) + } + + override def validateRemoval(snapshot: Snapshot): Boolean = { + !snapshot.metadata.configuration.contains(TABLE_PROP_KEY) + } + + override def actionUsesFeature(action: Action): Boolean = { + action match { + case m: Metadata => m.configuration.contains(TABLE_PROP_KEY) + case _ => false + } + } + + override def preDowngradeCommand(table: DeltaTableV2): PreDowngradeTableFeatureCommand = + TestLegacyReaderWriterFeaturePreDowngradeCommand(table) +} + +object TestFeatureWithDependency + extends ReaderWriterFeature(name = "testFeatureWithDependency") + with FeatureAutomaticallyEnabledByMetadata { + + val TABLE_PROP_KEY = "_123testFeatureWithDependency321_" + + override def automaticallyUpdateProtocolOfExistingTables: Boolean = true + + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, spark: SparkSession): Boolean = { + metadata.configuration.get(TABLE_PROP_KEY).exists(_.toBoolean) + } + + override def requiredFeatures: Set[TableFeature] = Set(TestReaderWriterFeature) +} + +object TestFeatureWithTransitiveDependency + extends ReaderWriterFeature(name = "testFeatureWithTransitiveDependency") { + + override def requiredFeatures: Set[TableFeature] = Set(TestFeatureWithDependency) +} + +object TestWriterFeatureWithTransitiveDependency + extends WriterFeature(name = "testWriterFeatureWithTransitiveDependency") { + + override def requiredFeatures: Set[TableFeature] = Set(TestFeatureWithDependency) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/UniversalFormat.scala b/spark/src/main/scala/org/apache/spark/sql/delta/UniversalFormat.scala new file mode 100644 index 00000000000..ffa8e3f4a28 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/UniversalFormat.scala @@ -0,0 +1,264 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.{Action, Metadata, Protocol} +import org.apache.spark.sql.delta.commands.WriteIntoDelta +import org.apache.spark.sql.delta.metering.DeltaLogging + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.catalog.CatalogTable + +/** + * Utils to validate the Universal Format (UniForm) Delta feature (NOT a table feature). + * + * The UniForm Delta feature governs and implements the actual conversion of Delta metadata into + * other formats. + * + * Currently, UniForm only supports Iceberg. When `delta.universalFormat.enabledFormats` contains + * "iceberg", we say that Universal Format (Iceberg) is enabled. + * + * [[enforceInvariantsAndDependencies]] ensures that all of UniForm's requirements for the + * specified format are met (e.g. for 'iceberg' that IcebergCompatV1 or V2 is enabled). + * It doesn't verify that its nested requirements are met (e.g. IcebergCompat's requirements, + * like Column Mapping). That is the responsibility of format-specific validations such as + * [[IcebergCompatV1.enforceInvariantsAndDependencies]] + * and [[IcebergCompatV2.enforceInvariantsAndDependencies]]. + * + * + * Note that UniForm (Iceberg) depends on IcebergCompat, but IcebergCompat does not + * depend on or require UniForm (Iceberg). It is perfectly valid for a Delta table to have + * IcebergCompatV1 or V2 enabled but UniForm (Iceberg) not enabled. + */ +object UniversalFormat extends DeltaLogging { + + val ICEBERG_FORMAT = "iceberg" + val SUPPORTED_FORMATS = Set(ICEBERG_FORMAT) + + def icebergEnabled(metadata: Metadata): Boolean = { + DeltaConfigs.UNIVERSAL_FORMAT_ENABLED_FORMATS.fromMetaData(metadata).contains(ICEBERG_FORMAT) + } + + def icebergEnabled(properties: Map[String, String]): Boolean = { + properties.get(DeltaConfigs.UNIVERSAL_FORMAT_ENABLED_FORMATS.key) + .exists(value => value.contains(ICEBERG_FORMAT)) + } + + /** + * Expected to be called after the newest metadata and protocol have been ~ finalized. + * + * @return tuple of options of (updatedProtocol, updatedMetadata). For either action, if no + * updates need to be applied, will return None. + */ + def enforceInvariantsAndDependencies( + snapshot: Snapshot, + newestProtocol: Protocol, + newestMetadata: Metadata, + isCreatingOrReorgTable: Boolean, + actions: Seq[Action]): (Option[Protocol], Option[Metadata]) = { + enforceIcebergInvariantsAndDependencies( + snapshot, newestProtocol, newestMetadata, isCreatingOrReorgTable, actions) + } + + /** + * If you are enabling Universal Format (Iceberg), this method ensures that at least one of + * IcebergCompat is enabled. If you are disabling Universal Format (Iceberg), this method + * will leave the current IcebergCompat version untouched. + * + * @return tuple of options of (updatedProtocol, updatedMetadata). For either action, if no + * updates need to be applied, will return None. + */ + def enforceIcebergInvariantsAndDependencies( + snapshot: Snapshot, + newestProtocol: Protocol, + newestMetadata: Metadata, + isCreatingOrReorg: Boolean, + actions: Seq[Action]): (Option[Protocol], Option[Metadata]) = { + + val prevMetadata = snapshot.metadata + val uniformIcebergWasEnabled = UniversalFormat.icebergEnabled(prevMetadata) + val uniformIcebergIsEnabled = UniversalFormat.icebergEnabled(newestMetadata) + val tableId = newestMetadata.id + var changed = false + + val (uniformProtocol, uniformMetadata) = + (uniformIcebergWasEnabled, uniformIcebergIsEnabled) match { + case (_, false) => (None, None) // Ignore + case (_, true) => // Enabling now or already-enabled + val icebergCompatWasEnabled = IcebergCompat.isAnyEnabled(prevMetadata) + val icebergCompatIsEnabled = IcebergCompat.isAnyEnabled(newestMetadata) + + if (icebergCompatIsEnabled) { + (None, None) + } else if (icebergCompatWasEnabled) { + // IcebergCompat is being disabled. We need to also disable Universal Format (Iceberg) + val remainingSupportedFormats = DeltaConfigs.UNIVERSAL_FORMAT_ENABLED_FORMATS + .fromMetaData(newestMetadata) + .filterNot(_ == UniversalFormat.ICEBERG_FORMAT) + + val newConfiguration = if (remainingSupportedFormats.isEmpty) { + newestMetadata.configuration - DeltaConfigs.UNIVERSAL_FORMAT_ENABLED_FORMATS.key + } else { + newestMetadata.configuration ++ + Map(DeltaConfigs.UNIVERSAL_FORMAT_ENABLED_FORMATS.key -> + remainingSupportedFormats.mkString(",")) + } + + logInfo(s"[tableId=$tableId] IcebergCompat is being disabled. Auto-disabling " + + "Universal Format (Iceberg), too.") + + (None, Some(newestMetadata.copy(configuration = newConfiguration))) + } else { + throw DeltaErrors.uniFormIcebergRequiresIcebergCompat() + } + } + + var protocolToCheck = uniformProtocol.getOrElse(newestProtocol) + var metadataToCheck = uniformMetadata.getOrElse(newestMetadata) + changed = uniformProtocol.nonEmpty || uniformMetadata.nonEmpty + + val (v1protocolUpdate, v1metadataUpdate) = IcebergCompatV1.enforceInvariantsAndDependencies( + snapshot, + newestProtocol = protocolToCheck, + newestMetadata = metadataToCheck, + isCreatingOrReorg, + actions + ) + protocolToCheck = v1protocolUpdate.getOrElse(protocolToCheck) + metadataToCheck = v1metadataUpdate.getOrElse(metadataToCheck) + changed ||= v1protocolUpdate.nonEmpty || v1metadataUpdate.nonEmpty + + val (v2protocolUpdate, v2metadataUpdate) = IcebergCompatV2.enforceInvariantsAndDependencies( + snapshot, + newestProtocol = protocolToCheck, + newestMetadata = metadataToCheck, + isCreatingOrReorg, + actions + ) + changed ||= v2protocolUpdate.nonEmpty || v2metadataUpdate.nonEmpty + + if (changed) { + ( + v2protocolUpdate.orElse(Some(protocolToCheck)), + v2metadataUpdate.orElse(Some(metadataToCheck)) + ) + } else { + (None, None) + } + } + + /** + * This method is used to build UniForm metadata dependencies closure. + * It checks configuration conflicts and adds missing properties. + * It will call [[enforceIcebergInvariantsAndDependencies]] to perform the actual check. + * @param configuration the original metadata configuration. + * @return updated configuration if any changes are required, + * otherwise the original configuration. + */ + def enforceDependenciesInConfiguration( + configuration: Map[String, String], + snapshot: Snapshot): Map[String, String] = { + var metadata = Metadata(configuration = configuration) + + // Check UniversalFormat related property dependencies + val (_, universalMetadata) = UniversalFormat.enforceInvariantsAndDependencies( + snapshot, + newestProtocol = snapshot.protocol, + newestMetadata = metadata, + isCreatingOrReorgTable = true, + actions = Seq() + ) + + universalMetadata match { + case Some(valid) => valid.configuration + case _ => configuration + } + } + + val ICEBERG_TABLE_TYPE_KEY = "table_type" + + /** + * Update CatalogTable to mark it readable by other table readers (iceberg for now). + * This method ensures 'table_type' = 'ICEBERG' when uniform is enabled, + * and ensure table_type is not 'ICEBERG' when uniform is not enabled + * If the key has other values than 'ICEBERG', this method will not touch it for compatibility + * + * @param table catalogTable before change + * @param metadata snapshot metadata + * @return the converted catalog, or None if no change is made + */ + def enforceSupportInCatalog(table: CatalogTable, metadata: Metadata): Option[CatalogTable] = { + val icebergInCatalog = table.properties.get(ICEBERG_TABLE_TYPE_KEY) match { + case Some(value) => value.equalsIgnoreCase(ICEBERG_FORMAT) + case _ => false + } + + (icebergEnabled(metadata), icebergInCatalog) match { + case (true, false) => + Some(table.copy(properties = table.properties + + (ICEBERG_TABLE_TYPE_KEY -> ICEBERG_FORMAT))) + case (false, true) => + Some(table.copy(properties = + table.properties - ICEBERG_TABLE_TYPE_KEY)) + case _ => None + } + } +} +/** Class to facilitate the conversion of Delta into other table formats. */ +abstract class UniversalFormatConverter(spark: SparkSession) { + /** + * Perform an asynchronous conversion. + * + * This will start an async job to run the conversion, unless there already is an async conversion + * running for this table. In that case, it will queue up the provided snapshot to be run after + * the existing job completes. + */ + def enqueueSnapshotForConversion( + snapshotToConvert: Snapshot, + txn: OptimisticTransactionImpl): Unit + + /** + * Perform a blocking conversion when performing an OptimisticTransaction + * on a delta table. + * + * @param snapshotToConvert the snapshot that needs to be converted to Iceberg + * @param txn the transaction that triggers the conversion. Used as a hint to + * avoid recomputing old metadata. It must contain the catalogTable + * this conversion targets. + * @return Converted Delta version and commit timestamp + */ + def convertSnapshot( + snapshotToConvert: Snapshot, txn: OptimisticTransactionImpl): Option[(Long, Long)] + + /** + * Perform a blocking conversion for the given catalogTable + * + * @param snapshotToConvert the snapshot that needs to be converted to Iceberg + * @param catalogTable the catalogTable this conversion targets. + * @return Converted Delta version and commit timestamp + */ + def convertSnapshot( + snapshotToConvert: Snapshot, catalogTable: CatalogTable): Option[(Long, Long)] + + /** + * Fetch the delta version corresponding to the latest conversion. + * @param snapshot the snapshot to be converted + * @param table the catalogTable with info of previous conversions + * @return None if no previous conversion found + */ + def loadLastDeltaVersionConverted(snapshot: Snapshot, table: CatalogTable): Option[Long] +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/UpdateExpressionsSupport.scala b/spark/src/main/scala/org/apache/spark/sql/delta/UpdateExpressionsSupport.scala new file mode 100644 index 00000000000..501967b08a2 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/UpdateExpressionsSupport.scala @@ -0,0 +1,516 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.AnalysisHelper + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper} +import org.apache.spark.sql.catalyst.analysis.Resolver +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +/** + * Trait with helper functions to generate expressions to update target columns, even if they are + * nested fields. + */ +trait UpdateExpressionsSupport extends SQLConfHelper with AnalysisHelper { + /** + * Specifies an operation that updates a target column with the given expression. + * The target column may or may not be a nested field and it is specified as a full quoted name + * or as a sequence of split into parts. + */ + case class UpdateOperation(targetColNameParts: Seq[String], updateExpr: Expression) + + /** + * Add a cast to the child expression if it differs from the specified data type. Note that + * structs here are cast by name, rather than the Spark SQL default of casting by position. + * + * @param fromExpression the expression to cast + * @param dataType The data type to cast to. + * @param allowStructEvolution Whether to allow structs to evolve. When this is false (default), + * struct casting will throw an error if there's any mismatch between + * column names. For example, (b, c, a) -> (a, b, c) is always a valid + * cast, but (a, b) -> (a, b, c) is valid only with this flag set. + * @param columnName The name of the column written to. It is used for the error message. + */ + protected def castIfNeeded( + fromExpression: Expression, + dataType: DataType, + allowStructEvolution: Boolean, + columnName: String): Expression = { + + fromExpression match { + // Need to deal with NullType here, as some types cannot be casted from NullType, e.g., + // StructType. + case Literal(nul, NullType) => Literal(nul, dataType) + case otherExpr => + val resolveStructsByName = + conf.getConf(DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME) + + (fromExpression.dataType, dataType) match { + case (ArrayType(_: StructType, _), ArrayType(toEt: StructType, toContainsNull)) => + fromExpression match { + // If fromExpression is an array function returning an array, cast the + // underlying array first and then perform the function on the transformed array. + case ArrayUnion(leftExpression, rightExpression) => + val castedLeft = + castIfNeeded(leftExpression, dataType, allowStructEvolution, columnName) + val castedRight = + castIfNeeded(rightExpression, dataType, allowStructEvolution, columnName) + ArrayUnion(castedLeft, castedRight) + + case ArrayIntersect(leftExpression, rightExpression) => + val castedLeft = + castIfNeeded(leftExpression, dataType, allowStructEvolution, columnName) + val castedRight = + castIfNeeded(rightExpression, dataType, allowStructEvolution, columnName) + ArrayIntersect(castedLeft, castedRight) + + case ArrayExcept(leftExpression, rightExpression) => + val castedLeft = + castIfNeeded(leftExpression, dataType, allowStructEvolution, columnName) + val castedRight = + castIfNeeded(rightExpression, dataType, allowStructEvolution, columnName) + ArrayExcept(castedLeft, castedRight) + + case ArrayRemove(leftExpression, rightExpression) => + val castedLeft = + castIfNeeded(leftExpression, dataType, allowStructEvolution, columnName) + // ArrayRemove removes all elements that equal to element from the given array. + // In this case, the element to be removed also needs to be casted into the target + // array's element type. + val castedRight = + castIfNeeded(rightExpression, toEt, allowStructEvolution, columnName) + ArrayRemove(castedLeft, castedRight) + + case ArrayDistinct(expression) => + val castedExpr = + castIfNeeded(expression, dataType, allowStructEvolution, columnName) + ArrayDistinct(castedExpr) + + case _ => + // generate a lambda function to cast each array item into to element struct type. + val structConverter: (Expression, Expression) => Expression = (_, i) => + castIfNeeded( + GetArrayItem(fromExpression, i), toEt, allowStructEvolution, columnName) + val transformLambdaFunc = { + val elementVar = NamedLambdaVariable("elementVar", toEt, toContainsNull) + val indexVar = NamedLambdaVariable("indexVar", IntegerType, false) + LambdaFunction(structConverter(elementVar, indexVar), Seq(elementVar, indexVar)) + } + // Transforms every element in the array using the lambda function. + // Because castIfNeeded is called recursively for array elements, which + // generates nullable expression, ArrayTransform will generate an ArrayType with + // containsNull as true. Thus, the ArrayType to be casted to need to have + // containsNull as true to avoid casting failures. + cast( + ArrayTransform(fromExpression, transformLambdaFunc), + ArrayType(toEt, containsNull = true), + columnName + ) + } + case (from: MapType, to: MapType) if !Cast.canCast(from, to) => + // Manually convert map keys and values if the types are not compatible to allow schema + // evolution. This is slower than direct cast so we only do it when required. + def createMapConverter(convert: (Expression, Expression) => Expression): Expression = { + val keyVar = NamedLambdaVariable("keyVar", from.keyType, nullable = false) + val valueVar = + NamedLambdaVariable("valueVar", from.valueType, from.valueContainsNull) + LambdaFunction(convert(keyVar, valueVar), Seq(keyVar, valueVar)) + } + + var transformedKeysAndValues = fromExpression + if (from.keyType != to.keyType) { + transformedKeysAndValues = + TransformKeys(transformedKeysAndValues, createMapConverter { + (key, _) => castIfNeeded(key, to.keyType, allowStructEvolution, columnName) + }) + } + + if (from.valueType != to.valueType) { + transformedKeysAndValues = + TransformValues(transformedKeysAndValues, createMapConverter { + (_, value) => castIfNeeded(value, to.valueType, allowStructEvolution, columnName) + }) + } + cast(transformedKeysAndValues, to, columnName) + case (from: StructType, to: StructType) + if !DataType.equalsIgnoreCaseAndNullability(from, to) && resolveStructsByName => + // All from fields must be present in the final schema, or we'll silently lose data. + if (from.exists { f => !to.exists(_.name.equalsIgnoreCase(f.name))}) { + throw DeltaErrors.updateSchemaMismatchExpression(from, to) + } + + // If struct evolution isn't allowed, the field count also has to match, since we can't + // add columns. + if (from.length != to.length && !allowStructEvolution) { + throw DeltaErrors.updateSchemaMismatchExpression(from, to) + } + + val nameMappedStruct = CreateNamedStruct(to.flatMap { field => + val fieldNameLit = Literal(field.name) + val extractedField = from + .find { f => SchemaUtils.DELTA_COL_RESOLVER(f.name, field.name) } + .map { _ => + ExtractValue(fromExpression, fieldNameLit, SchemaUtils.DELTA_COL_RESOLVER) + }.getOrElse { + // This shouldn't be possible - if all columns aren't present when struct + // evolution is disabled, we should have thrown an error earlier. + if (!allowStructEvolution) { + throw DeltaErrors.extractReferencesFieldNotFound(s"$field", + DeltaErrors.updateSchemaMismatchExpression(from, to)) + } + Literal(null) + } + Seq(fieldNameLit, + castIfNeeded(extractedField, field.dataType, allowStructEvolution, field.name)) + }) + + cast(nameMappedStruct, to.asNullable, columnName) + + case (from, to) if (from != to) => cast(fromExpression, dataType, columnName) + case _ => fromExpression + } + } + } + + /** + * Given a list of target-column expressions and a set of update operations, generate a list + * of update expressions, which are aligned with given target-column expressions. + * + * For update operations to nested struct fields, this method recursively walks down schema tree + * and apply the update expressions along the way. + * For example, assume table `target` has two attributes a and z, where a is of struct type + * with 3 fields: b, c and d, and z is of integer type. + * + * Given an update command: + * + * - UPDATE target SET a.b = 1, a.c = 2, z = 3 + * + * this method works as follows: + * + * generateUpdateExpressions(targetCols=[a,z], updateOps=[(a.b, 1), (a.c, 2), (z, 3)]) + * generateUpdateExpressions(targetCols=[b,c,d], updateOps=[(b, 1),(c, 2)], pathPrefix=["a"]) + * end-of-recursion + * -> returns (1, 2, d) + * -> return ((1, 2, d), 3) + * + * @param targetCols a list of expressions to read named columns; these named columns can be + * either the top-level attributes of a table, or the nested fields of a + * StructType column. + * @param updateOps a set of update operations. + * @param pathPrefix the path from root to the current (nested) column. Only used for printing out + * full column path in error messages. + * @param allowStructEvolution Whether to allow structs to evolve. When this is false (default), + * struct casting will throw an error if there's any mismatch between + * column names. For example, (b, c, a) -> (a, b, c) is always a valid + * cast, but (a, b) -> (a, b, c) is valid only with this flag set. + * @param generatedColumns the list of the generated columns in the table. When a column is a + * generated column and the user doesn't provide a update expression, its + * update expression in the return result will be None. + * If `generatedColumns` is empty, any of the options in the return result + * must be non-empty. + * @return a sequence of expression options. The elements in the sequence are options because + * when a column is a generated column but the user doesn't provide an update expression + * for this column, we need to generate the update expression according to the generated + * column definition. But this method doesn't have enough context to do that. Hence, we + * return a `None` for this case so that the caller knows it should generate the update + * expression for such column. For other cases, we will always return Some(expr). + */ + protected def generateUpdateExpressions( + targetCols: Seq[NamedExpression], + updateOps: Seq[UpdateOperation], + resolver: Resolver, + pathPrefix: Seq[String] = Nil, + allowStructEvolution: Boolean = false, + generatedColumns: Seq[StructField] = Nil): Seq[Option[Expression]] = { + // Check that the head of nameParts in each update operation can match a target col. This avoids + // silently ignoring invalid column names specified in update operations. + updateOps.foreach { u => + if (!targetCols.exists(f => resolver(f.name, u.targetColNameParts.head))) { + throw DeltaErrors.updateSetColumnNotFoundException( + (pathPrefix :+ u.targetColNameParts.head).mkString("."), + targetCols.map(col => (pathPrefix :+ col.name).mkString("."))) + } + } + + // Transform each targetCol to a possibly updated expression + targetCols.map { targetCol => + // The prefix of a update path matches the current targetCol path. + val prefixMatchedOps = + updateOps.filter(u => resolver(u.targetColNameParts.head, targetCol.name)) + // No prefix matches this target column, return its original expression. + if (prefixMatchedOps.isEmpty) { + // Check whether it's a generated column or not. If so, we will return `None` so that the + // caller will generate an expression for this column. We cannot generate an expression at + // this moment because a generated column may use other columns which we don't know their + // update expressions yet. + if (generatedColumns.find(f => resolver(f.name, targetCol.name)).nonEmpty) { + None + } else { + Some(targetCol) + } + } else { + // The update operation whose path exactly matches the current targetCol path. + val fullyMatchedOp = prefixMatchedOps.find(_.targetColNameParts.size == 1) + if (fullyMatchedOp.isDefined) { + // If a full match is found, then it should be the ONLY prefix match. Any other match + // would be a conflict, whether it is a full match or prefix-only. For example, + // when users are updating a nested column a.b, they can't simultaneously update a + // descendant of a.b, such as a.b.c. + if (prefixMatchedOps.size > 1) { + throw DeltaErrors.updateSetConflictException( + prefixMatchedOps.map(op => (pathPrefix ++ op.targetColNameParts).mkString("."))) + } + // For an exact match, return the updateExpr from the update operation. + Some(castIfNeeded( + fullyMatchedOp.get.updateExpr, + targetCol.dataType, + allowStructEvolution, + targetCol.name)) + } else { + // So there are prefix-matched update operations, but none of them is a full match. Then + // that means targetCol is a complex data type, so we recursively pass along the update + // operations to its children. + targetCol.dataType match { + case StructType(fields) => + val fieldExpr = targetCol + val childExprs = fields.zipWithIndex.map { case (field, ordinal) => + Alias(GetStructField(fieldExpr, ordinal, Some(field.name)), field.name)() + } + // Recursively apply update operations to the children + val targetExprs = generateUpdateExpressions( + childExprs, + prefixMatchedOps.map(u => u.copy(targetColNameParts = u.targetColNameParts.tail)), + resolver, + pathPrefix :+ targetCol.name, + allowStructEvolution, + // Set `generatedColumns` to Nil because they are only valid in the top level. + generatedColumns = Nil) + .map(_.getOrElse { + // Should not happen + throw DeltaErrors.cannotGenerateUpdateExpressions() + }) + // Reconstruct the expression for targetCol using its possibly updated children + val namedStructExprs = fields + .zip(targetExprs) + .flatMap { case (field, expr) => Seq(Literal(field.name), expr) } + Some(CreateNamedStruct(namedStructExprs)) + + case otherType => + throw DeltaErrors.updateNonStructTypeFieldNotSupportedException( + (pathPrefix :+ targetCol.name).mkString("."), otherType) + } + } + } + } + } + + /** See docs on overloaded method. */ + protected def generateUpdateExpressions( + targetCols: Seq[NamedExpression], + nameParts: Seq[Seq[String]], + updateExprs: Seq[Expression], + resolver: Resolver, + generatedColumns: Seq[StructField]): Seq[Option[Expression]] = { + assert(nameParts.size == updateExprs.size) + val updateOps = nameParts.zip(updateExprs).map { + case (nameParts, expr) => UpdateOperation(nameParts, expr) + } + generateUpdateExpressions(targetCols, updateOps, resolver, generatedColumns = generatedColumns) + } + + /** + * Generate update expressions for generated columns that the user doesn't provide a update + * expression. For each item in `updateExprs` that's None, we will find its generation expression + * from `generatedColumns`. In order to resolve this generation expression, we will create a + * fake Project which contains all update expressions and resolve the generation expression with + * this project. Source columns of a generation expression will also be replaced with their + * corresponding update expressions. + * + * For example, given a table that has a generated column `g` defined as `c1 + 10`. For the + * following update command: + * + * UPDATE target SET c1 = c2 + 100, c2 = 1000 + * + * We will generate the update expression `(c2 + 100) + 10`` for column `g`. Note: in this update + * expression, we should use the old `c2` attribute rather than its new value 1000. + * + * @param updateTarget The logical plan of the table to be updated. + * @param generatedColumns A list of generated columns. + * @param updateExprs The aligned (with `finalSchemaExprs` if not None, or `updateTarget.output` + * otherwise) update actions. + * @param finalSchemaExprs In case of UPDATE in MERGE when schema evolution happened, this is + * the final schema of the target table. This might not be the same as + * the output of `updateTarget`. + * @return a sequence of update expressions for all of columns in the table. + */ + protected def generateUpdateExprsForGeneratedColumns( + updateTarget: LogicalPlan, + generatedColumns: Seq[StructField], + updateExprs: Seq[Option[Expression]], + finalSchemaExprs: Option[Seq[Attribute]] = None): Seq[Expression] = { + val targetExprs = finalSchemaExprs.getOrElse(updateTarget.output) + assert( + targetExprs.size == updateExprs.length, + s"'generateUpdateExpressions' should return expressions that are aligned with the column " + + s"list. Expected size: ${updateTarget.output.size}, actual size: ${updateExprs.length}") + val attrsWithExprs = targetExprs.zip(updateExprs) + val exprsForProject = attrsWithExprs.flatMap { + case (attr, Some(expr)) => + // Create a named expression so that we can use it in Project + val exprForProject = Alias(expr, attr.name)() + Some(exprForProject.exprId -> exprForProject) + case (_, None) => None + }.toMap + // Create a fake Project to resolve the generation expressions + val fakePlan = Project(exprsForProject.values.toArray[NamedExpression], updateTarget) + attrsWithExprs.map { + case (_, Some(expr)) => expr + case (targetCol, None) => + // `targetCol` is a generated column and the user doesn't provide a update expression. + val resolvedExpr = + generatedColumns.find(f => conf.resolver(f.name, targetCol.name)) match { + case Some(field) => + val expr = GeneratedColumn.getGenerationExpression(field).get + resolveReferencesForExpressions(SparkSession.active, expr :: Nil, fakePlan).head + case None => + // Should not happen + throw DeltaErrors.nonGeneratedColumnMissingUpdateExpression(targetCol) + } + // As `resolvedExpr` will refer to attributes in `fakePlan`, we need to manually replace + // these attributes with their update expressions. + resolvedExpr.transform { + case a: AttributeReference if exprsForProject.contains(a.exprId) => + exprsForProject(a.exprId).child + } + } + } + + /** + * Replaces 'CastSupport.cast'. Selects a cast based on 'spark.sql.storeAssignmentPolicy' if + * 'spark.databricks.delta.updateAndMergeCastingFollowsAnsiEnabledFlag. is false, and based on + * 'spark.sql.ansi.enabled' otherwise. + */ + private def cast(child: Expression, dataType: DataType, columnName: String): Expression = { + if (conf.getConf(DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG)) { + return Cast(child, dataType, Option(conf.sessionLocalTimeZone)) + } + + conf.storeAssignmentPolicy match { + case SQLConf.StoreAssignmentPolicy.LEGACY => + Cast(child, dataType, Some(conf.sessionLocalTimeZone), ansiEnabled = false) + case SQLConf.StoreAssignmentPolicy.ANSI => + val cast = Cast(child, dataType, Some(conf.sessionLocalTimeZone), ansiEnabled = true) + if (canCauseCastOverflow(cast)) { + CheckOverflowInTableWrite(cast, columnName) + } else { + cast + } + case SQLConf.StoreAssignmentPolicy.STRICT => + UpCast(child, dataType) + } + } + + private def containsIntegralOrDecimalType(dt: DataType): Boolean = dt match { + case _: IntegralType | _: DecimalType => true + case a: ArrayType => containsIntegralOrDecimalType(a.elementType) + case m: MapType => + containsIntegralOrDecimalType(m.keyType) || containsIntegralOrDecimalType(m.valueType) + case s: StructType => + s.fields.exists(sf => containsIntegralOrDecimalType(sf.dataType)) + case _ => false + } + + private def canCauseCastOverflow(cast: Cast): Boolean = { + containsIntegralOrDecimalType(cast.dataType) && + !Cast.canUpCast(cast.child.dataType, cast.dataType) + } +} + +case class CheckOverflowInTableWrite(child: Expression, columnName: String) + extends UnaryExpression { + override protected def withNewChildInternal(newChild: Expression): Expression = { + copy(child = newChild) + } + + private def getCast: Option[Cast] = child match { + case c: Cast => Some(c) + case ExpressionProxy(c: Cast, _, _) => Some(c) + case _ => None + } + + override def eval(input: InternalRow): Any = try { + child.eval(input) + } catch { + case e: ArithmeticException => + getCast match { + case Some(cast) => + throw DeltaErrors.castingCauseOverflowErrorInTableWrite( + cast.child.dataType, + cast.dataType, + columnName) + case None => throw e + } + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + getCast match { + case Some(child) => doGenCodeWithBetterErrorMsg(ctx, ev, child) + case None => child.genCode(ctx) + } + } + + def doGenCodeWithBetterErrorMsg(ctx: CodegenContext, ev: ExprCode, child: Cast): ExprCode = { + val childGen = child.genCode(ctx) + val exceptionClass = classOf[ArithmeticException].getCanonicalName + assert(child.isInstanceOf[Cast]) + val cast = child.asInstanceOf[Cast] + val fromDt = + ctx.addReferenceObj("from", cast.child.dataType, cast.child.dataType.getClass.getName) + val toDt = ctx.addReferenceObj("to", child.dataType, child.dataType.getClass.getName) + val col = ctx.addReferenceObj("colName", columnName, "java.lang.String") + // scalastyle:off line.size.limit + ev.copy(code = + code""" + boolean ${ev.isNull} = true; + ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + try { + ${childGen.code} + ${ev.isNull} = ${childGen.isNull}; + ${ev.value} = ${childGen.value}; + } catch ($exceptionClass e) { + throw org.apache.spark.sql.delta.DeltaErrors + .castingCauseOverflowErrorInTableWrite($fromDt, $toDt, $col); + }""" + ) + // scalastyle:on line.size.limit + } + + override def dataType: DataType = child.dataType + + override def sql: String = child.sql + + override def toString: String = child.toString +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/actions/DeletionVectorDescriptor.scala b/spark/src/main/scala/org/apache/spark/sql/delta/actions/DeletionVectorDescriptor.scala new file mode 100644 index 00000000000..32a3da153fe --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/actions/DeletionVectorDescriptor.scala @@ -0,0 +1,276 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.actions + +import java.net.URI +import java.util.UUID + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.util.{Codec, DeltaEncoder, JsonUtils} +import com.fasterxml.jackson.annotation.JsonIgnore +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{Column, Encoder} +import org.apache.spark.sql.functions.{concat, lit, when} +import org.apache.spark.sql.types._ + +/** Information about a deletion vector attached to a file action. */ +case class DeletionVectorDescriptor( + /** + * Indicates how the DV is stored. + * Should be a single letter (see [[pathOrInlineDv]] below.) + */ + storageType: String, + + /** + * Contains the actual data that allows accessing the DV. + * + * Three options are currently supported: + * - `storageType="u"` format: `` + * The deletion vector is stored in a file with a path relative to + * the data directory of this Delta Table, and the file name can be + * reconstructed from the UUID. + * The encoded UUID is always exactly 20 characters, so the random + * prefix length can be determined any characters exceeding 20. + * - `storageType="i"` format: `` + * The deletion vector is stored inline in the log. + * - `storageType="p"` format: `` + * The DV is stored in a file with an absolute path given by this + * url. + */ + pathOrInlineDv: String, + /** + * Start of the data for this DV in number of bytes from the beginning of the file it is stored + * in. + * + * Always None when storageType = "i". + */ + @JsonDeserialize(contentAs = classOf[java.lang.Integer]) + offset: Option[Int] = None, + /** Size of the serialized DV in bytes (raw data size, i.e. before base85 encoding). */ + sizeInBytes: Int, + /** Number of rows the DV logically removes from the file. */ + cardinality: Long, + /** + * Transient property that is used to validate DV correctness. + * It is not stored in the log. + */ + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + maxRowIndex: Option[Long] = None) { + + import DeletionVectorDescriptor._ + + @JsonIgnore + @transient + lazy val uniqueId: String = { + offset match { + case Some(offset) => s"$uniqueFileId@$offset" + case None => uniqueFileId + } + } + + @JsonIgnore + @transient + lazy val uniqueFileId: String = s"$storageType$pathOrInlineDv" + + @JsonIgnore + protected[delta] def isOnDisk: Boolean = !isInline + + @JsonIgnore + protected[delta] def isInline: Boolean = storageType == INLINE_DV_MARKER + + @JsonIgnore + protected[delta] def isRelative: Boolean = storageType == UUID_DV_MARKER + + @JsonIgnore + protected[delta] def isAbsolute: Boolean = storageType == PATH_DV_MARKER + + @JsonIgnore + protected[delta] def isEmpty: Boolean = cardinality == 0 + + def absolutePath(tableLocation: Path): Path = { + require(isOnDisk, "Can't get a path for an inline deletion vector") + storageType match { + case UUID_DV_MARKER => + // If the file was written with a random prefix, we have to extract that, + // before decoding the UUID. + val randomPrefixLength = pathOrInlineDv.length - Codec.Base85Codec.ENCODED_UUID_LENGTH + val (randomPrefix, encodedUuid) = pathOrInlineDv.splitAt(randomPrefixLength) + val uuid = Codec.Base85Codec.decodeUUID(encodedUuid) + assembleDeletionVectorPath(tableLocation, uuid, randomPrefix) + case PATH_DV_MARKER => + // Since there is no need for legacy support for relative paths for DVs, + // relative DVs should *always* use the UUID variant. + val parsedUri = new URI(pathOrInlineDv) + assert(parsedUri.isAbsolute, "Relative URIs are not supported for DVs") + new Path(parsedUri) + case _ => throw DeltaErrors.cannotReconstructPathFromURI(pathOrInlineDv) + } + } + + /** + * Produce a copy of this DV, but using an absolute path. + * + * If the DV already has an absolute path or is inline, then this is just a normal copy. + */ + def copyWithAbsolutePath(tableLocation: Path): DeletionVectorDescriptor = { + storageType match { + case UUID_DV_MARKER => + val absolutePath = this.absolutePath(tableLocation) + this.copy(storageType = PATH_DV_MARKER, pathOrInlineDv = absolutePath.toString) + case PATH_DV_MARKER | INLINE_DV_MARKER => this.copy() + } + } + + /** + * Produce a copy of this DV, with `pathOrInlineDv` replaced by a relative path based on `id` + * and `randomPrefix`. + * + * If the DV already has a relative path or is inline, then this is just a normal copy. + */ + def copyWithNewRelativePath(id: UUID, randomPrefix: String): DeletionVectorDescriptor = { + storageType match { + case PATH_DV_MARKER => + this.copy(storageType = UUID_DV_MARKER, pathOrInlineDv = encodeUUID(id, randomPrefix)) + case UUID_DV_MARKER | INLINE_DV_MARKER => this.copy() + } + } + + @JsonIgnore + def inlineData: Array[Byte] = { + require(isInline, "Can't get data for an on-disk DV from the log.") + // The sizeInBytes is used to remove any padding that might have been added during encoding. + Codec.Base85Codec.decodeBytes(pathOrInlineDv, sizeInBytes) + } + + /** Returns the estimated number of bytes required to serialize this object. */ + @JsonIgnore + protected[delta] lazy val estimatedSerializedSize: Int = { + // (cardinality(8) + sizeInBytes(4)) + storageType + pathOrInlineDv + option[offset(4)] + 12 + storageType.length + pathOrInlineDv.length + (if (offset.isDefined) 4 else 0) + } +} + +object DeletionVectorDescriptor { + + /** String that is used in all file names generated by deletion vector store */ + val DELETION_VECTOR_FILE_NAME_CORE = "deletion_vector" + + // Markers to separate different kinds of DV storage. + final val PATH_DV_MARKER: String = "p" + final val INLINE_DV_MARKER: String = "i" + final val UUID_DV_MARKER: String = "u" + + final lazy val STRUCT_TYPE: StructType = + Action.addFileSchema("deletionVector").dataType.asInstanceOf[StructType] + + private lazy val _encoder = new DeltaEncoder[DeletionVectorDescriptor] + implicit def encoder: Encoder[DeletionVectorDescriptor] = _encoder.get + + /** Utility method to create an on-disk [[DeletionVectorDescriptor]] */ + def onDiskWithRelativePath( + id: UUID, + randomPrefix: String = "", + sizeInBytes: Int, + cardinality: Long, + offset: Option[Int] = None, + maxRowIndex: Option[Long] = None): DeletionVectorDescriptor = + DeletionVectorDescriptor( + storageType = UUID_DV_MARKER, + pathOrInlineDv = encodeUUID(id, randomPrefix), + offset = offset, + sizeInBytes = sizeInBytes, + cardinality = cardinality, + maxRowIndex = maxRowIndex) + + /** Utility method to create an on-disk [[DeletionVectorDescriptor]] */ + def onDiskWithAbsolutePath( + path: String, + sizeInBytes: Int, + cardinality: Long, + offset: Option[Int] = None, + maxRowIndex: Option[Long] = None): DeletionVectorDescriptor = + DeletionVectorDescriptor( + storageType = PATH_DV_MARKER, + pathOrInlineDv = path, + offset = offset, + sizeInBytes = sizeInBytes, + cardinality = cardinality, + maxRowIndex = maxRowIndex) + + /** Utility method to create an inline [[DeletionVectorDescriptor]] */ + def inlineInLog( + data: Array[Byte], + cardinality: Long): DeletionVectorDescriptor = + DeletionVectorDescriptor( + storageType = INLINE_DV_MARKER, + pathOrInlineDv = encodeData(data), + sizeInBytes = data.length, + cardinality = cardinality) + + /** + * This produces the same output as [[DeletionVectorDescriptor.uniqueId]] but as a column + * expression, so it can be used directly in a Spark query. + */ + def uniqueIdExpression(deletionVectorCol: Column): Column = { + when(deletionVectorCol("offset").isNotNull, + concat( + deletionVectorCol("storageType"), + deletionVectorCol("pathOrInlineDv"), + lit('@'), + deletionVectorCol("offset"))) + .otherwise(concat( + deletionVectorCol("storageType"), + deletionVectorCol("pathOrInlineDv"))) + } + + /** + * Return the unique path under `parentPath` that is based on `id`. + * + * Optionally, prepend a `prefix` to the name. + */ + def assembleDeletionVectorPath(targetParentPath: Path, id: UUID, prefix: String = ""): Path = { + val fileName = s"${DELETION_VECTOR_FILE_NAME_CORE}_${id}.bin" + if (prefix.nonEmpty) { + new Path(new Path(targetParentPath, prefix), fileName) + } else { + new Path(targetParentPath, fileName) + } + } + + /** Descriptor for an empty stored bitmap. */ + val EMPTY: DeletionVectorDescriptor = DeletionVectorDescriptor( + storageType = INLINE_DV_MARKER, + pathOrInlineDv = "", + sizeInBytes = 0, + cardinality = 0) + + private[delta] def fromJson(jsonString: String): DeletionVectorDescriptor = { + JsonUtils.fromJson[DeletionVectorDescriptor](jsonString) + } + + private[delta] def encodeUUID(id: UUID, randomPrefix: String): String = { + val uuidData = Codec.Base85Codec.encodeUUID(id) + // This should always be true and we are relying on it for separating out the + // prefix again later without having to spend an extra character as a separator. + assert(uuidData.length == 20) + s"$randomPrefix$uuidData" + } + + def encodeData(bytes: Array[Byte]): String = Codec.Base85Codec.encodeBytes(bytes) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/actions/InMemoryLogReplay.scala b/spark/src/main/scala/org/apache/spark/sql/delta/actions/InMemoryLogReplay.scala new file mode 100644 index 00000000000..99d51324b5f --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/actions/InMemoryLogReplay.scala @@ -0,0 +1,117 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.actions + +import java.net.URI + + +/** + * Replays a history of actions, resolving them to produce the current state + * of the table. The protocol for resolution is as follows: + * - The most recent [[AddFile]] and accompanying metadata for any `(path, dv id)` tuple wins. + * - [[RemoveFile]] deletes a corresponding [[AddFile]] and is retained as a + * tombstone until `minFileRetentionTimestamp` has passed. + * A [[RemoveFile]] "corresponds" to the [[AddFile]] that matches both the parquet file URI + * *and* the deletion vector's URI (if any). + * - The most recent version for any `appId` in a [[SetTransaction]] wins. + * - The most recent [[Metadata]] wins. + * - The most recent [[Protocol]] version wins. + * - For each `(path, dv id)` tuple, this class should always output only one [[FileAction]] + * (either [[AddFile]] or [[RemoveFile]]) + * + * This class is not thread safe. + */ +class InMemoryLogReplay( + minFileRetentionTimestamp: Long, + minSetTransactionRetentionTimestamp: Option[Long]) extends LogReplay { + + import InMemoryLogReplay._ + + private var currentProtocolVersion: Protocol = null + private var currentVersion: Long = -1 + private var currentMetaData: Metadata = null + private val transactions = new scala.collection.mutable.HashMap[String, SetTransaction]() + private val domainMetadatas = collection.mutable.Map.empty[String, DomainMetadata] + private val activeFiles = new scala.collection.mutable.HashMap[UniqueFileActionTuple, AddFile]() + private val tombstones = new scala.collection.mutable.HashMap[UniqueFileActionTuple, RemoveFile]() + + override def append(version: Long, actions: Iterator[Action]): Unit = { + assert(currentVersion == -1 || version == currentVersion + 1, + s"Attempted to replay version $version, but state is at $currentVersion") + currentVersion = version + actions.foreach { + case a: SetTransaction => + transactions(a.appId) = a + case a: DomainMetadata if a.removed => + domainMetadatas.remove(a.domain) + case a: DomainMetadata if !a.removed => + domainMetadatas(a.domain) = a + case _: CheckpointOnlyAction => // Ignore this while doing LogReplay + case a: Metadata => + currentMetaData = a + case a: Protocol => + currentProtocolVersion = a + case add: AddFile => + val uniquePath = UniqueFileActionTuple(add.pathAsUri, add.getDeletionVectorUniqueId) + activeFiles(uniquePath) = add.copy(dataChange = false) + // Remove the tombstone to make sure we only output one `FileAction`. + tombstones.remove(uniquePath) + case remove: RemoveFile => + val uniquePath = UniqueFileActionTuple(remove.pathAsUri, remove.getDeletionVectorUniqueId) + activeFiles.remove(uniquePath) + tombstones(uniquePath) = remove.copy(dataChange = false) + case _: CommitInfo => // do nothing + case _: AddCDCFile => // do nothing + case null => // Some crazy future feature. Ignore + } + } + + private def getTombstones: Iterable[FileAction] = { + tombstones.values.filter(_.delTimestamp > minFileRetentionTimestamp) + } + + private[delta] def getTransactions: Iterable[SetTransaction] = { + if (minSetTransactionRetentionTimestamp.isEmpty) { + transactions.values + } else { + transactions.values.filter { txn => + txn.lastUpdated.exists(_ > minSetTransactionRetentionTimestamp.get) + } + } + } + + private[delta] def getDomainMetadatas: Iterable[DomainMetadata] = domainMetadatas.values + + /** Returns the current state of the Table as an iterator of actions. */ + override def checkpoint: Iterator[Action] = { + val fileActions = (activeFiles.values ++ getTombstones).toSeq.sortBy(_.path) + + Option(currentProtocolVersion).toIterator ++ + Option(currentMetaData).toIterator ++ + getDomainMetadatas ++ + getTransactions ++ + fileActions.toIterator + } + + /** Returns all [[AddFile]] actions after the Log Replay */ + private[delta] def allFiles: Seq[AddFile] = activeFiles.values.toSeq +} + +object InMemoryLogReplay{ + /** The unit of path uniqueness in delta log actions is the tuple `(parquet file, dv)`. */ + final case class UniqueFileActionTuple(fileURI: URI, deletionVectorURI: Option[String]) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/actions/LogReplay.scala b/spark/src/main/scala/org/apache/spark/sql/delta/actions/LogReplay.scala new file mode 100644 index 00000000000..af603c26785 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/actions/LogReplay.scala @@ -0,0 +1,29 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.actions + +/** + * Replays a history of actions, resolving them to produce the current state + * of the table. + */ +trait LogReplay { + /** Append these `actions` to the state. Must only be called in ascending order of `version`. */ + def append(version: Long, actions: Iterator[Action]): Unit + + /** Returns the current state of the Table as an iterator of actions. */ + def checkpoint: Iterator[Action] +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/actions/TableFeatureSupport.scala b/spark/src/main/scala/org/apache/spark/sql/delta/actions/TableFeatureSupport.scala new file mode 100644 index 00000000000..c9c84547aae --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/actions/TableFeatureSupport.scala @@ -0,0 +1,501 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.actions + +import java.util.Locale + +import scala.collection.mutable + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.DeltaOperations.Operation +import com.fasterxml.jackson.annotation.JsonIgnore + +import org.apache.spark.sql.SparkSession + +/** + * Trait to be mixed into the [[Protocol]] case class to enable Table Features. + * + * Protocol reader version 3 and writer version 7 start to support reader and writer table + * features. Reader version 3 supports only reader-writer features in an explicit way, + * by adding its name to `readerFeatures`. Similarly, writer version 7 supports only writer-only + * or reader-writer features in an explicit way, by adding its name to `writerFeatures`. + * When reading or writing a table, clients MUST respect all supported features. + * + * See also the document of [[TableFeature]] for feature-specific terminologies. + */ +trait TableFeatureSupport { this: Protocol => + + /** Check if this protocol is capable of adding features into its `readerFeatures` field. */ + def supportsReaderFeatures: Boolean = + TableFeatureProtocolUtils.supportsReaderFeatures(minReaderVersion) + + /** Check if this protocol is capable of adding features into its `writerFeatures` field. */ + def supportsWriterFeatures: Boolean = + TableFeatureProtocolUtils.supportsWriterFeatures(minWriterVersion) + + /** + * Get a new Protocol object that has `feature` supported. Writer-only features will be added to + * `writerFeatures` field, and reader-writer features will be added to `readerFeatures` and + * `writerFeatures` fields. + * + * If `feature` is already implicitly supported in the current protocol's legacy reader or + * writer protocol version, the new protocol will not modify the original protocol version, + * i.e., the feature will not be explicitly added to the protocol's `readerFeatures` or + * `writerFeatures`. This is to avoid unnecessary protocol upgrade for feature that it already + * supports. + */ + def withFeature(feature: TableFeature): Protocol = { + def shouldAddRead: Boolean = { + if (supportsReaderFeatures) return true + if (feature.minReaderVersion <= minReaderVersion) return false + + throw DeltaErrors.tableFeatureRequiresHigherReaderProtocolVersion( + feature.name, + minReaderVersion, + feature.minReaderVersion) + } + + def shouldAddWrite: Boolean = { + if (supportsWriterFeatures) return true + if (feature.minWriterVersion <= minWriterVersion) return false + + throw DeltaErrors.tableFeatureRequiresHigherWriterProtocolVersion( + feature.name, + minWriterVersion, + feature.minWriterVersion) + } + + var shouldAddToReaderFeatures = feature.isReaderWriterFeature + var shouldAddToWriterFeatures = true + if (feature.isLegacyFeature) { + if (feature.isReaderWriterFeature) { + shouldAddToReaderFeatures = shouldAddRead + } + shouldAddToWriterFeatures = shouldAddWrite + } + + val protocolWithDependencies = withFeatures(feature.requiredFeatures) + protocolWithDependencies.withFeature( + feature.name, + addToReaderFeatures = shouldAddToReaderFeatures, + addToWriterFeatures = shouldAddToWriterFeatures) + } + + /** + * Get a new Protocol object with multiple features supported. + * + * See the documentation of [[withFeature]] for more information. + */ + def withFeatures(features: Iterable[TableFeature]): Protocol = { + features.foldLeft(this)(_.withFeature(_)) + } + + /** + * Get a new Protocol object with an additional feature descriptor. If `addToReaderFeatures` is + * set to `true`, the descriptor will be added to the protocol's `readerFeatures` field. If + * `addToWriterFeatures` is set to `true`, the descriptor will be added to the protocol's + * `writerFeatures` field. + * + * The method does not require the feature to be recognized by the client, therefore will not + * try keeping the protocol's `readerFeatures` and `writerFeatures` in sync. Use with caution. + */ + private[actions] def withFeature( + name: String, + addToReaderFeatures: Boolean, + addToWriterFeatures: Boolean): Protocol = { + if (addToReaderFeatures && !supportsReaderFeatures) { + throw DeltaErrors.tableFeatureRequiresHigherReaderProtocolVersion( + name, + currentVersion = minReaderVersion, + requiredVersion = TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION) + } + if (addToWriterFeatures && !supportsWriterFeatures) { + throw DeltaErrors.tableFeatureRequiresHigherWriterProtocolVersion( + name, + currentVersion = minWriterVersion, + requiredVersion = TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) + } + + val addedReaderFeatureOpt = if (addToReaderFeatures) Some(name) else None + val addedWriterFeatureOpt = if (addToWriterFeatures) Some(name) else None + + copy( + readerFeatures = this.readerFeatures.map(_ ++ addedReaderFeatureOpt), + writerFeatures = this.writerFeatures.map(_ ++ addedWriterFeatureOpt)) + } + + /** + * Get a new Protocol object with additional feature descriptors added to the protocol's + * `readerFeatures` field. + * + * The method does not require the features to be recognized by the client, therefore will not + * try keeping the protocol's `readerFeatures` and `writerFeatures` in sync. Use with caution. + */ + private[delta] def withReaderFeatures(names: Iterable[String]): Protocol = { + names.foldLeft(this)( + _.withFeature(_, addToReaderFeatures = true, addToWriterFeatures = false)) + } + + /** + * Get a new Protocol object with additional feature descriptors added to the protocol's + * `writerFeatures` field. + * + * The method does not require the features to be recognized by the client, therefore will not + * try keeping the protocol's `readerFeatures` and `writerFeatures` in sync. Use with caution. + */ + private[delta] def withWriterFeatures(names: Iterable[String]): Protocol = { + names.foldLeft(this)( + _.withFeature(_, addToReaderFeatures = false, addToWriterFeatures = true)) + } + + /** + * Get all feature names in this protocol's `readerFeatures` field. Returns an empty set when + * this protocol does not support reader features. + */ + def readerFeatureNames: Set[String] = this.readerFeatures.getOrElse(Set()) + + /** + * Get a set of all feature names in this protocol's `writerFeatures` field. Returns an empty + * set when this protocol does not support writer features. + */ + def writerFeatureNames: Set[String] = this.writerFeatures.getOrElse(Set()) + + /** + * Get a set of all feature names in this protocol's `readerFeatures` and `writerFeatures` + * field. Returns an empty set when this protocol supports none of reader and writer features. + */ + @JsonIgnore + lazy val readerAndWriterFeatureNames: Set[String] = readerFeatureNames ++ writerFeatureNames + + /** + * Same as above but returns a sequence of [[TableFeature]] instead of a set of feature names. + */ + @JsonIgnore + lazy val readerAndWriterFeatures: Seq[TableFeature] = + readerAndWriterFeatureNames.toSeq.flatMap(TableFeature.featureNameToFeature) + + /** + * Get all features that are implicitly supported by this protocol, for example, `Protocol(1,2)` + * implicitly supports `appendOnly` and `invariants`. When this protocol is capable of requiring + * writer features, no feature can be implicitly supported. + */ + @JsonIgnore + lazy val implicitlySupportedFeatures: Set[TableFeature] = { + if (supportsReaderFeatures && supportsWriterFeatures) { + // this protocol uses both reader and writer features, no feature can be implicitly supported + Set() + } else { + TableFeature.allSupportedFeaturesMap.values + .filter(_.isLegacyFeature) + .filterNot(supportsReaderFeatures || this.minReaderVersion < _.minReaderVersion) + .filterNot(supportsWriterFeatures || this.minWriterVersion < _.minWriterVersion) + .toSet + } + } + + /** + * Get all features that are supported by this protocol, implicitly and explicitly. When the + * protocol supports table features, this method returns the same set of features as + * [[readerAndWriterFeatureNames]]; when the protocol does not support table features, this + * method becomes equivalent to [[implicitlySupportedFeatures]]. + */ + @JsonIgnore + lazy val implicitlyAndExplicitlySupportedFeatures: Set[TableFeature] = { + readerAndWriterFeatureNames.flatMap(TableFeature.featureNameToFeature) ++ + implicitlySupportedFeatures + } + + /** + * Determine whether this protocol can be safely upgraded to a new protocol `to`. This means: + * - this protocol has reader protocol version less than or equals to `to`. + * - this protocol has writer protocol version less than or equals to `to`. + * - all features supported by this protocol are supported by `to`. + * + * Examples regarding feature status: + * - from `[appendOnly]` to `[appendOnly]` => allowed + * - from `[appendOnly, changeDataFeed]` to `[appendOnly]` => not allowed + * - from `[appendOnly]` to `[appendOnly, changeDataFeed]` => allowed + */ + def canUpgradeTo(to: Protocol): Boolean = { + if (to.minReaderVersion < this.minReaderVersion) return false + if (to.minWriterVersion < this.minWriterVersion) return false + + val thisFeatures = + this.readerAndWriterFeatureNames ++ this.implicitlySupportedFeatures.map(_.name) + val toFeatures = to.readerAndWriterFeatureNames ++ to.implicitlySupportedFeatures.map(_.name) + // all features supported by `this` are supported by `to` + thisFeatures.subsetOf(toFeatures) + } + + /** + * Determine whether this protocol can be safely downgraded to a new protocol `to`. This + * includes the following: + * - The current protocol needs to support at least writer features. This is because protocol + * downgrade is only supported with table features. + * - The protocol version can only be downgraded when there are no non-legacy table features. + * - We can only remove one feature at a time. + * - When downgrading protocol versions, the resulting versions must support exactly the same + * set of legacy features supported by the current protocol. + * + * Note, this not an exhaustive list of downgrade rules. Rather, we check the most important + * downgrade invariants. We also perform checks during feature removal at + * [[AlterTableDropFeatureDeltaCommand]]. + */ + def canDowngradeTo(to: Protocol, droppedFeatureName: String): Boolean = { + if (!supportsWriterFeatures) return false + + // When `to` protocol does not have any features version downgrades are possible. However, + // the current protocol needs to contain one non-legacy feature. We also allow downgrade when + // there are only legacy features. This is to accommodate the case when the user attempts to + // remove a legacy feature in a table that only contains legacy features. + if (to.readerAndWriterFeatureNames.isEmpty) { + val featureNames = readerAndWriterFeatureNames - droppedFeatureName + val sameLegacyFeaturesSupported = featureNames == to.implicitlySupportedFeatures.map(_.name) + val minRequiredVersions = TableFeatureProtocolUtils.minimumRequiredVersions( + featureNames.flatMap(TableFeature.featureNameToFeature).toSeq) + + return sameLegacyFeaturesSupported && + (to.minReaderVersion, to.minWriterVersion) == minRequiredVersions && + readerAndWriterFeatures.filterNot(_.isLegacyFeature).size <= 1 + } + + // When `to` protocol contains table features we cannot downgrade the protocol version. + if (to.minReaderVersion != this.minReaderVersion) return false + if (to.minWriterVersion != this.minWriterVersion) return false + + // Can only remove a maximum of one feature at a time. + (this.readerAndWriterFeatureNames -- to.readerAndWriterFeatureNames).size == 1 + } + + /** + * True if this protocol can be upgraded or downgraded to the 'to' protocol. + */ + def canTransitionTo(to: Protocol, op: Operation): Boolean = { + op match { + case drop: DeltaOperations.DropTableFeature => canDowngradeTo(to, drop.featureName) + case _ => canUpgradeTo(to) + } + } + + /** + * Merge this protocol with multiple `protocols` to have the highest reader and writer versions + * plus all explicitly and implicitly supported features. + */ + def merge(others: Protocol*): Protocol = { + val protocols = this +: others + val mergedReaderVersion = protocols.map(_.minReaderVersion).max + val mergedWriterVersion = protocols.map(_.minWriterVersion).max + val mergedReaderFeatures = protocols.flatMap(_.readerFeatureNames) + val mergedWriterFeatures = protocols.flatMap(_.writerFeatureNames) + val mergedImplicitFeatures = protocols.flatMap(_.implicitlySupportedFeatures) + + val mergedProtocol = Protocol(mergedReaderVersion, mergedWriterVersion) + .withReaderFeatures(mergedReaderFeatures) + .withWriterFeatures(mergedWriterFeatures) + + if (mergedProtocol.supportsReaderFeatures || mergedProtocol.supportsWriterFeatures) { + mergedProtocol.withFeatures(mergedImplicitFeatures) + } else { + mergedProtocol + } + } + + /** + * Remove writer feature from protocol. To remove a writer feature we only need to + * remove it from the writerFeatures set. + */ + private[delta] def removeWriterFeature(targetWriterFeature: TableFeature): Protocol = { + require(targetWriterFeature.isRemovable) + require(!targetWriterFeature.isReaderWriterFeature) + copy(writerFeatures = writerFeatures.map(_ - targetWriterFeature.name)) + } + + /** + * Remove reader+writer feature from protocol. To remove a reader+writer feature we need to + * remove it from the readerFeatures set and the writerFeatures set. + */ + private[delta] def removeReaderWriterFeature( + targetReaderWriterFeature: TableFeature): Protocol = { + require(targetReaderWriterFeature.isRemovable) + require(targetReaderWriterFeature.isReaderWriterFeature) + val newReaderFeatures = readerFeatures.map(_ - targetReaderWriterFeature.name) + val newWriterFeatures = writerFeatures.map(_ - targetReaderWriterFeature.name) + copy(readerFeatures = newReaderFeatures, writerFeatures = newWriterFeatures) + } + + /** + * Remove feature wrapper for removing either Reader/Writer or Writer features. We assume + * the feature exists in the protocol. There is a relevant validation at + * [[AlterTableDropFeatureDeltaCommand]]. We also require targetFeature is removable. + * + * When the feature to remove is the last explicit table feature of the table we also remove the + * TableFeatures feature and downgrade the protocol. + */ + def removeFeature(targetFeature: TableFeature): Protocol = { + require(targetFeature.isRemovable) + val newProtocol = targetFeature match { + case f@(_: ReaderWriterFeature | _: LegacyReaderWriterFeature) => + removeReaderWriterFeature(f) + case f@(_: WriterFeature | _: LegacyWriterFeature) => + removeWriterFeature(f) + case f => + throw DeltaErrors.dropTableFeatureNonRemovableFeature(f.name) + } + newProtocol.downgradeProtocolVersionsIfNeeded + } + + /** + * If the current protocol does not contain any non-legacy table features and the remaining + * set of legacy table features exactly matches a legacy protocol version, it downgrades the + * protocol to the minimum reader/writer versions required to support the protocol's legacy + * features. + * + * Note, when a table is initialized with table features (3, 7), by default there are no legacy + * features. After we remove the last native feature we downgrade the protocol to (1, 1). + */ + def downgradeProtocolVersionsIfNeeded: Protocol = { + if (!readerAndWriterFeatures.forall(_.isLegacyFeature)) return this + + val (minReaderVersion, minWriterVersion) = + TableFeatureProtocolUtils.minimumRequiredVersions(readerAndWriterFeatures) + val newProtocol = Protocol(minReaderVersion, minWriterVersion) + + require( + !newProtocol.supportsReaderFeatures && !newProtocol.supportsWriterFeatures, + s"Downgraded protocol should not support table features, but got $newProtocol.") + + // Ensure the legacy protocol supports features exactly as the current protocol. + if (this.implicitlyAndExplicitlySupportedFeatures == + newProtocol.implicitlyAndExplicitlySupportedFeatures) { + newProtocol + } else { + this + } + } + + /** + * Check if a `feature` is supported by this protocol. This means either (a) the protocol does + * not support table features and implicitly supports the feature, or (b) the protocol supports + * table features and references the feature. + */ + def isFeatureSupported(feature: TableFeature): Boolean = { + // legacy feature + legacy protocol + (feature.isLegacyFeature && this.implicitlySupportedFeatures.contains(feature)) || + // new protocol + readerAndWriterFeatureNames.contains(feature.name) + } +} + +object TableFeatureProtocolUtils { + + /** Prop prefix in table properties. */ + val FEATURE_PROP_PREFIX = "delta.feature." + + /** Prop prefix in Spark sessions configs. */ + val DEFAULT_FEATURE_PROP_PREFIX = "spark.databricks.delta.properties.defaults.feature." + + /** + * The string constant "enabled" for uses in table properties. + * @deprecated + * This value is deprecated to avoid confusion with features that are actually enabled by + * table metadata. Use [[FEATURE_PROP_SUPPORTED]] instead. + */ + val FEATURE_PROP_ENABLED = "enabled" + + /** The string constant "supported" for uses in table properties. */ + val FEATURE_PROP_SUPPORTED = "supported" + + /** Min reader version that supports reader features. */ + val TABLE_FEATURES_MIN_READER_VERSION = 3 + + /** Min reader version that supports writer features. */ + val TABLE_FEATURES_MIN_WRITER_VERSION = 7 + + /** Get the table property config key for the `feature`. */ + def propertyKey(feature: TableFeature): String = propertyKey(feature.name) + + /** Get the table property config key for the `featureName`. */ + def propertyKey(featureName: String): String = + s"$FEATURE_PROP_PREFIX$featureName" + + /** Get the session default config key for the `feature`. */ + def defaultPropertyKey(feature: TableFeature): String = defaultPropertyKey(feature.name) + + /** Get the session default config key for the `featureName`. */ + def defaultPropertyKey(featureName: String): String = + s"$DEFAULT_FEATURE_PROP_PREFIX$featureName" + + /** + * Determine whether a [[Protocol]] with the given reader protocol version is capable of adding + * features into its `readerFeatures` field. + */ + def supportsReaderFeatures(readerVersion: Int): Boolean = { + readerVersion >= TABLE_FEATURES_MIN_READER_VERSION + } + + /** + * Determine whether a [[Protocol]] with the given writer protocol version is capable of adding + * features into its `writerFeatures` field. + */ + def supportsWriterFeatures(writerVersion: Int): Boolean = { + writerVersion >= TABLE_FEATURES_MIN_WRITER_VERSION + } + + /** + * Get a set of [[TableFeature]]s representing supported features set in a table properties map. + */ + def getSupportedFeaturesFromTableConfigs(configs: Map[String, String]): Set[TableFeature] = { + val featureConfigs = configs.filterKeys(_.startsWith(FEATURE_PROP_PREFIX)) + val unsupportedFeatureConfigs = mutable.Set.empty[String] + val collectedFeatures = featureConfigs.flatMap { case (key, value) => + // Feature name is lower cased in table properties but not in Spark session configs. + // Feature status is not lower cased in any case. + val name = key.stripPrefix(FEATURE_PROP_PREFIX).toLowerCase(Locale.ROOT) + val status = value.toLowerCase(Locale.ROOT) + if (status != FEATURE_PROP_SUPPORTED && status != FEATURE_PROP_ENABLED) { + throw DeltaErrors.unsupportedTableFeatureStatusException(name, status) + } + val featureOpt = TableFeature.featureNameToFeature(name) + if (!featureOpt.isDefined) { + unsupportedFeatureConfigs += key + } + featureOpt + }.toSet + if (unsupportedFeatureConfigs.nonEmpty) { + throw DeltaErrors.unsupportedTableFeatureConfigsException(unsupportedFeatureConfigs) + } + collectedFeatures + } + + /** + * Checks if the the given table property key is a Table Protocol property, i.e., + * `delta.minReaderVersion`, `delta.minWriterVersion`, ``delta.ignoreProtocolDefaults``, or + * anything that starts with `delta.feature.` + */ + def isTableProtocolProperty(key: String): Boolean = { + key == Protocol.MIN_READER_VERSION_PROP || + key == Protocol.MIN_WRITER_VERSION_PROP || + key == DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key || + key.startsWith(TableFeatureProtocolUtils.FEATURE_PROP_PREFIX) + } + + /** + * Returns the minimum reader/writer versions required to support all provided features. + */ + def minimumRequiredVersions(features: Seq[TableFeature]): (Int, Int) = + ((features.map(_.minReaderVersion) :+ 1).max, (features.map(_.minWriterVersion) :+ 1).max) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala new file mode 100644 index 00000000000..a5d017ffedf --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala @@ -0,0 +1,1306 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.actions + +// scalastyle:off import.ordering.noEmptyLine +import java.net.URI +import java.sql.Timestamp +import java.util.Locale +import java.util.concurrent.TimeUnit + +import scala.annotation.tailrec +import scala.collection.mutable +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.commands.DeletionVectorUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.{JsonUtils, Utils => DeltaUtils} +import org.apache.spark.sql.delta.util.FileNames +import com.fasterxml.jackson.annotation._ +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.databind._ +import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize} +import com.fasterxml.jackson.databind.node.ObjectNode + +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.{Column, Encoder, SparkSession} +import org.apache.spark.sql.catalyst.ScalaReflection +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.util.Utils + +object Action { + /** + * The maximum version of the protocol that this version of Delta understands by default. + * + * Use [[supportedProtocolVersion()]] instead, except to define new feature-gated versions. + */ + private[actions] val readerVersion = TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION + private[actions] val writerVersion = TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION + private[actions] val protocolVersion: Protocol = Protocol(readerVersion, writerVersion) + + /** + * The maximum protocol version we are currently allowed to use, with or without all recognized + * features. Optionally, some features can be excluded using `featuresToExclude`. + */ + private[delta] def supportedProtocolVersion( + withAllFeatures: Boolean = true, + featuresToExclude: Seq[TableFeature] = Seq.empty): Protocol = { + if (withAllFeatures) { + val featuresToAdd = TableFeature.allSupportedFeaturesMap.values.toSet -- featuresToExclude + protocolVersion.withFeatures(featuresToAdd) + } else { + protocolVersion + } + } + + /** All reader protocol version numbers supported by the system. */ + private[delta] lazy val supportedReaderVersionNumbers: Set[Int] = { + val allVersions = + supportedProtocolVersion().implicitlyAndExplicitlySupportedFeatures.map(_.minReaderVersion) + + 1 // Version 1 does not introduce new feature, it's always supported. + if (DeltaUtils.isTesting) { + allVersions + 0 // Allow Version 0 in tests + } else { + allVersions - 0 // Delete 0 produced by writer-only features + } + } + + /** All writer protocol version numbers supported by the system. */ + private[delta] lazy val supportedWriterVersionNumbers: Set[Int] = { + val allVersions = + supportedProtocolVersion().implicitlyAndExplicitlySupportedFeatures.map(_.minWriterVersion) + + 1 // Version 1 does not introduce new feature, it's always supported. + if (DeltaUtils.isTesting) { + allVersions + 0 // Allow Version 0 in tests + } else { + allVersions - 0 // Delete 0 produced by reader-only features - we don't have any - for safety + } + } + + def fromJson(json: String): Action = { + JsonUtils.mapper.readValue[SingleAction](json).unwrap + } + + lazy val logSchema = ExpressionEncoder[SingleAction]().schema + lazy val addFileSchema = logSchema("add").dataType.asInstanceOf[StructType] +} + +/** + * Represents a single change to the state of a Delta table. An order sequence + * of actions can be replayed using [[InMemoryLogReplay]] to derive the state + * of the table at a given point in time. + */ +sealed trait Action { + def wrap: SingleAction + def json: String = JsonUtils.toJson(wrap) +} + +/** + * Used to block older clients from reading or writing the log when backwards incompatible changes + * are made to the protocol. Readers and writers are responsible for checking that they meet the + * minimum versions before performing any other operations. + * + * This action allows us to explicitly block older clients in the case of a breaking change to the + * protocol. Absent a protocol change, Clients MUST silently ignore messages and fields that they + * do not understand. + * + * Note: Please initialize this class using the companion object's `apply` method, which will + * assign correct values (`Set()` vs `None`) to [[readerFeatures]] and [[writerFeatures]]. + */ +case class Protocol private ( + minReaderVersion: Int, + minWriterVersion: Int, + @JsonInclude(Include.NON_ABSENT) // write to JSON only when the field is not `None` + readerFeatures: Option[Set[String]], + @JsonInclude(Include.NON_ABSENT) + writerFeatures: Option[Set[String]]) + extends Action + with TableFeatureSupport { + // Correctness check + // Reader and writer versions must match the status of reader and writer features + require( + supportsReaderFeatures == readerFeatures.isDefined, + "Mismatched minReaderVersion and readerFeatures.") + require( + supportsWriterFeatures == writerFeatures.isDefined, + "Mismatched minWriterVersion and writerFeatures.") + + // When reader is on table features, writer must be on table features too + if (supportsReaderFeatures && !supportsWriterFeatures) { + throw DeltaErrors.tableFeatureReadRequiresWriteException( + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) + } + + override def wrap: SingleAction = SingleAction(protocol = this) + + /** + * Return a reader-friendly string representation of this Protocol. + * + * Returns the protocol versions and referenced features when the protocol does support table + * features, such as `3,7,{},{appendOnly}` and `2,7,None,{appendOnly}`. Otherwise returns only + * the protocol version such as `2,6`. + */ + @JsonIgnore + lazy val simpleString: String = { + if (!supportsReaderFeatures && !supportsWriterFeatures) { + s"$minReaderVersion,$minWriterVersion" + } else { + val readerFeaturesStr = readerFeatures + .map(_.toSeq.sorted.mkString("[", ",", "]")) + .getOrElse("None") + val writerFeaturesStr = writerFeatures + .map(_.toSeq.sorted.mkString("[", ",", "]")) + .getOrElse("None") + s"$minReaderVersion,$minWriterVersion,$readerFeaturesStr,$writerFeaturesStr" + } + } + + override def toString: String = s"Protocol($simpleString)" +} + +object Protocol { + import TableFeatureProtocolUtils._ + + val MIN_READER_VERSION_PROP = "delta.minReaderVersion" + val MIN_WRITER_VERSION_PROP = "delta.minWriterVersion" + + /** + * Construct a [[Protocol]] case class of the given reader and writer versions. This method will + * initialize table features fields when reader and writer versions are capable. + */ + def apply( + minReaderVersion: Int = Action.readerVersion, + minWriterVersion: Int = Action.writerVersion): Protocol = { + new Protocol( + minReaderVersion = minReaderVersion, + minWriterVersion = minWriterVersion, + readerFeatures = if (supportsReaderFeatures(minReaderVersion)) Some(Set()) else None, + writerFeatures = if (supportsWriterFeatures(minWriterVersion)) Some(Set()) else None) + } + + def forTableFeature(tf: TableFeature): Protocol = { + val writerFeatures = Some(Set(tf.name)) // every table feature is a writer feature + val readerFeatures = if (tf.isReaderWriterFeature) writerFeatures else None + val minReaderVersion = if (readerFeatures.isDefined) TABLE_FEATURES_MIN_READER_VERSION else 1 + val minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION + + new Protocol(minReaderVersion, minWriterVersion, readerFeatures, writerFeatures) + } + + /** + * Picks the protocol version for a new table given the Delta table metadata. The result + * satisfies all active features in the metadata and protocol-related configs in table + * properties, i.e., configs with keys [[MIN_READER_VERSION_PROP]], [[MIN_WRITER_VERSION_PROP]], + * and [[FEATURE_PROP_PREFIX]]. This method will also consider protocol-related configs: default + * reader version, default writer version, and features enabled by + * [[DEFAULT_FEATURE_PROP_PREFIX]]. + */ + def forNewTable(spark: SparkSession, metadataOpt: Option[Metadata]): Protocol = { + // `minProtocolComponentsFromMetadata` does not consider sessions defaults, + // so we must copy sessions defaults to table metadata. + val conf = spark.sessionState.conf + val ignoreProtocolDefaults = DeltaConfigs.ignoreProtocolDefaultsIsSet( + sqlConfs = conf, + tableConf = metadataOpt.map(_.configuration).getOrElse(Map.empty)) + val defaultGlobalConf = if (ignoreProtocolDefaults) { + Map(MIN_READER_VERSION_PROP -> 1.toString, MIN_WRITER_VERSION_PROP -> 1.toString) + } else { + Map( + MIN_READER_VERSION_PROP -> + conf.getConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION).toString, + MIN_WRITER_VERSION_PROP -> + conf.getConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION).toString) + } + val overrideGlobalConf = DeltaConfigs + .mergeGlobalConfigs( + sqlConfs = spark.sessionState.conf, + tableConf = Map.empty, + ignoreProtocolConfsOpt = Some(ignoreProtocolDefaults)) + // We care only about protocol related stuff + .filter { case (k, _) => TableFeatureProtocolUtils.isTableProtocolProperty(k) } + var metadata = metadataOpt.getOrElse(Metadata()) + // Priority: user-provided > override of session defaults > session defaults + metadata = metadata.copy(configuration = + defaultGlobalConf ++ overrideGlobalConf ++ metadata.configuration) + + val (readerVersion, writerVersion, enabledFeatures) = + minProtocolComponentsFromMetadata(spark, metadata) + Protocol(readerVersion, writerVersion).withFeatures(enabledFeatures) + } + + /** + * Returns the smallest set of table features that contains `features` and that also contains + * all dependencies of all features in the returned set. + */ + @tailrec + private def getDependencyClosure(features: Set[TableFeature]): Set[TableFeature] = { + val requiredFeatures = features ++ features.flatMap(_.requiredFeatures) + if (features == requiredFeatures) { + features + } else { + getDependencyClosure(requiredFeatures) + } + } + + /** + * Extracts all table features that are enabled by the given metadata and the optional protocol. + * This includes all already enabled features (if a protocol is provided), the features enabled + * directly by metadata, and all of their (transitive) dependencies. + */ + def extractAutomaticallyEnabledFeatures( + spark: SparkSession, + metadata: Metadata, + protocol: Option[Protocol] = None): Set[TableFeature] = { + val protocolEnabledFeatures = protocol + .map(_.writerFeatureNames) + .getOrElse(Set.empty) + .flatMap(TableFeature.featureNameToFeature) + val metadataEnabledFeatures = TableFeature + .allSupportedFeaturesMap.values + .collect { + case f: TableFeature with FeatureAutomaticallyEnabledByMetadata + if f.metadataRequiresFeatureToBeEnabled(metadata, spark) => + f.asInstanceOf[TableFeature] + } + .toSet + + getDependencyClosure(protocolEnabledFeatures ++ metadataEnabledFeatures) + } + + /** + * Given the Delta table metadata, returns the minimum required reader and writer version that + * satisfies all enabled features in the metadata and protocol-related configs in table + * properties, i.e., configs with keys [[MIN_READER_VERSION_PROP]], [[MIN_WRITER_VERSION_PROP]], + * and [[FEATURE_PROP_PREFIX]]. + * + * This function returns the protocol versions and features individually instead of a + * [[Protocol]], so the caller can identify the features that caused the protocol version. For + * example, if the return values are (2, 5, columnMapping), the caller can safely ignore all + * other features required by the protocol with a reader and writer version of 2 and 5. + * + * Note that this method does not consider protocol versions and features configured in session + * defaults. To make them effective, copy them to `metadata` using + * [[DeltaConfigs.mergeGlobalConfigs]]. + */ + def minProtocolComponentsFromMetadata( + spark: SparkSession, + metadata: Metadata): (Int, Int, Set[TableFeature]) = { + val tableConf = metadata.configuration + // There might be features enabled by the table properties aka + // `CREATE TABLE ... TBLPROPERTIES ...`. + val tablePropEnabledFeatures = getSupportedFeaturesFromTableConfigs(tableConf) + // To enable features that are being dependent by `tablePropEnabledFeatures`, we pass it here to + // let [[getDependencyClosure]] collect them. + val metaEnabledFeatures = + extractAutomaticallyEnabledFeatures( + spark, metadata, Some(Protocol().withFeatures(tablePropEnabledFeatures))) + val allEnabledFeatures = tablePropEnabledFeatures ++ metaEnabledFeatures + + // Determine the min reader and writer version required by features in table properties or + // metadata. + // If any table property is specified: + // we start from (3, 7) or (0, 7) depending on the existence of any writer-only feature. + // If there's no table property: + // if no feature is enabled or all features are legacy, we start from (0, 0); + // if any feature is native and is reader-writer, we start from (3, 7); + // otherwise we start from (0, 7) because there must exist a native writer-only feature. + var (readerVersionFromFeatures, writerVersionFromFeatures) = { + if (tablePropEnabledFeatures.exists(_.isReaderWriterFeature)) { + (TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + } else if (tablePropEnabledFeatures.nonEmpty) { + (0, TABLE_FEATURES_MIN_WRITER_VERSION) + } else if (metaEnabledFeatures.forall(_.isLegacyFeature)) { // also true for empty set + (0, 0) + } else if (metaEnabledFeatures.exists(f => !f.isLegacyFeature && f.isReaderWriterFeature)) { + (TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + } else { + (0, TABLE_FEATURES_MIN_WRITER_VERSION) + } + } + allEnabledFeatures.foreach { feature => + readerVersionFromFeatures = math.max(readerVersionFromFeatures, feature.minReaderVersion) + writerVersionFromFeatures = math.max(writerVersionFromFeatures, feature.minWriterVersion) + } + + // Protocol version provided in table properties can upgrade the protocol, but only when they + // are higher than which required by the enabled features. + val (readerVersionFromTableConfOpt, writerVersionFromTableConfOpt) = + getProtocolVersionsFromTableConf(tableConf) + + // Decide the final protocol version: + // a. 1, aka the lowest version possible + // b. version required by manually enabled features and metadata features + // c. version defined as table properties + val finalReaderVersion = + Seq(1, readerVersionFromFeatures, readerVersionFromTableConfOpt.getOrElse(0)).max + val finalWriterVersion = + Seq(1, writerVersionFromFeatures, writerVersionFromTableConfOpt.getOrElse(0)).max + + (finalReaderVersion, finalWriterVersion, allEnabledFeatures) + } + + /** + * Given the Delta table metadata, returns the minimum required reader and writer version + * that satisfies all enabled table features in the metadata plus all enabled features as a set. + * + * This function returns the protocol versions and features individually instead of a + * [[Protocol]], so the caller can identify the features that caused the protocol version. For + * example, if the return values are (2, 5, columnMapping), the caller can safely ignore all + * other features required by the protocol with a reader and writer version of 2 and 5. + * + * This method does not process protocol-related configs in table properties or session + * defaults, i.e., configs with keys [[MIN_READER_VERSION_PROP]], [[MIN_WRITER_VERSION_PROP]], + * and [[FEATURE_PROP_PREFIX]]. + */ + def minProtocolComponentsFromAutomaticallyEnabledFeatures( + spark: SparkSession, + metadata: Metadata): (Int, Int, Set[TableFeature]) = { + val enabledFeatures = extractAutomaticallyEnabledFeatures(spark, metadata) + var (readerVersion, writerVersion) = (0, 0) + enabledFeatures.foreach { feature => + readerVersion = math.max(readerVersion, feature.minReaderVersion) + writerVersion = math.max(writerVersion, feature.minWriterVersion) + } + + (readerVersion, writerVersion, enabledFeatures) + } + + /** Cast the table property for the protocol version to an integer. */ + private def tryCastProtocolVersionToInt(key: String, value: String): Int = { + try value.toInt + catch { + case _: NumberFormatException => + throw DeltaErrors.protocolPropNotIntException(key, value) + } + } + + def getReaderVersionFromTableConf(conf: Map[String, String]): Option[Int] = { + conf.get(MIN_READER_VERSION_PROP).map(tryCastProtocolVersionToInt(MIN_READER_VERSION_PROP, _)) + } + + def getWriterVersionFromTableConf(conf: Map[String, String]): Option[Int] = { + conf.get(MIN_WRITER_VERSION_PROP).map(tryCastProtocolVersionToInt(MIN_WRITER_VERSION_PROP, _)) + } + + def getProtocolVersionsFromTableConf(conf: Map[String, String]): (Option[Int], Option[Int]) = { + (getReaderVersionFromTableConf(conf), getWriterVersionFromTableConf(conf)) + } + + /** Assert a table metadata contains no protocol-related table properties. */ + private def assertMetadataContainsNoProtocolProps(metadata: Metadata): Unit = { + assert( + !metadata.configuration.contains(MIN_READER_VERSION_PROP), + "Should not have the " + + s"protocol version ($MIN_READER_VERSION_PROP) as part of table properties") + assert( + !metadata.configuration.contains(MIN_WRITER_VERSION_PROP), + "Should not have the " + + s"protocol version ($MIN_WRITER_VERSION_PROP) as part of table properties") + assert( + !metadata.configuration.keys.exists(_.startsWith(FEATURE_PROP_PREFIX)), + "Should not have " + + s"table features (starts with '$FEATURE_PROP_PREFIX') as part of table properties") + assert( + !metadata.configuration.contains(DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key), + "Should not have the table property " + + s"${DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key} stored in table metadata") + } + + /** + * Upgrade the current protocol to satisfy all auto-update capable features required by the table + * metadata. An Delta error will be thrown if a non-auto-update capable feature is required by + * the metadata and not in the resulting protocol, in such a case the user must run `ALTER TABLE` + * to add support for this feature beforehand using the `delta.feature.featureName` table + * property. + * + * Refer to [[FeatureAutomaticallyEnabledByMetadata.automaticallyUpdateProtocolOfExistingTables]] + * to know more about "auto-update capable" features. + * + * Note: this method only considers metadata-enabled features. To avoid confusion, the caller + * must apply and remove protocol-related table properties from the metadata before calling this + * method. + */ + def upgradeProtocolFromMetadataForExistingTable( + spark: SparkSession, + metadata: Metadata, + current: Protocol): Option[Protocol] = { + assertMetadataContainsNoProtocolProps(metadata) + + val (readerVersion, writerVersion, minRequiredFeatures) = + minProtocolComponentsFromAutomaticallyEnabledFeatures(spark, metadata) + + // Increment the reader and writer version to accurately add enabled legacy table features + // either to the implicitly enabled table features or the table feature lists + val required = Protocol( + readerVersion.max(current.minReaderVersion), writerVersion.max(current.minWriterVersion)) + .withFeatures(minRequiredFeatures) + if (!required.canUpgradeTo(current)) { + // When the current protocol does not satisfy metadata requirement, some additional features + // must be supported by the protocol. We assert those features can actually perform the + // auto-update. + assertMetadataTableFeaturesAutomaticallySupported( + current.implicitlyAndExplicitlySupportedFeatures, + required.implicitlyAndExplicitlySupportedFeatures) + Some(required.merge(current)) + } else { + None + } + } + + /** + * Ensure all features listed in `currentFeatures` are also listed in `requiredFeatures`, or, if + * one is not listed, it must be capable to auto-update a protocol. + * + * Refer to [[FeatureAutomaticallyEnabledByMetadata.automaticallyUpdateProtocolOfExistingTables]] + * to know more about "auto-update capable" features. + * + * Note: Caller must make sure `requiredFeatures` is obtained from a min protocol that satisfies + * a table metadata. + */ + private def assertMetadataTableFeaturesAutomaticallySupported( + currentFeatures: Set[TableFeature], + requiredFeatures: Set[TableFeature]): Unit = { + val (autoUpdateCapableFeatures, nonAutoUpdateCapableFeatures) = + requiredFeatures.diff(currentFeatures) + .collect { case f: FeatureAutomaticallyEnabledByMetadata => f } + .partition(_.automaticallyUpdateProtocolOfExistingTables) + if (nonAutoUpdateCapableFeatures.nonEmpty) { + // The "current features" we give the user are which from the original protocol, plus + // features newly supported by table properties in the current transaction, plus + // metadata-enabled features that are auto-update capable. The first two are provided by + // `currentFeatures`. + throw DeltaErrors.tableFeaturesRequireManualEnablementException( + nonAutoUpdateCapableFeatures, + currentFeatures ++ autoUpdateCapableFeatures) + } + } + + /** + * Verify that the table properties satisfy legality constraints. Throw an exception if not. + */ + def assertTablePropertyConstraintsSatisfied( + spark: SparkSession, + metadata: Metadata, + snapshot: Snapshot): Unit = { + import DeltaTablePropertyValidationFailedSubClass._ + + val tableName = if (metadata.name != null) metadata.name else metadata.id + + val configs = metadata.configuration.map { case (k, v) => k.toLowerCase(Locale.ROOT) -> v } + val dvsEnabled = { + val lowerCaseKey = DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key.toLowerCase(Locale.ROOT) + configs.get(lowerCaseKey).exists(_.toBoolean) + } + if (dvsEnabled && metadata.format.provider != "parquet") { + // DVs only work with parquet-based delta tables. + throw new DeltaTablePropertyValidationFailedException( + table = tableName, + subClass = PersistentDeletionVectorsInNonParquetTable) + } + val manifestGenerationEnabled = { + val lowerCaseKey = DeltaConfigs.SYMLINK_FORMAT_MANIFEST_ENABLED.key.toLowerCase(Locale.ROOT) + configs.get(lowerCaseKey).exists(_.toBoolean) + } + if (dvsEnabled && manifestGenerationEnabled) { + throw new DeltaTablePropertyValidationFailedException( + table = tableName, + subClass = PersistentDeletionVectorsWithIncrementalManifestGeneration) + } + if (manifestGenerationEnabled) { + // Only allow enabling this, if there are no DVs present. + if (!DeletionVectorUtils.isTableDVFree(snapshot)) { + throw new DeltaTablePropertyValidationFailedException( + table = tableName, + subClass = ExistingDeletionVectorsWithIncrementalManifestGeneration) + } + } + } +} + +/** + * Sets the committed version for a given application. Used to make operations + * like streaming append idempotent. + */ +case class SetTransaction( + appId: String, + version: Long, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + lastUpdated: Option[Long]) extends Action { + override def wrap: SingleAction = SingleAction(txn = this) +} + +/** + * The domain metadata action contains a configuration (string-string map) for a named metadata + * domain. Two overlapping transactions conflict if they both contain a domain metadata action for + * the same metadata domain. + * + * [[domain]]: A string used to identify a specific feature. + * [[configuration]]: A string containing configuration options for the conflict domain. + * [[removed]]: If it is true it serves as a tombstone to logically delete a [[DomainMetadata]] + * action. + */ +case class DomainMetadata( + domain: String, + configuration: String, + removed: Boolean) extends Action { + override def wrap: SingleAction = SingleAction(domainMetadata = this) +} + +/** Actions pertaining to the addition and removal of files. */ +sealed trait FileAction extends Action { + val path: String + val dataChange: Boolean + @JsonIgnore + val tags: Map[String, String] + @JsonIgnore + lazy val pathAsUri: URI = new URI(path) + @JsonIgnore + def numLogicalRecords: Option[Long] + @JsonIgnore + val partitionValues: Map[String, String] + @JsonIgnore + def getFileSize: Long + def stats: String + def deletionVector: DeletionVectorDescriptor + + /** Returns the approx size of the remaining records after excluding the deleted ones. */ + @JsonIgnore + def estLogicalFileSize: Option[Long] + + /** + * Return tag value if tags is not null and the tag present. + */ + @JsonIgnore + def getTag(tagName: String): Option[String] = Option(tags).flatMap(_.get(tagName)) + + + def toPath: Path = new Path(pathAsUri) +} + +case class ParsedStatsFields( + numLogicalRecords: Option[Long], + tightBounds: Option[Boolean]) + +/** + * Common trait for AddFile and RemoveFile actions providing methods for the computation of + * logical, physical and deleted number of records based on the statistics and the Deletion Vector + * of the file. + */ +trait HasNumRecords { + this: FileAction => + + @JsonIgnore + @transient + protected lazy val parsedStatsFields: Option[ParsedStatsFields] = Option(stats).collect { + case stats if stats.nonEmpty => + val node = new ObjectMapper().readTree(stats) + val numLogicalRecords = if (node.has("numRecords")) { + Some(node.get("numRecords")).filterNot(_.isNull).map(_.asLong()) + .map(_ - numDeletedRecords) + } else None + val tightBounds = if (node.has("tightBounds")) { + Some(node.get("tightBounds")).filterNot(_.isNull).map(_.asBoolean()) + } else None + + ParsedStatsFields(numLogicalRecords, tightBounds) + } + + /** Returns the number of logical records, which do not include those marked as deleted. */ + @JsonIgnore + @transient + override lazy val numLogicalRecords: Option[Long] = parsedStatsFields.flatMap(_.numLogicalRecords) + + /** Returns the number of records marked as deleted. */ + @JsonIgnore + def numDeletedRecords: Long = deletionVector match { + case dv: DeletionVectorDescriptor => dv.cardinality + case _ => 0L + } + + /** Returns the total number of records, including those marked as deleted. */ + @JsonIgnore + def numPhysicalRecords: Option[Long] = numLogicalRecords.map(_ + numDeletedRecords) + + /** Returns the estimated size of the logical records in the file. */ + @JsonIgnore + override def estLogicalFileSize: Option[Long] = + logicalToPhysicalRecordsRatio.map(n => (n * getFileSize).toLong) + + /** Returns the ratio of the logical number of records to the total number of records. */ + @JsonIgnore + def logicalToPhysicalRecordsRatio: Option[Double] = numLogicalRecords.map { numLogicalRecords => + numLogicalRecords.toDouble / (numLogicalRecords + numDeletedRecords) + } + + /** Returns the ratio of number of deleted records to the total number of records. */ + @JsonIgnore + def deletedToPhysicalRecordsRatio: Option[Double] = logicalToPhysicalRecordsRatio.map(1.0d - _) + + /** Returns whether the statistics are tight or wide. */ + @JsonIgnore + @transient + lazy val tightBounds: Option[Boolean] = parsedStatsFields.flatMap(_.tightBounds) +} + +/** + * Adds a new file to the table. When multiple [[AddFile]] file actions + * are seen with the same `path` only the metadata from the last one is + * kept. + * + * [[path]] is URL-encoded. + */ +case class AddFile( + override val path: String, + @JsonInclude(JsonInclude.Include.ALWAYS) + partitionValues: Map[String, String], + size: Long, + modificationTime: Long, + override val dataChange: Boolean, + override val stats: String = null, + override val tags: Map[String, String] = null, + override val deletionVector: DeletionVectorDescriptor = null, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + baseRowId: Option[Long] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + defaultRowCommitVersion: Option[Long] = None, + clusteringProvider: Option[String] = None +) extends FileAction with HasNumRecords { + require(path.nonEmpty) + + override def wrap: SingleAction = SingleAction(add = this) + + def remove: RemoveFile = removeWithTimestamp() + + def removeWithTimestamp( + timestamp: Long = System.currentTimeMillis(), + dataChange: Boolean = true + ): RemoveFile = { + var newTags = tags + // scalastyle:off + RemoveFile( + path, Some(timestamp), dataChange, + extendedFileMetadata = Some(true), partitionValues, Some(size), newTags, + deletionVector = deletionVector, + baseRowId = baseRowId, + defaultRowCommitVersion = defaultRowCommitVersion, + stats = stats + ) + // scalastyle:on + } + + /** + * Logically remove rows by associating a `deletionVector` with the file. + * @param deletionVector: The descriptor of the DV that marks rows as deleted. + * @param dataChange: When false, the actions are marked as no-data-change actions. + */ + def removeRows( + deletionVector: DeletionVectorDescriptor, + updateStats: Boolean, + dataChange: Boolean = true): (AddFile, RemoveFile) = { + // Verify DV does not contain any invalid row indexes. Note, maxRowIndex is optional + // and not all commands may set it when updating DVs. + (numPhysicalRecords, deletionVector.maxRowIndex) match { + case (Some(numPhysicalRecords), Some(maxRowIndex)) + if (maxRowIndex + 1 > numPhysicalRecords) => + throw DeltaErrors.deletionVectorInvalidRowIndex() + case _ => // Nothing to check. + } + // We make sure maxRowIndex is not stored in the log. + val dvDescriptorWithoutMaxRowIndex = deletionVector.maxRowIndex match { + case Some(_) => deletionVector.copy(maxRowIndex = None) + case _ => deletionVector + } + var addFileWithNewDv = + this.copy(deletionVector = dvDescriptorWithoutMaxRowIndex, dataChange = dataChange) + if (updateStats) { + addFileWithNewDv = addFileWithNewDv.withoutTightBoundStats + } + val removeFileWithOldDv = this.removeWithTimestamp(dataChange = dataChange) + + // Sanity check for incremental DV updates. + if (addFileWithNewDv.numDeletedRecords < removeFileWithOldDv.numDeletedRecords) { + throw DeltaErrors.deletionVectorSizeMismatch() + } + + (addFileWithNewDv, removeFileWithOldDv) + } + + /** + * Return the unique id of the deletion vector, if present, or `None` if there's no DV. + * + * The unique id differentiates DVs, even if there are multiple in the same file + * or the DV is stored inline. + */ + @JsonIgnore + def getDeletionVectorUniqueId: Option[String] = Option(deletionVector).map(_.uniqueId) + + /** Update stats to have tightBounds = false, if file has any stats. */ + def withoutTightBoundStats: AddFile = { + if (stats == null || stats.isEmpty) { + this + } else { + val node = JsonUtils.mapper.readTree(stats).asInstanceOf[ObjectNode] + if (node.has("tightBounds") && + !node.get("tightBounds").asBoolean(true)) { + this + } else { + node.put("tightBounds", false) + val newStatsString = JsonUtils.mapper.writer.writeValueAsString(node) + this.copy(stats = newStatsString) + } + } + } + + @JsonIgnore + lazy val insertionTime: Long = tag(AddFile.Tags.INSERTION_TIME).map(_.toLong) + // From modification time in milliseconds to microseconds. + .getOrElse(TimeUnit.MICROSECONDS.convert(modificationTime, TimeUnit.MILLISECONDS)) + + + def copyWithTags(newTags: Map[String, String]): AddFile = + copy(tags = Option(tags).getOrElse(Map.empty) ++ newTags) + + + def tag(tag: AddFile.Tags.KeyType): Option[String] = getTag(tag.name) + + def copyWithTag(tag: AddFile.Tags.KeyType, value: String): AddFile = + copy(tags = Option(tags).getOrElse(Map.empty) + (tag.name -> value)) + + def copyWithoutTag(tag: AddFile.Tags.KeyType): AddFile = { + if (tags == null) { + this + } else { + copy(tags = tags - tag.name) + } + } + + @JsonIgnore + override def getFileSize: Long = size + + /** + * Before serializing make sure deletionVector.maxRowIndex is not defined. + * This is only a transient property and it is not intended to be stored in the log. + */ + override def json: String = { + if (deletionVector != null) assert(!deletionVector.maxRowIndex.isDefined) + super.json + } + +} + +object AddFile { + /** + * Misc file-level metadata. + * + * The convention is that clients may safely ignore any/all of these tags and this should never + * have an impact on correctness. + * + * Otherwise, the information should go as a field of the AddFile action itself and the Delta + * protocol version should be bumped. + */ + object Tags { + sealed abstract class KeyType(val name: String) + + /** [[ZCUBE_ID]]: identifier of the OPTIMIZE ZORDER BY job that this file was produced by */ + object ZCUBE_ID extends AddFile.Tags.KeyType("ZCUBE_ID") + + /** [[ZCUBE_ZORDER_BY]]: ZOrdering of the corresponding ZCube */ + object ZCUBE_ZORDER_BY extends AddFile.Tags.KeyType("ZCUBE_ZORDER_BY") + + /** [[ZCUBE_ZORDER_CURVE]]: Clustering strategy of the corresponding ZCube */ + object ZCUBE_ZORDER_CURVE extends AddFile.Tags.KeyType("ZCUBE_ZORDER_CURVE") + + /** + * [[INSERTION_TIME]]: the latest timestamp in micro seconds when the data in the file + * was inserted + */ + object INSERTION_TIME extends AddFile.Tags.KeyType("INSERTION_TIME") + + + /** [[PARTITION_ID]]: rdd partition id that has written the file, will not be stored in the + physical log, only used for communication */ + object PARTITION_ID extends AddFile.Tags.KeyType("PARTITION_ID") + + /** [[OPTIMIZE_TARGET_SIZE]]: target file size the file was optimized to. */ + object OPTIMIZE_TARGET_SIZE extends AddFile.Tags.KeyType("OPTIMIZE_TARGET_SIZE") + + + /** [[ICEBERG_COMPAT_VERSION]]: IcebergCompat version */ + object ICEBERG_COMPAT_VERSION extends AddFile.Tags.KeyType("ICEBERG_COMPAT_VERSION") + } + + /** Convert a [[Tags.KeyType]] to a string to be used in the AddMap.tags Map[String, String]. */ + def tag(tagKey: Tags.KeyType): String = tagKey.name +} + +/** + * Logical removal of a given file from the reservoir. Acts as a tombstone before a file is + * deleted permanently. + * + * Note that for protocol compatibility reasons, the fields `partitionValues`, `size`, and `tags` + * are only present when the extendedFileMetadata flag is true. New writers should generally be + * setting this flag, but old writers (and FSCK) won't, so readers must check this flag before + * attempting to consume those values. + * + * Since old tables would not have `extendedFileMetadata` and `size` field, we should make them + * nullable by setting their type Option. + */ +// scalastyle:off +case class RemoveFile( + override val path: String, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + deletionTimestamp: Option[Long], + override val dataChange: Boolean = true, + extendedFileMetadata: Option[Boolean] = None, + partitionValues: Map[String, String] = null, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + size: Option[Long] = None, + override val tags: Map[String, String] = null, + override val deletionVector: DeletionVectorDescriptor = null, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + baseRowId: Option[Long] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + defaultRowCommitVersion: Option[Long] = None, + override val stats: String = null +) extends FileAction with HasNumRecords { + override def wrap: SingleAction = SingleAction(remove = this) + + @JsonIgnore + val delTimestamp: Long = deletionTimestamp.getOrElse(0L) + + /** + * Return the unique id of the deletion vector, if present, or `None` if there's no DV. + * + * The unique id differentiates DVs, even if there are multiple in the same file + * or the DV is stored inline. + */ + @JsonIgnore + def getDeletionVectorUniqueId: Option[String] = Option(deletionVector).map(_.uniqueId) + + /** + * Create a copy with the new tag. `extendedFileMetadata` is copied unchanged. + */ + def copyWithTag(tag: String, value: String): RemoveFile = copy( + tags = Option(tags).getOrElse(Map.empty) + (tag -> value)) + + /** + * Create a copy without the tag. + */ + def copyWithoutTag(tag: String): RemoveFile = + copy(tags = Option(tags).getOrElse(Map.empty) - tag) + + @JsonIgnore + override def getFileSize: Long = size.getOrElse(0L) + +} +// scalastyle:on + +/** + * A change file containing CDC data for the Delta version it's within. Non-CDC readers should + * ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + * changes from AddFile and RemoveFile actions. + */ +case class AddCDCFile( + override val path: String, + @JsonInclude(JsonInclude.Include.ALWAYS) + partitionValues: Map[String, String], + size: Long, + override val tags: Map[String, String] = null) extends FileAction { + override val dataChange = false + @JsonIgnore + override val stats: String = null + @JsonIgnore + override val deletionVector: DeletionVectorDescriptor = null + + override def wrap: SingleAction = SingleAction(cdc = this) + + @JsonIgnore + override def getFileSize: Long = size + + @JsonIgnore + override def estLogicalFileSize: Option[Long] = None + + @JsonIgnore + override def numLogicalRecords: Option[Long] = None +} + + +case class Format( + provider: String = "parquet", + // If we support `options` in future, we should not store any file system options since they may + // contain credentials. + options: Map[String, String] = Map.empty) + +/** + * Updates the metadata of the table. Only the last update to the [[Metadata]] + * of a table is kept. It is the responsibility of the writer to ensure that + * any data already present in the table is still valid after any change. + */ +case class Metadata( + id: String = if (Utils.isTesting) "testId" else java.util.UUID.randomUUID().toString, + name: String = null, + description: String = null, + format: Format = Format(), + schemaString: String = null, + partitionColumns: Seq[String] = Nil, + configuration: Map[String, String] = Map.empty, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + createdTime: Option[Long] = None) extends Action { + + // The `schema` and `partitionSchema` methods should be vals or lazy vals, NOT + // defs, because parsing StructTypes from JSON is extremely expensive and has + // caused perf. problems here in the past: + + /** + * Column mapping mode for this table + */ + @JsonIgnore + lazy val columnMappingMode: DeltaColumnMappingMode = + DeltaConfigs.COLUMN_MAPPING_MODE.fromMetaData(this) + + /** + * Column mapping max id for this table + */ + @JsonIgnore + lazy val columnMappingMaxId: Long = + DeltaConfigs.COLUMN_MAPPING_MAX_ID.fromMetaData(this) + + /** Returns the schema as a [[StructType]] */ + @JsonIgnore + lazy val schema: StructType = Option(schemaString) + .map(DataType.fromJson(_).asInstanceOf[StructType]) + .getOrElse(StructType.apply(Nil)) + + /** Returns the partitionSchema as a [[StructType]] */ + @JsonIgnore + lazy val partitionSchema: StructType = + new StructType(partitionColumns.map(c => schema(c)).toArray) + + /** Partition value keys in the AddFile map. */ + @JsonIgnore + lazy val physicalPartitionSchema: StructType = + DeltaColumnMapping.renameColumns(partitionSchema) + + /** Columns written out to files. */ + @JsonIgnore + lazy val dataSchema: StructType = { + val partitions = partitionColumns.toSet + StructType(schema.filterNot(f => partitions.contains(f.name))) + } + + /** Partition value written out to files */ + @JsonIgnore + lazy val physicalPartitionColumns: Seq[String] = physicalPartitionSchema.fieldNames.toSeq + + /** + * Columns whose type should never be changed. For example, if a column is used by a generated + * column, changing its type may break the constraint defined by the generation expression. Hence, + * we should never change its type. + */ + @JsonIgnore + lazy val fixedTypeColumns: Set[String] = + GeneratedColumn.getGeneratedColumnsAndColumnsUsedByGeneratedColumns(schema) + + /** + * Store non-partition columns and their corresponding [[OptimizablePartitionExpression]] which + * can be used to create partition filters from data filters of these non-partition columns. + */ + @JsonIgnore + lazy val optimizablePartitionExpressions: Map[String, Seq[OptimizablePartitionExpression]] + = GeneratedColumn.getOptimizablePartitionExpressions(schema, partitionSchema) + + override def wrap: SingleAction = SingleAction(metaData = this) +} + +/** + * Interface for objects that represents the information for a commit. Commits can be referred to + * using a version and timestamp. The timestamp of a commit comes from the remote storage + * `lastModifiedTime`, and can be adjusted for clock skew. Hence we have the method `withTimestamp`. + */ +trait CommitMarker { + /** Get the timestamp of the commit as millis after the epoch. */ + def getTimestamp: Long + /** Return a copy object of this object with the given timestamp. */ + def withTimestamp(timestamp: Long): CommitMarker + /** Get the version of the commit. */ + def getVersion: Long +} + +/** + * Holds provenance information about changes to the table. This [[Action]] + * is not stored in the checkpoint and has reduced compatibility guarantees. + * Information stored in it is best effort (i.e. can be falsified by the writer). + * + * @param isBlindAppend Whether this commit has blindly appended without caring about existing files + * @param engineInfo The information for the engine that makes the commit. + * If a commit is made by Delta Lake 1.1.0 or above, it will be + * `Apache-Spark/x.y.z Delta-Lake/x.y.z`. + */ +case class CommitInfo( + // The commit version should be left unfilled during commit(). When reading a delta file, we can + // infer the commit version from the file name and fill in this field then. + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + version: Option[Long], + timestamp: Timestamp, + userId: Option[String], + userName: Option[String], + operation: String, + @JsonSerialize(using = classOf[JsonMapSerializer]) + operationParameters: Map[String, String], + job: Option[JobInfo], + notebook: Option[NotebookInfo], + clusterId: Option[String], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + readVersion: Option[Long], + isolationLevel: Option[String], + isBlindAppend: Option[Boolean], + operationMetrics: Option[Map[String, String]], + userMetadata: Option[String], + tags: Option[Map[String, String]], + engineInfo: Option[String], + txnId: Option[String]) extends Action with CommitMarker { + override def wrap: SingleAction = SingleAction(commitInfo = this) + + override def withTimestamp(timestamp: Long): CommitInfo = { + this.copy(timestamp = new Timestamp(timestamp)) + } + + override def getTimestamp: Long = timestamp.getTime + @JsonIgnore + override def getVersion: Long = version.get + +} + +case class JobInfo( + jobId: String, + jobName: String, + jobRunId: String, + runId: String, + jobOwnerId: String, + triggerType: String) + +object JobInfo { + def fromContext(context: Map[String, String]): Option[JobInfo] = { + context.get("jobId").map { jobId => + JobInfo( + jobId, + context.get("jobName").orNull, + context.get("multitaskParentRunId").orNull, + context.get("runId").orNull, + context.get("jobOwnerId").orNull, + context.get("jobTriggerType").orNull) + } + } +} + +case class NotebookInfo(notebookId: String) + +object NotebookInfo { + def fromContext(context: Map[String, String]): Option[NotebookInfo] = { + context.get("notebookId").map { nbId => NotebookInfo(nbId) } + } +} + +object CommitInfo { + def empty(version: Option[Long] = None): CommitInfo = { + CommitInfo(version, null, None, None, null, null, None, None, + None, None, None, None, None, None, None, None, None) + } + + // scalastyle:off argcount + def apply( + time: Long, + operation: String, + operationParameters: Map[String, String], + commandContext: Map[String, String], + readVersion: Option[Long], + isolationLevel: Option[String], + isBlindAppend: Option[Boolean], + operationMetrics: Option[Map[String, String]], + userMetadata: Option[String], + tags: Option[Map[String, String]], + txnId: Option[String]): CommitInfo = { + + val getUserName = commandContext.get("user").flatMap { + case "unknown" => None + case other => Option(other) + } + + CommitInfo( + None, + new Timestamp(time), + commandContext.get("userId"), + getUserName, + operation, + operationParameters, + JobInfo.fromContext(commandContext), + NotebookInfo.fromContext(commandContext), + commandContext.get("clusterId"), + readVersion, + isolationLevel, + isBlindAppend, + operationMetrics, + userMetadata, + tags, + getEngineInfo, + txnId) + } + // scalastyle:on argcount + + private def getEngineInfo: Option[String] = { + Some(s"Apache-Spark/${org.apache.spark.SPARK_VERSION} Delta-Lake/${io.delta.VERSION}") + } + +} + +/** A trait to represent actions which can only be part of Checkpoint */ +sealed trait CheckpointOnlyAction extends Action + +/** + * An [[Action]] containing the information about a sidecar file. + * + * @param path - sidecar path relative to `_delta_log/_sidecar` directory + * @param sizeInBytes - size in bytes for the sidecar file + * @param modificationTime - modification time of the sidecar file + * @param tags - attributes of the sidecar file, defaults to null (which is semantically same as an + * empty Map). This is kept null to ensure that the field is not present in the + * generated json. + */ +case class SidecarFile( + path: String, + sizeInBytes: Long, + modificationTime: Long, + tags: Map[String, String] = null) + extends Action with CheckpointOnlyAction { + + override def wrap: SingleAction = SingleAction(sidecar = this) + + def toFileStatus(logPath: Path): FileStatus = { + val partFilePath = new Path(FileNames.sidecarDirPath(logPath), path) + new FileStatus(sizeInBytes, false, 0, 0, modificationTime, partFilePath) + } +} + +object SidecarFile { + def apply(fileStatus: SerializableFileStatus): SidecarFile = { + SidecarFile(fileStatus.getHadoopPath.getName, fileStatus.length, fileStatus.modificationTime) + } + + def apply(fileStatus: FileStatus): SidecarFile = { + SidecarFile(fileStatus.getPath.getName, fileStatus.getLen, fileStatus.getModificationTime) + } +} + +/** + * Holds information about the Delta Checkpoint. This action will only be part of checkpoints. + * + * @param version version of the checkpoint + * @param tags attributes of the checkpoint, defaults to null (which is semantically same as an + * empty Map). This is kept null to ensure that the field is not present in the + * generated json. + */ +case class CheckpointMetadata( + version: Long, + tags: Map[String, String] = null) + extends Action with CheckpointOnlyAction { + + override def wrap: SingleAction = SingleAction(checkpointMetadata = this) +} + + +/** A serialization helper to create a common action envelope. */ +case class SingleAction( + txn: SetTransaction = null, + add: AddFile = null, + remove: RemoveFile = null, + metaData: Metadata = null, + protocol: Protocol = null, + cdc: AddCDCFile = null, + checkpointMetadata: CheckpointMetadata = null, + sidecar: SidecarFile = null, + domainMetadata: DomainMetadata = null, + commitInfo: CommitInfo = null) { + + def unwrap: Action = { + if (add != null) { + add + } else if (remove != null) { + remove + } else if (metaData != null) { + metaData + } else if (txn != null) { + txn + } else if (protocol != null) { + protocol + } else if (cdc != null) { + cdc + } else if (sidecar != null) { + sidecar + } else if (checkpointMetadata != null) { + checkpointMetadata + } else if (domainMetadata != null) { + domainMetadata + } else if (commitInfo != null) { + commitInfo + } else { + null + } + } +} + +object SingleAction extends Logging { + implicit def encoder: Encoder[SingleAction] = + org.apache.spark.sql.delta.implicits.singleActionEncoder + + implicit def addFileEncoder: Encoder[AddFile] = + org.apache.spark.sql.delta.implicits.addFileEncoder + + lazy val nullLitForRemoveFile: Column = + new Column(Literal(null, ScalaReflection.schemaFor[RemoveFile].dataType)) + + lazy val nullLitForAddCDCFile: Column = + new Column(Literal(null, ScalaReflection.schemaFor[AddCDCFile].dataType)) + + lazy val nullLitForMetadataAction: Column = + new Column(Literal(null, ScalaReflection.schemaFor[Metadata].dataType)) +} + +/** Serializes Maps containing JSON strings without extra escaping. */ +class JsonMapSerializer extends JsonSerializer[Map[String, String]] { + def serialize( + parameters: Map[String, String], + jgen: JsonGenerator, + provider: SerializerProvider): Unit = { + jgen.writeStartObject() + parameters.foreach { case (key, value) => + if (value == null) { + jgen.writeNullField(key) + } else { + jgen.writeFieldName(key) + // Write value as raw data, since it's already JSON text + jgen.writeRawValue(value) + } + } + jgen.writeEndObject() + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaCatalog.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaCatalog.scala new file mode 100644 index 00000000000..15927e7e248 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaCatalog.scala @@ -0,0 +1,880 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.catalog + +// scalastyle:off import.ordering.noEmptyLine +import java.sql.Timestamp +import java.util +import java.util.Locale + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils +import org.apache.spark.sql.delta.skipping.clustering.temp.ClusterBySpec +import org.apache.spark.sql.delta.skipping.clustering.temp.{ClusterByTransform => TempClusterByTransform} +import org.apache.spark.sql.delta.{DeltaConfigs, DeltaErrors, DeltaTableUtils} +import org.apache.spark.sql.delta.{DeltaLog, DeltaOptions} +import org.apache.spark.sql.delta.DeltaTableIdentifier.gluePermissionError +import org.apache.spark.sql.delta.commands._ +import org.apache.spark.sql.delta.constraints.{AddConstraint, DropConstraint} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.{DeltaDataSource, DeltaSourceUtils, DeltaSQLConf} +import org.apache.spark.sql.delta.stats.StatisticsCollection +import org.apache.spark.sql.delta.tablefeatures.DropFeature +import org.apache.spark.sql.delta.util.PartitionUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging +import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, UnresolvedAttribute, UnresolvedFieldName, UnresolvedFieldPosition} +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, QualifiedColType} +import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, StagedTable, StagingTableCatalog, SupportsWrite, Table, TableCapability, TableCatalog, TableChange, V1Table} +import org.apache.spark.sql.connector.catalog.TableCapability._ +import org.apache.spark.sql.connector.catalog.TableChange._ +import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Literal, NamedReference, Transform} +import org.apache.spark.sql.connector.write.{LogicalWriteInfo, V1Write, WriteBuilder} +import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources.InsertableRelation +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} + + +/** + * A Catalog extension which can properly handle the interaction between the HiveMetaStore and + * Delta tables. It delegates all operations DataSources other than Delta to the SparkCatalog. + */ +class DeltaCatalog extends DelegatingCatalogExtension + with StagingTableCatalog + with SupportsPathIdentifier + with DeltaLogging { + + + val spark = SparkSession.active + + /** + * Creates a Delta table + * + * @param ident The identifier of the table + * @param schema The schema of the table + * @param partitions The partition transforms for the table + * @param allTableProperties The table properties that configure the behavior of the table or + * provide information about the table + * @param writeOptions Options specific to the write during table creation or replacement + * @param sourceQuery A query if this CREATE request came from a CTAS or RTAS + * @param operation The specific table creation mode, whether this is a Create/Replace/Create or + * Replace + */ + private def createDeltaTable( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + allTableProperties: util.Map[String, String], + writeOptions: Map[String, String], + sourceQuery: Option[DataFrame], + operation: TableCreationModes.CreationMode + ): Table = recordFrameProfile( + "DeltaCatalog", "createDeltaTable") { + // These two keys are tableProperties in data source v2 but not in v1, so we have to filter + // them out. Otherwise property consistency checks will fail. + val tableProperties = allTableProperties.asScala.filterKeys { + case TableCatalog.PROP_LOCATION => false + case TableCatalog.PROP_PROVIDER => false + case TableCatalog.PROP_COMMENT => false + case TableCatalog.PROP_OWNER => false + case TableCatalog.PROP_EXTERNAL => false + case "path" => false + case "option.path" => false + case _ => true + }.toMap + val (partitionColumns, maybeBucketSpec, maybeClusterBySpec) = convertTransforms(partitions) + validateClusterBySpec(maybeClusterBySpec, schema) + var newSchema = schema + var newPartitionColumns = partitionColumns + var newBucketSpec = maybeBucketSpec + val conf = spark.sessionState.conf + allTableProperties.asScala + .get(DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS.key) + .foreach(StatisticsCollection.validateDeltaStatsColumns(schema, partitionColumns, _)) + val isByPath = isPathIdentifier(ident) + if (isByPath && !conf.getConf(DeltaSQLConf.DELTA_LEGACY_ALLOW_AMBIGUOUS_PATHS) + && allTableProperties.containsKey("location") + // The location property can be qualified and different from the path in the identifier, so + // we check `endsWith` here. + && Option(allTableProperties.get("location")).exists(!_.endsWith(ident.name())) + ) { + throw DeltaErrors.ambiguousPathsInCreateTableException( + ident.name(), allTableProperties.get("location")) + } + val location = if (isByPath) { + Option(ident.name()) + } else { + Option(allTableProperties.get("location")) + } + val id = { + TableIdentifier(ident.name(), ident.namespace().lastOption) + } + var locUriOpt = location.map(CatalogUtils.stringToURI) + val existingTableOpt = getExistingTableIfExists(id) + val loc = locUriOpt + .orElse(existingTableOpt.flatMap(_.storage.locationUri)) + .getOrElse(spark.sessionState.catalog.defaultTablePath(id)) + val storage = DataSource.buildStorageFormatFromOptions(writeOptions) + .copy(locationUri = Option(loc)) + val tableType = + if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED + val commentOpt = Option(allTableProperties.get("comment")) + + + var tableDesc = new CatalogTable( + identifier = id, + tableType = tableType, + storage = storage, + schema = newSchema, + provider = Some(DeltaSourceUtils.ALT_NAME), + partitionColumnNames = newPartitionColumns, + bucketSpec = newBucketSpec, + properties = tableProperties, + comment = commentOpt + ) + + val withDb = + verifyTableAndSolidify( + tableDesc, + None, + maybeClusterBySpec + ) + + val writer = sourceQuery.map { df => + WriteIntoDelta( + DeltaLog.forTable(spark, new Path(loc)), + operation.mode, + new DeltaOptions(withDb.storage.properties, spark.sessionState.conf), + withDb.partitionColumnNames, + withDb.properties ++ commentOpt.map("comment" -> _), + df, + Some(tableDesc), + schemaInCatalog = if (newSchema != schema) Some(newSchema) else None) + } + + CreateDeltaTableCommand( + withDb, + existingTableOpt, + operation.mode, + writer, + operation, + tableByPath = isByPath).run(spark) + + loadTable(ident) + } + + override def loadTable(ident: Identifier): Table = recordFrameProfile( + "DeltaCatalog", "loadTable") { + try { + super.loadTable(ident) match { + case v1: V1Table if DeltaTableUtils.isDeltaTable(v1.catalogTable) => + DeltaTableV2( + spark, + new Path(v1.catalogTable.location), + catalogTable = Some(v1.catalogTable), + tableIdentifier = Some(ident.toString)) + case o => o + } + } catch { + case e @ ( + _: NoSuchDatabaseException | _: NoSuchNamespaceException | _: NoSuchTableException) => + if (isPathIdentifier(ident)) { + newDeltaPathTable(ident) + } else if (isIcebergPathIdentifier(ident)) { + newIcebergPathTable(ident) + } else { + throw e + } + case e: AnalysisException if gluePermissionError(e) && isPathIdentifier(ident) => + logWarning("Received an access denied error from Glue. Assuming this " + + s"identifier ($ident) is path based.", e) + newDeltaPathTable(ident) + } + } + + override def loadTable(ident: Identifier, timestamp: Long): Table = { + loadTableWithTimeTravel(ident, version = None, Some(timestamp)) + } + + override def loadTable(ident: Identifier, version: String): Table = { + loadTableWithTimeTravel(ident, Some(version), timestamp = None) + } + + /** + * Helper method which loads a Delta table with given time travel parameters. + * Exactly one of the timetravel parameters (version or timestamp) must be present. + * + * @param version The table version to load + * @param timestamp The timestamp for the table to load, in microseconds + */ + private def loadTableWithTimeTravel( + ident: Identifier, + version: Option[String], + timestamp: Option[Long]): Table = { + assert(version.isEmpty ^ timestamp.isEmpty, + "Either the version or timestamp should be provided for time travel") + val table = loadTable(ident) + table match { + case deltaTable: DeltaTableV2 => + val ttOpts = Map(DeltaDataSource.TIME_TRAVEL_SOURCE_KEY -> "SQL") ++ + (if (version.isDefined) { + Map(DeltaDataSource.TIME_TRAVEL_VERSION_KEY -> version.get) + } else { + val timestampMs = timestamp.get / 1000 + Map(DeltaDataSource.TIME_TRAVEL_TIMESTAMP_KEY -> new Timestamp(timestampMs).toString) + }) + + deltaTable.withOptions(ttOpts) + // punt this problem up to the parent + case _ if version.isDefined => super.loadTable(ident, version.get) + case _ if timestamp.isDefined => super.loadTable(ident, timestamp.get) + } + } + + // Perform checks on ClusterBySpec. + def validateClusterBySpec( + maybeClusterBySpec: Option[ClusterBySpec], schema: StructType): Unit = { + // Validate that the preview is enabled if we are creating a clustered table. + ClusteredTableUtils.validatePreviewEnabled(maybeClusterBySpec) + maybeClusterBySpec.foreach { clusterBy => + // Check if the specified cluster by columns exists in the table. + val resolver = spark.sessionState.conf.resolver + clusterBy.columnNames.foreach { column => + // This is the same check as in rules.scala, to keep the behaviour consistent. + SchemaUtils.findColumnPosition(column.fieldNames(), schema, resolver) + } + // Check that columns are not duplicated in the cluster by statement. + PartitionUtils.checkColumnNameDuplication( + clusterBy.columnNames.map(_.toString), "in CLUSTER BY", resolver) + // Check number of clustering columns is within allowed range. + ClusteredTableUtils.validateNumClusteringColumns( + clusterBy.columnNames.map(_.fieldNames.toSeq)) + } + } + + protected def newDeltaPathTable(ident: Identifier): DeltaTableV2 = { + DeltaTableV2(spark, new Path(ident.name())) + } + + private def getProvider(properties: util.Map[String, String]): String = { + Option(properties.get("provider")) + .getOrElse(spark.sessionState.conf.getConf(SQLConf.DEFAULT_DATA_SOURCE_NAME)) + } + + private def createCatalogTable( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: util.Map[String, String] + ): Table = { + super.createTable(ident, schema, partitions, properties) + } + + + override def createTable( + ident: Identifier, + columns: Array[org.apache.spark.sql.connector.catalog.Column], + partitions: Array[Transform], + properties: util.Map[String, String]): Table = { + createTable( + ident, + org.apache.spark.sql.connector.catalog.CatalogV2Util.v2ColumnsToStructType(columns), + partitions, + properties) + } + + override def createTable( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: util.Map[String, String]) : Table = + recordFrameProfile("DeltaCatalog", "createTable") { + if (DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))) { + createDeltaTable( + ident, + schema, + partitions, + properties, + Map.empty, + sourceQuery = None, + TableCreationModes.Create + ) + } else { + createCatalogTable(ident, schema, partitions, properties + ) + } + } + + override def stageReplace( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: util.Map[String, String]): StagedTable = + recordFrameProfile("DeltaCatalog", "stageReplace") { + if (DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))) { + new StagedDeltaTableV2( + ident, + schema, + partitions, + properties, + TableCreationModes.Replace + ) + } else { + super.dropTable(ident) + val table = createCatalogTable(ident, schema, partitions, properties + ) + BestEffortStagedTable(ident, table, this) + } + } + + override def stageCreateOrReplace( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: util.Map[String, String]): StagedTable = + recordFrameProfile("DeltaCatalog", "stageCreateOrReplace") { + if (DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))) { + new StagedDeltaTableV2( + ident, + schema, + partitions, + properties, + TableCreationModes.CreateOrReplace + ) + } else { + try super.dropTable(ident) + catch { + case _: NoSuchDatabaseException => // this is fine + case _: NoSuchTableException => // this is fine + } + val table = createCatalogTable(ident, schema, partitions, properties + ) + BestEffortStagedTable(ident, table, this) + } + } + + override def stageCreate( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: util.Map[String, String]): StagedTable = + recordFrameProfile("DeltaCatalog", "stageCreate") { + if (DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))) { + new StagedDeltaTableV2( + ident, + schema, + partitions, + properties, + TableCreationModes.Create + ) + } else { + val table = createCatalogTable(ident, schema, partitions, properties + ) + BestEffortStagedTable(ident, table, this) + } + } + + // Copy of V2SessionCatalog.convertTransforms, which is private. + private def convertTransforms( + partitions: Seq[Transform]): (Seq[String], Option[BucketSpec], Option[ClusterBySpec]) = { + val identityCols = new mutable.ArrayBuffer[String] + var bucketSpec = Option.empty[BucketSpec] + var clusterBySpec = Option.empty[ClusterBySpec] + + partitions.map { + case IdentityTransform(FieldReference(Seq(col))) => + identityCols += col + + case BucketTransform(numBuckets, bucketCols, sortCols) => + bucketSpec = Some(BucketSpec( + numBuckets, bucketCols.map(_.fieldNames.head), sortCols.map(_.fieldNames.head))) + case TempClusterByTransform(columnNames) => + if (clusterBySpec.nonEmpty) { + // Parser guarantees that it only passes down one TempClusterByTransform. + throw SparkException.internalError("Cannot have multiple cluster by transforms.") + } + clusterBySpec = Some(ClusterBySpec(columnNames)) + + case transform => + throw DeltaErrors.operationNotSupportedException(s"Partitioning by expressions") + } + // Parser guarantees that partition and cluster by can't both exist. + assert(!(identityCols.toSeq.nonEmpty && clusterBySpec.nonEmpty)) + // Parser guarantees that bucketing and cluster by can't both exist. + assert(!(bucketSpec.nonEmpty && clusterBySpec.nonEmpty)) + + (identityCols.toSeq, bucketSpec, clusterBySpec) + } + + /** Performs checks on the parameters provided for table creation for a Delta table. */ + def verifyTableAndSolidify( + tableDesc: CatalogTable, + query: Option[LogicalPlan], + maybeClusterBySpec: Option[ClusterBySpec] = None): CatalogTable = { + if (tableDesc.bucketSpec.isDefined) { + throw DeltaErrors.operationNotSupportedException("Bucketing", tableDesc.identifier) + } + + val schema = query.map { plan => + assert(tableDesc.schema.isEmpty, "Can't specify table schema in CTAS.") + plan.schema.asNullable + }.getOrElse(tableDesc.schema) + + PartitioningUtils.validatePartitionColumn( + schema, + tableDesc.partitionColumnNames, + caseSensitive = false) // Delta is case insensitive + + var validatedConfigurations = + DeltaConfigs.validateConfigurations(tableDesc.properties) + ClusteredTableUtils.validateExistingTableFeatureProperties(validatedConfigurations) + // Add needed configs for Clustered table. + if (maybeClusterBySpec.nonEmpty) { + validatedConfigurations = + validatedConfigurations ++ + ClusteredTableUtils.getClusteringColumnsAsProperty(maybeClusterBySpec) ++ + ClusteredTableUtils.getTableFeatureProperties(validatedConfigurations) + } + + val db = tableDesc.identifier.database.getOrElse(catalog.getCurrentDatabase) + val tableIdentWithDB = tableDesc.identifier.copy(database = Some(db)) + tableDesc.copy( + identifier = tableIdentWithDB, + schema = schema, + properties = validatedConfigurations) + } + + /** Checks if a table already exists for the provided identifier. */ + def getExistingTableIfExists(table: TableIdentifier): Option[CatalogTable] = { + // If this is a path identifier, we cannot return an existing CatalogTable. The Create command + // will check the file system itself + if (isPathIdentifier(table)) return None + val tableExists = catalog.tableExists(table) + if (tableExists) { + val oldTable = catalog.getTableMetadata(table) + if (oldTable.tableType == CatalogTableType.VIEW) { + throw DeltaErrors.cannotWriteIntoView(table) + } + if (!DeltaSourceUtils.isDeltaTable(oldTable.provider)) { + throw DeltaErrors.notADeltaTable(table.table) + } + Some(oldTable) + } else { + None + } + } + + /** + * A staged delta table, which creates a HiveMetaStore entry and appends data if this was a + * CTAS/RTAS command. We have a ugly way of using this API right now, but it's the best way to + * maintain old behavior compatibility between Databricks Runtime and OSS Delta Lake. + */ + private class StagedDeltaTableV2( + ident: Identifier, + override val schema: StructType, + val partitions: Array[Transform], + override val properties: util.Map[String, String], + operation: TableCreationModes.CreationMode + ) extends StagedTable with SupportsWrite { + + private var asSelectQuery: Option[DataFrame] = None + private var writeOptions: Map[String, String] = Map.empty + + override def partitioning(): Array[Transform] = partitions + + override def commitStagedChanges(): Unit = recordFrameProfile( + "DeltaCatalog", "commitStagedChanges") { + val conf = spark.sessionState.conf + val props = new util.HashMap[String, String]() + // Options passed in through the SQL API will show up both with an "option." prefix and + // without in Spark 3.1, so we need to remove those from the properties + val optionsThroughProperties = properties.asScala.collect { + case (k, _) if k.startsWith("option.") => k.stripPrefix("option.") + }.toSet + val sqlWriteOptions = new util.HashMap[String, String]() + properties.asScala.foreach { case (k, v) => + if (!k.startsWith("option.") && !optionsThroughProperties.contains(k)) { + // Do not add to properties + props.put(k, v) + } else if (optionsThroughProperties.contains(k)) { + sqlWriteOptions.put(k, v) + } + } + if (writeOptions.isEmpty && !sqlWriteOptions.isEmpty) { + writeOptions = sqlWriteOptions.asScala.toMap + } + if (conf.getConf(DeltaSQLConf.DELTA_LEGACY_STORE_WRITER_OPTIONS_AS_PROPS)) { + // Legacy behavior + writeOptions.foreach { case (k, v) => props.put(k, v) } + } else { + writeOptions.foreach { case (k, v) => + // Continue putting in Delta prefixed options to avoid breaking workloads + if (k.toLowerCase(Locale.ROOT).startsWith("delta.")) { + props.put(k, v) + } + } + } + createDeltaTable( + ident, + schema, + partitions, + props, + writeOptions, + asSelectQuery, + operation + ) + } + + override def name(): String = ident.name() + + override def abortStagedChanges(): Unit = {} + + override def capabilities(): util.Set[TableCapability] = { + Set(V1_BATCH_WRITE).asJava + } + + override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = { + writeOptions = info.options.asCaseSensitiveMap().asScala.toMap + new DeltaV1WriteBuilder + } + + /* + * WriteBuilder for creating a Delta table. + */ + private class DeltaV1WriteBuilder extends WriteBuilder { + override def build(): V1Write = new V1Write { + override def toInsertableRelation(): InsertableRelation = { + new InsertableRelation { + override def insert(data: DataFrame, overwrite: Boolean): Unit = { + asSelectQuery = Option(data) + } + } + } + } + } + } + + override def alterTable(ident: Identifier, changes: TableChange*): Table = recordFrameProfile( + "DeltaCatalog", "alterTable") { + // We group the table changes by their type, since Delta applies each in a separate action. + // We also must define an artificial type for SetLocation, since data source V2 considers + // location just another property but it's special in catalog tables. + class SetLocation {} + val grouped = changes.groupBy { + case s: SetProperty if s.property() == "location" => classOf[SetLocation] + case c => c.getClass + } + val table = loadTable(ident) match { + case deltaTable: DeltaTableV2 => deltaTable + case _ => return super.alterTable(ident, changes: _*) + } + + // Whether this is an ALTER TABLE ALTER COLUMN SYNC IDENTITY command. + var syncIdentity = false + val columnUpdates = new mutable.HashMap[Seq[String], (StructField, Option[ColumnPosition])]() + val isReplaceColumnsCommand = grouped.get(classOf[DeleteColumn]) match { + case Some(deletes) if grouped.contains(classOf[AddColumn]) => + // Convert to Seq so that contains method works + val deleteSet = deletes.asInstanceOf[Seq[DeleteColumn]].map(_.fieldNames().toSeq).toSet + // Ensure that all the table top level columns are being deleted + table.schema().fieldNames.forall(f => deleteSet.contains(Seq(f))) + case _ => + false + } + + if (isReplaceColumnsCommand && + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_REPLACE_COLUMNS_SAFE)) { + // The new schema is essentially the AddColumn operators + val tableToUpdate = table + val colsToAdd = grouped(classOf[AddColumn]).asInstanceOf[Seq[AddColumn]] + val structFields = colsToAdd.map { col => + assert( + col.fieldNames().length == 1, "We don't expect replace to provide nested column adds") + var field = StructField(col.fieldNames().head, col.dataType, col.isNullable) + Option(col.comment()).foreach { comment => + field = field.withComment(comment) + } + Option(col.defaultValue()).foreach { defValue => + field = field.withCurrentDefaultValue(defValue.getSql) + } + field + } + AlterTableReplaceColumnsDeltaCommand(tableToUpdate, structFields).run(spark) + return loadTable(ident) + } + + grouped.foreach { + case (t, newColumns) if t == classOf[AddColumn] => + val tableToUpdate = table + AlterTableAddColumnsDeltaCommand( + tableToUpdate, + newColumns.asInstanceOf[Seq[AddColumn]].map { col => + // Convert V2 `AddColumn` to V1 `QualifiedColType` as `AlterTableAddColumnsDeltaCommand` + // is a V1 command. + val name = col.fieldNames() + val path = if (name.length > 1) Some(UnresolvedFieldName(name.init)) else None + QualifiedColType( + path, + name.last, + col.dataType(), + col.isNullable, + Option(col.comment()), + Option(col.position()).map(UnresolvedFieldPosition), + Option(col.defaultValue()).map(_.getSql()) + ) + }).run(spark) + + case (t, deleteColumns) if t == classOf[DeleteColumn] => + AlterTableDropColumnsDeltaCommand( + table, deleteColumns.asInstanceOf[Seq[DeleteColumn]].map(_.fieldNames().toSeq)).run(spark) + + case (t, newProperties) if t == classOf[SetProperty] => + AlterTableSetPropertiesDeltaCommand( + table, + DeltaConfigs.validateConfigurations( + newProperties.asInstanceOf[Seq[SetProperty]].map { prop => + prop.property() -> prop.value() + }.toMap) + ).run(spark) + + case (t, oldProperties) if t == classOf[RemoveProperty] => + AlterTableUnsetPropertiesDeltaCommand( + table, + oldProperties.asInstanceOf[Seq[RemoveProperty]].map(_.property()), + // Data source V2 REMOVE PROPERTY is always IF EXISTS. + ifExists = true).run(spark) + + case (t, columnChanges) if classOf[ColumnChange].isAssignableFrom(t) => + def getColumn(fieldNames: Seq[String]): (StructField, Option[ColumnPosition]) = { + columnUpdates.getOrElseUpdate(fieldNames, { + // TODO: Theoretically we should be able to fetch the snapshot from a txn. + val schema = table.initialSnapshot.schema + val colName = UnresolvedAttribute(fieldNames).name + val fieldOpt = schema.findNestedField(fieldNames, includeCollections = true, + spark.sessionState.conf.resolver) + .map(_._2) + val field = fieldOpt.getOrElse { + throw DeltaErrors.nonExistentColumnInSchema(colName, schema.treeString) + } + field -> None + }) + } + + columnChanges.foreach { + case comment: UpdateColumnComment => + val field = comment.fieldNames() + val (oldField, pos) = getColumn(field) + columnUpdates(field) = oldField.withComment(comment.newComment()) -> pos + + case dataType: UpdateColumnType => + val field = dataType.fieldNames() + val (oldField, pos) = getColumn(field) + columnUpdates(field) = oldField.copy(dataType = dataType.newDataType()) -> pos + + case position: UpdateColumnPosition => + val field = position.fieldNames() + val (oldField, pos) = getColumn(field) + columnUpdates(field) = oldField -> Option(position.position()) + + case nullability: UpdateColumnNullability => + val field = nullability.fieldNames() + val (oldField, pos) = getColumn(field) + columnUpdates(field) = oldField.copy(nullable = nullability.nullable()) -> pos + + case rename: RenameColumn => + val field = rename.fieldNames() + val (oldField, pos) = getColumn(field) + columnUpdates(field) = oldField.copy(name = rename.newName()) -> pos + + + case updateDefault: UpdateColumnDefaultValue => + val field = updateDefault.fieldNames() + val (oldField, pos) = getColumn(field) + val updatedField = updateDefault.newDefaultValue() match { + case "" => oldField.clearCurrentDefaultValue() + case newDefault => oldField.withCurrentDefaultValue(newDefault) + } + columnUpdates(field) = updatedField -> pos + + case other => + throw DeltaErrors.unrecognizedColumnChange(s"${other.getClass}") + } + + case (t, locations) if t == classOf[SetLocation] => + if (locations.size != 1) { + throw DeltaErrors.cannotSetLocationMultipleTimes( + locations.asInstanceOf[Seq[SetProperty]].map(_.value())) + } + if (table.tableIdentifier.isEmpty) { + throw DeltaErrors.setLocationNotSupportedOnPathIdentifiers() + } + AlterTableSetLocationDeltaCommand( + table, + locations.head.asInstanceOf[SetProperty].value()).run(spark) + + case (t, constraints) if t == classOf[AddConstraint] => + constraints.foreach { constraint => + val c = constraint.asInstanceOf[AddConstraint] + AlterTableAddConstraintDeltaCommand(table, c.constraintName, c.expr).run(spark) + } + + case (t, constraints) if t == classOf[DropConstraint] => + constraints.foreach { constraint => + val c = constraint.asInstanceOf[DropConstraint] + AlterTableDropConstraintDeltaCommand(table, c.constraintName, c.ifExists).run(spark) + } + + case (t, dropFeature) if t == classOf[DropFeature] => + // Only single feature removal is supported. + val dropFeatureTableChange = dropFeature.head.asInstanceOf[DropFeature] + val featureName = dropFeatureTableChange.featureName + val truncateHistory = dropFeatureTableChange.truncateHistory + AlterTableDropFeatureDeltaCommand( + table, featureName, truncateHistory = truncateHistory).run(spark) + + } + + columnUpdates.foreach { case (fieldNames, (newField, newPositionOpt)) => + AlterTableChangeColumnDeltaCommand( + table, + fieldNames.dropRight(1), + fieldNames.last, + newField, + newPositionOpt, + syncIdentity = syncIdentity).run(spark) + } + + loadTable(ident) + } + + // We want our catalog to handle Delta, therefore for other data sources that want to be + // created, we just have this wrapper StagedTable to only drop the table if the commit fails. + private case class BestEffortStagedTable( + ident: Identifier, + table: Table, + catalog: TableCatalog) extends StagedTable with SupportsWrite { + override def abortStagedChanges(): Unit = catalog.dropTable(ident) + + override def commitStagedChanges(): Unit = {} + + // Pass through + override def name(): String = table.name() + override def schema(): StructType = table.schema() + override def partitioning(): Array[Transform] = table.partitioning() + override def capabilities(): util.Set[TableCapability] = table.capabilities() + override def properties(): util.Map[String, String] = table.properties() + + override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = table match { + case supportsWrite: SupportsWrite => supportsWrite.newWriteBuilder(info) + case _ => throw DeltaErrors.unsupportedWriteStagedTable(name) + } + } +} + +/** + * A trait for handling table access through delta.`/some/path`. This is a stop-gap solution + * until PathIdentifiers are implemented in Apache Spark. + */ +trait SupportsPathIdentifier extends TableCatalog { self: DeltaCatalog => + + private def supportSQLOnFile: Boolean = spark.sessionState.conf.runSQLonFile + + protected lazy val catalog: SessionCatalog = spark.sessionState.catalog + + private def hasDeltaNamespace(ident: Identifier): Boolean = { + ident.namespace().length == 1 && DeltaSourceUtils.isDeltaDataSourceName(ident.namespace().head) + } + + private def hasIcebergNamespace(ident: Identifier): Boolean = { + ident.namespace().length == 1 && ident.namespace().head.equalsIgnoreCase("iceberg") + } + + protected def isIcebergPathIdentifier(ident: Identifier): Boolean = { + hasIcebergNamespace(ident) && new Path(ident.name()).isAbsolute + } + + protected def newIcebergPathTable(ident: Identifier): IcebergTablePlaceHolder = { + IcebergTablePlaceHolder(TableIdentifier(ident.name(), Some("iceberg"))) + } + + protected def isPathIdentifier(ident: Identifier): Boolean = { + // Should be a simple check of a special PathIdentifier class in the future + try { + supportSQLOnFile && hasDeltaNamespace(ident) && new Path(ident.name()).isAbsolute + } catch { + case _: IllegalArgumentException => false + } + } + + protected def isPathIdentifier(table: CatalogTable): Boolean = { + isPathIdentifier(table.identifier) + } + + protected def isPathIdentifier(tableIdentifier: TableIdentifier) : Boolean = { + isPathIdentifier(Identifier.of(tableIdentifier.database.toArray, tableIdentifier.table)) + } + + override def tableExists(ident: Identifier): Boolean = recordFrameProfile( + "DeltaCatalog", "tableExists") { + if (isPathIdentifier(ident)) { + val path = new Path(ident.name()) + // scalastyle:off deltahadoopconfiguration + val fs = path.getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + fs.exists(path) && fs.listStatus(path).nonEmpty + } else { + super.tableExists(ident) + } + } +} + +object BucketTransform { + def unapply(transform: Transform): Option[(Int, Seq[NamedReference], Seq[NamedReference])] = { + val arguments = transform.arguments() + if (transform.name() == "sorted_bucket") { + var posOfLit: Int = -1 + var numOfBucket: Int = -1 + arguments.zipWithIndex.foreach { + case (literal: Literal[_], i) if literal.dataType() == IntegerType => + numOfBucket = literal.value().asInstanceOf[Integer] + posOfLit = i + case _ => + } + Some(numOfBucket, arguments.take(posOfLit).map(_.asInstanceOf[NamedReference]), + arguments.drop(posOfLit + 1).map(_.asInstanceOf[NamedReference])) + } else if (transform.name() == "bucket") { + val numOfBucket = arguments(0) match { + case literal: Literal[_] if literal.dataType() == IntegerType => + literal.value().asInstanceOf[Integer] + case _ => throw new IllegalStateException("invalid bucket transform") + } + Some(numOfBucket, arguments.drop(1).map(_.asInstanceOf[NamedReference]), + Seq.empty[FieldReference]) + } else { + None + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala new file mode 100644 index 00000000000..e364a65ae15 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala @@ -0,0 +1,439 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.catalog + +import java.{util => ju} + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.commands.WriteIntoDelta +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.{DeltaDataSource, DeltaSourceUtils} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{DataFrame, Dataset, SaveMode, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{ResolvedTable, UnresolvedTable} +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability, TableCatalog, V2TableWithV1Fallback} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ +import org.apache.spark.sql.connector.catalog.TableCapability._ +import org.apache.spark.sql.connector.catalog.V1Table +import org.apache.spark.sql.connector.expressions._ +import org.apache.spark.sql.connector.write.{LogicalWriteInfo, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1Write, WriteBuilder} +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.sources.{BaseRelation, Filter, InsertableRelation} +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * The data source V2 representation of a Delta table that exists. + * + * @param path The path to the table + * @param tableIdentifier The table identifier for this table + */ +case class DeltaTableV2( + spark: SparkSession, + path: Path, + catalogTable: Option[CatalogTable] = None, + tableIdentifier: Option[String] = None, + timeTravelOpt: Option[DeltaTimeTravelSpec] = None, + options: Map[String, String] = Map.empty) + extends Table + with SupportsWrite + with V2TableWithV1Fallback + with DeltaLogging { + + private lazy val (rootPath, partitionFilters, timeTravelByPath) = { + if (catalogTable.isDefined) { + // Fast path for reducing path munging overhead + (new Path(catalogTable.get.location), Nil, None) + } else { + DeltaDataSource.parsePathIdentifier(spark, path.toString, options) + } + } + + + def hasPartitionFilters: Boolean = partitionFilters.nonEmpty + + // This MUST be initialized before the deltaLog object is created, in order to accurately + // bound the creation time of the table. + private val creationTimeMs = { + System.currentTimeMillis() + } + + // The loading of the DeltaLog is lazy in order to reduce the amount of FileSystem calls, + // in cases where we will fallback to the V1 behavior. + lazy val deltaLog: DeltaLog = { + DeltaTableV2.withEnrichedUnsupportedTableException(catalogTable, tableIdentifier) { + DeltaLog.forTable(spark, rootPath, options) + } + } + + def getTableIdentifierIfExists: Option[TableIdentifier] = tableIdentifier.map { tableName => + spark.sessionState.sqlParser.parseMultipartIdentifier(tableName).asTableIdentifier + } + + override def name(): String = catalogTable.map(_.identifier.unquotedString) + .orElse(tableIdentifier) + .getOrElse(s"delta.`${deltaLog.dataPath}`") + + private lazy val timeTravelSpec: Option[DeltaTimeTravelSpec] = { + if (timeTravelOpt.isDefined && timeTravelByPath.isDefined) { + throw DeltaErrors.multipleTimeTravelSyntaxUsed + } + timeTravelOpt.orElse(timeTravelByPath) + } + + private lazy val caseInsensitiveOptions = new CaseInsensitiveStringMap(options.asJava) + + /** + * The snapshot initially associated with this table. It is captured on first access, usually (but + * not always) shortly after the table was first created, and is immutable once captured. + * + * WARNING: This snapshot could be arbitrarily stale for long-lived [[DeltaTableV2]] instances, + * such as the ones [[DeltaTable]] uses internally. Callers who cannot tolerate this potential + * staleness should use [[getFreshSnapshot]] instead. + * + * WARNING: Because the snapshot is captured lazily, callers should explicitly access the snapshot + * if they want to be certain it has been captured. + */ + lazy val initialSnapshot: Snapshot = DeltaTableV2.withEnrichedUnsupportedTableException( + catalogTable, tableIdentifier) { + + timeTravelSpec.map { spec => + // By default, block using CDF + time-travel + if (CDCReader.isCDCRead(caseInsensitiveOptions) && + !spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_CDF_ALLOW_TIME_TRAVEL_OPTIONS)) { + throw DeltaErrors.timeTravelNotSupportedException + } + + val (version, accessType) = DeltaTableUtils.resolveTimeTravelVersion( + spark.sessionState.conf, deltaLog, spec) + val source = spec.creationSource.getOrElse("unknown") + recordDeltaEvent(deltaLog, s"delta.timeTravel.$source", data = Map( + // Log the cached version of the table on the cluster + "tableVersion" -> deltaLog.unsafeVolatileSnapshot.version, + "queriedVersion" -> version, + "accessType" -> accessType + )) + deltaLog.getSnapshotAt(version) + }.getOrElse( + deltaLog.update( + stalenessAcceptable = true, + checkIfUpdatedSinceTs = Some(creationTimeMs) + ) + ) + } + + // We get the cdcRelation ahead of time if this is a CDC read to be able to return the correct + // schema. The schema for CDC reads are currently convoluted due to column mapping behavior + private lazy val cdcRelation: Option[BaseRelation] = { + if (CDCReader.isCDCRead(caseInsensitiveOptions)) { + recordDeltaEvent(deltaLog, "delta.cdf.read", + data = caseInsensitiveOptions.asCaseSensitiveMap()) + Some(CDCReader.getCDCRelation( + spark, initialSnapshot, timeTravelSpec.nonEmpty, spark.sessionState.conf, + caseInsensitiveOptions)) + } else { + None + } + } + + private lazy val tableSchema: StructType = { + val baseSchema = cdcRelation.map(_.schema).getOrElse { + DeltaTableUtils.removeInternalMetadata(spark, initialSnapshot.schema) + } + DeltaColumnMapping.dropColumnMappingMetadata(baseSchema) + } + + override def schema(): StructType = tableSchema + + override def partitioning(): Array[Transform] = { + initialSnapshot.metadata.partitionColumns.map { col => + new IdentityTransform(new FieldReference(Seq(col))) + }.toArray + } + + override def properties(): ju.Map[String, String] = { + val base = initialSnapshot.getProperties + base.put(TableCatalog.PROP_PROVIDER, "delta") + base.put(TableCatalog.PROP_LOCATION, CatalogUtils.URIToString(path.toUri)) + catalogTable.foreach { table => + if (table.owner != null && table.owner.nonEmpty) { + base.put(TableCatalog.PROP_OWNER, table.owner) + } + v1Table.storage.properties.foreach { case (key, value) => + base.put(TableCatalog.OPTION_PREFIX + key, value) + } + if (v1Table.tableType == CatalogTableType.EXTERNAL) { + base.put(TableCatalog.PROP_EXTERNAL, "true") + } + } + Option(initialSnapshot.metadata.description).foreach(base.put(TableCatalog.PROP_COMMENT, _)) + base.asJava + } + + override def capabilities(): ju.Set[TableCapability] = Set( + ACCEPT_ANY_SCHEMA, BATCH_READ, + V1_BATCH_WRITE, OVERWRITE_BY_FILTER, TRUNCATE, OVERWRITE_DYNAMIC + ).asJava + + def tableExists: Boolean = deltaLog.tableExists + + + override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = { + new WriteIntoDeltaBuilder( + this, info.options, spark.sessionState.conf.useNullsForMissingDefaultColumnValues) + } + + /** + * Starts a transaction for this table, using the snapshot captured during table resolution. + * + * WARNING: Caller is responsible to ensure that table resolution was recent (e.g. if working with + * [[DataFrame]] or [[DeltaTable]] API, where the table could have been resolved long ago). + */ + def startTransactionWithInitialSnapshot(): OptimisticTransaction = + startTransaction(Some(initialSnapshot)) + + /** + * Starts a transaction for this table, using Some provided snapshot, or a fresh snapshot if None + * was provided. + */ + def startTransaction(snapshotOpt: Option[Snapshot] = None): OptimisticTransaction = { + deltaLog.startTransaction(catalogTable, snapshotOpt) + } + + /** + * Creates a V1 BaseRelation from this Table to allow read APIs to go through V1 DataSource code + * paths. + */ + lazy val toBaseRelation: BaseRelation = { + // force update() if necessary in DataFrameReader.load code + initialSnapshot + if (!tableExists) { + // special error handling for path based tables + if (catalogTable.isEmpty + && !rootPath.getFileSystem(deltaLog.newDeltaHadoopConf()).exists(rootPath)) { + throw QueryCompilationErrors.dataPathNotExistError(rootPath.toString) + } + + val id = catalogTable.map(ct => DeltaTableIdentifier(table = Some(ct.identifier))) + .getOrElse(DeltaTableIdentifier(path = Some(path.toString))) + throw DeltaErrors.nonExistentDeltaTable(id) + } + val partitionPredicates = DeltaDataSource.verifyAndCreatePartitionFilters( + path.toString, initialSnapshot, partitionFilters) + + cdcRelation.getOrElse { + deltaLog.createRelation( + partitionPredicates, Some(initialSnapshot), catalogTable, timeTravelSpec.isDefined) + } + } + + /** Creates a [[LogicalRelation]] that represents this table */ + lazy val toLogicalRelation: LogicalRelation = { + val relation = this.toBaseRelation + LogicalRelation( + relation, toAttributes(relation.schema), ttSafeCatalogTable, isStreaming = false) + } + + /** Creates a [[DataFrame]] that uses the requested spark session to read from this table */ + def toDf(sparkSession: SparkSession): DataFrame = { + val plan = catalogTable.foldLeft[LogicalPlan](toLogicalRelation) { (child, ct) => + // Catalog based tables need a SubqueryAlias that carries their fully-qualified name + SubqueryAlias(ct.identifier.nameParts, child) + } + Dataset.ofRows(sparkSession, plan) + } + + /** Creates a [[DataFrame]] that reads from this table */ + lazy val toDf: DataFrame = toDf(spark) + + /** + * Check the passed in options and existing timeTravelOpt, set new time travel by options. + */ + def withOptions(newOptions: Map[String, String]): DeltaTableV2 = { + val ttSpec = DeltaDataSource.getTimeTravelVersion(newOptions) + if (timeTravelOpt.nonEmpty && ttSpec.nonEmpty) { + throw DeltaErrors.multipleTimeTravelSyntaxUsed + } + + val caseInsensitiveNewOptions = new CaseInsensitiveStringMap(newOptions.asJava) + + if (timeTravelOpt.isEmpty && ttSpec.nonEmpty) { + copy(timeTravelOpt = ttSpec) + } else if (CDCReader.isCDCRead(caseInsensitiveNewOptions)) { + checkCDCOptionsValidity(caseInsensitiveNewOptions) + // Do not use statistics during CDF reads + this.copy(catalogTable = catalogTable.map(_.copy(stats = None)), options = newOptions) + } else { + this + } + } + + private def checkCDCOptionsValidity(options: CaseInsensitiveStringMap): Unit = { + // check if we have both version and timestamp parameters + if (options.containsKey(DeltaDataSource.CDC_START_TIMESTAMP_KEY) + && options.containsKey(DeltaDataSource.CDC_START_VERSION_KEY)) { + throw DeltaErrors.multipleCDCBoundaryException("starting") + } + if (options.containsKey(DeltaDataSource.CDC_END_VERSION_KEY) + && options.containsKey(DeltaDataSource.CDC_END_TIMESTAMP_KEY)) { + throw DeltaErrors.multipleCDCBoundaryException("ending") + } + if (!options.containsKey(DeltaDataSource.CDC_START_VERSION_KEY) + && !options.containsKey(DeltaDataSource.CDC_START_TIMESTAMP_KEY)) { + throw DeltaErrors.noStartVersionForCDC() + } + } + + /** A "clean" version of the catalog table, safe for use with or without time travel. */ + lazy val ttSafeCatalogTable: Option[CatalogTable] = catalogTable match { + case Some(ct) if timeTravelSpec.isDefined => Some(ct.copy(stats = None)) + case other => other + } + + override def v1Table: CatalogTable = ttSafeCatalogTable.getOrElse { + throw DeltaErrors.invalidV1TableCall("v1Table", "DeltaTableV2") + } +} + +object DeltaTableV2 { + /** Resolves a path into a DeltaTableV2, leveraging standard v2 table resolution. */ + def apply(spark: SparkSession, tablePath: Path, options: Map[String, String], cmd: String) + : DeltaTableV2 = + resolve(spark, UnresolvedPathBasedDeltaTable(tablePath.toString, options, cmd), cmd) + + /** Resolves a table identifier into a DeltaTableV2, leveraging standard v2 table resolution. */ + def apply(spark: SparkSession, tableId: TableIdentifier, cmd: String): DeltaTableV2 = { + resolve(spark, UnresolvedTable(tableId.nameParts, cmd, None), cmd) + } + + /** Applies standard v2 table resolution to an unresolved Delta table plan node */ + def resolve(spark: SparkSession, unresolved: LogicalPlan, cmd: String): DeltaTableV2 = + extractFrom(spark.sessionState.analyzer.ResolveRelations(unresolved), cmd) + + /** + * Extracts the DeltaTableV2 from a resolved Delta table plan node, throwing "table not found" if + * the node does not actually represent a resolved Delta table. + */ + def extractFrom(plan: LogicalPlan, cmd: String): DeltaTableV2 = plan match { + case ResolvedTable(_, _, d: DeltaTableV2, _) => d + case ResolvedTable(_, _, t: V1Table, _) if DeltaTableUtils.isDeltaTable(t.catalogTable) => + DeltaTableV2(SparkSession.active, new Path(t.v1Table.location), Some(t.v1Table)) + case _ => throw DeltaErrors.notADeltaTableException(cmd) + } + + /** + * When Delta Log throws InvalidProtocolVersionException it doesn't know the table name and uses + * the data path in the message, this wrapper throw a new InvalidProtocolVersionException with + * table name and sets its Cause to the original InvalidProtocolVersionException. + */ + def withEnrichedUnsupportedTableException[T]( + catalogTable: Option[CatalogTable], + tableName: Option[String] = None)(thunk: => T): T = { + + lazy val tableNameToUse = catalogTable match { + case Some(ct) => Some(ct.identifier.copy(catalog = None).unquotedString) + case None => tableName + } + + try thunk catch { + case e: InvalidProtocolVersionException if tableNameToUse.exists(_ != e.tableNameOrPath) => + throw e.copy(tableNameOrPath = tableNameToUse.get).initCause(e) + case e: DeltaUnsupportedTableFeatureException if + tableNameToUse.exists(_ != e.tableNameOrPath) => + throw e.copy(tableNameOrPath = tableNameToUse.get).initCause(e) + } + } +} + +private class WriteIntoDeltaBuilder( + table: DeltaTableV2, + writeOptions: CaseInsensitiveStringMap, + nullAsDefault: Boolean) + extends WriteBuilder with SupportsOverwrite with SupportsTruncate with SupportsDynamicOverwrite { + + private var forceOverwrite = false + + private val options = + mutable.HashMap[String, String](writeOptions.asCaseSensitiveMap().asScala.toSeq: _*) + + override def truncate(): WriteIntoDeltaBuilder = { + forceOverwrite = true + this + } + + override def overwrite(filters: Array[Filter]): WriteBuilder = { + if (writeOptions.containsKey("replaceWhere")) { + throw DeltaErrors.replaceWhereUsedInOverwrite() + } + options.put("replaceWhere", DeltaSourceUtils.translateFilters(filters).sql) + forceOverwrite = true + this + } + + override def overwriteDynamicPartitions(): WriteBuilder = { + options.put( + DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, + DeltaOptions.PARTITION_OVERWRITE_MODE_DYNAMIC) + forceOverwrite = true + this + } + + override def build(): V1Write = new V1Write { + override def toInsertableRelation(): InsertableRelation = { + new InsertableRelation { + override def insert(data: DataFrame, overwrite: Boolean): Unit = { + val session = data.sparkSession + // Normal table insertion should be the only place that can use null as the default + // column value. We put a special option here so that `TransactionalWrite#writeFiles` + // will recognize it and apply null-as-default. + if (nullAsDefault) { + options.put( + ColumnWithDefaultExprUtils.USE_NULL_AS_DEFAULT_DELTA_OPTION, + "true" + ) + } + // TODO: Get the config from WriteIntoDelta's txn. + WriteIntoDelta( + table.deltaLog, + if (forceOverwrite) SaveMode.Overwrite else SaveMode.Append, + new DeltaOptions(options.toMap, session.sessionState.conf), + Nil, + table.deltaLog.unsafeVolatileSnapshot.metadata.configuration, + data, + table.catalogTable).run(session) + + // TODO: Push this to Apache Spark + // Re-cache all cached plans(including this relation itself, if it's cached) that refer + // to this data source relation. This is the behavior for InsertInto + session.sharedState.cacheManager.recacheByPlan(session, table.toLogicalRelation) + } + } + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/IcebergTablePlaceHolder.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/IcebergTablePlaceHolder.scala new file mode 100644 index 00000000000..2664b878459 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/IcebergTablePlaceHolder.scala @@ -0,0 +1,33 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.catalog + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.connector.catalog.{Table, TableCapability} +import org.apache.spark.sql.types.StructType + +/** A place holder used to resolve Iceberg table as a relation during analysis */ +case class IcebergTablePlaceHolder(tableIdentifier: TableIdentifier) extends Table { + + override def name(): String = tableIdentifier.unquotedString + + override def schema(): StructType = new StructType() + + override def capabilities(): java.util.Set[TableCapability] = Set.empty[TableCapability].asJava +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/clustering/ClusteringMetadataDomain.scala b/spark/src/main/scala/org/apache/spark/sql/delta/clustering/ClusteringMetadataDomain.scala new file mode 100644 index 00000000000..01102ff5190 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/clustering/ClusteringMetadataDomain.scala @@ -0,0 +1,36 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.clustering + +import org.apache.spark.sql.delta.skipping.clustering.ClusteringColumn +import org.apache.spark.sql.delta.{JsonMetadataDomain, JsonMetadataDomainUtils} + +/** + * Metadata domain for Clustered table which tracks clustering columns. + */ +case class ClusteringMetadataDomain(clusteringColumns: Seq[Seq[String]]) + extends JsonMetadataDomain[ClusteringMetadataDomain] { + override val domainName: String = ClusteringMetadataDomain.domainName +} + +object ClusteringMetadataDomain extends JsonMetadataDomainUtils[ClusteringMetadataDomain] { + override val domainName = "delta.clustering" + + def fromClusteringColumns(clusteringColumns: Seq[ClusteringColumn]): ClusteringMetadataDomain = { + ClusteringMetadataDomain(clusteringColumns.map(_.physicalName)) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CloneTableBase.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CloneTableBase.scala new file mode 100644 index 00000000000..1dcfea56d6a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CloneTableBase.scala @@ -0,0 +1,414 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.io.Closeable +import java.util.UUID + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util._ +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.{Dataset, Row, SparkSession} +import org.apache.spark.sql.catalyst.SQLConfHelper +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.plans.logical.LeafCommand +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.{Clock, SerializableConfiguration} +// scalastyle:on import.ordering.noEmptyLine + +/** + * An interface of the source table to be cloned from. + */ +trait CloneSource extends Closeable { + /** The format of the source table */ + def format: String + + /** The source table's protocol */ + def protocol: Protocol + + /** A system clock */ + def clock: Clock + + /** The source table name */ + def name: String + + /** The path of the source table */ + def dataPath: Path + + /** The source table schema */ + def schema: StructType + + /** The catalog table of the source table, if exists */ + def catalogTable: Option[CatalogTable] + + /** The time travel spec of the source table, if exists */ + def timeTravelOpt: Option[DeltaTimeTravelSpec] + + /** A snapshot of the source table, if exists */ + def snapshot: Option[Snapshot] + + /** The metadata of the source table */ + def metadata: Metadata + + /** All of the files present in the source table */ + def allFiles: Dataset[AddFile] + + /** Total size of data files in bytes */ + def sizeInBytes: Long + + /** Total number of data files */ + def numOfFiles: Long + + /** Describe this clone source */ + def description: String +} + +// Clone source table formats +object CloneSourceFormat { + val DELTA = "Delta" + val ICEBERG = "Iceberg" + val PARQUET = "Parquet" + val UNKNOWN = "Unknown" +} + +trait CloneTableBaseUtils extends DeltaLogging +{ + + import CloneTableCommand._ + + /** Make a map of operation metrics for the executed command for DeltaLog commits */ + protected def getOperationMetricsForDeltaLog( + opMetrics: SnapshotOverwriteOperationMetrics): Map[String, Long] = { + Map( + SOURCE_TABLE_SIZE -> opMetrics.sourceSnapshotSizeInBytes, + SOURCE_NUM_OF_FILES -> opMetrics.sourceSnapshotFileCount, + NUM_REMOVED_FILES -> 0L, + NUM_COPIED_FILES -> 0L, + REMOVED_FILES_SIZE -> 0L, + COPIED_FILES_SIZE -> 0L + ) + } + + /** + * Make a map of operation metrics for the executed command for recording events. + * Any command can extend to overwrite or add new metrics + */ + protected def getOperationMetricsForEventRecord( + opMetrics: SnapshotOverwriteOperationMetrics): Map[String, Long] = + getOperationMetricsForDeltaLog(opMetrics) + + /** Make a output Seq[Row] of metrics for the executed command */ + protected def getOutputSeq(operationMetrics: Map[String, Long]): Seq[Row] + + protected def checkColumnMappingMode(beforeMetadata: Metadata, afterMetadata: Metadata): Unit = { + val beforeColumnMappingMode = beforeMetadata.columnMappingMode + val afterColumnMappingMode = afterMetadata.columnMappingMode + // can't switch column mapping mode + if (beforeColumnMappingMode != afterColumnMappingMode) { + throw DeltaErrors.changeColumnMappingModeNotSupported( + beforeColumnMappingMode.name, afterColumnMappingMode.name) + } + } + + // Return a copy of the AddFiles with path being absolutized, indicating a SHALLOW CLONE + protected def handleNewDataFiles( + opName: String, + datasetOfNewFilesToAdd: Dataset[AddFile], + qualifiedSourceTableBasePath: String, + destTable: DeltaLog + ): Dataset[AddFile] = { + recordDeltaOperation(destTable, s"delta.${opName.toLowerCase()}.makeAbsolute") { + val absolutePaths = DeltaFileOperations.makePathsAbsolute( + qualifiedSourceTableBasePath, + datasetOfNewFilesToAdd) + absolutePaths + } + } +} + +abstract class CloneTableBase( + sourceTable: CloneSource, + tablePropertyOverrides: Map[String, String], + targetPath: Path) + extends LeafCommand + with CloneTableBaseUtils + with SQLConfHelper +{ + + import CloneTableBase._ + def dataChangeInFileAction: Boolean = true + + /** Returns whether the table exists at the given snapshot version. */ + def tableExists(snapshot: SnapshotDescriptor): Boolean = snapshot.version >= 0 + + /** + * Handles the transaction logic for the CLONE command. + * + * @param spark [[SparkSession]] to use + * @param txn [[OptimisticTransaction]] to use for the commit to the target table. + * @param destinationTable [[DeltaLog]] of the destination table. + * @param deltaOperation [[DeltaOperations.Operation]] to use when commit changes to DeltaLog + * @return + */ + protected def handleClone( + spark: SparkSession, + txn: OptimisticTransaction, + destinationTable: DeltaLog, + hdpConf: Configuration, + deltaOperation: DeltaOperations.Operation): Seq[Row] = { + val targetFs = targetPath.getFileSystem(hdpConf) + val qualifiedTarget = targetFs.makeQualified(targetPath).toString + val qualifiedSource = { + val sourcePath = sourceTable.dataPath + val sourceFs = sourcePath.getFileSystem(hdpConf) + sourceFs.makeQualified(sourcePath).toString + } + + if (txn.readVersion < 0) { + destinationTable.createLogDirectory() + } + + val ( + datasetOfNewFilesToAdd + ) = { + // Make sure target table is empty before running clone + if (txn.snapshot.allFiles.count() > 0) { + throw DeltaErrors.cloneReplaceNonEmptyTable + } + sourceTable.allFiles + } + + val metadataToUpdate = determineTargetMetadata(txn.snapshot, deltaOperation.name) + // Don't merge in the default properties when cloning, or we'll end up with different sets of + // properties between source and target. + txn.updateMetadata(metadataToUpdate, ignoreDefaultProperties = true) + + val datasetOfAddedFileList = handleNewDataFiles( + deltaOperation.name, + datasetOfNewFilesToAdd, + qualifiedSource, + destinationTable) + + val addedFileList = datasetOfAddedFileList.collectAsList() + + val (addedFileCount, addedFilesSize) = + (addedFileList.size.toLong, totalDataSize(addedFileList.iterator)) + + val operationTimestamp = sourceTable.clock.getTimeMillis() + + + val newProtocol = determineTargetProtocol(spark, txn, deltaOperation.name) + + try { + var actions = Iterator.single(newProtocol) ++ + addedFileList.iterator.asScala.map { fileToCopy => + val copiedFile = fileToCopy.copy(dataChange = dataChangeInFileAction) + // CLONE does not preserve Row IDs and Commit Versions + copiedFile.copy(baseRowId = None, defaultRowCommitVersion = None) + } + val sourceName = sourceTable.name + // Override source table metadata with user-defined table properties + val context = Map[String, String]() + val isReplaceDelta = txn.readVersion >= 0 + + val opMetrics = SnapshotOverwriteOperationMetrics( + sourceTable.sizeInBytes, + sourceTable.numOfFiles, + addedFileCount, + addedFilesSize) + val commitOpMetrics = getOperationMetricsForDeltaLog(opMetrics) + + recordDeltaOperation( + destinationTable, s"delta.${deltaOperation.name.toLowerCase()}.commit") { + txn.commitLarge( + spark, + actions, + deltaOperation, + context, + commitOpMetrics.mapValues(_.toString()).toMap) + } + + val cloneLogData = getOperationMetricsForEventRecord(opMetrics) ++ Map( + SOURCE -> sourceName, + SOURCE_FORMAT -> sourceTable.format, + SOURCE_PATH -> qualifiedSource, + TARGET -> qualifiedTarget, + PARTITION_BY -> sourceTable.metadata.partitionColumns, + IS_REPLACE_DELTA -> isReplaceDelta) ++ + sourceTable.snapshot.map(s => SOURCE_VERSION -> s.version) + recordDeltaEvent( + destinationTable, s"delta.${deltaOperation.name.toLowerCase()}", data = cloneLogData) + + getOutputSeq(commitOpMetrics) + } finally { + sourceTable.close() + } + } + + /** + * Prepares the source metadata by making it compatible with the existing target metadata. + */ + private def prepareSourceMetadata( + targetSnapshot: SnapshotDescriptor, + opName: String): Metadata = { + var clonedMetadata = + sourceTable.metadata.copy( + id = UUID.randomUUID().toString, + name = targetSnapshot.metadata.name, + description = targetSnapshot.metadata.description) + // If it's a new table, we remove the row tracking table property to create a 1:1 CLONE of + // the source, just without row tracking. If it's an existing table, we take whatever + // setting is currently on the target, as the setting should be independent between + // target and source. + if (!tableExists(targetSnapshot)) { + clonedMetadata = RowTracking.removeRowTrackingProperty(clonedMetadata) + } else { + clonedMetadata = RowTracking.takeRowTrackingPropertyFromTarget( + targetMetadata = targetSnapshot.metadata, + sourceMetadata = clonedMetadata) + } + clonedMetadata + } + + /** + * Verifies metadata invariants. + */ + private def verifyMetadataInvariants( + targetSnapshot: SnapshotDescriptor, + updatedMetadataWithOverrides: Metadata): Unit = { + // TODO: we have not decided on how to implement switching column mapping modes + // so we block this feature for now + // 1. Validate configuration overrides + // this checks if columnMapping.maxId is unexpected set in the properties + DeltaConfigs.validateConfigurations(tablePropertyOverrides) + // 2. Check for column mapping mode conflict with the source metadata w/ tablePropertyOverrides + checkColumnMappingMode(sourceTable.metadata, updatedMetadataWithOverrides) + // 3. Checks for column mapping mode conflicts with existing metadata if there's any + if (tableExists(targetSnapshot)) { + checkColumnMappingMode(targetSnapshot.metadata, updatedMetadataWithOverrides) + } + } + + /** + * Determines the expected metadata of the target. + */ + private def determineTargetMetadata( + targetSnapshot: SnapshotDescriptor, + opName: String) : Metadata = { + var metadata = prepareSourceMetadata(targetSnapshot, opName) + val validatedConfigurations = DeltaConfigs.validateConfigurations(tablePropertyOverrides) + // Merge source configuration and table property overrides + metadata = metadata.copy( + configuration = metadata.configuration ++ validatedConfigurations) + verifyMetadataInvariants(targetSnapshot, metadata) + metadata + } + + /** + * Determines the final protocol of the target. The metadata of the `txn` must be updated before + * determining the protocol. + */ + private def determineTargetProtocol( + spark: SparkSession, + txn: OptimisticTransaction, + opName: String): Protocol = { + val sourceProtocol = sourceTable.protocol + // Pre-transaction version of the target table. + val targetProtocol = txn.snapshot.protocol + // Overriding properties during the CLONE can change the minimum required protocol for target. + // We need to look at the metadata of the transaction to see the entire set of table properties + // for the post-transaction state and decide a version based on that. We also need to re-add + // the table property overrides as table features set by it won't be in the transaction + // metadata anymore. + val validatedConfigurations = DeltaConfigs.validateConfigurations(tablePropertyOverrides) + val configWithOverrides = txn.metadata.configuration ++ validatedConfigurations + val metadataWithOverrides = txn.metadata.copy(configuration = configWithOverrides) + var (minReaderVersion, minWriterVersion, enabledFeatures) = + Protocol.minProtocolComponentsFromMetadata(spark, metadataWithOverrides) + + // Only upgrade the protocol, never downgrade (unless allowed by flag), since that may break + // time travel. + val protocolDowngradeAllowed = + conf.getConf(DeltaSQLConf.RESTORE_TABLE_PROTOCOL_DOWNGRADE_ALLOWED) || + // It's not a real downgrade if the table doesn't exist before the CLONE. + !tableExists(txn.snapshot) + val sourceProtocolWithoutRowTracking = RowTracking.removeRowTrackingTableFeature(sourceProtocol) + + if (protocolDowngradeAllowed) { + minReaderVersion = minReaderVersion.max(sourceProtocol.minReaderVersion) + minWriterVersion = minWriterVersion.max(sourceProtocol.minWriterVersion) + val minProtocol = Protocol(minReaderVersion, minWriterVersion).withFeatures(enabledFeatures) + // Row tracking settings should be independent between target and source. + sourceProtocolWithoutRowTracking.merge(minProtocol) + } else { + // Take the maximum of all protocol versions being merged to ensure that table features + // from table property overrides are correctly added to the table feature list or are only + // implicitly enabled + minReaderVersion = + Seq(targetProtocol.minReaderVersion, sourceProtocol.minReaderVersion, minReaderVersion).max + minWriterVersion = Seq( + targetProtocol.minWriterVersion, sourceProtocol.minWriterVersion, minWriterVersion).max + val minProtocol = Protocol(minReaderVersion, minWriterVersion).withFeatures(enabledFeatures) + // Row tracking settings should be independent between target and source. + targetProtocol.merge(sourceProtocolWithoutRowTracking, minProtocol) + } + } +} + +object CloneTableBase extends Logging { + + val SOURCE = "source" + val SOURCE_FORMAT = "sourceFormat" + val SOURCE_PATH = "sourcePath" + val SOURCE_VERSION = "sourceVersion" + val TARGET = "target" + val IS_REPLACE_DELTA = "isReplaceDelta" + val PARTITION_BY = "partitionBy" + + /** Utility method returns the total size of all files in the given iterator */ + private def totalDataSize(fileList: java.util.Iterator[AddFile]): Long = { + var totalSize = 0L + fileList.asScala.foreach { f => + totalSize += f.size + } + totalSize + } +} + +/** + * Metrics of snapshot overwrite operation. + * @param sourceSnapshotSizeInBytes Total size of the data in the source snapshot. + * @param sourceSnapshotFileCount Number of data files in the source snapshot. + * @param destSnapshotAddedFileCount Number of new data files added to the destination + * snapshot as part of the execution. + * @param destSnapshotAddedFilesSizeInBytes Total size (in bytes) of the data files that were + * added to the destination snapshot. + */ +case class SnapshotOverwriteOperationMetrics( + sourceSnapshotSizeInBytes: Long, + sourceSnapshotFileCount: Long, + destSnapshotAddedFileCount: Long, + destSnapshotAddedFilesSizeInBytes: Long) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CloneTableCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CloneTableCommand.scala new file mode 100644 index 00000000000..5da35112ddd --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CloneTableCommand.scala @@ -0,0 +1,294 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.io.FileNotFoundException + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTimeTravelSpec, OptimisticTransaction, Snapshot} +import org.apache.spark.sql.delta.DeltaOperations.Clone +import org.apache.spark.sql.delta.actions.{AddFile, Metadata, Protocol} +import org.apache.spark.sql.delta.actions.Protocol.extractAutomaticallyEnabledFeatures +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.convert.{ConvertTargetTable, ConvertUtils} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{Column, Dataset, Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.connector.catalog.Table +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{LongType, StructType} +import org.apache.spark.util.{Clock, SerializableConfiguration, SystemClock} +// scalastyle:on import.ordering.noEmptyLine + +/** + * Clones a Delta table to a new location with a new table id. + * The clone can be performed as a shallow clone (i.e. shallow = true), + * where we do not copy the files, but just point to them. + * If a table exists at the given targetPath, that table will be replaced. + * + * @param sourceTable is the table to be cloned + * @param targetIdent destination table identifier to clone to + * @param tablePropertyOverrides user-defined table properties that should override any properties + * with the same key from the source table + * @param targetPath the actual destination + */ +case class CloneTableCommand( + sourceTable: CloneSource, + targetIdent: TableIdentifier, + tablePropertyOverrides: Map[String, String], + targetPath: Path) + extends CloneTableBase(sourceTable, tablePropertyOverrides, targetPath) { + + import CloneTableCommand._ + + + /** Return the CLONE command output from the execution metrics */ + override protected def getOutputSeq(operationMetrics: Map[String, Long]): Seq[Row] = { + Seq(Row( + operationMetrics.get(SOURCE_TABLE_SIZE), + operationMetrics.get(SOURCE_NUM_OF_FILES), + operationMetrics.get(NUM_REMOVED_FILES), + operationMetrics.get(NUM_COPIED_FILES), + operationMetrics.get(REMOVED_FILES_SIZE), + operationMetrics.get(COPIED_FILES_SIZE) + )) + } + + /** + * Handles the transaction logic for the CLONE command. + * @param txn [[OptimisticTransaction]] to use for the commit to the target table. + * @param targetDeltaLog [[DeltaLog]] of the target table. + * @return + */ + def handleClone( + sparkSession: SparkSession, + txn: OptimisticTransaction, + targetDeltaLog: DeltaLog): Seq[Row] = { + if (!targetPath.isAbsolute) { + throw DeltaErrors.cloneOnRelativePath(targetIdent.toString) + } + + /** Log clone command information */ + logInfo("Cloning " + sourceTable.description + s" to $targetPath") + + // scalastyle:off deltahadoopconfiguration + val hdpConf = sparkSession.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + if (!sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_CLONE_REPLACE_ENABLED)) { + val targetFs = targetPath.getFileSystem(hdpConf) + try { + val subFiles = targetFs.listStatus(targetPath) + if (subFiles.nonEmpty) { + throw DeltaErrors.cloneReplaceUnsupported(targetIdent) + } + } catch { + case _: FileNotFoundException => // we want the path to not exist + targetFs.mkdirs(targetPath) + } + } + + handleClone( + sparkSession, + txn, + targetDeltaLog, + hdpConf = hdpConf, + deltaOperation = Clone( + sourceTable.name, sourceTable.snapshot.map(_.version).getOrElse(-1) + )) + } +} + +object CloneTableCommand { + // Names of the metrics - added to the Delta commit log as part of Clone transaction + val SOURCE_TABLE_SIZE = "sourceTableSize" + val SOURCE_NUM_OF_FILES = "sourceNumOfFiles" + val NUM_REMOVED_FILES = "numRemovedFiles" + val NUM_COPIED_FILES = "numCopiedFiles" + val REMOVED_FILES_SIZE = "removedFilesSize" + val COPIED_FILES_SIZE = "copiedFilesSize" + + // SQL way column names for metrics in command execution output + private val COLUMN_SOURCE_TABLE_SIZE = "source_table_size" + private val COLUMN_SOURCE_NUM_OF_FILES = "source_num_of_files" + private val COLUMN_NUM_REMOVED_FILES = "num_removed_files" + private val COLUMN_NUM_COPIED_FILES = "num_copied_files" + private val COLUMN_REMOVED_FILES_SIZE = "removed_files_size" + private val COLUMN_COPIED_FILES_SIZE = "copied_files_size" + + val output: Seq[Attribute] = Seq( + AttributeReference(COLUMN_SOURCE_TABLE_SIZE, LongType)(), + AttributeReference(COLUMN_SOURCE_NUM_OF_FILES, LongType)(), + AttributeReference(COLUMN_NUM_REMOVED_FILES, LongType)(), + AttributeReference(COLUMN_NUM_COPIED_FILES, LongType)(), + AttributeReference(COLUMN_REMOVED_FILES_SIZE, LongType)(), + AttributeReference(COLUMN_COPIED_FILES_SIZE, LongType)() + ) +} + +/** A delta table source to be cloned from */ +class CloneDeltaSource( + sourceTable: DeltaTableV2) extends CloneSource { + + private val deltaLog = sourceTable.deltaLog + private val sourceSnapshot = sourceTable.initialSnapshot + + def format: String = CloneSourceFormat.DELTA + + def protocol: Protocol = sourceSnapshot.protocol + + def clock: Clock = deltaLog.clock + + def name: String = sourceTable.name() + + def dataPath: Path = deltaLog.dataPath + + def schema: StructType = sourceTable.schema() + + def catalogTable: Option[CatalogTable] = sourceTable.catalogTable + + def timeTravelOpt: Option[DeltaTimeTravelSpec] = sourceTable.timeTravelOpt + + def snapshot: Option[Snapshot] = Some(sourceSnapshot) + + def metadata: Metadata = sourceSnapshot.metadata + + def allFiles: Dataset[AddFile] = sourceSnapshot.allFiles + + def sizeInBytes: Long = sourceSnapshot.sizeInBytes + + def numOfFiles: Long = sourceSnapshot.numOfFiles + + def description: String = s"${format} table ${name} at version ${sourceSnapshot.version}" + + override def close(): Unit = {} +} + +/** A convertible non-delta table source to be cloned from */ +abstract class CloneConvertedSource(spark: SparkSession) extends CloneSource { + + // The converter which produces delta metadata from non-delta table, child class must implement + // this converter. + protected def convertTargetTable: ConvertTargetTable + + def format: String = CloneSourceFormat.UNKNOWN + + def protocol: Protocol = { + // This is quirky but necessary to add table features such as column mapping if the default + // protocol version supports table features. + Protocol().withFeatures(extractAutomaticallyEnabledFeatures(spark, metadata)) + } + + override val clock: Clock = new SystemClock() + + def dataPath: Path = new Path(convertTargetTable.fileManifest.basePath) + + def schema: StructType = convertTargetTable.tableSchema + + def timeTravelOpt: Option[DeltaTimeTravelSpec] = None + + def snapshot: Option[Snapshot] = None + + override lazy val metadata: Metadata = { + val conf = catalogTable + // Hive adds some transient table properties which should be ignored + .map(_.properties.filterKeys(_ != "transient_lastDdlTime").toMap) + .foldRight(convertTargetTable.properties.toMap)(_ ++ _) + + { + Metadata( + schemaString = convertTargetTable.tableSchema.json, + partitionColumns = convertTargetTable.partitionSchema.fieldNames, + configuration = conf, + createdTime = Some(System.currentTimeMillis())) + } + } + + override lazy val allFiles: Dataset[AddFile] = { + import org.apache.spark.sql.delta.implicits._ + + // scalastyle:off deltahadoopconfiguration + val serializableConf = new SerializableConfiguration(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val baseDir = dataPath.toString + val conf = spark.sparkContext.broadcast(serializableConf) + val partitionSchema = convertTargetTable.partitionSchema + + { + convertTargetTable.fileManifest.allFiles.mapPartitions { targetFile => + val basePath = new Path(baseDir) + val fs = basePath.getFileSystem(conf.value.value) + targetFile.map(ConvertUtils.createAddFile( + _, basePath, fs, SQLConf.get, Some(partitionSchema))) + } + } + } + + private lazy val fileStats = allFiles.select( + coalesce(sum("size"), lit(0L)), count(new Column("*"))).first() + + def sizeInBytes: Long = fileStats.getLong(0) + + def numOfFiles: Long = fileStats.getLong(1) + + def description: String = s"${format} table ${name}" + + override def close(): Unit = convertTargetTable.fileManifest.close() +} + +/** + * A parquet table source to be cloned from + */ +case class CloneParquetSource( + tableIdentifier: TableIdentifier, + override val catalogTable: Option[CatalogTable], + spark: SparkSession) extends CloneConvertedSource(spark) { + + override lazy val convertTargetTable: ConvertTargetTable = { + val baseDir = catalogTable.map(_.location.toString).getOrElse(tableIdentifier.table) + ConvertUtils.getParquetTable(spark, baseDir, catalogTable, None) + } + + override def format: String = CloneSourceFormat.PARQUET + + override def name: String = catalogTable.map(_.identifier.unquotedString) + .getOrElse(s"parquet.`${tableIdentifier.table}`") +} + +/** + * A iceberg table source to be cloned from + */ +case class CloneIcebergSource( + tableIdentifier: TableIdentifier, + sparkTable: Option[Table], + tableSchema: Option[StructType], + spark: SparkSession) extends CloneConvertedSource(spark) { + + override lazy val convertTargetTable: ConvertTargetTable = + ConvertUtils.getIcebergTable(spark, tableIdentifier.table, sparkTable, tableSchema) + + override def format: String = CloneSourceFormat.ICEBERG + + override def name: String = + sparkTable.map(_.name()).getOrElse(s"iceberg.`${tableIdentifier.table}`") + + override def catalogTable: Option[CatalogTable] = None +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/ConvertToDeltaCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/ConvertToDeltaCommand.scala new file mode 100644 index 00000000000..443e665e55d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/ConvertToDeltaCommand.scala @@ -0,0 +1,465 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.io.Closeable +import java.lang.reflect.InvocationTargetException +import java.util.Locale + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{AddFile, Metadata} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.VacuumCommand.{generateCandidateFileMap, getTouchedFile} +import org.apache.spark.sql.delta.commands.convert.{ConvertTargetFileManifest, ConvertTargetTable, ConvertUtils} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.{DeltaSourceUtils, DeltaSQLConf} +import org.apache.spark.sql.delta.util._ +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.sql.{AnalysisException, Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{Analyzer, NoSuchTableException} +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, SessionCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, V1Table} +import org.apache.spark.sql.execution.command.LeafRunnableCommand +import org.apache.spark.sql.types.StructType + +/** + * Convert an existing parquet table to a delta table by creating delta logs based on + * existing files. Here are the main components: + * + * - File Listing: Launch a spark job to list files from a given directory in parallel. + * + * - Schema Inference: Given an iterator on the file list result, we group the iterator into + * sequential batches and launch a spark job to infer schema for each batch, + * and finally merge schemas from all batches. + * + * - Stats collection: Again, we group the iterator on file list results into sequential batches + * and launch a spark job to collect stats for each batch. + * + * - Commit the files: We take the iterator of files with stats and write out a delta + * log file as the first commit. This bypasses the transaction protocol, but + * it's ok as this would be the very first commit. + * + * @param tableIdentifier the target parquet table. + * @param partitionSchema the partition schema of the table, required when table is partitioned. + * @param collectStats Should collect column stats per file on convert. + * @param deltaPath if provided, the delta log will be written to this location. + */ +abstract class ConvertToDeltaCommandBase( + tableIdentifier: TableIdentifier, + partitionSchema: Option[StructType], + collectStats: Boolean, + deltaPath: Option[String]) extends LeafRunnableCommand with DeltaCommand { + + protected lazy val statsEnabled: Boolean = conf.getConf(DeltaSQLConf.DELTA_COLLECT_STATS) + + protected lazy val icebergEnabled: Boolean = + conf.getConf(DeltaSQLConf.DELTA_CONVERT_ICEBERG_ENABLED) + + protected def isParquetPathProvider(provider: String): Boolean = + provider.equalsIgnoreCase("parquet") + + protected def isIcebergPathProvider(provider: String): Boolean = + icebergEnabled && provider.equalsIgnoreCase("iceberg") + + protected def isSupportedPathTableProvider(provider: String): Boolean = { + isParquetPathProvider(provider) || isIcebergPathProvider(provider) + } + + override def run(spark: SparkSession): Seq[Row] = { + val convertProperties = resolveConvertTarget(spark, tableIdentifier) match { + case Some(props) if !DeltaSourceUtils.isDeltaTable(props.provider) => props + case _ => + // Make convert to delta idempotent + logConsole("The table you are trying to convert is already a delta table") + return Seq.empty[Row] + } + + val targetTable = getTargetTable(spark, convertProperties) + val deltaPathToUse = new Path(deltaPath.getOrElse(convertProperties.targetDir)) + val deltaLog = DeltaLog.forTable(spark, deltaPathToUse) + val txn = deltaLog.startTransaction(convertProperties.catalogTable) + if (txn.readVersion > -1) { + handleExistingTransactionLog(spark, txn, convertProperties, targetTable.format) + return Seq.empty[Row] + } + + performConvert(spark, txn, convertProperties, targetTable) + } + + /** Given the table identifier, figure out what our conversion target is. */ + private def resolveConvertTarget( + spark: SparkSession, + tableIdentifier: TableIdentifier): Option[ConvertTarget] = { + val v2SessionCatalog = + spark.sessionState.catalogManager.v2SessionCatalog.asInstanceOf[TableCatalog] + + // TODO: Leverage the analyzer for all this work + if (isCatalogTable(spark.sessionState.analyzer, tableIdentifier)) { + val namespace = + tableIdentifier.database.map(Array(_)) + .getOrElse(spark.sessionState.catalogManager.currentNamespace) + val ident = Identifier.of(namespace, tableIdentifier.table) + v2SessionCatalog.loadTable(ident) match { + case v1: V1Table if v1.catalogTable.tableType == CatalogTableType.VIEW => + throw DeltaErrors.operationNotSupportedException( + "Converting a view to a Delta table", + tableIdentifier) + case v1: V1Table => + val table = v1.catalogTable + // Hive adds some transient table properties which should be ignored + val props = table.properties.filterKeys(_ != "transient_lastDdlTime").toMap + Some(ConvertTarget(Some(table), table.provider, new Path(table.location).toString, props)) + case _: DeltaTableV2 => + // Already a Delta table + None + } + } else { + Some(ConvertTarget( + None, + tableIdentifier.database, + tableIdentifier.table, + Map.empty[String, String])) + } + } + + /** + * When converting a table to delta using table name, we should also change the metadata in the + * catalog table because the delta log should be the source of truth for the metadata rather than + * the metastore. + * + * @param catalogTable metadata of the table to be converted + * @param sessionCatalog session catalog of the metastore used to update the metadata + */ + private def convertMetadata( + catalogTable: CatalogTable, + sessionCatalog: SessionCatalog): Unit = { + var newCatalog = catalogTable.copy( + provider = Some("delta"), + // TODO: Schema changes unfortunately doesn't get reflected in the HiveMetaStore. Should be + // fixed in Apache Spark + schema = new StructType(), + partitionColumnNames = Seq.empty, + properties = Map.empty, + // TODO: Serde information also doesn't get removed + storage = catalogTable.storage.copy( + inputFormat = None, + outputFormat = None, + serde = None) + ) + sessionCatalog.alterTable(newCatalog) + logInfo("Convert to Delta converted metadata") + } + + /** + * Calls DeltaCommand.isCatalogTable. With Convert, we may get a format check error in cases where + * the metastore and the underlying table don't align, e.g. external table where the underlying + * files are converted to delta but the metadata has not been converted yet. In these cases, + * catch the error and return based on whether the provided Table Identifier could reasonably be + * a path + * + * @param analyzer The session state analyzer to call + * @param tableIdent Table Identifier to determine whether is path based or not + * @return Boolean where true means that the table is a table in a metastore and false means the + * table is a path based table + */ + override def isCatalogTable(analyzer: Analyzer, tableIdent: TableIdentifier): Boolean = { + try { + super.isCatalogTable(analyzer, tableIdentifier) + } catch { + case e: AnalysisException if e.getMessage.contains("Incompatible format detected") => + !isPathIdentifier(tableIdentifier) + case e: AssertionError if e.getMessage.contains("Conflicting directory structures") => + !isPathIdentifier(tableIdentifier) + case _: NoSuchTableException + if tableIdent.database.isEmpty && new Path(tableIdent.table).isAbsolute => + throw DeltaErrors.missingProviderForConvertException(tableIdent.table) + } + } + + /** + * Override this method since parquet paths are valid for Convert + * + * @param tableIdent the provided table or path + * @return Whether or not the ident provided can refer to a table by path + */ + override def isPathIdentifier(tableIdent: TableIdentifier): Boolean = { + val provider = tableIdent.database.getOrElse("") + // If db doesnt exist or db is called delta/tahoe then check if path exists + (DeltaSourceUtils.isDeltaDataSourceName(provider) || + isSupportedPathTableProvider(provider)) && + new Path(tableIdent.table).isAbsolute + } + + /** + * If there is already a transaction log we should handle what happens when convert to delta is + * run once again. It may be the case that the table is entirely converted i.e. the underlying + * files AND the catalog (if one exists) are updated. Or it may be the case that the table is + * partially converted i.e. underlying files are converted but catalog (if one exists) + * has not been updated. + * + * @param spark spark session to get session catalog + * @param txn existing transaction log + * @param target properties that contains: the provider and the catalogTable when + * converting using table name + */ + private def handleExistingTransactionLog( + spark: SparkSession, + txn: OptimisticTransaction, + target: ConvertTarget, + sourceFormat: String): Unit = { + // In the case that the table is a delta table but the provider has not been updated we should + // update table metadata to reflect that the table is a delta table and table properties should + // also be updated + if (isParquetCatalogTable(target)) { + val catalogTable = target.catalogTable + val tableProps = target.properties + val deltaLogConfig = txn.metadata.configuration + val mergedConfig = deltaLogConfig ++ tableProps + + if (mergedConfig != deltaLogConfig) { + if (deltaLogConfig.nonEmpty && + conf.getConf(DeltaSQLConf.DELTA_CONVERT_METADATA_CHECK_ENABLED)) { + throw DeltaErrors.convertMetastoreMetadataMismatchException(tableProps, deltaLogConfig) + } + val newMetadata = txn.metadata.copy( + configuration = mergedConfig + ) + txn.commit( + newMetadata :: Nil, + DeltaOperations.Convert( + numFiles = 0L, + partitionSchema.map(_.fieldNames.toSeq).getOrElse(Nil), + collectStats = false, + catalogTable = catalogTable.map(t => t.identifier.toString), + sourceFormat = Some(sourceFormat) + )) + } + convertMetadata( + catalogTable.get, + spark.sessionState.catalog + ) + } else { + logConsole("The table you are trying to convert is already a delta table") + } + } + + /** Is the target table a parquet table defined in an external catalog. */ + private def isParquetCatalogTable(target: ConvertTarget): Boolean = { + target.catalogTable match { + case Some(ct) => + ConvertToDeltaCommand.isHiveStyleParquetTable(ct) || + target.provider.get.toLowerCase(Locale.ROOT) == "parquet" + case None => false + } + } + + protected def performStatsCollection( + spark: SparkSession, + txn: OptimisticTransaction, + addFiles: Seq[AddFile]): Iterator[AddFile] = { + val initialSnapshot = new InitialSnapshot(txn.deltaLog.logPath, txn.deltaLog, txn.metadata) + ConvertToDeltaCommand.computeStats(txn.deltaLog, initialSnapshot, addFiles) + } + + /** + * Given the file manifest, create corresponding AddFile actions for the entire list of files. + */ + protected def createDeltaActions( + spark: SparkSession, + manifest: ConvertTargetFileManifest, + partitionSchema: StructType, + txn: OptimisticTransaction, + fs: FileSystem): Iterator[AddFile] = { + val shouldCollectStats = collectStats && statsEnabled + val statsBatchSize = conf.getConf(DeltaSQLConf.DELTA_IMPORT_BATCH_SIZE_STATS_COLLECTION) + var numFiles = 0L + manifest.getFiles.grouped(statsBatchSize).flatMap { batch => + val adds = batch.map( + ConvertUtils.createAddFile( + _, txn.deltaLog.dataPath, fs, conf, Some(partitionSchema), deltaPath.isDefined)) + if (shouldCollectStats) { + logInfo(s"Collecting stats for a batch of ${batch.size} files; " + + s"finished $numFiles so far") + numFiles += statsBatchSize + performStatsCollection(spark, txn, adds) + } else if (collectStats) { + logWarning(s"collectStats is set to true but ${DeltaSQLConf.DELTA_COLLECT_STATS.key}" + + s" is false. Skip statistics collection") + adds.toIterator + } else { + adds.toIterator + } + } + } + + /** Get the instance of the convert target table, which provides file manifest and schema */ + protected def getTargetTable(spark: SparkSession, target: ConvertTarget): ConvertTargetTable = { + target.provider match { + case Some(providerName) => providerName.toLowerCase(Locale.ROOT) match { + case checkProvider + if target.catalogTable.exists(ConvertToDeltaCommand.isHiveStyleParquetTable) || + isParquetPathProvider(checkProvider) => + ConvertUtils.getParquetTable( + spark, target.targetDir, target.catalogTable, partitionSchema) + case checkProvider if isIcebergPathProvider(checkProvider) => + if (partitionSchema.isDefined) { + throw DeltaErrors.partitionSchemaInIcebergTables + } + ConvertUtils.getIcebergTable(spark, target.targetDir, None, None) + case other => + throw DeltaErrors.convertNonParquetTablesException(tableIdentifier, other) + } + case None => + throw DeltaErrors.missingProviderForConvertException(target.targetDir) + } + } + + /** + * Converts the given table to a Delta table. First gets the file manifest for the table. Then + * in the first pass, it infers the schema of the table. Then in the second pass, it generates + * the relevant Actions for Delta's transaction log, namely the AddFile actions for each file + * in the manifest. Once a commit is made, updates an external catalog, e.g. Hive MetaStore, + * if this table was referenced through a table in a catalog. + */ + private def performConvert( + spark: SparkSession, + txn: OptimisticTransaction, + convertProperties: ConvertTarget, + targetTable: ConvertTargetTable): Seq[Row] = + recordDeltaOperation(txn.deltaLog, "delta.convert") { + txn.deltaLog.ensureLogDirectoryExist() + val targetPath = new Path(convertProperties.targetDir) + // scalastyle:off deltahadoopconfiguration + val sessionHadoopConf = spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + val fs = targetPath.getFileSystem(sessionHadoopConf) + val manifest = targetTable.fileManifest + try { + if (!manifest.getFiles.hasNext) { + throw DeltaErrors.emptyDirectoryException(convertProperties.targetDir) + } + + val partitionFields = targetTable.partitionSchema + val schema = targetTable.tableSchema + val metadata = Metadata( + schemaString = schema.json, + partitionColumns = partitionFields.fieldNames, + configuration = convertProperties.properties ++ targetTable.properties, + createdTime = Some(System.currentTimeMillis())) + txn.updateMetadataForNewTable(metadata) + + // TODO: we have not decided on how to implement CONVERT TO DELTA under column mapping modes + // for some convert targets so we block this feature for them here + checkColumnMapping(txn.metadata, targetTable) + RowTracking.checkStatsCollectedIfRowTrackingSupported( + txn.protocol, + collectStats, + statsEnabled) + + val numFiles = targetTable.numFiles + val addFilesIter = createDeltaActions(spark, manifest, partitionFields, txn, fs) + val metrics = Map[String, String]( + "numConvertedFiles" -> numFiles.toString + ) + val (committedVersion, postCommitSnapshot) = txn.commitLarge( + spark, + Iterator.single(txn.protocol) ++ addFilesIter, + getOperation(numFiles, convertProperties, targetTable.format), + getContext, + metrics) + } finally { + manifest.close() + } + + // If there is a catalog table, convert metadata + if (convertProperties.catalogTable.isDefined) { + convertMetadata( + convertProperties.catalogTable.get, + spark.sessionState.catalog + ) + } + + Seq.empty[Row] + } + + protected def getContext: Map[String, String] = { + Map.empty + } + + /** Get the operation to store in the commit message. */ + protected def getOperation( + numFilesConverted: Long, + convertProperties: ConvertTarget, + sourceFormat: String): DeltaOperations.Operation = { + DeltaOperations.Convert( + numFilesConverted, + partitionSchema.map(_.fieldNames.toSeq).getOrElse(Nil), + collectStats = collectStats && statsEnabled, + convertProperties.catalogTable.map(t => t.identifier.toString), + sourceFormat = Some(sourceFormat)) + } + + protected case class ConvertTarget( + catalogTable: Option[CatalogTable], + provider: Option[String], + targetDir: String, + properties: Map[String, String]) + + private def checkColumnMapping( + txnMetadata: Metadata, + convertTargetTable: ConvertTargetTable): Unit = { + if (convertTargetTable.requiredColumnMappingMode != txnMetadata.columnMappingMode) { + throw DeltaErrors.convertToDeltaWithColumnMappingNotSupported(txnMetadata.columnMappingMode) + } + } + +} + +case class ConvertToDeltaCommand( + tableIdentifier: TableIdentifier, + partitionSchema: Option[StructType], + collectStats: Boolean, + deltaPath: Option[String]) + extends ConvertToDeltaCommandBase(tableIdentifier, partitionSchema, collectStats, deltaPath) + +object ConvertToDeltaCommand extends DeltaLogging { + + def isHiveStyleParquetTable(catalogTable: CatalogTable): Boolean = { + catalogTable.provider.contains("hive") && catalogTable.storage.serde.contains( + "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe") + } + + def computeStats( + deltaLog: DeltaLog, + snapshot: Snapshot, + addFiles: Seq[AddFile]): Iterator[AddFile] = { + import org.apache.spark.sql.functions._ + val filesWithStats = deltaLog.createDataFrame(snapshot, addFiles) + .groupBy(input_file_name()).agg(to_json(snapshot.statsCollector)) + + val pathToAddFileMap = generateCandidateFileMap(deltaLog.dataPath, addFiles) + filesWithStats.collect().iterator.map { row => + val addFile = getTouchedFile(deltaLog.dataPath, row.getString(0), pathToAddFileMap) + addFile.copy(stats = row.getString(1)) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala new file mode 100644 index 00000000000..e03dfd109cd --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala @@ -0,0 +1,717 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.util.concurrent.TimeUnit + +import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.DeltaColumnMapping.{dropColumnMappingMetadata, filterColumnMappingProperties} +import org.apache.spark.sql.delta.actions.{Action, Metadata, Protocol} +import org.apache.spark.sql.delta.actions.DomainMetadata +import org.apache.spark.sql.delta.hooks.{UpdateCatalog, UpdateCatalogFactory} +import org.apache.spark.sql.delta.hooks.IcebergConverterHook +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.connector.catalog.Identifier +import org.apache.spark.sql.execution.command.{LeafRunnableCommand, RunnableCommand} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.StructType + +/** + * Single entry point for all write or declaration operations for Delta tables accessed through + * the table name. + * + * @param table The table identifier for the Delta table + * @param existingTableOpt The existing table for the same identifier if exists + * @param mode The save mode when writing data. Relevant when the query is empty or set to Ignore + * with `CREATE TABLE IF NOT EXISTS`. + * @param query The query to commit into the Delta table if it exist. This can come from + * - CTAS + * - saveAsTable + * @param protocol This is used to create a table with specific protocol version + */ +case class CreateDeltaTableCommand( + table: CatalogTable, + existingTableOpt: Option[CatalogTable], + mode: SaveMode, + query: Option[LogicalPlan], + operation: TableCreationModes.CreationMode = TableCreationModes.Create, + tableByPath: Boolean = false, + override val output: Seq[Attribute] = Nil, + protocol: Option[Protocol] = None) + extends LeafRunnableCommand + with DeltaCommand + with DeltaLogging { + + override def run(sparkSession: SparkSession): Seq[Row] = { + + assert(table.tableType != CatalogTableType.VIEW) + assert(table.identifier.database.isDefined, "Database should've been fixed at analysis") + // There is a subtle race condition here, where the table can be created by someone else + // while this command is running. Nothing we can do about that though :( + val tableExistsInCatalog = existingTableOpt.isDefined + if (mode == SaveMode.Ignore && tableExistsInCatalog) { + // Early exit on ignore + return Nil + } else if (mode == SaveMode.ErrorIfExists && tableExistsInCatalog) { + throw DeltaErrors.tableAlreadyExists(table) + } + + var tableWithLocation = if (tableExistsInCatalog) { + val existingTable = existingTableOpt.get + table.storage.locationUri match { + case Some(location) if location.getPath != existingTable.location.getPath => + throw DeltaErrors.tableLocationMismatch(table, existingTable) + case _ => + } + table.copy( + storage = existingTable.storage, + tableType = existingTable.tableType) + } else if (table.storage.locationUri.isEmpty) { + // We are defining a new managed table + assert(table.tableType == CatalogTableType.MANAGED) + val loc = sparkSession.sessionState.catalog.defaultTablePath(table.identifier) + table.copy(storage = table.storage.copy(locationUri = Some(loc))) + } else { + // 1. We are defining a new external table + // 2. It's a managed table which already has the location populated. This can happen in DSV2 + // CTAS flow. + table + } + + val tableLocation = new Path(tableWithLocation.location) + val deltaLog = DeltaLog.forTable(sparkSession, tableLocation) + + recordDeltaOperation(deltaLog, "delta.ddl.createTable") { + handleCommit(sparkSession, deltaLog, tableWithLocation) + } + } + + /** + * Handles the transaction logic for the command. Returns the operation metrics in case of CLONE. + */ + private def handleCommit( + sparkSession: SparkSession, + deltaLog: DeltaLog, + tableWithLocation: CatalogTable): Seq[Row] = { + val tableExistsInCatalog = existingTableOpt.isDefined + val hadoopConf = deltaLog.newDeltaHadoopConf() + val tableLocation = new Path(tableWithLocation.location) + val fs = tableLocation.getFileSystem(hadoopConf) + + def checkPathEmpty(txn: OptimisticTransaction): Unit = { + // Verify the table does not exist. + if (mode == SaveMode.Ignore || mode == SaveMode.ErrorIfExists) { + // We should have returned earlier in Ignore and ErrorIfExists mode if the table + // is already registered in the catalog. + assert(!tableExistsInCatalog) + // Verify that the data path does not contain any data. + // We may have failed a previous write. The retry should still succeed even if we have + // garbage data + if (txn.readVersion > -1 || !fs.exists(deltaLog.logPath)) { + assertPathEmpty(hadoopConf, tableWithLocation) + } + } + } + + val txn = startTxnForTableCreation(sparkSession, deltaLog, tableWithLocation) + + val result = query match { + // CLONE handled separately from other CREATE TABLE syntax + case Some(cmd: CloneTableCommand) => + checkPathEmpty(txn) + cmd.handleClone(sparkSession, txn, targetDeltaLog = deltaLog) + case Some(deltaWriter: WriteIntoDeltaLike) => + checkPathEmpty(txn) + handleCreateTableAsSelect(sparkSession, txn, deltaLog, deltaWriter, tableWithLocation) + Nil + case Some(query) => + checkPathEmpty(txn) + require(!query.isInstanceOf[RunnableCommand]) + // When using V1 APIs, the `query` plan is not yet optimized, therefore, it is safe + // to once again go through analysis + val data = Dataset.ofRows(sparkSession, query) + val options = new DeltaOptions(table.storage.properties, sparkSession.sessionState.conf) + val deltaWriter = WriteIntoDelta( + deltaLog = deltaLog, + mode = mode, + options, + partitionColumns = table.partitionColumnNames, + configuration = tableWithLocation.properties + ("comment" -> table.comment.orNull), + data = data, + Some(tableWithLocation)) + handleCreateTableAsSelect(sparkSession, txn, deltaLog, deltaWriter, tableWithLocation) + Nil + case _ => + handleCreateTable(sparkSession, txn, tableWithLocation, fs, hadoopConf) + Nil + } + + runPostCommitUpdates(sparkSession, txn, deltaLog, tableWithLocation) + + result + } + + /** + * Runs updates post table creation commit, such as updating the catalog + * with relevant information. + */ + private def runPostCommitUpdates( + sparkSession: SparkSession, + txnUsedForCommit: OptimisticTransaction, + deltaLog: DeltaLog, + tableWithLocation: CatalogTable): Unit = { + // Note that someone may have dropped and recreated the table in a separate location in the + // meantime... Unfortunately we can't do anything there at the moment, because Hive sucks. + logInfo(s"Table is path-based table: $tableByPath. Update catalog with mode: $operation") + val opStartTs = TimeUnit.NANOSECONDS.toMillis(txnUsedForCommit.txnStartTimeNs) + val postCommitSnapshot = deltaLog.update(checkIfUpdatedSinceTs = Some(opStartTs)) + val didNotChangeMetadata = txnUsedForCommit.metadata == txnUsedForCommit.snapshot.metadata + updateCatalog(sparkSession, tableWithLocation, postCommitSnapshot, didNotChangeMetadata) + + + if (UniversalFormat.icebergEnabled(postCommitSnapshot.metadata)) { + deltaLog.icebergConverter.convertSnapshot(postCommitSnapshot, tableWithLocation) + } + } + + /** + * Handles the transaction logic for CTAS-like statements, i.e.: + * CREATE TABLE AS SELECT + * CREATE OR REPLACE TABLE AS SELECT + * .saveAsTable in DataframeWriter API + */ + private def handleCreateTableAsSelect( + sparkSession: SparkSession, + txn: OptimisticTransaction, + deltaLog: DeltaLog, + deltaWriter: WriteIntoDeltaLike, + tableWithLocation: CatalogTable): Unit = { + val isManagedTable = tableWithLocation.tableType == CatalogTableType.MANAGED + val options = new DeltaOptions(table.storage.properties, sparkSession.sessionState.conf) + + // Execute write command for `deltaWriter` by + // - replacing the metadata new target table for DataFrameWriterV2 writer if it is a + // REPLACE or CREATE_OR_REPLACE command, + // - running the write procedure of DataFrameWriter command and returning the + // new created actions, + // - returning the Delta Operation type of this DataFrameWriter + def doDeltaWrite( + deltaWriter: WriteIntoDeltaLike, + schema: StructType): (Seq[Action], DeltaOperations.Operation) = { + // In the V2 Writer, methods like "replace" and "createOrReplace" implicitly mean that + // the metadata should be changed. This wasn't the behavior for DataFrameWriterV1. + if (!isV1Writer) { + replaceMetadataIfNecessary( + txn, + tableWithLocation, + options, + schema) + } + var actions = deltaWriter.write( + txn, + sparkSession, + ClusteredTableUtils.getClusterBySpecOptional(table), + // Pass this option to the writer so that it can differentiate between an INSERT and a + // REPLACE command. This is needed because the writer is shared between the two commands. + // But some options, such as dynamic partition overwrite, are only valid for INSERT. + // Only allow createOrReplace command which is not a V1 writer. + // saveAsTable() command uses this same code path and is marked as a V1 writer. + // We do not want saveAsTable() to be treated as a REPLACE command wrt dynamic partition + // overwrite. + isTableReplace = isReplace && !isV1Writer + ) + // Metadata updates for creating table (with any writer) and replacing table + // (only with V1 writer) will be handled inside WriteIntoDelta. + // For createOrReplace operation, metadata updates are handled here if the table already + // exists (replacing table), otherwise it is handled inside WriteIntoDelta (creating table). + if (!isV1Writer && isReplace && txn.readVersion > -1L) { + val newDomainMetadata = Seq.empty[DomainMetadata] ++ + ClusteredTableUtils.getDomainMetadataOptional(table, txn) + // Ensure to remove any domain metadata for REPLACE TABLE. + actions = actions ++ DomainMetadataUtils.handleDomainMetadataForReplaceTable( + txn.snapshot.domainMetadata, newDomainMetadata) + } + val op = getOperation(txn.metadata, isManagedTable, Some(options) + ) + (actions, op) + } + val updatedConfiguration = UniversalFormat + .enforceDependenciesInConfiguration(deltaWriter.configuration, txn.snapshot) + val updatedWriter = deltaWriter.withNewWriterConfiguration(updatedConfiguration) + // We are either appending/overwriting with saveAsTable or creating a new table with CTAS + if (!hasBeenExecuted(txn, sparkSession, Some(options))) { + val (actions, op) = doDeltaWrite(updatedWriter, updatedWriter.data.schema.asNullable) + txn.commit(actions, op) + } + } + + /** + * Handles the transaction logic for CREATE OR REPLACE TABLE statement + * without the AS [CLONE, SELECT] clause. + */ + private def handleCreateTable( + sparkSession: SparkSession, + txn: OptimisticTransaction, + tableWithLocation: CatalogTable, + fs: FileSystem, + hadoopConf: Configuration): Unit = { + + val isManagedTable = tableWithLocation.tableType == CatalogTableType.MANAGED + val tableLocation = new Path(tableWithLocation.location) + val tableExistsInCatalog = existingTableOpt.isDefined + val options = new DeltaOptions(table.storage.properties, sparkSession.sessionState.conf) + + def createActionsForNewTableOrVerify(): Seq[Action] = { + if (isManagedTable) { + // When creating a managed table, the table path should not exist or is empty, or + // users would be surprised to see the data, or see the data directory being dropped + // after the table is dropped. + assertPathEmpty(hadoopConf, tableWithLocation) + } + + // However, if we allow creating an empty schema table and indeed the table is new, we + // would need to make sure txn.readVersion <= 0 so we are either: + // 1) Creating a new empty schema table (version = -1) or + // 2) Restoring an existing empty schema table at version 0. An empty schema table should + // not have versions > 0 because it must be written with schema changes after initial + // creation. + val emptySchemaTableFlag = sparkSession.sessionState.conf + .getConf(DeltaSQLConf.DELTA_ALLOW_CREATE_EMPTY_SCHEMA_TABLE) + val allowRestoringExistingEmptySchemaTable = + emptySchemaTableFlag && txn.metadata.schema.isEmpty && txn.readVersion == 0 + val allowCreatingNewEmptySchemaTable = + emptySchemaTableFlag && tableWithLocation.schema.isEmpty && txn.readVersion == -1 + + // This is either a new table, or, we never defined the schema of the table. While it is + // unexpected that `txn.metadata.schema` to be empty when txn.readVersion >= 0, we still + // guard against it, in case of checkpoint corruption bugs. + val noExistingMetadata = txn.readVersion == -1 || txn.metadata.schema.isEmpty + if (noExistingMetadata && !allowRestoringExistingEmptySchemaTable) { + assertTableSchemaDefined( + fs, tableLocation, tableWithLocation, sparkSession, + allowCreatingNewEmptySchemaTable + ) + assertPathEmpty(hadoopConf, tableWithLocation) + // This is a user provided schema. + // Doesn't come from a query, Follow nullability invariants. + var newMetadata = + getProvidedMetadata(tableWithLocation, table.schema.json) + newMetadata = newMetadata.copy(configuration = + UniversalFormat.enforceDependenciesInConfiguration( + newMetadata.configuration, txn.snapshot)) + + txn.updateMetadataForNewTable(newMetadata) + protocol.foreach { protocol => + txn.updateProtocol(protocol) + } + ClusteredTableUtils.getDomainMetadataOptional(table, txn).toSeq + } else { + verifyTableMetadata(txn, tableWithLocation) + Nil + } + } + + // We are defining a table using the Create or Replace Table statements. + val actionsToCommit = operation match { + case TableCreationModes.Create => + require(!tableExistsInCatalog, "Can't recreate a table when it exists") + createActionsForNewTableOrVerify() + + case TableCreationModes.CreateOrReplace if !tableExistsInCatalog => + // If the table doesn't exist, CREATE OR REPLACE must provide a schema + if (tableWithLocation.schema.isEmpty) { + throw DeltaErrors.schemaNotProvidedException + } + createActionsForNewTableOrVerify() + case _ => + // When the operation is a REPLACE or CREATE OR REPLACE, then the schema shouldn't be + // empty, since we'll use the entry to replace the schema + if (tableWithLocation.schema.isEmpty) { + throw DeltaErrors.schemaNotProvidedException + } + // This can happen if someone deleted files from the filesystem but + // the table still exists in the catalog. + if (txn.readVersion == -1 && tableExistsInCatalog) { + throw DeltaErrors.metadataAbsentForExistingCatalogTable( + tableWithLocation.identifier.toString, txn.deltaLog.logPath.toString) + } + // We need to replace + replaceMetadataIfNecessary( + txn, + tableWithLocation, + options, + tableWithLocation.schema) + // Truncate the table + val operationTimestamp = System.currentTimeMillis() + var actionsToCommit = Seq.empty[Action] + val removes = txn.filterFiles().map(_.removeWithTimestamp(operationTimestamp)) + actionsToCommit = removes ++ + DomainMetadataUtils.handleDomainMetadataForReplaceTable( + txn.snapshot.domainMetadata, + ClusteredTableUtils.getDomainMetadataOptional(table, txn).toSeq) + actionsToCommit + } + + val changedMetadata = txn.metadata != txn.snapshot.metadata + val changedProtocol = txn.protocol != txn.snapshot.protocol + if (actionsToCommit.nonEmpty || changedMetadata || changedProtocol) { + val op = getOperation(txn.metadata, isManagedTable, None + ) + txn.commit(actionsToCommit, op) + } + } + + private def getProvidedMetadata(table: CatalogTable, schemaString: String): Metadata = { + Metadata( + description = table.comment.orNull, + schemaString = schemaString, + partitionColumns = table.partitionColumnNames, + // Filter out ephemeral clustering columns config because we don't want to persist + // it in delta log. This will be persisted in CatalogTable's table properties instead. + configuration = ClusteredTableUtils.removeClusteringColumnsProperty(table.properties), + createdTime = Some(System.currentTimeMillis())) + } + + private def assertPathEmpty( + hadoopConf: Configuration, + tableWithLocation: CatalogTable): Unit = { + val path = new Path(tableWithLocation.location) + val fs = path.getFileSystem(hadoopConf) + // Verify that the table location associated with CREATE TABLE doesn't have any data. Note that + // we intentionally diverge from this behavior w.r.t regular datasource tables (that silently + // overwrite any previous data) + if (fs.exists(path) && fs.listStatus(path).nonEmpty) { + throw DeltaErrors.createTableWithNonEmptyLocation( + tableWithLocation.identifier.toString, + tableWithLocation.location.toString) + } + } + + private def assertTableSchemaDefined( + fs: FileSystem, + path: Path, + table: CatalogTable, + sparkSession: SparkSession, + allowEmptyTableSchema: Boolean): Unit = { + // Users did not specify the schema. We expect the schema exists in Delta. + if (table.schema.isEmpty) { + if (table.tableType == CatalogTableType.EXTERNAL) { + if (fs.exists(path) && fs.listStatus(path).nonEmpty) { + throw DeltaErrors.createExternalTableWithoutLogException( + path, table.identifier.quotedString, sparkSession) + } else { + if (allowEmptyTableSchema) return + throw DeltaErrors.createExternalTableWithoutSchemaException( + path, table.identifier.quotedString, sparkSession) + } + } else { + if (allowEmptyTableSchema) return + throw DeltaErrors.createManagedTableWithoutSchemaException( + table.identifier.quotedString, sparkSession) + } + } + } + + /** + * Verify against our transaction metadata that the user specified the right metadata for the + * table. + */ + private def verifyTableMetadata( + txn: OptimisticTransaction, + tableDesc: CatalogTable): Unit = { + val existingMetadata = txn.metadata + val path = new Path(tableDesc.location) + + // The delta log already exists. If they give any configuration, we'll make sure it all matches. + // Otherwise we'll just go with the metadata already present in the log. + // The schema compatibility checks will be made in `WriteIntoDelta` for CreateTable + // with a query + if (txn.readVersion > -1) { + if (tableDesc.schema.nonEmpty) { + // We check exact alignment on create table if everything is provided + // However, if in column mapping mode, we can safely ignore the related metadata fields in + // existing metadata because new table desc will not have related metadata assigned yet + val differences = SchemaUtils.reportDifferences( + dropColumnMappingMetadata(existingMetadata.schema), + tableDesc.schema) + if (differences.nonEmpty) { + throw DeltaErrors.createTableWithDifferentSchemaException( + path, tableDesc.schema, existingMetadata.schema, differences) + } + + // If schema is specified, we must make sure the partitioning matches, even the partitioning + // is not specified. + if (tableDesc.partitionColumnNames != existingMetadata.partitionColumns) { + throw DeltaErrors.createTableWithDifferentPartitioningException( + path, tableDesc.partitionColumnNames, existingMetadata.partitionColumns) + } + } + + if (tableDesc.properties.nonEmpty) { + // When comparing properties of the existing table and the new table, remove some + // internal column mapping properties for the sake of comparison. + val filteredTableProperties = filterColumnMappingProperties( + tableDesc.properties) + val filteredExistingProperties = filterColumnMappingProperties( + existingMetadata.configuration) + if (filteredTableProperties != filteredExistingProperties) { + throw DeltaErrors.createTableWithDifferentPropertiesException( + path, filteredTableProperties, filteredExistingProperties) + } + // If column mapping properties are present in both configs, verify they're the same value. + if (!DeltaColumnMapping.verifyInternalProperties( + tableDesc.properties, existingMetadata.configuration)) { + throw DeltaErrors.createTableWithDifferentPropertiesException( + path, tableDesc.properties, existingMetadata.configuration) + } + } + } + } + + /** + * Based on the table creation operation, and parameters, we can resolve to different operations. + * A lot of this is needed for legacy reasons in Databricks Runtime. + * @param metadata The table metadata, which we are creating or replacing + * @param isManagedTable Whether we are creating or replacing a managed table + * @param options Write options, if this was a CTAS/RTAS + */ + private def getOperation( + metadata: Metadata, + isManagedTable: Boolean, + options: Option[DeltaOptions] + ): DeltaOperations.Operation = operation match { + // This is legacy saveAsTable behavior in Databricks Runtime + case TableCreationModes.Create if existingTableOpt.isDefined && query.isDefined => + DeltaOperations.Write(mode, Option(table.partitionColumnNames), options.get.replaceWhere, + options.flatMap(_.userMetadata) + ) + + // DataSourceV2 table creation + // CREATE TABLE (non-DataFrameWriter API) doesn't have options syntax + // (userMetadata uses SQLConf in this case) + case TableCreationModes.Create => + DeltaOperations.CreateTable( + metadata, isManagedTable, query.isDefined + ) + + // DataSourceV2 table replace + // REPLACE TABLE (non-DataFrameWriter API) doesn't have options syntax + // (userMetadata uses SQLConf in this case) + case TableCreationModes.Replace => + DeltaOperations.ReplaceTable( + metadata, isManagedTable, orCreate = false, query.isDefined + ) + + // Legacy saveAsTable with Overwrite mode + case TableCreationModes.CreateOrReplace if options.exists(_.replaceWhere.isDefined) => + DeltaOperations.Write(mode, Option(table.partitionColumnNames), options.get.replaceWhere, + options.flatMap(_.userMetadata) + ) + + // New DataSourceV2 saveAsTable with overwrite mode behavior + case TableCreationModes.CreateOrReplace => + DeltaOperations.ReplaceTable(metadata, isManagedTable, orCreate = true, query.isDefined, + options.flatMap(_.userMetadata) + ) + } + + /** + * Similar to getOperation, here we disambiguate the catalog alterations we need to do based + * on the table operation, and whether we have reached here through legacy code or DataSourceV2 + * code paths. + */ + private def updateCatalog( + spark: SparkSession, + table: CatalogTable, + snapshot: Snapshot, + didNotChangeMetadata: Boolean + ): Unit = { + val cleaned = cleanupTableDefinition(spark, table, snapshot) + operation match { + case _ if tableByPath => // do nothing with the metastore if this is by path + case TableCreationModes.Create => + spark.sessionState.catalog.createTable( + cleaned, + ignoreIfExists = existingTableOpt.isDefined, + validateLocation = false) + case TableCreationModes.Replace | TableCreationModes.CreateOrReplace + if existingTableOpt.isDefined => + UpdateCatalogFactory.getUpdateCatalogHook(table, spark).updateSchema(spark, snapshot) + case TableCreationModes.Replace => + val ident = Identifier.of(table.identifier.database.toArray, table.identifier.table) + throw DeltaErrors.cannotReplaceMissingTableException(ident) + case TableCreationModes.CreateOrReplace => + spark.sessionState.catalog.createTable( + cleaned, + ignoreIfExists = false, + validateLocation = false) + } + } + + /** Clean up the information we pass on to store in the catalog. */ + private def cleanupTableDefinition(spark: SparkSession, table: CatalogTable, snapshot: Snapshot) + : CatalogTable = { + // These actually have no effect on the usability of Delta, but feature flagging legacy + // behavior for now + val storageProps = if (conf.getConf(DeltaSQLConf.DELTA_LEGACY_STORE_WRITER_OPTIONS_AS_PROPS)) { + // Legacy behavior + table.storage + } else { + table.storage.copy(properties = Map.empty) + } + + // If we have to update the catalog, use the correct schema and table properties, otherwise + // empty out the schema and property information + if (conf.getConf(DeltaSQLConf.DELTA_UPDATE_CATALOG_ENABLED)) { + // In the case we're creating a Delta table on an existing path and adopting the schema + val schema = if (table.schema.isEmpty) snapshot.schema else table.schema + val truncatedSchema = UpdateCatalog.truncateSchemaIfNecessary(schema) + val additionalProperties = if (truncatedSchema.isEmpty) { + Map(UpdateCatalog.ERROR_KEY -> UpdateCatalog.LONG_SCHEMA_ERROR) + } else { + Map.empty + } + + table.copy( + schema = truncatedSchema, + // Hive does not allow for the removal of partition columns once stored. + // To avoid returning the incorrect schema when the partition columns change, + // we store the partition columns as regular data columns. + partitionColumnNames = Nil, + properties = UpdateCatalog.updatedProperties(snapshot) + ++ additionalProperties, + storage = storageProps, + tracksPartitionsInCatalog = true) + } else { + table.copy( + schema = new StructType(), + properties = Map.empty, + partitionColumnNames = Nil, + // Remove write specific options when updating the catalog + storage = storageProps, + tracksPartitionsInCatalog = true) + } + } + + /** + * With DataFrameWriterV2, methods like `replace()` or `createOrReplace()` mean that the + * metadata of the table should be replaced. If overwriteSchema=false is provided with these + * methods, then we will verify that the metadata match exactly. + */ + private def replaceMetadataIfNecessary( + txn: OptimisticTransaction, + tableDesc: CatalogTable, + options: DeltaOptions, + schema: StructType): Unit = { + // If a user explicitly specifies not to overwrite the schema, during a replace, we should + // tell them that it's not supported + val dontOverwriteSchema = options.options.contains(DeltaOptions.OVERWRITE_SCHEMA_OPTION) && + !options.canOverwriteSchema + if (isReplace && dontOverwriteSchema) { + throw DeltaErrors.illegalUsageException(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "replacing") + } + if (txn.readVersion > -1L && isReplace && !dontOverwriteSchema) { + // When a table already exists, and we're using the DataFrameWriterV2 API to replace + // or createOrReplace a table, we blindly overwrite the metadata. + val newMetadata = getProvidedMetadata(table, schema.json) + txn.updateMetadataForNewTable(newMetadata) + } + } + + /** + * Horrible hack to differentiate between DataFrameWriterV1 and V2 so that we can decide + * what to do with table metadata. In DataFrameWriterV1, mode("overwrite").saveAsTable, + * behaves as a CreateOrReplace table, but we have asked for "overwriteSchema" as an + * explicit option to overwrite partitioning or schema information. With DataFrameWriterV2, + * the behavior asked for by the user is clearer: .createOrReplace(), which means that we + * should overwrite schema and/or partitioning. Therefore we have this hack. + */ + private def isV1Writer: Boolean = { + Thread.currentThread().getStackTrace.exists(_.toString.contains( + classOf[DataFrameWriter[_]].getCanonicalName + ".")) + } + + /** Returns true if the current operation could be replacing a table. */ + private def isReplace: Boolean = { + operation == TableCreationModes.CreateOrReplace || + operation == TableCreationModes.Replace + } + + /** Returns the transaction that should be used for the CREATE/REPLACE commit. */ + private def startTxnForTableCreation( + sparkSession: SparkSession, + deltaLog: DeltaLog, + tableWithLocation: CatalogTable, + snapshotOpt: Option[Snapshot] = None): OptimisticTransaction = { + val txn = deltaLog.startTransaction(None, snapshotOpt) + validatePrerequisitesForClusteredTable(txn.snapshot.protocol, txn.deltaLog) + + // During CREATE/REPLACE, we synchronously run conversion (if Uniform is enabled) so + // we always remove the post commit hook here. + txn.unregisterPostCommitHooksWhere(hook => hook.name == IcebergConverterHook.name) + + txn + } + + /** + * Validate pre-requisites for clustered tables for CREATE/REPLACE operations. + * @param protocol Protocol used for validations. This protocol should + * be used during the CREATE/REPLACE commit. + * @param deltaLog Delta log used for logging purposes. + */ + private def validatePrerequisitesForClusteredTable( + protocol: Protocol, + deltaLog: DeltaLog): Unit = { + // Validate a clustered table is not replaced by a partitioned table. + if (table.partitionColumnNames.nonEmpty && + ClusteredTableUtils.isSupported(protocol)) { + throw DeltaErrors.replacingClusteredTableWithPartitionedTableNotAllowed() + } + } +} + +// isCreate is true for Create and CreateOrReplace modes. It is false for Replace mode. +object TableCreationModes { + sealed trait CreationMode { + def mode: SaveMode + def isCreate: Boolean = true + } + + case object Create extends CreationMode { + override def mode: SaveMode = SaveMode.ErrorIfExists + } + + case object CreateOrReplace extends CreationMode { + override def mode: SaveMode = SaveMode.Overwrite + } + + case object Replace extends CreationMode { + override def mode: SaveMode = SaveMode.Overwrite + override def isCreate: Boolean = false + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/DMLWithDeletionVectorsHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DMLWithDeletionVectorsHelper.scala new file mode 100644 index 00000000000..ab17e4c8069 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DMLWithDeletionVectorsHelper.scala @@ -0,0 +1,662 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import java.util.UUID + +import scala.collection.generic.Sizing + +import org.apache.spark.sql.catalyst.expressions.aggregation.BitmapAggregator +import org.apache.spark.sql.delta.{DeltaLog, DeltaParquetFileFormat, OptimisticTransaction, Snapshot} +import org.apache.spark.sql.delta.DeltaParquetFileFormat._ +import org.apache.spark.sql.delta.actions.{AddFile, DeletionVectorDescriptor, FileAction} +import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, RoaringBitmapArrayFormat, StoredBitmap} +import org.apache.spark.sql.delta.files.{TahoeBatchFileIndex, TahoeFileIndex} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.StatsCollectionUtils +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore +import org.apache.spark.sql.delta.util.{BinPackingIterator, DeltaEncoder, JsonUtils, PathWithFileSystem, Utils => DeltaUtils} +import org.apache.spark.sql.delta.util.DeltaFileOperations.absolutePath +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.paths.SparkPath +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.execution.datasources.FileFormat.{FILE_PATH, METADATA_NAME} +import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.{SerializableConfiguration, Utils => SparkUtils} + + +/** + * Contains utility classes and method for performing DML operations with Deletion Vectors. + */ +object DMLWithDeletionVectorsHelper extends DeltaCommand { + val SUPPORTED_DML_COMMANDS: Seq[String] = Seq("DELETE", "UPDATE") + + /** + * Creates a DataFrame that can be used to scan for rows matching the condition in the given + * files. Generally the given file list is a pruned file list using the stats based pruning. + */ + def createTargetDfForScanningForMatches( + spark: SparkSession, + target: LogicalPlan, + fileIndex: TahoeFileIndex): DataFrame = { + Dataset.ofRows(spark, replaceFileIndex(target, fileIndex)) + } + + /** + * Replace the file index in a logical plan and return the updated plan. + * It's a common pattern that, in Delta commands, we use data skipping to determine a subset of + * files that can be affected by the command, so we replace the whole-table file index in the + * original logical plan with a new index of potentially affected files, while everything else in + * the original plan, e.g., resolved references, remain unchanged. + * + * In addition we also request a metadata column and a row index column from the Scan to help + * generate the Deletion Vectors. + * + * @param target the logical plan in which we replace the file index + * @param fileIndex the new file index + */ + private def replaceFileIndex(target: LogicalPlan, fileIndex: TahoeFileIndex): LogicalPlan = { + val rowIndexCol = AttributeReference(ROW_INDEX_COLUMN_NAME, ROW_INDEX_STRUCT_FILED.dataType)(); + var fileMetadataCol: AttributeReference = null + + val newTarget = target.transformUp { + case l @ LogicalRelation( + hfsr @ HadoopFsRelation(_, _, _, _, format: DeltaParquetFileFormat, _), _, _, _) => + fileMetadataCol = format.createFileMetadataCol() + // Take the existing schema and add additional metadata columns + val newDataSchema = StructType(hfsr.dataSchema).add(ROW_INDEX_STRUCT_FILED) + val finalOutput = l.output ++ Seq(rowIndexCol, fileMetadataCol) + // Disable splitting and filter pushdown in order to generate the row-indexes + val newFormat = format.copy(isSplittable = false, disablePushDowns = true) + + val newBaseRelation = hfsr.copy( + location = fileIndex, + dataSchema = newDataSchema, + fileFormat = newFormat)(hfsr.sparkSession) + + l.copy(relation = newBaseRelation, output = finalOutput) + case p @ Project(projectList, _) => + if (fileMetadataCol == null) { + throw new IllegalStateException("File metadata column is not yet created.") + } + val newProjectList = projectList ++ Seq(rowIndexCol, fileMetadataCol) + p.copy(projectList = newProjectList) + } + newTarget + } + + /** + * Find the target table files that contain rows that satisfy the condition and a DV attached + * to each file that indicates a the rows marked as deleted from the file + */ + def findTouchedFiles( + sparkSession: SparkSession, + txn: OptimisticTransaction, + hasDVsEnabled: Boolean, + deltaLog: DeltaLog, + targetDf: DataFrame, + fileIndex: TahoeFileIndex, + condition: Expression, + opName: String): Seq[TouchedFileWithDV] = { + require( + SUPPORTED_DML_COMMANDS.contains(opName), + s"Expecting opName to be one of ${SUPPORTED_DML_COMMANDS.mkString(", ")}, " + + s"but got '$opName'.") + + recordDeltaOperation(deltaLog, opType = s"$opName.findTouchedFiles") { + val candidateFiles = fileIndex match { + case f: TahoeBatchFileIndex => f.addFiles + case _ => throw new IllegalArgumentException("Unexpected file index found!") + } + + val matchedRowIndexSets = + DeletionVectorBitmapGenerator.buildRowIndexSetsForFilesMatchingCondition( + sparkSession, + txn, + hasDVsEnabled, + targetDf, + candidateFiles, + condition) + + val nameToAddFileMap = generateCandidateFileMap(txn.deltaLog.dataPath, candidateFiles) + findFilesWithMatchingRows(txn, nameToAddFileMap, matchedRowIndexSets) + } + } + + /** + * Finds the files in nameToAddFileMap in which rows were deleted by checking the row index set. + */ + def findFilesWithMatchingRows( + txn: OptimisticTransaction, + nameToAddFileMap: Map[String, AddFile], + matchedFileRowIndexSets: Seq[DeletionVectorResult]): Seq[TouchedFileWithDV] = { + // Get the AddFiles using the touched file names and group them together with other + // information we need for later phases. + val dataPath = txn.deltaLog.dataPath + val touchedFilesWithMatchedRowIndices = matchedFileRowIndexSets.map { fileRowIndex => + val filePath = fileRowIndex.filePath + val addFile = getTouchedFile(dataPath, filePath, nameToAddFileMap) + TouchedFileWithDV( + filePath, + addFile, + fileRowIndex.deletionVector, + fileRowIndex.matchedRowCount) + } + + logTrace("findTouchedFiles: matched files:\n\t" + + s"${touchedFilesWithMatchedRowIndices.map(_.inputFilePath).mkString("\n\t")}") + + touchedFilesWithMatchedRowIndices.filterNot(_.isUnchanged) + } + + def processUnmodifiedData( + spark: SparkSession, + touchedFiles: Seq[TouchedFileWithDV], + snapshot: Snapshot): (Seq[FileAction], Map[String, Long]) = { + val numModifiedRows: Long = touchedFiles.map(_.numberOfModifiedRows).sum + val numRemovedFiles: Long = touchedFiles.count(_.isFullyReplaced()) + + val (fullyRemovedFiles, notFullyRemovedFiles) = touchedFiles.partition(_.isFullyReplaced()) + + val timestamp = System.currentTimeMillis() + val fullyRemoved = fullyRemovedFiles.map(_.fileLogEntry.removeWithTimestamp(timestamp)) + + val dvUpdates = notFullyRemovedFiles.map { fileWithDVInfo => + fileWithDVInfo.fileLogEntry.removeRows( + deletionVector = fileWithDVInfo.newDeletionVector, + updateStats = false + )} + val (dvAddFiles, dvRemoveFiles) = dvUpdates.unzip + val dvAddFilesWithStats = getActionsWithStats(spark, dvAddFiles, snapshot) + + var (numDeletionVectorsAdded, numDeletionVectorsRemoved, numDeletionVectorsUpdated) = + dvUpdates.foldLeft((0L, 0L, 0L)) { case ((added, removed, updated), (addFile, removeFile)) => + (Option(addFile.deletionVector), Option(removeFile.deletionVector)) match { + case (Some(_), Some(_)) => (added, removed, updated + 1) + case (None, Some(_)) => (added, removed + 1, updated) + case (Some(_), None) => (added + 1, removed, updated) + case _ => (added, removed, updated) + } + } + numDeletionVectorsRemoved += fullyRemoved.count(_.deletionVector != null) + val metricMap = Map( + "numModifiedRows" -> numModifiedRows, + "numRemovedFiles" -> numRemovedFiles, + "numDeletionVectorsAdded" -> numDeletionVectorsAdded, + "numDeletionVectorsRemoved" -> numDeletionVectorsRemoved, + "numDeletionVectorsUpdated" -> numDeletionVectorsUpdated) + (fullyRemoved ++ dvAddFilesWithStats ++ dvRemoveFiles, metricMap) + } + + /** Fetch stats for `addFiles`. */ + private def getActionsWithStats( + spark: SparkSession, + addFilesWithNewDvs: Seq[AddFile], + snapshot: Snapshot): Seq[AddFile] = { + import org.apache.spark.sql.delta.implicits._ + + if (addFilesWithNewDvs.isEmpty) return Seq.empty + + val selectionPathAndStatsCols = Seq(col("path"), col("stats")) + val addFilesWithNewDvsDf = addFilesWithNewDvs.toDF(spark) + + // These files originate from snapshot.filesForScan which resets column statistics. + // Since these object don't carry stats and tags, if we were to use them as result actions of + // the operation directly, we'd effectively be removing all stats and tags. To resolve this + // we join the list of files with DVs with the log (allFiles) to retrieve statistics. This is + // expected to have better performance than supporting full stats retrieval + // in snapshot.filesForScan because it only affects a subset of the scanned files. + + // Find the current metadata with stats for all files with new DV + val addFileWithStatsDf = snapshot.withStats + .join(addFilesWithNewDvsDf.select("path"), "path") + + // Update the existing stats to set the tightBounds to false and also set the appropriate + // null count. We want to set the bounds before the AddFile has DV descriptor attached. + // Attaching the DV descriptor here, causes wrong logical records computation in + // `updateStatsToWideBounds`. + val statsColName = snapshot.getBaseStatsColumnName + val addFilesWithWideBoundsDf = snapshot + .updateStatsToWideBounds(addFileWithStatsDf, statsColName) + + val (filesWithNoStats, filesWithExistingStats) = { + // numRecords is the only stat we really have to guarantee. + // If the others are missing, we do not need to fetch them. + addFilesWithWideBoundsDf.as[AddFile].collect().toSeq + .partition(_.numPhysicalRecords.isEmpty) + } + + // If we encounter files with no stats we fetch the stats from the parquet footer. + // Files with persistent DVs *must* have (at least numRecords) stats according to the + // Delta spec. + val filesWithFetchedStats = + if (filesWithNoStats.nonEmpty) { + StatsCollectionUtils.computeStats(spark, + conf = snapshot.deltaLog.newDeltaHadoopConf(), + dataPath = snapshot.deltaLog.dataPath, + addFiles = filesWithNoStats.toDS(spark), + numFilesOpt = Some(filesWithNoStats.size), + columnMappingMode = snapshot.metadata.columnMappingMode, + dataSchema = snapshot.dataSchema, + statsSchema = snapshot.statsSchema, + setBoundsToWide = true) + .collect() + .toSeq + } else { + Seq.empty + } + + val allAddFilesWithUpdatedStats = + (filesWithExistingStats ++ filesWithFetchedStats).toSeq.toDF(spark) + + // Now join the allAddFilesWithUpdatedStats with addFilesWithNewDvs + // so that the updated stats are joined with the new DV info + addFilesWithNewDvsDf.drop("stats") + .join( + allAddFilesWithUpdatedStats.select(selectionPathAndStatsCols: _*), "path") + .as[AddFile] + .collect() + .toSeq + } +} + +object DeletionVectorBitmapGenerator { + final val FILE_NAME_COL = "filePath" + final val FILE_DV_ID_COL = "deletionVectorId" + final val ROW_INDEX_COL = "rowIndexCol" + final val DELETED_ROW_INDEX_BITMAP = "deletedRowIndexSet" + final val DELETED_ROW_INDEX_COUNT = "deletedRowIndexCount" + final val MAX_ROW_INDEX_COL = "maxRowIndexCol" + + private class DeletionVectorSet( + spark: SparkSession, + target: DataFrame, + targetDeltaLog: DeltaLog, + deltaTxn: OptimisticTransaction) { + + case object CardinalityAndBitmapStruct { + val name: String = "CardinalityAndBitmapStruct" + def cardinality: String = s"$name.cardinality" + def bitmap: String = s"$name.bitmap" + } + + def computeResult(): Seq[DeletionVectorResult] = { + val aggregated = target + .groupBy(col(FILE_NAME_COL), col(FILE_DV_ID_COL)) + .agg(aggColumns.head, aggColumns.tail: _*) + .select(outputColumns: _*) + + import DeletionVectorResult.encoder + val rowIndexData = aggregated.as[DeletionVectorData] + val storedResults = rowIndexData.mapPartitions(bitmapStorageMapper()) + storedResults.as[DeletionVectorResult].collect() + } + + protected def aggColumns: Seq[Column] = { + Seq(createBitmapSetAggregator(col(ROW_INDEX_COL)).as(CardinalityAndBitmapStruct.name)) + } + + /** Create a bitmap set aggregator over the given column */ + private def createBitmapSetAggregator(indexColumn: Column): Column = { + val func = new BitmapAggregator(indexColumn.expr, RoaringBitmapArrayFormat.Portable) + new Column(func.toAggregateExpression(isDistinct = false)) + } + + protected def outputColumns: Seq[Column] = + Seq( + col(FILE_NAME_COL), + col(FILE_DV_ID_COL), + col(CardinalityAndBitmapStruct.bitmap).as(DELETED_ROW_INDEX_BITMAP), + col(CardinalityAndBitmapStruct.cardinality).as(DELETED_ROW_INDEX_COUNT) + ) + + protected def bitmapStorageMapper() + : Iterator[DeletionVectorData] => Iterator[DeletionVectorResult] = { + val prefixLen = DeltaUtils.getRandomPrefixLength(deltaTxn.metadata) + DeletionVectorWriter.createMapperToStoreDeletionVectors( + spark, + targetDeltaLog.newDeltaHadoopConf(), + targetDeltaLog.dataPath, + prefixLen) + } + } + + /** + * Build bitmap compressed sets of row indices for each file in [[target]] using + * [[ROW_INDEX_COL]]. + * Write those sets out to temporary files and collect the file names, + * together with some encoded metadata about the contents. + * + * @param target DataFrame with expected schema [[FILE_NAME_COL]], [[ROW_INDEX_COL]], + */ + def buildDeletionVectors( + spark: SparkSession, + target: DataFrame, + targetDeltaLog: DeltaLog, + deltaTxn: OptimisticTransaction): Seq[DeletionVectorResult] = { + val rowIndexSet = new DeletionVectorSet(spark, target, targetDeltaLog, deltaTxn) + rowIndexSet.computeResult() + } + + def buildRowIndexSetsForFilesMatchingCondition( + sparkSession: SparkSession, + txn: OptimisticTransaction, + tableHasDVs: Boolean, + targetDf: DataFrame, + candidateFiles: Seq[AddFile], + condition: Expression, + fileNameColumnOpt: Option[Column] = None, + rowIndexColumnOpt: Option[Column] = None): Seq[DeletionVectorResult] = { + val fileNameColumn = fileNameColumnOpt.getOrElse(col(s"${METADATA_NAME}.${FILE_PATH}")) + val rowIndexColumn = rowIndexColumnOpt.getOrElse(col(ROW_INDEX_COLUMN_NAME)) + val matchedRowsDf = targetDf + .withColumn(FILE_NAME_COL, fileNameColumn) + // Filter after getting input file name as the filter might introduce a join and we + // cannot get input file name on join's output. + .filter(new Column(condition)) + .withColumn(ROW_INDEX_COL, rowIndexColumn) + + val df = if (tableHasDVs) { + // When the table already has DVs, join the `matchedRowDf` above to attach for each matched + // file its existing DeletionVectorDescriptor + val basePath = txn.deltaLog.dataPath.toString + val filePathToDV = candidateFiles.map { add => + val serializedDV = Option(add.deletionVector).map(dvd => JsonUtils.toJson(dvd)) + // Paths in the metadata column are canonicalized. Thus we must canonicalize the DV path. + FileToDvDescriptor( + SparkPath.fromPath(absolutePath(basePath, add.path)).urlEncoded, + serializedDV) + } + val filePathToDVDf = sparkSession.createDataset(filePathToDV) + + val joinExpr = filePathToDVDf("path") === matchedRowsDf(FILE_NAME_COL) + // Perform leftOuter join to make sure we do not eliminate any rows because of path + // encoding issues. If there is such an issue we will detect it during the aggregation + // of the bitmaps. + val joinedDf = matchedRowsDf.join(filePathToDVDf, joinExpr, "leftOuter") + .drop(FILE_NAME_COL) + .withColumnRenamed("path", FILE_NAME_COL) + joinedDf + } else { + // When the table has no DVs, just add a column to indicate that the existing dv is null + matchedRowsDf.withColumn(FILE_DV_ID_COL, lit(null)) + } + + DeletionVectorBitmapGenerator.buildDeletionVectors(sparkSession, df, txn.deltaLog, txn) + } +} + +/** + * Holds a mapping from a file path (url-encoded) to an (optional) serialized Deletion Vector + * descriptor. + */ +case class FileToDvDescriptor(path: String, deletionVectorId: Option[String]) + +object FileToDvDescriptor { + private lazy val _encoder = new DeltaEncoder[FileToDvDescriptor] + implicit def encoder: Encoder[FileToDvDescriptor] = _encoder.get +} + +/** + * Row containing the file path and its new deletion vector bitmap in memory + * + * @param filePath Absolute path of the data file this DV result is generated for. + * @param deletionVectorId Existing [[DeletionVectorDescriptor]] serialized in JSON format. + * This info is used to load the existing DV with the new DV. + * @param deletedRowIndexSet In-memory Deletion vector bitmap generated containing the newly + * deleted row indexes from data file. + * @param deletedRowIndexCount Count of rows marked as deleted using the [[deletedRowIndexSet]]. + */ +case class DeletionVectorData( + filePath: String, + deletionVectorId: Option[String], + deletedRowIndexSet: Array[Byte], + deletedRowIndexCount: Long) extends Sizing { + + /** The size of the bitmaps to use in [[BinPackingIterator]]. */ + override def size: Int = deletedRowIndexSet.length +} + +object DeletionVectorData { + private lazy val _encoder = new DeltaEncoder[DeletionVectorData] + implicit def encoder: Encoder[DeletionVectorData] = _encoder.get + + def apply(filePath: String, rowIndexSet: Array[Byte], rowIndexCount: Long): DeletionVectorData = { + DeletionVectorData( + filePath = filePath, + deletionVectorId = None, + deletedRowIndexSet = rowIndexSet, + deletedRowIndexCount = rowIndexCount) + } +} + +/** Final output for each file containing the file path, DeletionVectorDescriptor and how many + * rows are marked as deleted in this file as part of the this operation (doesn't include rows that + * are already marked as deleted). + * + * @param filePath Absolute path of the data file this DV result is generated for. + * @param deletionVector Deletion vector generated containing the newly deleted row indices from + * data file. + * @param matchedRowCount Number of rows marked as deleted using the [[deletionVector]]. + */ +case class DeletionVectorResult( + filePath: String, + deletionVector: DeletionVectorDescriptor, + matchedRowCount: Long) { +} + +object DeletionVectorResult { + private lazy val _encoder = new DeltaEncoder[DeletionVectorResult] + implicit def encoder: Encoder[DeletionVectorResult] = _encoder.get + + def fromDeletionVectorData( + data: DeletionVectorData, + deletionVector: DeletionVectorDescriptor): DeletionVectorResult = { + DeletionVectorResult( + filePath = data.filePath, + deletionVector = deletionVector, + matchedRowCount = data.deletedRowIndexCount) + } +} + +case class TouchedFileWithDV( + inputFilePath: String, + fileLogEntry: AddFile, + newDeletionVector: DeletionVectorDescriptor, + deletedRows: Long) { + /** + * Checks the *sufficient* condition for a file being fully replaced by the current operation. + * (That is, all rows are either being updated or deleted.) + */ + def isFullyReplaced(): Boolean = { + fileLogEntry.numLogicalRecords match { + case Some(numRecords) => numRecords == numberOfModifiedRows + case None => false // must make defensive assumption if no statistics are available + } + } + + /** + * Checks if the file is unchanged by the current operation. + * (That is no row has been updated or deleted.) + */ + def isUnchanged: Boolean = { + // If the bitmap is empty then no row would be removed during the rewrite, + // thus the file is unchanged. + numberOfModifiedRows == 0 + } + + /** + * The number of rows that are modified in this file. + */ + def numberOfModifiedRows: Long = newDeletionVector.cardinality - fileLogEntry.numDeletedRecords +} + +/** + * Utility methods to write the deletion vector to storage. If a particular file already + * has an existing DV, it will be merged with the new deletion vector and written to storage. + */ +object DeletionVectorWriter extends DeltaLogging { + /** + * The context for [[createDeletionVectorMapper]] callback functions. Contains the DV writer that + * is used by callback functions to write the new DVs. + */ + case class DeletionVectorMapperContext( + dvStore: DeletionVectorStore, + writer: DeletionVectorStore.Writer, + tablePath: Path, + fileId: UUID, + prefix: String) + + /** + * Prepare a mapper function for storing deletion vectors. + * + * For each DeletionVector the writer will create a [[DeletionVectorMapperContext]] that contains + * a DV writer that is used by to write the DV into a file. + * + * The result can be used with [[org.apache.spark.sql.Dataset.mapPartitions()]] and must thus be + * serialized. + */ + def createDeletionVectorMapper[InputT <: Sizing, OutputT]( + sparkSession: SparkSession, + hadoopConf: Configuration, + table: Path, + prefixLength: Int) + (callbackFn: (DeletionVectorMapperContext, InputT) => OutputT) + : Iterator[InputT] => Iterator[OutputT] = { + val broadcastHadoopConf = sparkSession.sparkContext.broadcast( + new SerializableConfiguration(hadoopConf)) + // hadoop.fs.Path is not Serializable, so close over the String representation instead + val tablePathString = DeletionVectorStore.pathToEscapedString(table) + val packingTargetSize = + sparkSession.conf.get(DeltaSQLConf.DELETION_VECTOR_PACKING_TARGET_SIZE) + + // This is the (partition) mapper function we are returning + (rowIterator: Iterator[InputT]) => { + val dvStore = DeletionVectorStore.createInstance(broadcastHadoopConf.value.value) + val tablePath = DeletionVectorStore.escapedStringToPath(tablePathString) + val tablePathWithFS = dvStore.pathWithFileSystem(tablePath) + + val perBinFunction: Seq[InputT] => Seq[OutputT] = (rows: Seq[InputT]) => { + val prefix = DeltaUtils.getRandomPrefix(prefixLength) + val (writer, fileId) = createWriter(dvStore, tablePathWithFS, prefix) + val ctx = DeletionVectorMapperContext( + dvStore, + writer, + tablePath, + fileId, + prefix) + val result = SparkUtils.tryWithResource(writer) { writer => + rows.map(r => callbackFn(ctx, r)) + } + result + } + + val binPackedRowIterator = new BinPackingIterator(rowIterator, packingTargetSize) + binPackedRowIterator.flatMap(perBinFunction) + } + } + + /** + * Creates a writer for writing multiple DVs in the same file. + * + * Returns the writer and the UUID of the new file. + */ + def createWriter( + dvStore: DeletionVectorStore, + tablePath: PathWithFileSystem, + prefix: String = ""): (DeletionVectorStore.Writer, UUID) = { + val fileId = UUID.randomUUID() + val writer = dvStore.createWriter(dvStore.generateFileNameInTable(tablePath, fileId, prefix)) + (writer, fileId) + } + + /** Store the `bitmapData` on cloud storage. */ + def storeSerializedBitmap( + ctx: DeletionVectorMapperContext, + bitmapData: Array[Byte], + cardinality: Long): DeletionVectorDescriptor = { + if (cardinality == 0L) { + DeletionVectorDescriptor.EMPTY + } else { + val dvRange = ctx.writer.write(bitmapData) + DeletionVectorDescriptor.onDiskWithRelativePath( + id = ctx.fileId, + randomPrefix = ctx.prefix, + sizeInBytes = bitmapData.length, + cardinality = cardinality, + offset = Some(dvRange.offset)) + } + } + + /** + * Prepares a mapper function that can be used by DML commands to store the Deletion Vectors + * that are in described in [[DeletionVectorData]] and return their descriptors + * [[DeletionVectorResult]]. + */ + def createMapperToStoreDeletionVectors( + sparkSession: SparkSession, + hadoopConf: Configuration, + table: Path, + prefixLength: Int): Iterator[DeletionVectorData] => Iterator[DeletionVectorResult] = + createDeletionVectorMapper(sparkSession, hadoopConf, table, prefixLength) { + (ctx, row) => storeBitmapAndGenerateResult(ctx, row) + } + + /** + * Helper to generate and store the deletion vector bitmap. The deletion vector is merged with + * the file's already existing deletion vector before being stored. + */ + def storeBitmapAndGenerateResult(ctx: DeletionVectorMapperContext, row: DeletionVectorData) + : DeletionVectorResult = { + // If a group with null path exists it means there was an issue while joining with the log to + // fetch the DeletionVectorDescriptors. + assert(row.filePath != null, + s""" + |Encountered a non matched file path. + |It is likely that _metadata.file_path is not encoded by Spark as expected. + |""".stripMargin) + + val fileDvDescriptor = row.deletionVectorId.map(DeletionVectorDescriptor.fromJson(_)) + val finalDvDescriptor = fileDvDescriptor match { + case Some(existingDvDescriptor) if row.deletedRowIndexCount > 0 => + // Load the existing bit map + val existingBitmap = + StoredBitmap.create(existingDvDescriptor, ctx.tablePath).load(ctx.dvStore) + val newBitmap = RoaringBitmapArray.readFrom(row.deletedRowIndexSet) + + // Merge both the existing and new bitmaps into one, and finally persist on disk + existingBitmap.merge(newBitmap) + storeSerializedBitmap( + ctx, + existingBitmap.serializeAsByteArray(RoaringBitmapArrayFormat.Portable), + existingBitmap.cardinality) + case Some(existingDvDescriptor) => + existingDvDescriptor // This is already stored. + case None => + // Persist the new bitmap + storeSerializedBitmap(ctx, row.deletedRowIndexSet, row.deletedRowIndexCount) + } + DeletionVectorResult.fromDeletionVectorData(row, deletionVector = finalDvDescriptor) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeleteCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeleteCommand.scala new file mode 100644 index 00000000000..98c11f5a5c2 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeleteCommand.scala @@ -0,0 +1,541 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import java.util.concurrent.TimeUnit + +import org.apache.spark.sql.delta.metric.IncrementMetric +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile, FileAction} +import org.apache.spark.sql.delta.commands.DeleteCommand.{rewritingFilesMsg, FINDING_TOUCHED_FILES_MSG} +import org.apache.spark.sql.delta.commands.MergeIntoCommandBase.totalBytesAndDistinctPartitionValues +import org.apache.spark.sql.delta.files.TahoeBatchFileIndex +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import com.fasterxml.jackson.databind.annotation.JsonDeserialize + +import org.apache.spark.SparkContext +import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, EqualNullSafe, Expression, If, Literal, Not} +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.QueryPlan +import org.apache.spark.sql.catalyst.plans.logical.{DeltaDelete, LogicalPlan} +import org.apache.spark.sql.execution.command.LeafRunnableCommand +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.execution.metric.SQLMetrics.{createMetric, createTimingMetric} +import org.apache.spark.sql.functions.input_file_name +import org.apache.spark.sql.types.LongType + +trait DeleteCommandMetrics { self: LeafRunnableCommand => + @transient private lazy val sc: SparkContext = SparkContext.getOrCreate() + + def createMetrics: Map[String, SQLMetric] = Map[String, SQLMetric]( + "numRemovedFiles" -> createMetric(sc, "number of files removed."), + "numAddedFiles" -> createMetric(sc, "number of files added."), + "numDeletedRows" -> createMetric(sc, "number of rows deleted."), + "numFilesBeforeSkipping" -> createMetric(sc, "number of files before skipping"), + "numBytesBeforeSkipping" -> createMetric(sc, "number of bytes before skipping"), + "numFilesAfterSkipping" -> createMetric(sc, "number of files after skipping"), + "numBytesAfterSkipping" -> createMetric(sc, "number of bytes after skipping"), + "numPartitionsAfterSkipping" -> createMetric(sc, "number of partitions after skipping"), + "numPartitionsAddedTo" -> createMetric(sc, "number of partitions added"), + "numPartitionsRemovedFrom" -> createMetric(sc, "number of partitions removed"), + "numCopiedRows" -> createMetric(sc, "number of rows copied"), + "numAddedBytes" -> createMetric(sc, "number of bytes added"), + "numRemovedBytes" -> createMetric(sc, "number of bytes removed"), + "executionTimeMs" -> + createTimingMetric(sc, "time taken to execute the entire operation"), + "scanTimeMs" -> + createTimingMetric(sc, "time taken to scan the files for matches"), + "rewriteTimeMs" -> + createTimingMetric(sc, "time taken to rewrite the matched files"), + "numAddedChangeFiles" -> createMetric(sc, "number of change data capture files generated"), + "changeFileBytes" -> createMetric(sc, "total size of change data capture files generated"), + "numTouchedRows" -> createMetric(sc, "number of rows touched"), + "numDeletionVectorsAdded" -> createMetric(sc, "number of deletion vectors added"), + "numDeletionVectorsRemoved" -> createMetric(sc, "number of deletion vectors removed"), + "numDeletionVectorsUpdated" -> createMetric(sc, "number of deletion vectors updated") + ) + + def getDeletedRowsFromAddFilesAndUpdateMetrics(files: Seq[AddFile]) : Option[Long] = { + if (!conf.getConf(DeltaSQLConf.DELTA_DML_METRICS_FROM_METADATA)) { + return None; + } + // No file to get metadata, return none to be consistent with metadata stats disabled + if (files.isEmpty) { + return None + } + // Return None if any file does not contain numLogicalRecords status + var count: Long = 0 + for (file <- files) { + if (file.numLogicalRecords.isEmpty) { + return None + } + count += file.numLogicalRecords.get + } + metrics("numDeletedRows").set(count) + return Some(count) + } +} + +/** + * Performs a Delete based on the search condition + * + * Algorithm: + * 1) Scan all the files and determine which files have + * the rows that need to be deleted. + * 2) Traverse the affected files and rebuild the touched files. + * 3) Use the Delta protocol to atomically write the remaining rows to new files and remove + * the affected files that are identified in step 1. + */ +case class DeleteCommand( + deltaLog: DeltaLog, + catalogTable: Option[CatalogTable], + target: LogicalPlan, + condition: Option[Expression]) + extends LeafRunnableCommand with DeltaCommand with DeleteCommandMetrics { + + override def innerChildren: Seq[QueryPlan[_]] = Seq(target) + + override val output: Seq[Attribute] = Seq(AttributeReference("num_affected_rows", LongType)()) + + override lazy val metrics = createMetrics + + final override def run(sparkSession: SparkSession): Seq[Row] = { + recordDeltaOperation(deltaLog, "delta.dml.delete") { + deltaLog.withNewTransaction(catalogTable) { txn => + DeltaLog.assertRemovable(txn.snapshot) + if (hasBeenExecuted(txn, sparkSession)) { + sendDriverMetrics(sparkSession, metrics) + return Seq.empty + } + + val deleteActions = performDelete(sparkSession, deltaLog, txn) + txn.commitIfNeeded(deleteActions, DeltaOperations.Delete(condition.toSeq)) + } + // Re-cache all cached plans(including this relation itself, if it's cached) that refer to + // this data source relation. + sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, target) + } + + // Adjust for deletes at partition boundaries. Deletes at partition boundaries is a metadata + // operation, therefore we don't actually have any information around how many rows were deleted + // While this info may exist in the file statistics, it's not guaranteed that we have these + // statistics. To avoid any performance regressions, we currently just return a -1 in such cases + if (metrics("numRemovedFiles").value > 0 && metrics("numDeletedRows").value == 0) { + Seq(Row(-1L)) + } else { + Seq(Row(metrics("numDeletedRows").value)) + } + } + + def performDelete( + sparkSession: SparkSession, + deltaLog: DeltaLog, + txn: OptimisticTransaction): Seq[Action] = { + import org.apache.spark.sql.delta.implicits._ + + var numRemovedFiles: Long = 0 + var numAddedFiles: Long = 0 + var numAddedChangeFiles: Long = 0 + var scanTimeMs: Long = 0 + var rewriteTimeMs: Long = 0 + var numAddedBytes: Long = 0 + var changeFileBytes: Long = 0 + var numRemovedBytes: Long = 0 + var numFilesBeforeSkipping: Long = 0 + var numBytesBeforeSkipping: Long = 0 + var numFilesAfterSkipping: Long = 0 + var numBytesAfterSkipping: Long = 0 + var numPartitionsAfterSkipping: Option[Long] = None + var numPartitionsRemovedFrom: Option[Long] = None + var numPartitionsAddedTo: Option[Long] = None + var numDeletedRows: Option[Long] = None + var numCopiedRows: Option[Long] = None + var numDeletionVectorsAdded: Long = 0 + var numDeletionVectorsRemoved: Long = 0 + var numDeletionVectorsUpdated: Long = 0 + + val startTime = System.nanoTime() + val numFilesTotal = txn.snapshot.numOfFiles + + val deleteActions: Seq[Action] = condition match { + case None => + // Case 1: Delete the whole table if the condition is true + val reportRowLevelMetrics = conf.getConf(DeltaSQLConf.DELTA_DML_METRICS_FROM_METADATA) + val allFiles = txn.filterFiles(Nil, keepNumRecords = reportRowLevelMetrics) + + numRemovedFiles = allFiles.size + numDeletionVectorsRemoved = allFiles.count(_.deletionVector != null) + scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000 + val (numBytes, numPartitions) = totalBytesAndDistinctPartitionValues(allFiles) + numRemovedBytes = numBytes + numFilesBeforeSkipping = numRemovedFiles + numBytesBeforeSkipping = numBytes + numFilesAfterSkipping = numRemovedFiles + numBytesAfterSkipping = numBytes + numDeletedRows = getDeletedRowsFromAddFilesAndUpdateMetrics(allFiles) + + if (txn.metadata.partitionColumns.nonEmpty) { + numPartitionsAfterSkipping = Some(numPartitions) + numPartitionsRemovedFrom = Some(numPartitions) + numPartitionsAddedTo = Some(0) + } + val operationTimestamp = System.currentTimeMillis() + allFiles.map(_.removeWithTimestamp(operationTimestamp)) + case Some(cond) => + val (metadataPredicates, otherPredicates) = + DeltaTableUtils.splitMetadataAndDataPredicates( + cond, txn.metadata.partitionColumns, sparkSession) + + numFilesBeforeSkipping = txn.snapshot.numOfFiles + numBytesBeforeSkipping = txn.snapshot.sizeInBytes + + if (otherPredicates.isEmpty) { + // Case 2: The condition can be evaluated using metadata only. + // Delete a set of files without the need of scanning any data files. + val operationTimestamp = System.currentTimeMillis() + val reportRowLevelMetrics = conf.getConf(DeltaSQLConf.DELTA_DML_METRICS_FROM_METADATA) + val candidateFiles = + txn.filterFiles(metadataPredicates, keepNumRecords = reportRowLevelMetrics) + + scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000 + numRemovedFiles = candidateFiles.size + numRemovedBytes = candidateFiles.map(_.size).sum + numFilesAfterSkipping = candidateFiles.size + numDeletionVectorsRemoved = candidateFiles.count(_.deletionVector != null) + val (numCandidateBytes, numCandidatePartitions) = + totalBytesAndDistinctPartitionValues(candidateFiles) + numBytesAfterSkipping = numCandidateBytes + numDeletedRows = getDeletedRowsFromAddFilesAndUpdateMetrics(candidateFiles) + + if (txn.metadata.partitionColumns.nonEmpty) { + numPartitionsAfterSkipping = Some(numCandidatePartitions) + numPartitionsRemovedFrom = Some(numCandidatePartitions) + numPartitionsAddedTo = Some(0) + } + candidateFiles.map(_.removeWithTimestamp(operationTimestamp)) + } else { + // Case 3: Delete the rows based on the condition. + + // Should we write the DVs to represent the deleted rows? + val shouldWriteDVs = shouldWritePersistentDeletionVectors(sparkSession, txn) + + val candidateFiles = txn.filterFiles( + metadataPredicates ++ otherPredicates, + keepNumRecords = shouldWriteDVs) + // `candidateFiles` contains the files filtered using statistics and delete condition + // They may or may not contains any rows that need to be deleted. + + numFilesAfterSkipping = candidateFiles.size + val (numCandidateBytes, numCandidatePartitions) = + totalBytesAndDistinctPartitionValues(candidateFiles) + numBytesAfterSkipping = numCandidateBytes + if (txn.metadata.partitionColumns.nonEmpty) { + numPartitionsAfterSkipping = Some(numCandidatePartitions) + } + + val nameToAddFileMap = generateCandidateFileMap(deltaLog.dataPath, candidateFiles) + + val fileIndex = new TahoeBatchFileIndex( + sparkSession, "delete", candidateFiles, deltaLog, deltaLog.dataPath, txn.snapshot) + if (shouldWriteDVs) { + val targetDf = DMLWithDeletionVectorsHelper.createTargetDfForScanningForMatches( + sparkSession, + target, + fileIndex) + + // Does the target table already has DVs enabled? If so, we need to read the table + // with deletion vectors. + val mustReadDeletionVectors = DeletionVectorUtils.deletionVectorsReadable(txn.snapshot) + + val touchedFiles = DMLWithDeletionVectorsHelper.findTouchedFiles( + sparkSession, + txn, + mustReadDeletionVectors, + deltaLog, + targetDf, + fileIndex, + cond, + opName = "DELETE") + + if (touchedFiles.nonEmpty) { + val (actions, metricMap) = DMLWithDeletionVectorsHelper.processUnmodifiedData( + sparkSession, + touchedFiles, + txn.snapshot) + metrics("numDeletedRows").set(metricMap("numModifiedRows")) + numDeletionVectorsAdded = metricMap("numDeletionVectorsAdded") + numDeletionVectorsRemoved = metricMap("numDeletionVectorsRemoved") + numDeletionVectorsUpdated = metricMap("numDeletionVectorsUpdated") + numRemovedFiles = metricMap("numRemovedFiles") + actions + } else { + Nil // Nothing to update + } + } else { + // Keep everything from the resolved target except a new TahoeFileIndex + // that only involves the affected files instead of all files. + val newTarget = DeltaTableUtils.replaceFileIndex(target, fileIndex) + val data = Dataset.ofRows(sparkSession, newTarget) + val incrDeletedCountExpr = IncrementMetric(TrueLiteral, metrics("numDeletedRows")) + val filesToRewrite = + withStatusCode("DELTA", FINDING_TOUCHED_FILES_MSG) { + if (candidateFiles.isEmpty) { + Array.empty[String] + } else { + data.filter(new Column(cond)) + .select(input_file_name()) + .filter(new Column(incrDeletedCountExpr)) + .distinct() + .as[String] + .collect() + } + } + + numRemovedFiles = filesToRewrite.length + scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000 + if (filesToRewrite.isEmpty) { + // Case 3.1: no row matches and no delete will be triggered + if (txn.metadata.partitionColumns.nonEmpty) { + numPartitionsRemovedFrom = Some(0) + numPartitionsAddedTo = Some(0) + } + Nil + } else { + // Case 3.2: some files need an update to remove the deleted files + // Do the second pass and just read the affected files + val baseRelation = buildBaseRelation( + sparkSession, txn, "delete", deltaLog.dataPath, filesToRewrite, nameToAddFileMap) + // Keep everything from the resolved target except a new TahoeFileIndex + // that only involves the affected files instead of all files. + val newTarget = DeltaTableUtils.replaceFileIndex(target, baseRelation.location) + val targetDF = Dataset.ofRows(sparkSession, newTarget) + val filterCond = Not(EqualNullSafe(cond, Literal.TrueLiteral)) + val rewrittenActions = rewriteFiles(txn, targetDF, filterCond, filesToRewrite.length) + val (changeFiles, rewrittenFiles) = rewrittenActions + .partition(_.isInstanceOf[AddCDCFile]) + numAddedFiles = rewrittenFiles.size + val removedFiles = filesToRewrite.map(f => + getTouchedFile(deltaLog.dataPath, f, nameToAddFileMap)) + val (removedBytes, removedPartitions) = + totalBytesAndDistinctPartitionValues(removedFiles) + numRemovedBytes = removedBytes + val (rewrittenBytes, rewrittenPartitions) = + totalBytesAndDistinctPartitionValues(rewrittenFiles) + numAddedBytes = rewrittenBytes + if (txn.metadata.partitionColumns.nonEmpty) { + numPartitionsRemovedFrom = Some(removedPartitions) + numPartitionsAddedTo = Some(rewrittenPartitions) + } + numAddedChangeFiles = changeFiles.size + changeFileBytes = changeFiles.collect { case f: AddCDCFile => f.size }.sum + rewriteTimeMs = + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime) - scanTimeMs + numDeletedRows = Some(metrics("numDeletedRows").value) + numCopiedRows = + Some(metrics("numTouchedRows").value - metrics("numDeletedRows").value) + numDeletionVectorsRemoved = removedFiles.count(_.deletionVector != null) + val operationTimestamp = System.currentTimeMillis() + removeFilesFromPaths( + deltaLog, nameToAddFileMap, filesToRewrite, operationTimestamp) ++ rewrittenActions + } + } + } + } + metrics("numRemovedFiles").set(numRemovedFiles) + metrics("numAddedFiles").set(numAddedFiles) + val executionTimeMs = (System.nanoTime() - startTime) / 1000 / 1000 + metrics("executionTimeMs").set(executionTimeMs) + metrics("scanTimeMs").set(scanTimeMs) + metrics("rewriteTimeMs").set(rewriteTimeMs) + metrics("numAddedChangeFiles").set(numAddedChangeFiles) + metrics("changeFileBytes").set(changeFileBytes) + metrics("numAddedBytes").set(numAddedBytes) + metrics("numRemovedBytes").set(numRemovedBytes) + metrics("numFilesBeforeSkipping").set(numFilesBeforeSkipping) + metrics("numBytesBeforeSkipping").set(numBytesBeforeSkipping) + metrics("numFilesAfterSkipping").set(numFilesAfterSkipping) + metrics("numBytesAfterSkipping").set(numBytesAfterSkipping) + metrics("numDeletionVectorsAdded").set(numDeletionVectorsAdded) + metrics("numDeletionVectorsRemoved").set(numDeletionVectorsRemoved) + metrics("numDeletionVectorsUpdated").set(numDeletionVectorsUpdated) + numPartitionsAfterSkipping.foreach(metrics("numPartitionsAfterSkipping").set) + numPartitionsAddedTo.foreach(metrics("numPartitionsAddedTo").set) + numPartitionsRemovedFrom.foreach(metrics("numPartitionsRemovedFrom").set) + numCopiedRows.foreach(metrics("numCopiedRows").set) + txn.registerSQLMetrics(sparkSession, metrics) + sendDriverMetrics(sparkSession, metrics) + + recordDeltaEvent( + deltaLog, + "delta.dml.delete.stats", + data = DeleteMetric( + condition = condition.map(_.sql).getOrElse("true"), + numFilesTotal, + numFilesAfterSkipping, + numAddedFiles, + numRemovedFiles, + numAddedFiles, + numAddedChangeFiles = numAddedChangeFiles, + numFilesBeforeSkipping, + numBytesBeforeSkipping, + numFilesAfterSkipping, + numBytesAfterSkipping, + numPartitionsAfterSkipping, + numPartitionsAddedTo, + numPartitionsRemovedFrom, + numCopiedRows, + numDeletedRows, + numAddedBytes, + numRemovedBytes, + changeFileBytes = changeFileBytes, + scanTimeMs, + rewriteTimeMs, + numDeletionVectorsAdded, + numDeletionVectorsRemoved, + numDeletionVectorsUpdated) + ) + + if (deleteActions.nonEmpty) { + createSetTransaction(sparkSession, deltaLog).toSeq ++ deleteActions + } else { + Seq.empty + } + } + + /** + * Returns the list of [[AddFile]]s and [[AddCDCFile]]s that have been re-written. + */ + private def rewriteFiles( + txn: OptimisticTransaction, + baseData: DataFrame, + filterCondition: Expression, + numFilesToRewrite: Long): Seq[FileAction] = { + val shouldWriteCdc = DeltaConfigs.CHANGE_DATA_FEED.fromMetaData(txn.metadata) + + // number of total rows that we have seen / are either copying or deleting (sum of both). + val incrTouchedCountExpr = IncrementMetric(TrueLiteral, metrics("numTouchedRows")) + + withStatusCode( + "DELTA", rewritingFilesMsg(numFilesToRewrite)) { + val dfToWrite = if (shouldWriteCdc) { + import org.apache.spark.sql.delta.commands.cdc.CDCReader._ + // The logic here ends up being surprisingly elegant, with all source rows ending up in + // the output. Recall that we flipped the user-provided delete condition earlier, before the + // call to `rewriteFiles`. All rows which match this latest `filterCondition` are retained + // as table data, while all rows which don't match are removed from the rewritten table data + // but do get included in the output as CDC events. + baseData + .filter(new Column(incrTouchedCountExpr)) + .withColumn( + CDC_TYPE_COLUMN_NAME, + new Column(If(filterCondition, CDC_TYPE_NOT_CDC, CDC_TYPE_DELETE)) + ) + } else { + baseData + .filter(new Column(incrTouchedCountExpr)) + .filter(new Column(filterCondition)) + } + + txn.writeFiles(dfToWrite) + } + } + + def shouldWritePersistentDeletionVectors( + spark: SparkSession, txn: OptimisticTransaction): Boolean = { + spark.conf.get(DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS) && + DeletionVectorUtils.deletionVectorsWritable(txn.snapshot) + } +} + +object DeleteCommand { + def apply(delete: DeltaDelete): DeleteCommand = { + EliminateSubqueryAliases(delete.child) match { + case DeltaFullTable(relation, fileIndex) => + DeleteCommand(fileIndex.deltaLog, relation.catalogTable, delete.child, delete.condition) + case o => + throw DeltaErrors.notADeltaSourceException("DELETE", Some(o)) + } + } + + val FILE_NAME_COLUMN: String = "_input_file_name_" + val FINDING_TOUCHED_FILES_MSG: String = "Finding files to rewrite for DELETE operation" + + def rewritingFilesMsg(numFilesToRewrite: Long): String = + s"Rewriting $numFilesToRewrite files for DELETE operation" +} + +/** + * Used to report details about delete. + * + * @param condition: what was the delete condition + * @param numFilesTotal: how big is the table + * @param numTouchedFiles: how many files did we touch. Alias for `numFilesAfterSkipping` + * @param numRewrittenFiles: how many files had to be rewritten. Alias for `numAddedFiles` + * @param numRemovedFiles: how many files we removed. Alias for `numTouchedFiles` + * @param numAddedFiles: how many files we added. Alias for `numRewrittenFiles` + * @param numAddedChangeFiles: how many change files were generated + * @param numFilesBeforeSkipping: how many candidate files before skipping + * @param numBytesBeforeSkipping: how many candidate bytes before skipping + * @param numFilesAfterSkipping: how many candidate files after skipping + * @param numBytesAfterSkipping: how many candidate bytes after skipping + * @param numPartitionsAfterSkipping: how many candidate partitions after skipping + * @param numPartitionsAddedTo: how many new partitions were added + * @param numPartitionsRemovedFrom: how many partitions were removed + * @param numCopiedRows: how many rows were copied + * @param numDeletedRows: how many rows were deleted + * @param numBytesAdded: how many bytes were added + * @param numBytesRemoved: how many bytes were removed + * @param changeFileBytes: total size of change files generated + * @param scanTimeMs: how long did finding take + * @param rewriteTimeMs: how long did rewriting take + * @param numDeletionVectorsAdded: how many deletion vectors were added + * @param numDeletionVectorsRemoved: how many deletion vectors were removed + * @param numDeletionVectorsUpdated: how many deletion vectors were updated + * + * @note All the time units are milliseconds. + */ +case class DeleteMetric( + condition: String, + numFilesTotal: Long, + numTouchedFiles: Long, + numRewrittenFiles: Long, + numRemovedFiles: Long, + numAddedFiles: Long, + numAddedChangeFiles: Long, + numFilesBeforeSkipping: Long, + numBytesBeforeSkipping: Long, + numFilesAfterSkipping: Long, + numBytesAfterSkipping: Long, + numPartitionsAfterSkipping: Option[Long], + numPartitionsAddedTo: Option[Long], + numPartitionsRemovedFrom: Option[Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + numCopiedRows: Option[Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + numDeletedRows: Option[Long], + numBytesAdded: Long, + numBytesRemoved: Long, + changeFileBytes: Long, + scanTimeMs: Long, + rewriteTimeMs: Long, + numDeletionVectorsAdded: Long, + numDeletionVectorsRemoved: Long, + numDeletionVectorsUpdated: Long +) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeletionVectorUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeletionVectorUtils.scala new file mode 100644 index 00000000000..c03adfcd5cf --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeletionVectorUtils.scala @@ -0,0 +1,91 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import org.apache.spark.sql.delta.{DeletionVectorsTableFeature, DeltaConfigs, Snapshot, SnapshotDescriptor} +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.files.SupportsRowIndexFilters +import org.apache.spark.sql.delta.files.TahoeFileIndex +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.datasources.FileIndex +import org.apache.spark.sql.functions.col +import org.apache.spark.util.Utils + +trait DeletionVectorUtils { + + /** + * Run a query on the delta log to determine if the given snapshot contains no deletion vectors. + * Return `false` if it does contain deletion vectors. + */ + def isTableDVFree(snapshot: Snapshot): Boolean = { + val dvsReadable = deletionVectorsReadable(snapshot) + + if (dvsReadable) { + val dvCount = snapshot.allFiles + .filter(col("deletionVector").isNotNull) + .limit(1) + .count() + + dvCount == 0L + } else { + true + } + } + + /** + * Returns true if persistent deletion vectors are enabled and + * readable with the current reader version. + */ + def fileIndexSupportsReadingDVs(fileIndex: FileIndex): Boolean = fileIndex match { + case index: TahoeFileIndex => deletionVectorsReadable(index) + case _: SupportsRowIndexFilters => true + case _ => false + } + + def deletionVectorsWritable( + snapshot: SnapshotDescriptor, + newProtocol: Option[Protocol] = None, + newMetadata: Option[Metadata] = None): Boolean = + deletionVectorsWritable( + protocol = newProtocol.getOrElse(snapshot.protocol), + metadata = newMetadata.getOrElse(snapshot.metadata)) + + def deletionVectorsWritable(protocol: Protocol, metadata: Metadata): Boolean = + protocol.isFeatureSupported(DeletionVectorsTableFeature) && + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.fromMetaData(metadata) + + def deletionVectorsReadable( + snapshot: SnapshotDescriptor, + newProtocol: Option[Protocol] = None, + newMetadata: Option[Metadata] = None): Boolean = { + deletionVectorsReadable( + newProtocol.getOrElse(snapshot.protocol), + newMetadata.getOrElse(snapshot.metadata)) + } + + def deletionVectorsReadable( + protocol: Protocol, + metadata: Metadata): Boolean = { + protocol.isFeatureSupported(DeletionVectorsTableFeature) && + metadata.format.provider == "parquet" // DVs are only supported on parquet tables. + } +} + +// To access utilities from places where mixing in a trait is inconvenient. +object DeletionVectorUtils extends DeletionVectorUtils diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaCommand.scala new file mode 100644 index 00000000000..315da17665d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaCommand.scala @@ -0,0 +1,434 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.util.concurrent.TimeUnit.NANOSECONDS + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaErrors, DeltaLog, DeltaOptions, DeltaTableIdentifier, DeltaTableUtils, OptimisticTransaction, ResolvedPathBasedNonDeltaTable} +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.catalog.{DeltaTableV2, IcebergTablePlaceHolder} +import org.apache.spark.sql.delta.files.TahoeBatchFileIndex +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.{DeltaSourceUtils, DeltaSQLConf} +import org.apache.spark.sql.delta.util.DeltaFileOperations +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateSubqueryAliases, NoSuchTableException, ResolvedTable, UnresolvedAttribute, UnresolvedRelation} +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression} +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.connector.catalog.V1Table +import org.apache.spark.sql.execution.SQLExecution +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} + +/** + * Helper trait for all delta commands. + */ +trait DeltaCommand extends DeltaLogging { + /** + * Converts string predicates into [[Expression]]s relative to a transaction. + * + * @throws AnalysisException if a non-partition column is referenced. + */ + protected def parsePredicates( + spark: SparkSession, + predicate: String): Seq[Expression] = { + try { + spark.sessionState.sqlParser.parseExpression(predicate) :: Nil + } catch { + case e: ParseException => + throw DeltaErrors.failedRecognizePredicate(predicate, e) + } + } + + def verifyPartitionPredicates( + spark: SparkSession, + partitionColumns: Seq[String], + predicates: Seq[Expression]): Unit = { + + predicates.foreach { pred => + if (SubqueryExpression.hasSubquery(pred)) { + throw DeltaErrors.unsupportSubqueryInPartitionPredicates() + } + + pred.references.foreach { col => + val colName = col match { + case u: UnresolvedAttribute => + // Note: `UnresolvedAttribute(Seq("a.b"))` and `UnresolvedAttribute(Seq("a", "b"))` will + // return the same name. We accidentally treated the latter as the same as the former. + // Because some users may already rely on it, we keep supporting both. + u.nameParts.mkString(".") + case _ => col.name + } + val nameEquality = spark.sessionState.conf.resolver + partitionColumns.find(f => nameEquality(f, colName)).getOrElse { + throw DeltaErrors.nonPartitionColumnReference(colName, partitionColumns) + } + } + } + } + + /** + * Generates a map of file names to add file entries for operations where we will need to + * rewrite files such as delete, merge, update. We expect file names to be unique, because + * each file contains a UUID. + */ + def generateCandidateFileMap( + basePath: Path, + candidateFiles: Seq[AddFile]): Map[String, AddFile] = { + val nameToAddFileMap = candidateFiles.map(add => + DeltaFileOperations.absolutePath(basePath.toString, add.path).toString -> add).toMap + assert(nameToAddFileMap.size == candidateFiles.length, + s"File name collisions found among:\n${candidateFiles.map(_.path).mkString("\n")}") + nameToAddFileMap + } + + /** + * This method provides the RemoveFile actions that are necessary for files that are touched and + * need to be rewritten in methods like Delete, Update, and Merge. + * + * @param deltaLog The DeltaLog of the table that is being operated on + * @param nameToAddFileMap A map generated using `generateCandidateFileMap`. + * @param filesToRewrite Absolute paths of the files that were touched. We will search for these + * in `candidateFiles`. Obtained as the output of the `input_file_name` + * function. + * @param operationTimestamp The timestamp of the operation + */ + protected def removeFilesFromPaths( + deltaLog: DeltaLog, + nameToAddFileMap: Map[String, AddFile], + filesToRewrite: Seq[String], + operationTimestamp: Long): Seq[RemoveFile] = { + filesToRewrite.map { absolutePath => + val addFile = getTouchedFile(deltaLog.dataPath, absolutePath, nameToAddFileMap) + addFile.removeWithTimestamp(operationTimestamp) + } + } + + /** + * Build a base relation of files that need to be rewritten as part of an update/delete/merge + * operation. + */ + protected def buildBaseRelation( + spark: SparkSession, + txn: OptimisticTransaction, + actionType: String, + rootPath: Path, + inputLeafFiles: Seq[String], + nameToAddFileMap: Map[String, AddFile]): HadoopFsRelation = { + val deltaLog = txn.deltaLog + val scannedFiles = inputLeafFiles.map(f => getTouchedFile(rootPath, f, nameToAddFileMap)) + val fileIndex = new TahoeBatchFileIndex( + spark, actionType, scannedFiles, deltaLog, rootPath, txn.snapshot) + HadoopFsRelation( + fileIndex, + partitionSchema = txn.metadata.partitionSchema, + dataSchema = txn.metadata.schema, + bucketSpec = None, + deltaLog.fileFormat(txn.protocol, txn.metadata), + txn.metadata.format.options)(spark) + } + + /** + * Find the AddFile record corresponding to the file that was read as part of a + * delete/update/merge operation. + * + * @param filePath The path to a file. Can be either absolute or relative + * @param nameToAddFileMap Map generated through `generateCandidateFileMap()` + */ + def getTouchedFile( + basePath: Path, + filePath: String, + nameToAddFileMap: Map[String, AddFile]): AddFile = { + val absolutePath = DeltaFileOperations.absolutePath(basePath.toString, filePath).toString + nameToAddFileMap.getOrElse(absolutePath, { + throw DeltaErrors.notFoundFileToBeRewritten(absolutePath, nameToAddFileMap.keys) + }) + } + + /** + * Use the analyzer to resolve the identifier provided + * @param analyzer The session state analyzer to call + * @param identifier Table Identifier to determine whether is path based or not + * @return + */ + protected def resolveIdentifier(analyzer: Analyzer, identifier: TableIdentifier): LogicalPlan = { + EliminateSubqueryAliases(analyzer.execute(UnresolvedRelation(identifier))) + } + + /** + * Use the analyzer to see whether the provided TableIdentifier is for a path based table or not + * @param analyzer The session state analyzer to call + * @param tableIdent Table Identifier to determine whether is path based or not + * @return Boolean where true means that the table is a table in a metastore and false means the + * table is a path based table + */ + def isCatalogTable(analyzer: Analyzer, tableIdent: TableIdentifier): Boolean = { + try { + resolveIdentifier(analyzer, tableIdent) match { + // is path + case LogicalRelation(HadoopFsRelation(_, _, _, _, _, _), _, None, _) => false + // is table + case LogicalRelation(HadoopFsRelation(_, _, _, _, _, _), _, Some(_), _) => true + // is iceberg table + case DataSourceV2Relation(_: IcebergTablePlaceHolder, _, _, _, _) => false + // could not resolve table/db + case _: UnresolvedRelation => + throw new NoSuchTableException(tableIdent.database.getOrElse(""), tableIdent.table) + // other e.g. view + case _ => true + } + } catch { + // Checking for table exists/database exists may throw an error in some cases in which case, + // see if the table is a path-based table, otherwise throw the original error + case _: AnalysisException if isPathIdentifier(tableIdent) => false + } + } + + /** + * Checks if the given identifier can be for a delta table's path + * @param tableIdent Table Identifier for which to check + */ + protected def isPathIdentifier(tableIdent: TableIdentifier): Boolean = { + val provider = tableIdent.database.getOrElse("") + // If db doesnt exist or db is called delta/tahoe then check if path exists + DeltaSourceUtils.isDeltaDataSourceName(provider) && new Path(tableIdent.table).isAbsolute + } + + /** + * Utility method to return the [[DeltaLog]] of an existing Delta table referred + * by either the given [[path]] or [[tableIdentifier]. + * + * @param spark [[SparkSession]] reference to use. + * @param path Table location. Expects a non-empty [[tableIdentifier]] or [[path]]. + * @param tableIdentifier Table identifier. Expects a non-empty [[tableIdentifier]] or [[path]]. + * @param operationName Operation that is getting the DeltaLog, used in error messages. + * @param hadoopConf Hadoop file system options used to build DeltaLog. + * @return DeltaLog of the table + * @throws AnalysisException If either no Delta table exists at the given path/identifier or + * there is neither [[path]] nor [[tableIdentifier]] is provided. + */ + protected def getDeltaLog( + spark: SparkSession, + path: Option[String], + tableIdentifier: Option[TableIdentifier], + operationName: String, + hadoopConf: Map[String, String] = Map.empty): DeltaLog = { + val tablePath = + if (path.nonEmpty) { + new Path(path.get) + } else if (tableIdentifier.nonEmpty) { + val sessionCatalog = spark.sessionState.catalog + lazy val metadata = sessionCatalog.getTableMetadata(tableIdentifier.get) + + DeltaTableIdentifier(spark, tableIdentifier.get) match { + case Some(id) if id.path.nonEmpty => + new Path(id.path.get) + case Some(id) if id.table.nonEmpty => + new Path(metadata.location) + case _ => + if (metadata.tableType == CatalogTableType.VIEW) { + throw DeltaErrors.viewNotSupported(operationName) + } + throw DeltaErrors.notADeltaTableException(operationName) + } + } else { + throw DeltaErrors.missingTableIdentifierException(operationName) + } + + val startTime = Some(System.currentTimeMillis) + val deltaLog = DeltaLog.forTable(spark, tablePath, hadoopConf) + if (deltaLog.update(checkIfUpdatedSinceTs = startTime).version < 0) { + throw DeltaErrors.notADeltaTableException( + operationName, + DeltaTableIdentifier(path, tableIdentifier)) + } + deltaLog + } + + /** + * Send the driver-side metrics. + * + * This is needed to make the SQL metrics visible in the Spark UI. + * All metrics are default initialized with 0 so that's what we're + * reporting in case we skip an already executed action. + */ + protected def sendDriverMetrics(spark: SparkSession, metrics: Map[String, SQLMetric]): Unit = { + val executionId = spark.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) + SQLMetrics.postDriverMetricUpdates(spark.sparkContext, executionId, metrics.values.toSeq) + } + + /** + * Extracts the [[DeltaTableV2]] from a LogicalPlan iff the LogicalPlan is a [[ResolvedTable]] + * with either a [[DeltaTableV2]] or a [[V1Table]] that is referencing a Delta table. In all + * other cases this method will throw a "Table not found" exception. + */ + def getDeltaTable(target: LogicalPlan, cmd: String): DeltaTableV2 = { + // TODO: Remove this wrapper and let former callers invoke DeltaTableV2.extractFrom directly. + DeltaTableV2.extractFrom(target, cmd) + } + + /** + * Extracts [[CatalogTable]] metadata from a LogicalPlan if the plan is a [[ResolvedTable]]. The + * table can be a non delta table. + */ + def getTableCatalogTable(target: LogicalPlan, cmd: String): Option[CatalogTable] = { + target match { + case ResolvedTable(_, _, d: DeltaTableV2, _) => d.catalogTable + case ResolvedTable(_, _, t: V1Table, _) => Some(t.catalogTable) + case _ => None + } + } + + /** + * Helper method to extract the table id or path from a LogicalPlan representing + * a Delta table. This uses [[DeltaCommand.getDeltaTable]] to convert the LogicalPlan + * to a [[DeltaTableV2]] and then extracts either the path or identifier from it. If + * the [[DeltaTableV2]] has a [[CatalogTable]], the table identifier will be returned. + * Otherwise, the table's path will be returned. Throws an exception if the LogicalPlan + * does not represent a Delta table. + */ + def getDeltaTablePathOrIdentifier( + target: LogicalPlan, + cmd: String): (Option[TableIdentifier], Option[String]) = { + val table = getDeltaTable(target, cmd) + table.catalogTable match { + case Some(catalogTable) + => (Some(catalogTable.identifier), None) + case _ => (None, Some(table.path.toString)) + } + } + + /** + * Helper method to extract the table id or path from a LogicalPlan representing a resolved table + * or path. This calls getDeltaTablePathOrIdentifier if the resolved table is a delta table. For + * non delta table with identifier, we extract its identifier. For non delta table with path, it + * expects the path to be wrapped in an ResolvedPathBasedNonDeltaTable and extracts it from there. + */ + def getTablePathOrIdentifier( + target: LogicalPlan, + cmd: String): (Option[TableIdentifier], Option[String]) = { + target match { + case ResolvedTable(_, _, t: DeltaTableV2, _) => getDeltaTablePathOrIdentifier(target, cmd) + case ResolvedTable(_, _, t: V1Table, _) if DeltaTableUtils.isDeltaTable(t.catalogTable) => + getDeltaTablePathOrIdentifier(target, cmd) + case ResolvedTable(_, _, t: V1Table, _) => (Some(t.catalogTable.identifier), None) + case p: ResolvedPathBasedNonDeltaTable => (None, Some(p.path)) + case _ => (None, None) + } + } + + /** + * Returns true if there is information in the spark session that indicates that this write + * has already been successfully written. + */ + protected def hasBeenExecuted(txn: OptimisticTransaction, sparkSession: SparkSession, + options: Option[DeltaOptions] = None): Boolean = { + val (txnVersionOpt, txnAppIdOpt, isFromSessionConf) = getTxnVersionAndAppId( + sparkSession, options) + // only enter if both txnVersion and txnAppId are set + for (version <- txnVersionOpt; appId <- txnAppIdOpt) { + val currentVersion = txn.txnVersion(appId) + if (currentVersion >= version) { + logInfo(s"Already completed batch $version in application $appId. This will be skipped.") + if (isFromSessionConf && sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_IDEMPOTENT_DML_AUTO_RESET_ENABLED)) { + // if we got txnAppId and txnVersion from the session config, we reset the + // version here, after skipping the current transaction, as a safety measure to + // prevent data loss if the user forgets to manually reset txnVersion + sparkSession.sessionState.conf.unsetConf(DeltaSQLConf.DELTA_IDEMPOTENT_DML_TXN_VERSION) + } + return true + } + } + false + } + + /** + * Returns SetTransaction if a valid app ID and version are present. Otherwise returns + * an empty list. + */ + protected def createSetTransaction( + sparkSession: SparkSession, + deltaLog: DeltaLog, + options: Option[DeltaOptions] = None): Option[SetTransaction] = { + val (txnVersionOpt, txnAppIdOpt, isFromSessionConf) = getTxnVersionAndAppId( + sparkSession, options) + // only enter if both txnVersion and txnAppId are set + for (version <- txnVersionOpt; appId <- txnAppIdOpt) { + if (isFromSessionConf && sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_IDEMPOTENT_DML_AUTO_RESET_ENABLED)) { + // if we got txnAppID and txnVersion from the session config, we reset the + // version here as a safety measure to prevent data loss if the user forgets + // to manually reset txnVersion + sparkSession.sessionState.conf.unsetConf(DeltaSQLConf.DELTA_IDEMPOTENT_DML_TXN_VERSION) + } + return Some(SetTransaction(appId, version, Some(deltaLog.clock.getTimeMillis()))) + } + None + } + + /** + * Helper method to retrieve the current txn version and app ID. These are either + * retrieved from user-provided write options or from session configurations. + */ + private def getTxnVersionAndAppId( + sparkSession: SparkSession, + options: Option[DeltaOptions]): (Option[Long], Option[String], Boolean) = { + var txnVersion: Option[Long] = None + var txnAppId: Option[String] = None + for (o <- options) { + txnVersion = o.txnVersion + txnAppId = o.txnAppId + } + + var numOptions = txnVersion.size + txnAppId.size + // numOptions can only be 0 or 2, as enforced by + // DeltaWriteOptionsImpl.validateIdempotentWriteOptions so this + // assert should never be triggered + assert(numOptions == 0 || numOptions == 2, s"Only one of txnVersion and txnAppId " + + s"has been set via dataframe writer options: txnVersion = $txnVersion txnAppId = $txnAppId") + var fromSessionConf = false + if (numOptions == 0) { + txnVersion = sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_IDEMPOTENT_DML_TXN_VERSION) + // don't need to check for valid conversion to Long here as that + // is already enforced at set time + txnAppId = sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_IDEMPOTENT_DML_TXN_APP_ID) + // check that both session configs are set + numOptions = txnVersion.size + txnAppId.size + if (numOptions != 0 && numOptions != 2) { + throw DeltaErrors.invalidIdempotentWritesOptionsException( + "Both spark.databricks.delta.write.txnAppId and " + + "spark.databricks.delta.write.txnVersion must be specified for " + + "idempotent Delta writes") + } + fromSessionConf = true + } + (txnVersion, txnAppId, fromSessionConf) + } + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaGenerateCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaGenerateCommand.scala new file mode 100644 index 00000000000..42b3914ca25 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaGenerateCommand.scala @@ -0,0 +1,64 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier} +import org.apache.spark.sql.delta.hooks.GenerateSymlinkManifest +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.execution.command.LeafRunnableCommand + +case class DeltaGenerateCommand( + modeName: String, + tableId: TableIdentifier, + options: Map[String, String]) + extends LeafRunnableCommand { + + import DeltaGenerateCommand._ + + override def run(sparkSession: SparkSession): Seq[Row] = { + if (!modeNameToGenerationFunc.contains(modeName)) { + throw DeltaErrors.unsupportedGenerateModeException(modeName) + } + + val tablePath = DeltaTableIdentifier(sparkSession, tableId) match { + case Some(id) if id.path.isDefined => + new Path(id.path.get) + case _ => + new Path(sparkSession.sessionState.catalog.getTableMetadata(tableId).location) + } + + val deltaLog = DeltaLog.forTable(sparkSession, tablePath, options) + if (!deltaLog.tableExists) { + throw DeltaErrors.notADeltaTableException("GENERATE") + } + val generationFunc = modeNameToGenerationFunc(modeName) + generationFunc(sparkSession, deltaLog) + Seq.empty + } +} + +object DeltaGenerateCommand { + val modeNameToGenerationFunc = CaseInsensitiveMap( + Map[String, (SparkSession, DeltaLog) => Unit]( + "symlink_format_manifest" -> GenerateSymlinkManifest.generateFullManifest + )) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaReorgTableCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaReorgTableCommand.scala new file mode 100644 index 00000000000..59016b3ff77 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DeltaReorgTableCommand.scala @@ -0,0 +1,89 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.sources.DeltaSourceUtils + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.analysis.ResolvedTable +import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LeafCommand, LogicalPlan, UnaryCommand} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} + +object DeltaReorgTableMode extends Enumeration { + val PURGE, UNIFORM_ICEBERG = Value +} + +case class DeltaReorgTableSpec( + reorgTableMode: DeltaReorgTableMode.Value, + icebergCompatVersionOpt: Option[Int] +) + +case class DeltaReorgTable( + target: LogicalPlan, + reorgTableSpec: DeltaReorgTableSpec = DeltaReorgTableSpec(DeltaReorgTableMode.PURGE, None))( + val predicates: Seq[String]) extends UnaryCommand { + + def child: LogicalPlan = target + + protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = + copy(target = newChild)(predicates) + + override val otherCopyArgs: Seq[AnyRef] = predicates :: Nil +} + +/** + * The PURGE command. + */ +case class DeltaReorgTableCommand( + target: LogicalPlan, + reorgTableSpec: DeltaReorgTableSpec = DeltaReorgTableSpec(DeltaReorgTableMode.PURGE, None))( + val predicates: Seq[String]) + extends OptimizeTableCommandBase + with ReorgTableForUpgradeUniformHelper + with LeafCommand + with IgnoreCachedData { + + override val otherCopyArgs: Seq[AnyRef] = predicates :: Nil + + override def optimizeByReorg( + sparkSession: SparkSession, + isPurge: Boolean, + icebergCompatVersion: Option[Int]): Seq[Row] = { + val command = OptimizeTableCommand( + target, + predicates, + optimizeContext = DeltaOptimizeContext( + isPurge = isPurge, + minFileSize = Some(0L), + maxDeletedRowsRatio = Some(0d), + icebergCompatVersion = icebergCompatVersion + ) + )(zOrderBy = Nil) + command.run(sparkSession) + } + + override def run(sparkSession: SparkSession): Seq[Row] = { + reorgTableSpec match { + case DeltaReorgTableSpec(DeltaReorgTableMode.PURGE, None) => + optimizeByReorg(sparkSession, isPurge = true, icebergCompatVersion = None) + case DeltaReorgTableSpec(DeltaReorgTableMode.UNIFORM_ICEBERG, Some(icebergCompatVersion)) => + val table = getDeltaTable(target, "REORG") + upgradeUniformIcebergCompatVersion(table, sparkSession, icebergCompatVersion) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/DescribeDeltaDetailsCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DescribeDeltaDetailsCommand.scala new file mode 100644 index 00000000000..463e81a6f08 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DescribeDeltaDetailsCommand.scala @@ -0,0 +1,197 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.sql.Timestamp + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, Snapshot, UnresolvedPathOrIdentifier} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.util.FileNames +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, TableIdentifier} +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.execution.command.RunnableCommand +import org.apache.spark.sql.types.StructType + +/** The result returned by the `describe detail` command. */ +case class TableDetail( + format: String, + id: String, + name: String, + description: String, + location: String, + createdAt: Timestamp, + lastModified: Timestamp, + partitionColumns: Seq[String], + numFiles: java.lang.Long, + sizeInBytes: java.lang.Long, + properties: Map[String, String], + minReaderVersion: java.lang.Integer, + minWriterVersion: java.lang.Integer, + tableFeatures: Seq[String] + ) + +object TableDetail { + val schema = ScalaReflection.schemaFor[TableDetail].dataType.asInstanceOf[StructType] + + private lazy val converter: TableDetail => Row = { + val toInternalRow = CatalystTypeConverters.createToCatalystConverter(schema) + val toExternalRow = CatalystTypeConverters.createToScalaConverter(schema) + toInternalRow.andThen(toExternalRow).asInstanceOf[TableDetail => Row] + } + + def toRow(table: TableDetail): Row = converter(table) +} + +/** + * A command for describing the details of a table such as the format, name, and size. + */ +case class DescribeDeltaDetailCommand( + override val child: LogicalPlan, + hadoopConf: Map[String, String]) + extends RunnableCommand + with UnaryNode + with DeltaLogging + with DeltaCommand +{ + override val output: Seq[Attribute] = toAttributes(TableDetail.schema) + + override protected def withNewChildInternal(newChild: LogicalPlan): DescribeDeltaDetailCommand = + copy(child = newChild) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val tableMetadata = getTableCatalogTable(child, DescribeDeltaDetailCommand.CMD_NAME) + val (_, path) = getTablePathOrIdentifier(child, DescribeDeltaDetailCommand.CMD_NAME) + val basePath = tableMetadata match { + case Some(metadata) => new Path(metadata.location) + case _ if path.isDefined => new Path(path.get) + case _ => + throw DeltaErrors.missingTableIdentifierException(DescribeDeltaDetailCommand.CMD_NAME) + } + val deltaLog = DeltaLog.forTable(sparkSession, basePath, hadoopConf) + recordDeltaOperation(deltaLog, "delta.ddl.describeDetails") { + val snapshot = deltaLog.update() + if (snapshot.version == -1) { + if (path.nonEmpty) { + val fs = new Path(path.get).getFileSystem(deltaLog.newDeltaHadoopConf()) + // Throw FileNotFoundException when the path doesn't exist since there may be a typo + if (!fs.exists(new Path(path.get))) { + throw DeltaErrors.fileNotFoundException(path.get) + } + describeNonDeltaPath(path.get) + } else { + describeNonDeltaTable(tableMetadata.get) + } + } else { + describeDeltaTable(sparkSession, deltaLog, snapshot, tableMetadata) + } + } + } + + private def toRows(detail: TableDetail): Seq[Row] = TableDetail.toRow(detail) :: Nil + + private def describeNonDeltaTable(table: CatalogTable): Seq[Row] = { + toRows( + TableDetail( + format = table.provider.orNull, + id = null, + name = table.qualifiedName, + description = table.comment.getOrElse(""), + location = table.storage.locationUri.map(new Path(_).toString).orNull, + createdAt = new Timestamp(table.createTime), + lastModified = null, + partitionColumns = table.partitionColumnNames, + numFiles = null, + sizeInBytes = null, + properties = table.properties, + minReaderVersion = null, + minWriterVersion = null, + tableFeatures = null + )) + } + + private def describeNonDeltaPath(path: String): Seq[Row] = { + toRows( + TableDetail( + format = null, + id = null, + name = null, + description = null, + location = path, + createdAt = null, + lastModified = null, + partitionColumns = null, + numFiles = null, + sizeInBytes = null, + properties = Map.empty, + minReaderVersion = null, + minWriterVersion = null, + tableFeatures = null)) + } + + private def describeDeltaTable( + sparkSession: SparkSession, + deltaLog: DeltaLog, + snapshot: Snapshot, + tableMetadata: Option[CatalogTable]): Seq[Row] = { + val currentVersionPath = FileNames.deltaFile(deltaLog.logPath, snapshot.version) + val fs = currentVersionPath.getFileSystem(deltaLog.newDeltaHadoopConf()) + val tableName = tableMetadata.map(_.qualifiedName).getOrElse(snapshot.metadata.name) + val featureNames = ( + snapshot.protocol.implicitlySupportedFeatures.map(_.name) ++ + snapshot.protocol.readerAndWriterFeatureNames).toSeq.sorted + toRows( + TableDetail( + format = "delta", + id = snapshot.metadata.id, + name = tableName, + description = snapshot.metadata.description, + location = deltaLog.dataPath.toString, + createdAt = snapshot.metadata.createdTime.map(new Timestamp(_)).orNull, + lastModified = new Timestamp(fs.getFileStatus(currentVersionPath).getModificationTime), + partitionColumns = snapshot.metadata.partitionColumns, + numFiles = snapshot.numOfFiles, + sizeInBytes = snapshot.sizeInBytes, + properties = snapshot.metadata.configuration, + minReaderVersion = snapshot.protocol.minReaderVersion, + minWriterVersion = snapshot.protocol.minWriterVersion, + tableFeatures = featureNames + )) + } +} + +object DescribeDeltaDetailCommand { + val CMD_NAME = "DESCRIBE DETAIL" + def apply( + path: Option[String], + tableIdentifier: Option[TableIdentifier], + hadoopConf: Map[String, String] + ): DescribeDeltaDetailCommand = { + val plan = UnresolvedPathOrIdentifier( + path, + tableIdentifier, + CMD_NAME + ) + DescribeDeltaDetailCommand(plan, hadoopConf) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/DescribeDeltaHistoryCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DescribeDeltaHistoryCommand.scala new file mode 100644 index 00000000000..5239562042a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/DescribeDeltaHistoryCommand.scala @@ -0,0 +1,120 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.{DeltaErrors, DeltaHistory, DeltaTableIdentifier, UnresolvedDeltaPathOrIdentifier, UnresolvedPathBasedDeltaTable} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.metering.DeltaLogging + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, UnresolvedTable} +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, UnaryNode} +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.execution.command.LeafRunnableCommand + +object DescribeDeltaHistory { + /** + * Alternate constructor that converts a provided path or table identifier into the + * correct child LogicalPlan node. If both path and tableIdentifier are specified (or + * if both are None), this method will throw an exception. If a table identifier is + * specified, the child LogicalPlan will be an [[UnresolvedTable]] whereas if a path + * is specified, it will be an [[UnresolvedPathBasedDeltaTable]]. + * + * Note that the returned command will have an *unresolved* child table and hence, the command + * needs to be analyzed before it can be executed. + */ + def apply( + path: Option[String], + tableIdentifier: Option[TableIdentifier], + limit: Option[Int]): DescribeDeltaHistory = { + val plan = UnresolvedDeltaPathOrIdentifier(path, tableIdentifier, COMMAND_NAME) + DescribeDeltaHistory(plan, limit) + } + + val COMMAND_NAME = "DESCRIBE HISTORY" +} + +/** + * A logical placeholder for describing a Delta table's history, so that the history can be + * leveraged in subqueries. Replaced with `DescribeDeltaHistoryCommand` during planning. + * + * @param options: Hadoop file system options used for read and write. + */ +case class DescribeDeltaHistory( + override val child: LogicalPlan, + limit: Option[Int], + override val output: Seq[Attribute] = toAttributes(ExpressionEncoder[DeltaHistory]().schema)) + extends UnaryNode + with MultiInstanceRelation + with DeltaCommand { + + override def newInstance(): LogicalPlan = copy(output = output.map(_.newInstance())) + + override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(child = newChild) + + /** + * Define this operator as having no attributes provided by children in order to prevent column + * pruning from trying to insert projections above the source relation. + */ + override lazy val references: AttributeSet = AttributeSet.empty + override def inputSet: AttributeSet = AttributeSet.empty + assert(!child.isInstanceOf[Project], + s"The child operator of DescribeDeltaHistory must not contain any projection: $child") + + /** Converts this operator into an executable command. */ + def toCommand: DescribeDeltaHistoryCommand = { + // Max array size + if (limit.exists(_ > Int.MaxValue - 8)) { + throw DeltaErrors.maxArraySizeExceeded() + } + val deltaTableV2: DeltaTableV2 = getDeltaTable(child, DescribeDeltaHistory.COMMAND_NAME) + DescribeDeltaHistoryCommand(table = deltaTableV2, limit = limit, output = output) + } +} + +/** + * A command for describing the history of a Delta table. + */ +case class DescribeDeltaHistoryCommand( + @transient table: DeltaTableV2, + limit: Option[Int], + override val output: Seq[Attribute] = toAttributes(ExpressionEncoder[DeltaHistory]().schema)) + extends LeafRunnableCommand + with MultiInstanceRelation + with DeltaLogging { + + override def newInstance(): LogicalPlan = copy(output = output.map(_.newInstance())) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaLog = table.deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.describeHistory") { + if (!deltaLog.tableExists) { + throw DeltaErrors.notADeltaTableException( + DescribeDeltaHistory.COMMAND_NAME, + DeltaTableIdentifier(path = Some(table.path.toString)) + ) + } + import org.apache.spark.sql.delta.implicits._ + val commits = deltaLog.history.getHistory(limit) + sparkSession.implicits.localSeqToDatasetHolder(commits).toDF().collect().toSeq + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/MergeIntoCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/MergeIntoCommand.scala new file mode 100644 index 00000000000..c6b49f4a746 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/MergeIntoCommand.scala @@ -0,0 +1,210 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import java.util.concurrent.TimeUnit + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.FileAction +import org.apache.spark.sql.delta.commands.merge.{ClassicMergeExecutor, InsertOnlyMergeExecutor, MergeIntoMaterializeSourceReason} +import org.apache.spark.sql.delta.files._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.types.{LongType, StructType} + +/** + * Performs a merge of a source query/table into a Delta table. + * + * Issues an error message when the ON search_condition of the MERGE statement can match + * a single row from the target table with multiple rows of the source table-reference. + * + * Algorithm: + * + * Phase 1: Find the input files in target that are touched by the rows that satisfy + * the condition and verify that no two source rows match with the same target row. + * This is implemented as an inner-join using the given condition. See [[ClassicMergeExecutor]] + * for more details. + * + * Phase 2: Read the touched files again and write new files with updated and/or inserted rows. + * + * Phase 3: Use the Delta protocol to atomically remove the touched files and add the new files. + * + * @param source Source data to merge from + * @param target Target table to merge into + * @param targetFileIndex TahoeFileIndex of the target table + * @param condition Condition for a source row to match with a target row + * @param matchedClauses All info related to matched clauses. + * @param notMatchedClauses All info related to not matched clauses. + * @param notMatchedBySourceClauses All info related to not matched by source clauses. + * @param migratedSchema The final schema of the target - may be changed by schema + * evolution. + */ +case class MergeIntoCommand( + @transient source: LogicalPlan, + @transient target: LogicalPlan, + @transient catalogTable: Option[CatalogTable], + @transient targetFileIndex: TahoeFileIndex, + condition: Expression, + matchedClauses: Seq[DeltaMergeIntoMatchedClause], + notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause], + notMatchedBySourceClauses: Seq[DeltaMergeIntoNotMatchedBySourceClause], + migratedSchema: Option[StructType]) + extends MergeIntoCommandBase + with InsertOnlyMergeExecutor + with ClassicMergeExecutor { + + override val output: Seq[Attribute] = Seq( + AttributeReference("num_affected_rows", LongType)(), + AttributeReference("num_updated_rows", LongType)(), + AttributeReference("num_deleted_rows", LongType)(), + AttributeReference("num_inserted_rows", LongType)()) + + protected def runMerge(spark: SparkSession): Seq[Row] = { + recordDeltaOperation(targetDeltaLog, "delta.dml.merge") { + val startTime = System.nanoTime() + targetDeltaLog.withNewTransaction(catalogTable) { deltaTxn => + if (hasBeenExecuted(deltaTxn, spark)) { + sendDriverMetrics(spark, metrics) + return Seq.empty + } + if (target.schema.size != deltaTxn.metadata.schema.size) { + throw DeltaErrors.schemaChangedSinceAnalysis( + atAnalysis = target.schema, latestSchema = deltaTxn.metadata.schema) + } + + if (canMergeSchema) { + updateMetadata( + spark, deltaTxn, migratedSchema.getOrElse(target.schema), + deltaTxn.metadata.partitionColumns, deltaTxn.metadata.configuration, + isOverwriteMode = false, rearrangeOnly = false) + } + + // Materialize the source if needed. + prepareMergeSource( + spark, + source, + condition, + matchedClauses, + notMatchedClauses, + isInsertOnly) + + val mergeActions = { + if (isInsertOnly && spark.conf.get(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED)) { + // This is a single-job execution so there is no WriteChanges. + performedSecondSourceScan = false + writeOnlyInserts( + spark, deltaTxn, filterMatchedRows = true, numSourceRowsMetric = "numSourceRows") + } else { + val (filesToRewrite, deduplicateCDFDeletes) = findTouchedFiles(spark, deltaTxn) + if (filesToRewrite.nonEmpty) { + val shouldWriteDeletionVectors = shouldWritePersistentDeletionVectors(spark, deltaTxn) + if (shouldWriteDeletionVectors) { + val newWrittenFiles = withStatusCode("DELTA", "Writing modified data") { + writeAllChanges( + spark, + deltaTxn, + filesToRewrite, + deduplicateCDFDeletes, + writeUnmodifiedRows = false) + } + + val dvActions = withStatusCode( + "DELTA", + "Writing Deletion Vectors for modified data") { + writeDVs(spark, deltaTxn, filesToRewrite) + } + + newWrittenFiles ++ dvActions + } else { + val newWrittenFiles = withStatusCode("DELTA", "Writing modified data") { + writeAllChanges( + spark, + deltaTxn, + filesToRewrite, + deduplicateCDFDeletes, + writeUnmodifiedRows = true) + } + newWrittenFiles ++ filesToRewrite.map(_.remove) + } + } else { + // Run an insert-only job instead of WriteChanges + writeOnlyInserts( + spark, + deltaTxn, + filterMatchedRows = false, + numSourceRowsMetric = "numSourceRowsInSecondScan") + } + } + } + commitAndRecordStats( + spark, + deltaTxn, + mergeActions, + startTime, + getMergeSource.materializeReason) + } + spark.sharedState.cacheManager.recacheByPlan(spark, target) + } + sendDriverMetrics(spark, metrics) + val num_affected_rows = + metrics("numTargetRowsUpdated").value + + metrics("numTargetRowsDeleted").value + + metrics("numTargetRowsInserted").value + Seq(Row( + num_affected_rows, + metrics("numTargetRowsUpdated").value, + metrics("numTargetRowsDeleted").value, + metrics("numTargetRowsInserted").value)) + } + + /** + * Finalizes the merge operation before committing it to the delta log and records merge metrics: + * - Checks that the source table didn't change during the merge operation. + * - Register SQL metrics to be updated during commit. + * - Commit the operations. + * - Collects final merge stats and record them with a Delta event. + */ + private def commitAndRecordStats( + spark: SparkSession, + deltaTxn: OptimisticTransaction, + mergeActions: Seq[FileAction], + startTime: Long, + materializeSourceReason: MergeIntoMaterializeSourceReason.MergeIntoMaterializeSourceReason) + : Unit = { + checkNonDeterministicSource(spark) + + // Metrics should be recorded before commit (where they are written to delta logs). + metrics("executionTimeMs").set(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime)) + deltaTxn.registerSQLMetrics(spark, metrics) + + val finalActions = createSetTransaction(spark, targetDeltaLog).toSeq ++ mergeActions + deltaTxn.commitIfNeeded( + actions = finalActions, + DeltaOperations.Merge( + predicate = Option(condition), + matchedPredicates = matchedClauses.map(DeltaOperations.MergePredicate(_)), + notMatchedPredicates = notMatchedClauses.map(DeltaOperations.MergePredicate(_)), + notMatchedBySourcePredicates = + notMatchedBySourceClauses.map(DeltaOperations.MergePredicate(_)))) + val stats = collectMergeStats(deltaTxn, materializeSourceReason) + recordDeltaEvent(targetDeltaLog, "delta.dml.merge.stats", data = stats) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/MergeIntoCommandBase.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/MergeIntoCommandBase.scala new file mode 100644 index 00000000000..307e1008e4c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/MergeIntoCommandBase.scala @@ -0,0 +1,503 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import java.util.concurrent.TimeUnit + +import scala.collection.mutable + +import org.apache.spark.sql.delta.metric.IncrementMetric +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{AddFile, FileAction} +import org.apache.spark.sql.delta.commands.merge.{MergeIntoMaterializeSource, MergeIntoMaterializeSourceReason, MergeStats} +import org.apache.spark.sql.delta.files.{TahoeBatchFileIndex, TahoeFileIndex, TransactionalWrite} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.{ImplicitMetadataOperation, SchemaUtils} +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.SparkContext +import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.execution.command.LeafRunnableCommand +import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.types.StructType + +trait MergeIntoCommandBase extends LeafRunnableCommand + with DeltaCommand + with DeltaLogging + with PredicateHelper + with ImplicitMetadataOperation + with MergeIntoMaterializeSource { + + @transient val source: LogicalPlan + @transient val target: LogicalPlan + @transient val targetFileIndex: TahoeFileIndex + val condition: Expression + val matchedClauses: Seq[DeltaMergeIntoMatchedClause] + val notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause] + val notMatchedBySourceClauses: Seq[DeltaMergeIntoNotMatchedBySourceClause] + val migratedSchema: Option[StructType] + + protected def shouldWritePersistentDeletionVectors( + spark: SparkSession, + txn: OptimisticTransaction): Boolean = { + spark.conf.get(DeltaSQLConf.MERGE_USE_PERSISTENT_DELETION_VECTORS) && + DeletionVectorUtils.deletionVectorsWritable(txn.snapshot) + } + + override val (canMergeSchema, canOverwriteSchema) = { + // Delta options can't be passed to MERGE INTO currently, so they'll always be empty. + // The methods in options check if the auto migration flag is on, in which case schema evolution + // will be allowed. + val options = new DeltaOptions(Map.empty[String, String], conf) + (options.canMergeSchema, options.canOverwriteSchema) + } + + @transient protected lazy val sc: SparkContext = SparkContext.getOrCreate() + @transient protected lazy val targetDeltaLog: DeltaLog = targetFileIndex.deltaLog + + /** + * Map to get target output attributes by name. + * The case sensitivity of the map is set accordingly to Spark configuration. + */ + @transient private lazy val targetOutputAttributesMap: Map[String, Attribute] = { + val attrMap: Map[String, Attribute] = target + .outputSet.view + .map(attr => attr.name -> attr).toMap + if (conf.caseSensitiveAnalysis) { + attrMap + } else { + CaseInsensitiveMap(attrMap) + } + } + + /** Whether this merge statement has only MATCHED clauses. */ + protected def isMatchedOnly: Boolean = notMatchedClauses.isEmpty && matchedClauses.nonEmpty && + notMatchedBySourceClauses.isEmpty + + /** Whether this merge statement only has only insert (NOT MATCHED) clauses. */ + protected def isInsertOnly: Boolean = matchedClauses.isEmpty && notMatchedClauses.nonEmpty && + notMatchedBySourceClauses.isEmpty + + /** Whether this merge statement includes inserts statements. */ + protected def includesInserts: Boolean = notMatchedClauses.nonEmpty + + /** Whether this merge statement includes delete statements. */ + protected def includesDeletes: Boolean = { + matchedClauses.exists(_.isInstanceOf[DeltaMergeIntoMatchedDeleteClause]) || + notMatchedBySourceClauses.exists(_.isInstanceOf[DeltaMergeIntoNotMatchedBySourceDeleteClause]) + } + + protected def isCdcEnabled(deltaTxn: OptimisticTransaction): Boolean = + DeltaConfigs.CHANGE_DATA_FEED.fromMetaData(deltaTxn.metadata) + + protected def runMerge(spark: SparkSession): Seq[Row] + + override def run(spark: SparkSession): Seq[Row] = { + metrics("executionTimeMs").set(0) + metrics("scanTimeMs").set(0) + metrics("rewriteTimeMs").set(0) + if (migratedSchema.isDefined) { + // Block writes of void columns in the Delta log. Currently void columns are not properly + // supported and are dropped on read, but this is not enough for merge command that is also + // reading the schema from the Delta log. Until proper support we prefer to fail merge + // queries that add void columns. + val newNullColumn = SchemaUtils.findNullTypeColumn(migratedSchema.get) + if (newNullColumn.isDefined) { + throw new AnalysisException( + s"""Cannot add column '${newNullColumn.get}' with type 'void'. Please explicitly specify a + |non-void type.""".stripMargin.replaceAll("\n", " ") + ) + } + } + + val (materializeSource, _) = shouldMaterializeSource(spark, source, isInsertOnly) + if (!materializeSource) { + runMerge(spark) + } else { + // If it is determined that source should be materialized, wrap the execution with retries, + // in case the data of the materialized source is lost. + runWithMaterializedSourceLostRetries( + spark, targetFileIndex.deltaLog, metrics, runMerge) + } + } + + import SQLMetrics._ + + override lazy val metrics: Map[String, SQLMetric] = baseMetrics + + lazy val baseMetrics: Map[String, SQLMetric] = Map( + "numSourceRows" -> createMetric(sc, "number of source rows"), + "numSourceRowsInSecondScan" -> + createMetric(sc, "number of source rows (during repeated scan)"), + "numTargetRowsCopied" -> createMetric(sc, "number of target rows rewritten unmodified"), + "numTargetRowsInserted" -> createMetric(sc, "number of inserted rows"), + "numTargetRowsUpdated" -> createMetric(sc, "number of updated rows"), + "numTargetRowsMatchedUpdated" -> createMetric(sc, "number of rows updated by a matched clause"), + "numTargetRowsNotMatchedBySourceUpdated" -> + createMetric(sc, "number of rows updated by a not matched by source clause"), + "numTargetRowsDeleted" -> createMetric(sc, "number of deleted rows"), + "numTargetRowsMatchedDeleted" -> createMetric(sc, "number of rows deleted by a matched clause"), + "numTargetRowsNotMatchedBySourceDeleted" -> + createMetric(sc, "number of rows deleted by a not matched by source clause"), + "numTargetFilesBeforeSkipping" -> createMetric(sc, "number of target files before skipping"), + "numTargetFilesAfterSkipping" -> createMetric(sc, "number of target files after skipping"), + "numTargetFilesRemoved" -> createMetric(sc, "number of files removed to target"), + "numTargetFilesAdded" -> createMetric(sc, "number of files added to target"), + "numTargetChangeFilesAdded" -> + createMetric(sc, "number of change data capture files generated"), + "numTargetChangeFileBytes" -> + createMetric(sc, "total size of change data capture files generated"), + "numTargetBytesBeforeSkipping" -> createMetric(sc, "number of target bytes before skipping"), + "numTargetBytesAfterSkipping" -> createMetric(sc, "number of target bytes after skipping"), + "numTargetBytesRemoved" -> createMetric(sc, "number of target bytes removed"), + "numTargetBytesAdded" -> createMetric(sc, "number of target bytes added"), + "numTargetPartitionsAfterSkipping" -> + createMetric(sc, "number of target partitions after skipping"), + "numTargetPartitionsRemovedFrom" -> + createMetric(sc, "number of target partitions from which files were removed"), + "numTargetPartitionsAddedTo" -> + createMetric(sc, "number of target partitions to which files were added"), + "executionTimeMs" -> + createTimingMetric(sc, "time taken to execute the entire operation"), + "scanTimeMs" -> + createTimingMetric(sc, "time taken to scan the files for matches"), + "rewriteTimeMs" -> + createTimingMetric(sc, "time taken to rewrite the matched files"), + "numTargetDeletionVectorsAdded" -> createMetric(sc, "number of deletion vectors added"), + "numTargetDeletionVectorsRemoved" -> createMetric(sc, "number of deletion vectors removed"), + "numTargetDeletionVectorsUpdated" -> createMetric(sc, "number of deletion vectors updated") + ) + + /** + * Collects the merge operation stats and metrics into a [[MergeStats]] object that can be + * recorded with `recordDeltaEvent`. Merge stats should be collected after committing all new + * actions as metrics may still be updated during commit. + */ + protected def collectMergeStats( + deltaTxn: OptimisticTransaction, + materializeSourceReason: MergeIntoMaterializeSourceReason.MergeIntoMaterializeSourceReason) + : MergeStats = { + val stats = MergeStats.fromMergeSQLMetrics( + metrics, + condition, + matchedClauses, + notMatchedClauses, + notMatchedBySourceClauses, + isPartitioned = deltaTxn.metadata.partitionColumns.nonEmpty, + performedSecondSourceScan = performedSecondSourceScan) + stats.copy( + materializeSourceReason = Some(materializeSourceReason.toString), + materializeSourceAttempts = Some(attempt)) + } + + protected def shouldOptimizeMatchedOnlyMerge(spark: SparkSession): Boolean = { + isMatchedOnly && spark.conf.get(DeltaSQLConf.MERGE_MATCHED_ONLY_ENABLED) + } + + // There is only one when matched clause and it's a Delete and it does not have a condition. + protected val isOnlyOneUnconditionalDelete: Boolean = + matchedClauses == Seq(DeltaMergeIntoMatchedDeleteClause(None)) + + // We over-count numTargetRowsDeleted when there are multiple matches; + // this is the amount of the overcount, so we can subtract it to get a correct final metric. + protected var multipleMatchDeleteOnlyOvercount: Option[Long] = None + + // Throw error if multiple matches are ambiguous or cannot be computed correctly. + protected def throwErrorOnMultipleMatches( + hasMultipleMatches: Boolean, spark: SparkSession): Unit = { + // Multiple matches are not ambiguous when there is only one unconditional delete as + // all the matched row pairs in the 2nd join in `writeAllChanges` will get deleted. + if (hasMultipleMatches && !isOnlyOneUnconditionalDelete) { + throw DeltaErrors.multipleSourceRowMatchingTargetRowInMergeException(spark) + } + } + + /** + * Write the output data to files, repartitioning the output DataFrame by the partition columns + * if table is partitioned and `merge.repartitionBeforeWrite.enabled` is set to true. + */ + protected def writeFiles( + spark: SparkSession, + txn: OptimisticTransaction, + outputDF: DataFrame): Seq[FileAction] = { + val partitionColumns = txn.metadata.partitionColumns + // If the write will be an optimized write, which shuffles the data anyway, then don't + // repartition. Optimized writes can handle both splitting very large tasks and coalescing + // very small ones. + if (partitionColumns.nonEmpty && spark.conf.get(DeltaSQLConf.MERGE_REPARTITION_BEFORE_WRITE) + && !TransactionalWrite.shouldOptimizeWrite(txn.metadata, spark.sessionState.conf)) { + txn.writeFiles(outputDF.repartition(partitionColumns.map(col): _*)) + } else { + txn.writeFiles(outputDF) + } + } + + /** + * Builds a new logical plan to read the given `files` instead of the whole target table. + * The plan returned has the same output columns (exprIds) as the `target` logical plan, so that + * existing update/insert expressions can be applied on this new plan. Unneeded non-partition + * columns may be dropped. + */ + protected def buildTargetPlanWithFiles( + spark: SparkSession, + deltaTxn: OptimisticTransaction, + files: Seq[AddFile], + columnsToDrop: Seq[String]): LogicalPlan = { + // Action type "batch" is a historical artifact; the original implementation used it. + val fileIndex = new TahoeBatchFileIndex( + spark, + actionType = "batch", + files, + deltaTxn.deltaLog, + targetFileIndex.path, + deltaTxn.snapshot) + + buildTargetPlanWithIndex( + spark, + deltaTxn, + fileIndex, + columnsToDrop + ) + } + + /** + * Builds a new logical plan to read the target table using the given `fileIndex`. + * The plan returned has the same output columns (exprIds) as the `target` logical plan, so that + * existing update/insert expressions can be applied on this new plan. + * + * @param columnsToDrop unneeded non-partition columns to be dropped + */ + protected def buildTargetPlanWithIndex( + spark: SparkSession, + deltaTxn: OptimisticTransaction, + fileIndex: TahoeFileIndex, + columnsToDrop: Seq[String]): LogicalPlan = { + + val targetOutputCols = getTargetOutputCols(deltaTxn) + + val plan = { + + // In case of schema evolution & column mapping, we need to rebuild the file format + // because under column mapping, the reference schema within DeltaParquetFileFormat + // that is used to populate metadata needs to be updated. + // + // WARNING: We must do this before replacing the file index, or we risk invalidating the + // metadata column expression ids that replaceFileIndex might inject into the plan. + val planWithReplacedFileFormat = if (deltaTxn.metadata.columnMappingMode != NoMapping) { + val updatedFileFormat = deltaTxn.deltaLog.fileFormat(deltaTxn.protocol, deltaTxn.metadata) + DeltaTableUtils.replaceFileFormat(target, updatedFileFormat) + } else { + target + } + + // We have to do surgery to use the attributes from `targetOutputCols` to scan the table. + // In cases of schema evolution, they may not be the same type as the original attributes. + // We can ignore the new columns which aren't yet AttributeReferences. + val newReadCols = targetOutputCols.collect { case a: AttributeReference => a } + DeltaTableUtils.replaceFileIndex( + spark, + planWithReplacedFileFormat, + fileIndex, + columnsToDrop, + newOutput = Some(newReadCols)) + } + + // Add back the null expression aliases for columns that are new to the target schema + // and don't exist in the input snapshot. + // These have been added in `getTargetOutputCols` but have been removed in `newReadCols` above + // and are thus not in `plan.output`. + val newColumnsWithNulls = targetOutputCols.filter(_.isInstanceOf[Alias]) + Project(plan.output ++ newColumnsWithNulls, plan) + } + + /** + * Get the expression references for the output columns of the target table relative to + * the transaction. Due to schema evolution, there are two kinds of expressions here: + * * References to columns in the target dataframe. Note that these references may have a + * different data type than they originally did due to schema evolution, but the exprId + * will be the same. These references will be marked as nullable if `makeNullable` is set + * to true. + * * Literal nulls, for new columns which are being added to the target table as part of + * this transaction, since new columns will have a value of null for all existing rows. + */ + protected def getTargetOutputCols( + txn: OptimisticTransaction, makeNullable: Boolean = false): Seq[NamedExpression] = { + txn.metadata.schema.map { col => + targetOutputAttributesMap + .get(col.name) + .map { a => + AttributeReference(col.name, col.dataType, makeNullable || col.nullable)(a.exprId) + } + .getOrElse(Alias(Literal(null), col.name)()) + } + } + + /** @return An `Expression` to increment a SQL metric */ + protected def incrementMetricAndReturnBool( + name: String, + valueToReturn: Boolean): Expression = { + IncrementMetric(Literal(valueToReturn), metrics(name)) + } + + /** @return An `Expression` to increment SQL metrics */ + protected def incrementMetricsAndReturnBool( + names: Seq[String], + valueToReturn: Boolean): Expression = { + val incExpr = incrementMetricAndReturnBool(names.head, valueToReturn) + names.tail.foldLeft(incExpr) { case (expr, name) => + IncrementMetric(expr, metrics(name)) + } + } + + protected def getTargetOnlyPredicates(spark: SparkSession): Seq[Expression] = { + val targetOnlyPredicatesOnCondition = + splitConjunctivePredicates(condition).filter(_.references.subsetOf(target.outputSet)) + + if (!isMatchedOnly) { + targetOnlyPredicatesOnCondition + } else { + val targetOnlyMatchedPredicate = matchedClauses + .map(_.condition.getOrElse(Literal.TrueLiteral)) + .map { condition => + splitConjunctivePredicates(condition) + .filter(_.references.subsetOf(target.outputSet)) + .reduceOption(And) + .getOrElse(Literal.TrueLiteral) + } + .reduceOption(Or) + targetOnlyPredicatesOnCondition ++ targetOnlyMatchedPredicate + } + } + + protected def seqToString(exprs: Seq[Expression]): String = exprs.map(_.sql).mkString("\n\t") + + /** + * Execute the given `thunk` and return its result while recording the time taken to do it + * and setting additional local properties for better UI visibility. + * + * @param extraOpType extra operation name recorded in the logs + * @param status human readable status string describing what the thunk is doing + * @param sqlMetricName name of SQL metric to update with the time taken by the thunk + * @param thunk the code to execute + */ + protected def recordMergeOperation[A]( + extraOpType: String = "", + status: String = null, + sqlMetricName: String = null)( + thunk: => A): A = { + val changedOpType = if (extraOpType == "") { + "delta.dml.merge" + } else { + s"delta.dml.merge.$extraOpType" + } + + val prevDesc = sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION) + val newDesc = Option(status).map { s => + // Append the status to existing description if any + val prefix = Option(prevDesc).filter(_.nonEmpty).map(_ + " - ").getOrElse("") + prefix + s + } + + def executeThunk(): A = { + try { + val startTimeNs = System.nanoTime() + newDesc.foreach { d => sc.setJobDescription(d) } + val r = thunk + val timeTakenMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs) + if (sqlMetricName != null && timeTakenMs > 0) { + metrics(sqlMetricName) += timeTakenMs + } + r + } finally { + if (newDesc.isDefined) { + sc.setJobDescription(prevDesc) + } + } + } + + recordDeltaOperation(targetDeltaLog, changedOpType) { + if (status == null) { + executeThunk() + } else { + withStatusCode("DELTA", status) { executeThunk() } + } + } + } + + // Whether we actually scanned the source twice or the value in numSourceRowsInSecondScan is + // uninitialised. + protected var performedSecondSourceScan: Boolean = true + + /** + * Throws an exception if merge metrics indicate that the source table changed between the first + * and the second source table scans. + */ + protected def checkNonDeterministicSource(spark: SparkSession): Unit = { + // We only detect changes in the number of source rows. This is a best-effort detection; a + // more comprehensive solution would be to checksum the values for the columns that we read + // in both jobs. + // If numSourceRowsInSecondScan is < 0 then it hasn't run, e.g. for insert-only merges. + // In that case we have only read the source table once. + if (performedSecondSourceScan && + metrics("numSourceRows").value != metrics("numSourceRowsInSecondScan").value) { + log.warn(s"Merge source has ${metrics("numSourceRows")} rows in initial scan but " + + s"${metrics("numSourceRowsInSecondScan")} rows in second scan") + if (conf.getConf(DeltaSQLConf.MERGE_FAIL_IF_SOURCE_CHANGED)) { + throw DeltaErrors.sourceNotDeterministicInMergeException(spark) + } + } + } +} + +object MergeIntoCommandBase { + val ROW_ID_COL = "_row_id_" + val FILE_NAME_COL = "_file_name_" + val SOURCE_ROW_PRESENT_COL = "_source_row_present_" + val TARGET_ROW_PRESENT_COL = "_target_row_present_" + val ROW_DROPPED_COL = "_row_dropped_" + val PRECOMPUTED_CONDITION_COL = "_condition_" + + /** + * Spark UI will track all normal accumulators along with Spark tasks to show them on Web UI. + * However, the accumulator used by `MergeIntoCommand` can store a very large value since it + * tracks all files that need to be rewritten. We should ask Spark UI to not remember it, + * otherwise, the UI data may consume lots of memory. Hence, we use the prefix `internal.metrics.` + * to make this accumulator become an internal accumulator, so that it will not be tracked by + * Spark UI. + */ + val TOUCHED_FILES_ACCUM_NAME = "internal.metrics.MergeIntoDelta.touchedFiles" + + + /** Count the number of distinct partition values among the AddFiles in the given set. */ + def totalBytesAndDistinctPartitionValues(files: Seq[FileAction]): (Long, Int) = { + val distinctValues = new mutable.HashSet[Map[String, String]]() + var bytes = 0L + files.collect { case file: AddFile => + distinctValues += file.partitionValues + bytes += file.size + }.toList + // If the only distinct value map is an empty map, then it must be an unpartitioned table. + // Return 0 in that case. + val numDistinctValues = + if (distinctValues.size == 1 && distinctValues.head.isEmpty) 0 else distinctValues.size + (bytes, numDistinctValues) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/OptimizeTableCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/OptimizeTableCommand.scala new file mode 100644 index 00000000000..2e84ec52460 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/OptimizeTableCommand.scala @@ -0,0 +1,581 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import java.util.ConcurrentModificationException + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.skipping.MultiDimClustering +import org.apache.spark.sql.delta.skipping.clustering.{ClusteredTableUtils, ClusteringColumnInfo} +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.DeltaOperations.Operation +import org.apache.spark.sql.delta.actions.{Action, AddFile, DeletionVectorDescriptor, FileAction, RemoveFile} +import org.apache.spark.sql.delta.commands.optimize._ +import org.apache.spark.sql.delta.files.SQLMetricsReporting +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext.SPARK_JOB_GROUP_ID +import org.apache.spark.sql.{AnalysisException, Encoders, Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedTable} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +import org.apache.spark.sql.execution.command.RunnableCommand +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.execution.metric.SQLMetrics.createMetric +import org.apache.spark.sql.types._ +import org.apache.spark.util.{SystemClock, ThreadUtils} + +/** Base class defining abstract optimize command */ +abstract class OptimizeTableCommandBase extends RunnableCommand with DeltaCommand { + + override val output: Seq[Attribute] = Seq( + AttributeReference("path", StringType)(), + AttributeReference("metrics", Encoders.product[OptimizeMetrics].schema)()) + + /** + * Validates ZOrderBy columns + * - validates that partitions columns are not used in `unresolvedZOrderByCols` + * - validates that we already collect stats for all the columns used in `unresolvedZOrderByCols` + * + * @param spark [[SparkSession]] to use + * @param txn the [[OptimisticTransaction]] being used to optimize + * @param unresolvedZOrderByCols Seq of [[UnresolvedAttribute]] corresponding to zOrderBy columns + */ + def validateZorderByColumns( + spark: SparkSession, + txn: OptimisticTransaction, + unresolvedZOrderByCols: Seq[UnresolvedAttribute]): Unit = { + if (unresolvedZOrderByCols.isEmpty) return + val metadata = txn.snapshot.metadata + val partitionColumns = metadata.partitionColumns.toSet + val dataSchema = + StructType(metadata.schema.filterNot(c => partitionColumns.contains(c.name))) + val df = spark.createDataFrame(new java.util.ArrayList[Row](), dataSchema) + val checkColStat = spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_OPTIMIZE_ZORDER_COL_STAT_CHECK) + val statCollectionSchema = txn.snapshot.statCollectionLogicalSchema + val colsWithoutStats = ArrayBuffer[String]() + + unresolvedZOrderByCols.foreach { colAttribute => + val colName = colAttribute.name + if (checkColStat) { + try { + SchemaUtils.findColumnPosition(colAttribute.nameParts, statCollectionSchema) + } catch { + case e: AnalysisException if e.getMessage.contains("Couldn't find column") => + colsWithoutStats.append(colName) + } + } + val isNameEqual = spark.sessionState.conf.resolver + if (partitionColumns.find(isNameEqual(_, colName)).nonEmpty) { + throw DeltaErrors.zOrderingOnPartitionColumnException(colName) + } + if (df.queryExecution.analyzed.resolve(colAttribute.nameParts, isNameEqual).isEmpty) { + throw DeltaErrors.zOrderingColumnDoesNotExistException(colName) + } + } + if (checkColStat && colsWithoutStats.nonEmpty) { + throw DeltaErrors.zOrderingOnColumnWithNoStatsException( + colsWithoutStats.toSeq, spark) + } + } +} + +object OptimizeTableCommand { + /** + * Alternate constructor that converts a provided path or table identifier into the + * correct child LogicalPlan node. If both path and tableIdentifier are specified (or + * if both are None), this method will throw an exception. If a table identifier is + * specified, the child LogicalPlan will be an [[UnresolvedTable]] whereas if a path + * is specified, it will be an [[UnresolvedPathBasedDeltaTable]]. + * + * Note that the returned OptimizeTableCommand will have an *unresolved* child table + * and hence, the command needs to be analyzed before it can be executed. + */ + def apply( + path: Option[String], + tableIdentifier: Option[TableIdentifier], + userPartitionPredicates: Seq[String], + optimizeContext: DeltaOptimizeContext = DeltaOptimizeContext())( + zOrderBy: Seq[UnresolvedAttribute]): OptimizeTableCommand = { + val plan = UnresolvedDeltaPathOrIdentifier(path, tableIdentifier, "OPTIMIZE") + OptimizeTableCommand(plan, userPartitionPredicates, optimizeContext)(zOrderBy) + } +} + +/** + * The `optimize` command implementation for Spark SQL. Example SQL: + * {{{ + * OPTIMIZE ('/path/to/dir' | delta.table) [WHERE part = 25]; + * }}} + */ +case class OptimizeTableCommand( + override val child: LogicalPlan, + userPartitionPredicates: Seq[String], + optimizeContext: DeltaOptimizeContext +)(val zOrderBy: Seq[UnresolvedAttribute]) + extends OptimizeTableCommandBase with RunnableCommand with UnaryNode { + + override val otherCopyArgs: Seq[AnyRef] = zOrderBy :: Nil + + override protected def withNewChildInternal(newChild: LogicalPlan): OptimizeTableCommand = + copy(child = newChild)(zOrderBy) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val table = getDeltaTable(child, "OPTIMIZE") + val txn = table.startTransaction() + if (txn.readVersion == -1) { + throw DeltaErrors.notADeltaTableException(table.deltaLog.dataPath.toString) + } + + if (ClusteredTableUtils.isSupported(txn.protocol)) { + // Validate that the preview is enabled if we are optimizing a clustered table. + ClusteredTableUtils.validatePreviewEnabled(txn.snapshot.protocol) + if (userPartitionPredicates.nonEmpty) { + throw DeltaErrors.clusteringWithPartitionPredicatesException(userPartitionPredicates) + } + if (zOrderBy.nonEmpty) { + throw DeltaErrors.clusteringWithZOrderByException(zOrderBy) + } + } + + val partitionColumns = txn.snapshot.metadata.partitionColumns + // Parse the predicate expression into Catalyst expression and verify only simple filters + // on partition columns are present + + val partitionPredicates = userPartitionPredicates.flatMap { predicate => + val predicates = parsePredicates(sparkSession, predicate) + verifyPartitionPredicates( + sparkSession, + partitionColumns, + predicates) + predicates + } + + validateZorderByColumns(sparkSession, txn, zOrderBy) + val zOrderByColumns = zOrderBy.map(_.name).toSeq + + new OptimizeExecutor( + sparkSession, + txn, + partitionPredicates, + zOrderByColumns, + isAutoCompact = false, + optimizeContext + ).optimize() + } +} + +/** + * Stored all runtime context information that can control the execution of optimize. + * + * @param isPurge Whether the rewriting task is only for purging soft-deleted data instead of + * for compaction. If [[isPurge]] is true, only files with DVs will be selected + * for compaction. + * @param minFileSize Files which are smaller than this threshold will be selected for compaction. + * If not specified, [[DeltaSQLConf.DELTA_OPTIMIZE_MIN_FILE_SIZE]] will be used. + * This parameter must be set to `0` when [[isPurge]] is true. + * @param maxDeletedRowsRatio Files with a ratio of soft-deleted rows to the total rows larger than + * this threshold will be rewritten by the OPTIMIZE command. If not + * specified, [[DeltaSQLConf.DELTA_OPTIMIZE_MAX_DELETED_ROWS_RATIO]] + * will be used. This parameter must be set to `0` when [[isPurge]] is + * true. + * @param icebergCompatVersion The iceberg compatibility version used to rewrite data for + * uniform tables. + */ +case class DeltaOptimizeContext( + isPurge: Boolean = false, + minFileSize: Option[Long] = None, + maxFileSize: Option[Long] = None, + maxDeletedRowsRatio: Option[Double] = None, + icebergCompatVersion: Option[Int] = None) { + if (isPurge || icebergCompatVersion.isDefined) { + require( + minFileSize.contains(0L) && maxDeletedRowsRatio.contains(0d), + "minFileSize and maxDeletedRowsRatio must be 0 when running PURGE.") + } +} + +/** + * Optimize job which compacts small files into larger files to reduce + * the number of files and potentially allow more efficient reads. + * + * @param sparkSession Spark environment reference. + * @param txn The transaction used to optimize this table + * @param partitionPredicate List of partition predicates to select subset of files to optimize. + */ +class OptimizeExecutor( + sparkSession: SparkSession, + txn: OptimisticTransaction, + partitionPredicate: Seq[Expression], + zOrderByColumns: Seq[String], + isAutoCompact: Boolean, + optimizeContext: DeltaOptimizeContext) + extends DeltaCommand with SQLMetricsReporting with Serializable { + + /** Timestamp to use in [[FileAction]] */ + private val operationTimestamp = new SystemClock().getTimeMillis() + + private val isClusteredTable = ClusteredTableUtils.isSupported(txn.snapshot.protocol) + + private val isMultiDimClustering = isClusteredTable || zOrderByColumns.nonEmpty + + private val clusteringColumns: Seq[String] = { + if (zOrderByColumns.nonEmpty) { + zOrderByColumns + } else if (isClusteredTable) { + ClusteringColumnInfo.extractLogicalNames(txn.snapshot) + } else { + Nil + } + } + + private lazy val curve: String = { + if (zOrderByColumns.nonEmpty) { + "zorder" + } else { + assert(isClusteredTable) + "hilbert" + } + } + + def optimize(): Seq[Row] = { + recordDeltaOperation(txn.deltaLog, "delta.optimize") { + val minFileSize = optimizeContext.minFileSize.getOrElse( + sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MIN_FILE_SIZE)) + val maxFileSize = optimizeContext.maxFileSize.getOrElse( + sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_FILE_SIZE)) + val maxDeletedRowsRatio = optimizeContext.maxDeletedRowsRatio.getOrElse( + sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_DELETED_ROWS_RATIO)) + + val candidateFiles = txn.filterFiles(partitionPredicate, keepNumRecords = true) + val partitionSchema = txn.metadata.partitionSchema + + val filesToProcess = pruneCandidateFileList(minFileSize, maxDeletedRowsRatio, candidateFiles) + val partitionsToCompact = filesToProcess.groupBy(_.partitionValues).toSeq + + val jobs = groupFilesIntoBins(partitionsToCompact, maxFileSize) + + val maxThreads = + sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_THREADS) + val updates = ThreadUtils.parmap(jobs, "OptimizeJob", maxThreads) { partitionBinGroup => + runOptimizeBinJob(txn, partitionBinGroup._1, partitionBinGroup._2, maxFileSize) + }.flatten + + val addedFiles = updates.collect { case a: AddFile => a } + val removedFiles = updates.collect { case r: RemoveFile => r } + val removedDVs = filesToProcess.filter(_.deletionVector != null).map(_.deletionVector).toSeq + if (addedFiles.size > 0) { + val metrics = createMetrics(sparkSession.sparkContext, addedFiles, removedFiles, removedDVs) + commitAndRetry(txn, getOperation(), updates, metrics) { newTxn => + val newPartitionSchema = newTxn.metadata.partitionSchema + val candidateSetOld = candidateFiles.map(_.path).toSet + val candidateSetNew = newTxn.filterFiles(partitionPredicate).map(_.path).toSet + + // As long as all of the files that we compacted are still part of the table, + // and the partitioning has not changed it is valid to continue to try + // and commit this checkpoint. + if (candidateSetOld.subsetOf(candidateSetNew) && partitionSchema == newPartitionSchema) { + true + } else { + val deleted = candidateSetOld -- candidateSetNew + logWarning(s"The following compacted files were delete " + + s"during checkpoint ${deleted.mkString(",")}. Aborting the compaction.") + false + } + } + } + + val optimizeStats = OptimizeStats() + optimizeStats.addedFilesSizeStats.merge(addedFiles) + optimizeStats.removedFilesSizeStats.merge(removedFiles) + optimizeStats.numPartitionsOptimized = jobs.map(j => j._1).distinct.size + optimizeStats.numBatches = jobs.size + optimizeStats.totalConsideredFiles = candidateFiles.size + optimizeStats.totalFilesSkipped = optimizeStats.totalConsideredFiles - removedFiles.size + optimizeStats.totalClusterParallelism = sparkSession.sparkContext.defaultParallelism + val numTableColumns = txn.snapshot.metadata.schema.size + optimizeStats.numTableColumns = numTableColumns + optimizeStats.numTableColumnsWithStats = + DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.fromMetaData(txn.snapshot.metadata) + .min(numTableColumns) + if (removedDVs.size > 0) { + optimizeStats.deletionVectorStats = Some(DeletionVectorStats( + numDeletionVectorsRemoved = removedDVs.size, + numDeletionVectorRowsRemoved = removedDVs.map(_.cardinality).sum)) + } + + if (isMultiDimClustering) { + val inputFileStats = + ZOrderFileStats(removedFiles.size, removedFiles.map(_.size.getOrElse(0L)).sum) + optimizeStats.zOrderStats = Some(ZOrderStats( + strategyName = "all", // means process all files in a partition + inputCubeFiles = ZOrderFileStats(0, 0), + inputOtherFiles = inputFileStats, + inputNumCubes = 0, + mergedFiles = inputFileStats, + // There will one z-cube for each partition + numOutputCubes = optimizeStats.numPartitionsOptimized)) + } + + return Seq(Row(txn.deltaLog.dataPath.toString, optimizeStats.toOptimizeMetrics)) + } + } + + /** + * Helper method to prune the list of selected files based on fileSize and ratio of + * deleted rows according to the deletion vector in [[AddFile]]. + */ + private def pruneCandidateFileList( + minFileSize: Long, maxDeletedRowsRatio: Double, files: Seq[AddFile]): Seq[AddFile] = { + + // Select all files in case of multi-dimensional clustering + if (isMultiDimClustering) return files + + def shouldCompactBecauseOfDeletedRows(file: AddFile): Boolean = { + // Always compact files with DVs but without numRecords stats. + // This may be overly aggressive, but it fixes the problem in the long-term, + // as the compacted files will have stats. + (file.deletionVector != null && file.numPhysicalRecords.isEmpty) || + file.deletedToPhysicalRecordsRatio.getOrElse(0d) > maxDeletedRowsRatio + } + + def shouldRewriteToBeIcebergCompatible(file: AddFile): Boolean = { + if (optimizeContext.icebergCompatVersion.isEmpty) return false + if (file.tags == null) return true + val icebergCompatVersion = file.tags.getOrElse(AddFile.Tags.ICEBERG_COMPAT_VERSION.name, "0") + !optimizeContext.icebergCompatVersion.exists(_.toString == icebergCompatVersion) + } + + // Select files that are small, have too many deleted rows, + // or need to be made iceberg compatible + files.filter( + addFile => addFile.size < minFileSize || shouldCompactBecauseOfDeletedRows(addFile) || + shouldRewriteToBeIcebergCompatible(addFile)) + } + + /** + * Utility methods to group files into bins for optimize. + * + * @param partitionsToCompact List of files to compact group by partition. + * Partition is defined by the partition values (partCol -> partValue) + * @param maxTargetFileSize Max size (in bytes) of the compaction output file. + * @return Sequence of bins. Each bin contains one or more files from the same + * partition and targeted for one output file. + */ + private def groupFilesIntoBins( + partitionsToCompact: Seq[(Map[String, String], Seq[AddFile])], + maxTargetFileSize: Long): Seq[(Map[String, String], Seq[AddFile])] = { + partitionsToCompact.flatMap { + case (partition, files) => + val bins = new ArrayBuffer[Seq[AddFile]]() + + val currentBin = new ArrayBuffer[AddFile]() + var currentBinSize = 0L + + files.sortBy(_.size).foreach { file => + // Generally, a bin is a group of existing files, whose total size does not exceed the + // desired maxFileSize. They will be coalesced into a single output file. + // However, if isMultiDimClustering = true, all files in a partition will be read by the + // same job, the data will be range-partitioned and numFiles = totalFileSize / maxFileSize + // will be produced. See below. + if (file.size + currentBinSize > maxTargetFileSize && !isMultiDimClustering) { + bins += currentBin.toVector + currentBin.clear() + currentBin += file + currentBinSize = file.size + } else { + currentBin += file + currentBinSize += file.size + } + } + + if (currentBin.nonEmpty) { + bins += currentBin.toVector + } + + bins.filter { bin => + bin.size > 1 || // bin has more than one file or + (bin.size == 1 && bin(0).deletionVector != null) || // single file in the bin has a DV or + (bin.size == 1 && optimizeContext.icebergCompatVersion.isDefined) || // uniform reorg + isMultiDimClustering // multi-clustering + }.map(b => (partition, b)) + } + } + + /** + * Utility method to run a Spark job to compact the files in given bin + * + * @param txn [[OptimisticTransaction]] instance in use to commit the changes to DeltaLog. + * @param partition Partition values of the partition that files in [[bin]] belongs to. + * @param bin List of files to compact into one large file. + * @param maxFileSize Targeted output file size in bytes + */ + private def runOptimizeBinJob( + txn: OptimisticTransaction, + partition: Map[String, String], + bin: Seq[AddFile], + maxFileSize: Long): Seq[FileAction] = { + val baseTablePath = txn.deltaLog.dataPath + + val input = txn.deltaLog.createDataFrame(txn.snapshot, bin, actionTypeOpt = Some("Optimize")) + val repartitionDF = if (isMultiDimClustering) { + val totalSize = bin.map(_.size).sum + val approxNumFiles = Math.max(1, totalSize / maxFileSize).toInt + MultiDimClustering.cluster( + input, + approxNumFiles, + clusteringColumns, + curve) + } else { + val useRepartition = sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_OPTIMIZE_REPARTITION_ENABLED) + if (useRepartition) { + input.repartition(numPartitions = 1) + } else { + input.coalesce(numPartitions = 1) + } + } + + val partitionDesc = partition.toSeq.map(entry => entry._1 + "=" + entry._2).mkString(",") + + val partitionName = if (partition.isEmpty) "" else s" in partition ($partitionDesc)" + val description = s"$baseTablePath
Optimizing ${bin.size} files" + partitionName + sparkSession.sparkContext.setJobGroup( + sparkSession.sparkContext.getLocalProperty(SPARK_JOB_GROUP_ID), + description) + + val addFiles = txn.writeFiles(repartitionDF, None, isOptimize = true, Nil).collect { + case a: AddFile => + (if (isClusteredTable) { + a.copy(clusteringProvider = Some(ClusteredTableUtils.clusteringProvider)) + } else { + a + }).copy(dataChange = false) + case other => + throw new IllegalStateException( + s"Unexpected action $other with type ${other.getClass}. File compaction job output" + + s"should only have AddFiles") + } + val removeFiles = bin.map(f => f.removeWithTimestamp(operationTimestamp, dataChange = false)) + val updates = addFiles ++ removeFiles + updates + } + + /** + * Attempts to commit the given actions to the log. In the case of a concurrent update, + * the given function will be invoked with a new transaction to allow custom conflict + * detection logic to indicate it is safe to try again, by returning `true`. + * + * This function will continue to try to commit to the log as long as `f` returns `true`, + * otherwise throws a subclass of [[ConcurrentModificationException]]. + */ + private def commitAndRetry( + txn: OptimisticTransaction, + optimizeOperation: Operation, + actions: Seq[Action], + metrics: Map[String, SQLMetric])(f: OptimisticTransaction => Boolean): Unit = { + try { + txn.registerSQLMetrics(sparkSession, metrics) + txn.commit(actions, optimizeOperation) + } catch { + case e: ConcurrentModificationException => + val newTxn = txn.deltaLog.startTransaction(txn.catalogTable) + if (f(newTxn)) { + logInfo("Retrying commit after checking for semantic conflicts with concurrent updates.") + commitAndRetry(newTxn, optimizeOperation, actions, metrics)(f) + } else { + logWarning("Semantic conflicts detected. Aborting operation.") + throw e + } + } + } + + /** Create the appropriate [[Operation]] object for txn commit history */ + private def getOperation(): Operation = { + if (optimizeContext.isPurge) { + DeltaOperations.Reorg(partitionPredicate) + } else { + DeltaOperations.Optimize(partitionPredicate, clusteringColumns, auto = isAutoCompact) + } + } + + /** Create a map of SQL metrics for adding to the commit history. */ + private def createMetrics( + sparkContext: SparkContext, + addedFiles: Seq[AddFile], + removedFiles: Seq[RemoveFile], + removedDVs: Seq[DeletionVectorDescriptor]): Map[String, SQLMetric] = { + + def setAndReturnMetric(description: String, value: Long) = { + val metric = createMetric(sparkContext, description) + metric.set(value) + metric + } + + def totalSize(actions: Seq[FileAction]): Long = { + var totalSize = 0L + actions.foreach { file => + val fileSize = file match { + case addFile: AddFile => addFile.size + case removeFile: RemoveFile => removeFile.size.getOrElse(0L) + case default => + throw new IllegalArgumentException(s"Unknown FileAction type: ${default.getClass}") + } + totalSize += fileSize + } + totalSize + } + + val (deletionVectorRowsRemoved, deletionVectorBytesRemoved) = + removedDVs.map(dv => (dv.cardinality, dv.sizeInBytes.toLong)) + .reduceLeftOption((dv1, dv2) => (dv1._1 + dv2._1, dv1._2 + dv2._2)) + .getOrElse((0L, 0L)) + + val dvMetrics: Map[String, SQLMetric] = Map( + "numDeletionVectorsRemoved" -> + setAndReturnMetric( + "total number of deletion vectors removed", + removedDVs.size), + "numDeletionVectorRowsRemoved" -> + setAndReturnMetric( + "total number of deletion vector rows removed", + deletionVectorRowsRemoved), + "numDeletionVectorBytesRemoved" -> + setAndReturnMetric( + "total number of bytes of removed deletion vectors", + deletionVectorBytesRemoved)) + + val sizeStats = FileSizeStatsWithHistogram.create(addedFiles.map(_.size).sorted) + Map[String, SQLMetric]( + "minFileSize" -> setAndReturnMetric("minimum file size", sizeStats.get.min), + "p25FileSize" -> setAndReturnMetric("25th percentile file size", sizeStats.get.p25), + "p50FileSize" -> setAndReturnMetric("50th percentile file size", sizeStats.get.p50), + "p75FileSize" -> setAndReturnMetric("75th percentile file size", sizeStats.get.p75), + "maxFileSize" -> setAndReturnMetric("maximum file size", sizeStats.get.max), + "numAddedFiles" -> setAndReturnMetric("total number of files added.", addedFiles.size), + "numRemovedFiles" -> setAndReturnMetric("total number of files removed.", removedFiles.size), + "numAddedBytes" -> setAndReturnMetric("total number of bytes added", totalSize(addedFiles)), + "numRemovedBytes" -> + setAndReturnMetric("total number of bytes removed", totalSize(removedFiles)) + ) ++ dvMetrics + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/ReorgTableForUpgradeUniformHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/ReorgTableForUpgradeUniformHelper.scala new file mode 100644 index 00000000000..c15831b761a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/ReorgTableForUpgradeUniformHelper.scala @@ -0,0 +1,246 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaConfig, DeltaConfigs, DeltaErrors, DeltaOperations, Snapshot} +import org.apache.spark.sql.delta.IcebergCompat.{getEnabledVersion, getIcebergCompatVersionConfigForValidVersion} +import org.apache.spark.sql.delta.UniversalFormat.{icebergEnabled, ICEBERG_FORMAT} +import org.apache.spark.sql.delta.actions.{AddFile, Protocol} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.util.Utils.try_element_at + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.functions.col + +/** + * Helper trait for ReorgTableCommand to rewrite the table to be Iceberg compatible. + */ +trait ReorgTableForUpgradeUniformHelper extends DeltaLogging { + + private val versionChangesRequireRewrite: Map[Int, Set[Int]] = + Map(0 -> Set(2), 1 -> Set(2), 2 -> Set(2)) + + /** + * Helper function to check if the table data may need to be rewritten to be iceberg compatible. + * Only if not all addFiles has the tag, Rewriting would be performed. + */ + private def reorgMayNeedRewrite(oldVersion: Int, newVersion: Int): Boolean = { + versionChangesRequireRewrite.getOrElse(oldVersion, Set.empty[Int]).contains(newVersion) + } + + /** + * Helper function to rewrite the table. Implemented by Reorg Table Command. + */ + def optimizeByReorg( + sparkSession: SparkSession, + isPurge: Boolean, + icebergCompatVersion: Option[Int]): Seq[Row] + + /** + * Helper function to update the table icebergCompat properties. + * We can not use AlterTableSetPropertiesDeltaCommand here because we don't allow customer to + * change icebergCompatVersion by using Alter Table command. + */ + private def enableIcebergCompat( + target: DeltaTableV2, + currIcebergCompatVersionOpt: Option[Int], + targetVersionDeltaConfig: DeltaConfig[Option[Boolean]]): Unit = { + var enableIcebergCompatConf = Map( + targetVersionDeltaConfig.key -> "true", + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key -> "false", + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name" + ) + if (currIcebergCompatVersionOpt.nonEmpty) { + val currIcebergCompatVersionDeltaConfig = getIcebergCompatVersionConfigForValidVersion( + currIcebergCompatVersionOpt.get) + enableIcebergCompatConf ++= Map(currIcebergCompatVersionDeltaConfig.key -> "false") + } + + val alterConfTxn = target.startTransaction() + + if (alterConfTxn.protocol.minWriterVersion < 7) { + enableIcebergCompatConf += Protocol.MIN_WRITER_VERSION_PROP -> "7" + } + if (alterConfTxn.protocol.minReaderVersion < 3) { + enableIcebergCompatConf += Protocol.MIN_READER_VERSION_PROP -> "3" + } + + val metadata = alterConfTxn.metadata + val newMetadata = metadata.copy( + description = metadata.description, + configuration = metadata.configuration ++ enableIcebergCompatConf) + alterConfTxn.updateMetadata(newMetadata) + alterConfTxn.commit( + Nil, + DeltaOperations.UpgradeUniformProperties(enableIcebergCompatConf) + ) + } + + /** + * Helper function to get the num of addFiles as well as + * num of addFiles with ICEBERG_COMPAT_VERSION tag. + * @param icebergCompatVersion target iceberg compat version + * @param snapshot current snapshot + * @return (NumOfAddFiles, NumOfAddFilesWithIcebergCompatTag) + */ + private def getNumOfAddFiles( + icebergCompatVersion: Int, + table: DeltaTableV2, + snapshot: Snapshot): (Long, Long) = { + val numOfAddFilesWithTag = snapshot.allFiles + .select("tags") + .where(try_element_at(col("tags"), AddFile.Tags.ICEBERG_COMPAT_VERSION.name) + === icebergCompatVersion.toString) + .count() + val numOfAddFiles = snapshot.numOfFiles + logInfo(s"For table ${table.tableIdentifier} at version ${snapshot.version}, there are " + + s"$numOfAddFiles addFiles, and $numOfAddFilesWithTag addFiles with ICEBERG_COMPAT_VERSION=" + + s"$icebergCompatVersion tag.") + (numOfAddFiles, numOfAddFilesWithTag) + } + + /** + * Helper function to rewrite the table data files in Iceberg compatible way. + * This method would do following things: + * 1. Update the table properties to enable the target iceberg compat version and disable the + * existing iceberg compat version. + * 2. If target iceberg compat version require rewriting and not all addFiles has + * ICEBERG_COMPAT_VERSION=version tag, rewrite the table data files to be iceberg compatible + * and adding tag to all addFiles. + * 3. If universal format not enabled, alter the table properties to enable + * universalFormat = Iceberg. + * + * * There are six possible write combinations: + * | CurrentIcebergCompatVersion | TargetIcebergCompatVersion | Required steps| + * | --------------- | --------------- | --------------- | + * | None | 1 | 1, 3 | + * | None | 2 | 1, 2, 3 | + * | 1 | 1 | 3 | + * | 1 | 2 | 1, 2, 3 | + * | 2 | 1 | 1, 3 | + * | 2 | 2 | 2, 3 | + */ + private def doRewrite( + target: DeltaTableV2, + sparkSession: SparkSession, + targetIcebergCompatVersion: Int): Seq[Row] = { + + val snapshot = target.deltaLog.update() + val currIcebergCompatVersionOpt = getEnabledVersion(snapshot.metadata) + val targetVersionDeltaConfig = getIcebergCompatVersionConfigForValidVersion( + targetIcebergCompatVersion) + val versionChangeMayNeedRewrite = reorgMayNeedRewrite( + currIcebergCompatVersionOpt.getOrElse(0), targetIcebergCompatVersion) + + // Step 1: Update the table properties to enable the target iceberg compat version + val didUpdateIcebergCompatVersion = + if (!currIcebergCompatVersionOpt.contains(targetIcebergCompatVersion)) { + enableIcebergCompat(target, currIcebergCompatVersionOpt, targetVersionDeltaConfig) + logInfo(s"Update table ${target.tableIdentifier} to iceberg compat version = " + + s"$targetIcebergCompatVersion successfully.") + true + } else { + false + } + + // Step 2: Rewrite the table data files to be Iceberg compatible. + val (numOfAddFilesBefore, numOfAddFilesWithTagBefore) = getNumOfAddFiles( + targetIcebergCompatVersion, target, snapshot) + val allAddFilesHaveTag = numOfAddFilesWithTagBefore == numOfAddFilesBefore + // The table needs to be rewritten if: + // 1. The target iceberg compat version requires rewrite. + // 2. Not all addFile have ICEBERG_COMPAT_VERSION=targetVersion tag + val (metricsOpt, didRewrite) = if (versionChangeMayNeedRewrite && !allAddFilesHaveTag) { + logInfo(s"Reorg Table ${target.tableIdentifier} to iceberg compat version = " + + s"$targetIcebergCompatVersion need rewrite data files.") + val metrics = try { + optimizeByReorg( + sparkSession, + isPurge = false, + icebergCompatVersion = Some(targetIcebergCompatVersion) + ) + } catch { + case NonFatal(e) => + throw DeltaErrors.icebergCompatDataFileRewriteFailedException( + targetIcebergCompatVersion, e) + } + logInfo(s"Rewrite table ${target.tableIdentifier} to iceberg compat version = " + + s"$targetIcebergCompatVersion successfully.") + (Some(metrics), true) + } else { + (None, false) + } + val updatedSnapshot = target.deltaLog.update() + val (numOfAddFiles, numOfAddFilesWithIcebergCompatTag) = getNumOfAddFiles( + targetIcebergCompatVersion, target, updatedSnapshot) + if (versionChangeMayNeedRewrite && numOfAddFilesWithIcebergCompatTag != numOfAddFiles) { + throw DeltaErrors.icebergCompatReorgAddFileTagsMissingException( + updatedSnapshot.version, + targetIcebergCompatVersion, + numOfAddFiles, + numOfAddFilesWithIcebergCompatTag + ) + } + + // Step 3: Update the table properties to enable the universalFormat = Iceberg. + if (!icebergEnabled(updatedSnapshot.metadata)) { + val enableUniformConf = Map( + DeltaConfigs.UNIVERSAL_FORMAT_ENABLED_FORMATS.key -> ICEBERG_FORMAT) + AlterTableSetPropertiesDeltaCommand(target, enableUniformConf).run(sparkSession) + logInfo(s"Enabling universal format with iceberg compat version = " + + s"$targetIcebergCompatVersion for table ${target.tableIdentifier} succeeded.") + } + + recordDeltaEvent(updatedSnapshot.deltaLog, "delta.upgradeUniform.success", data = Map( + "currIcebergCompatVersion" -> currIcebergCompatVersionOpt.toString, + "targetIcebergCompatVersion" -> targetIcebergCompatVersion.toString, + "metrics" -> metricsOpt.toString, + "didUpdateIcebergCompatVersion" -> didUpdateIcebergCompatVersion.toString, + "needRewrite" -> versionChangeMayNeedRewrite.toString, + "didRewrite" -> didRewrite.toString, + "numOfAddFilesBefore" -> numOfAddFilesBefore.toString, + "numOfAddFilesWithIcebergCompatTagBefore" -> numOfAddFilesWithTagBefore.toString, + "numOfAddFilesAfter" -> numOfAddFiles.toString, + "numOfAddFilesWithIcebergCompatTagAfter" -> numOfAddFilesWithIcebergCompatTag.toString, + "universalFormatIcebergEnabled" -> icebergEnabled(target.deltaLog.update().metadata).toString + )) + metricsOpt.getOrElse(Seq.empty[Row]) + } + + /** + * Helper function to upgrade the table to uniform iceberg compat version. + */ + protected def upgradeUniformIcebergCompatVersion( + target: DeltaTableV2, + sparkSession: SparkSession, + targetIcebergCompatVersion: Int): Seq[Row] = { + try { + doRewrite(target, sparkSession, targetIcebergCompatVersion) + } catch { + case NonFatal(e) => + recordDeltaEvent(target.deltaLog, "delta.upgradeUniform.exception", data = Map( + "targetIcebergCompatVersion" -> targetIcebergCompatVersion.toString, + "exception" -> e.toString + )) + throw e + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/RestoreTableCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/RestoreTableCommand.scala new file mode 100644 index 00000000000..163c6e4742e --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/RestoreTableCommand.scala @@ -0,0 +1,309 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import java.sql.Timestamp + +import scala.collection.JavaConverters._ +import scala.util.{Success, Try} + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaOperations, Snapshot} +import org.apache.spark.sql.delta.actions.{AddFile, DeletionVectorDescriptor, RemoveFile} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.DeltaFileOperations.absolutePath +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Literal} +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.execution.command.LeafRunnableCommand +import org.apache.spark.sql.functions.{column, lit} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.IGNORE_MISSING_FILES +import org.apache.spark.sql.types.LongType +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.SerializableConfiguration + +/** Base trait class for RESTORE. Defines command output schema and metrics. */ +trait RestoreTableCommandBase { + + // RESTORE operation metrics + val TABLE_SIZE_AFTER_RESTORE = "tableSizeAfterRestore" + val NUM_OF_FILES_AFTER_RESTORE = "numOfFilesAfterRestore" + val NUM_REMOVED_FILES = "numRemovedFiles" + val NUM_RESTORED_FILES = "numRestoredFiles" + val REMOVED_FILES_SIZE = "removedFilesSize" + val RESTORED_FILES_SIZE = "restoredFilesSize" + + // SQL way column names for RESTORE command output + private val COLUMN_TABLE_SIZE_AFTER_RESTORE = "table_size_after_restore" + private val COLUMN_NUM_OF_FILES_AFTER_RESTORE = "num_of_files_after_restore" + private val COLUMN_NUM_REMOVED_FILES = "num_removed_files" + private val COLUMN_NUM_RESTORED_FILES = "num_restored_files" + private val COLUMN_REMOVED_FILES_SIZE = "removed_files_size" + private val COLUMN_RESTORED_FILES_SIZE = "restored_files_size" + + val outputSchema: Seq[Attribute] = Seq( + AttributeReference(COLUMN_TABLE_SIZE_AFTER_RESTORE, LongType)(), + AttributeReference(COLUMN_NUM_OF_FILES_AFTER_RESTORE, LongType)(), + AttributeReference(COLUMN_NUM_REMOVED_FILES, LongType)(), + AttributeReference(COLUMN_NUM_RESTORED_FILES, LongType)(), + AttributeReference(COLUMN_REMOVED_FILES_SIZE, LongType)(), + AttributeReference(COLUMN_RESTORED_FILES_SIZE, LongType)() + ) +} + +/** + * Perform restore of delta table to a specified version or timestamp + * + * Algorithm: + * 1) Read the latest snapshot of the table. + * 2) Read snapshot for version or timestamp to restore + * 3) Compute files available in snapshot for restoring (files were removed by some commit) + * but missed in the latest. Add these files into commit as AddFile action. + * 4) Compute files available in the latest snapshot (files were added after version to restore) + * but missed in the snapshot to restore. Add these files into commit as RemoveFile action. + * 5) If SQLConf.IGNORE_MISSING_FILES option is false (default value) check availability of AddFile + * in file system. + * 6) Commit metadata, Protocol, all RemoveFile and AddFile actions + * into delta log using `commitLarge` (commit will be failed in case of parallel transaction) + * 7) If table was modified in parallel then ignore restore and raise exception. + * + */ +case class RestoreTableCommand(sourceTable: DeltaTableV2) + extends LeafRunnableCommand with DeltaCommand with RestoreTableCommandBase { + + override val output: Seq[Attribute] = outputSchema + + override def run(spark: SparkSession): Seq[Row] = { + val deltaLog = sourceTable.deltaLog + val version = sourceTable.timeTravelOpt.get.version + val timestamp = getTimestamp() + recordDeltaOperation(deltaLog, "delta.restore") { + require(version.isEmpty ^ timestamp.isEmpty, + "Either the version or timestamp should be provided for restore") + + val versionToRestore = version.getOrElse { + deltaLog + .history + .getActiveCommitAtTime(parseStringToTs(timestamp), canReturnLastCommit = true) + .version + } + + val latestVersion = deltaLog.update().version + + require(versionToRestore < latestVersion, s"Version to restore ($versionToRestore)" + + s"should be less then last available version ($latestVersion)") + + deltaLog.withNewTransaction(sourceTable.catalogTable) { txn => + val latestSnapshot = txn.snapshot + val snapshotToRestore = deltaLog.getSnapshotAt(versionToRestore) + val latestSnapshotFiles = latestSnapshot.allFiles + val snapshotToRestoreFiles = snapshotToRestore.allFiles + + import org.apache.spark.sql.delta.implicits._ + + // If either source version or destination version contains DVs, + // we have to take them into account during deduplication. + val targetMayHaveDVs = DeletionVectorUtils.deletionVectorsReadable(latestSnapshot) + val sourceMayHaveDVs = DeletionVectorUtils.deletionVectorsReadable(snapshotToRestore) + + val normalizedSourceWithoutDVs = snapshotToRestoreFiles.mapPartitions { files => + files.map(file => (file, file.path)) + }.toDF("srcAddFile", "srcPath") + val normalizedTargetWithoutDVs = latestSnapshotFiles.mapPartitions { files => + files.map(file => (file, file.path)) + }.toDF("tgtAddFile", "tgtPath") + + def addDVsToNormalizedDF( + mayHaveDVs: Boolean, + dvIdColumnName: String, + dvAccessColumn: Column, + normalizedDf: DataFrame): DataFrame = { + if (mayHaveDVs) { + normalizedDf.withColumn( + dvIdColumnName, + DeletionVectorDescriptor.uniqueIdExpression(dvAccessColumn)) + } else { + normalizedDf.withColumn(dvIdColumnName, lit(null)) + } + } + + val normalizedSource = addDVsToNormalizedDF( + mayHaveDVs = sourceMayHaveDVs, + dvIdColumnName = "srcDeletionVectorId", + dvAccessColumn = column("srcAddFile.deletionVector"), + normalizedDf = normalizedSourceWithoutDVs) + + val normalizedTarget = addDVsToNormalizedDF( + mayHaveDVs = targetMayHaveDVs, + dvIdColumnName = "tgtDeletionVectorId", + dvAccessColumn = column("tgtAddFile.deletionVector"), + normalizedDf = normalizedTargetWithoutDVs) + + val joinExprs = + column("srcPath") === column("tgtPath") and + // Use comparison operator where NULL == NULL + column("srcDeletionVectorId") <=> column("tgtDeletionVectorId") + + val filesToAdd = normalizedSource + .join(normalizedTarget, joinExprs, "left_anti") + .select(column("srcAddFile").as[AddFile]) + .map(_.copy(dataChange = true)) + + val filesToRemove = normalizedTarget + .join(normalizedSource, joinExprs, "left_anti") + .select(column("tgtAddFile").as[AddFile]) + .map(_.removeWithTimestamp()) + + val ignoreMissingFiles = spark + .sessionState + .conf + .getConf(IGNORE_MISSING_FILES) + + if (!ignoreMissingFiles) { + checkSnapshotFilesAvailability(deltaLog, filesToAdd, versionToRestore) + } + + // Commit files, metrics, protocol and metadata to delta log + val metrics = withDescription("metrics") { + computeMetrics(filesToAdd, filesToRemove, snapshotToRestore) + } + val addActions = withDescription("add actions") { + filesToAdd.toLocalIterator().asScala + } + val removeActions = withDescription("remove actions") { + filesToRemove.toLocalIterator().asScala + } + + txn.updateMetadata(snapshotToRestore.metadata) + + val sourceProtocol = snapshotToRestore.protocol + val targetProtocol = latestSnapshot.protocol + // Only upgrade the protocol, never downgrade (unless allowed by flag), since that may break + // time travel. + val protocolDowngradeAllowed = + conf.getConf(DeltaSQLConf.RESTORE_TABLE_PROTOCOL_DOWNGRADE_ALLOWED) + val newProtocol = if (protocolDowngradeAllowed) { + sourceProtocol + } else { + sourceProtocol.merge(targetProtocol) + } + + txn.commitLarge( + spark, + Iterator.single(newProtocol) ++ addActions ++ removeActions, + DeltaOperations.Restore(version, timestamp), + Map.empty, + metrics.mapValues(_.toString).toMap) + + Seq(Row( + metrics.get(TABLE_SIZE_AFTER_RESTORE), + metrics.get(NUM_OF_FILES_AFTER_RESTORE), + metrics.get(NUM_REMOVED_FILES), + metrics.get(NUM_RESTORED_FILES), + metrics.get(REMOVED_FILES_SIZE), + metrics.get(RESTORED_FILES_SIZE))) + } + } + } + + private def withDescription[T](action: String)(f: => T): T = + withStatusCode("DELTA", + s"RestoreTableCommand: compute $action (table path ${sourceTable.deltaLog.dataPath})") { + f + } + + private def parseStringToTs(timestamp: Option[String]): Timestamp = { + Try { + timestamp.flatMap { tsStr => + val tz = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone) + val utfStr = UTF8String.fromString(tsStr) + DateTimeUtils.stringToTimestamp(utfStr, tz) + } + } match { + case Success(Some(tsMicroseconds)) => new Timestamp(tsMicroseconds / 1000) + case _ => throw DeltaErrors.timestampInvalid(Literal(timestamp.get)) + } + } + + private def computeMetrics( + toAdd: Dataset[AddFile], + toRemove: Dataset[RemoveFile], + snapshot: Snapshot + ): Map[String, Long] = { + // scalastyle:off sparkimplicits + import toAdd.sparkSession.implicits._ + // scalastyle:on sparkimplicits + + val (numRestoredFiles, restoredFilesSize) = toAdd + .agg("size" -> "count", "size" -> "sum").as[(Long, Option[Long])].head() + + val (numRemovedFiles, removedFilesSize) = toRemove + .agg("size" -> "count", "size" -> "sum").as[(Long, Option[Long])].head() + + Map( + NUM_RESTORED_FILES -> numRestoredFiles, + RESTORED_FILES_SIZE -> restoredFilesSize.getOrElse(0), + NUM_REMOVED_FILES -> numRemovedFiles, + REMOVED_FILES_SIZE -> removedFilesSize.getOrElse(0), + NUM_OF_FILES_AFTER_RESTORE -> snapshot.numOfFiles, + TABLE_SIZE_AFTER_RESTORE -> snapshot.sizeInBytes + ) + } + + /* Prevent users from running restore to table version with missed + * data files (manually deleted or vacuumed). Restoring to this version partially + * is still possible if spark.sql.files.ignoreMissingFiles is set to true + */ + private def checkSnapshotFilesAvailability( + deltaLog: DeltaLog, + files: Dataset[AddFile], + version: Long): Unit = withDescription("missing files validation") { + + val spark: SparkSession = files.sparkSession + + val pathString = deltaLog.dataPath.toString + val hadoopConf = spark.sparkContext.broadcast( + new SerializableConfiguration(deltaLog.newDeltaHadoopConf())) + + import org.apache.spark.sql.delta.implicits._ + + val missedFiles = files + .mapPartitions { files => + val path = new Path(pathString) + val fs = path.getFileSystem(hadoopConf.value.value) + files.filterNot(f => fs.exists(absolutePath(pathString, f.path))) + } + .map(_.path) + .head(100) + + if (missedFiles.nonEmpty) { + throw DeltaErrors.restoreMissedDataFilesError(missedFiles, version) + } + } + + /** If available get the timestamp referring to a snapshot in the source table timeline */ + private def getTimestamp(): Option[String] = { + if (sourceTable.timeTravelOpt.get.timestamp.isDefined) { + Some(sourceTable.timeTravelOpt.get.getTimestamp(conf).toString) + } else { + None + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/ShowDeltaTableColumnsCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/ShowDeltaTableColumnsCommand.scala new file mode 100644 index 00000000000..0b88f83fcc0 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/ShowDeltaTableColumnsCommand.scala @@ -0,0 +1,54 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import org.apache.spark.sql.delta.catalog.DeltaTableV2 + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.execution.command.RunnableCommand + +/** + * The column format of the result returned by the `SHOW COLUMNS` command. + */ +case class TableColumns(col_name: String) + +/** + * A command for listing all column names of a Delta table. + * + * @param child The resolved Delta table + */ +case class ShowDeltaTableColumnsCommand(child: LogicalPlan) + extends RunnableCommand with UnaryNode with DeltaCommand { + + override val output: Seq[Attribute] = toAttributes(ExpressionEncoder[TableColumns]().schema) + + override protected def withNewChildInternal(newChild: LogicalPlan): ShowDeltaTableColumnsCommand = + copy(child = newChild) + + override def run(sparkSession: SparkSession): Seq[Row] = { + // Return the schema from snapshot if it is an Delta table. Or raise + // `DeltaErrors.notADeltaTableException` if it is a non-Delta table. + val deltaLog = getDeltaTable(child, "SHOW COLUMNS").deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.showColumns") { + deltaLog.update().schema.fieldNames.map { x => Row(x) }.toSeq + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/UpdateCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/UpdateCommand.scala new file mode 100644 index 00000000000..5c7f755e14b --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/UpdateCommand.scala @@ -0,0 +1,500 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.util.concurrent.TimeUnit + +import org.apache.spark.sql.delta.metric.IncrementMetric +import org.apache.spark.sql.delta.{DeltaConfigs, DeltaLog, DeltaOperations, DeltaTableUtils, OptimisticTransaction} +import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, FileAction} +import org.apache.spark.sql.delta.commands.cdc.CDCReader.{CDC_TYPE_COLUMN_NAME, CDC_TYPE_NOT_CDC, CDC_TYPE_UPDATE_POSTIMAGE, CDC_TYPE_UPDATE_PREIMAGE} +import org.apache.spark.sql.delta.files.{TahoeBatchFileIndex, TahoeFileIndex} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkContext +import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, If, Literal} +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.QueryPlan +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.command.LeafRunnableCommand +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.execution.metric.SQLMetrics.{createMetric, createTimingMetric} +import org.apache.spark.sql.functions.{array, col, explode, input_file_name, lit, struct} +import org.apache.spark.sql.types.LongType + +/** + * Performs an Update using `updateExpression` on the rows that match `condition` + * + * Algorithm: + * 1) Identify the affected files, i.e., the files that may have the rows to be updated. + * 2) Scan affected files, apply the updates, and generate a new DF with updated rows. + * 3) Use the Delta protocol to atomically write the new DF as new files and remove + * the affected files that are identified in step 1. + */ +case class UpdateCommand( + tahoeFileIndex: TahoeFileIndex, + catalogTable: Option[CatalogTable], + target: LogicalPlan, + updateExpressions: Seq[Expression], + condition: Option[Expression]) + extends LeafRunnableCommand with DeltaCommand { + + override val output: Seq[Attribute] = { + Seq(AttributeReference("num_affected_rows", LongType)()) + } + + override def innerChildren: Seq[QueryPlan[_]] = Seq(target) + + @transient private lazy val sc: SparkContext = SparkContext.getOrCreate() + + override lazy val metrics = Map[String, SQLMetric]( + "numAddedFiles" -> createMetric(sc, "number of files added."), + "numAddedBytes" -> createMetric(sc, "number of bytes added"), + "numRemovedFiles" -> createMetric(sc, "number of files removed."), + "numRemovedBytes" -> createMetric(sc, "number of bytes removed"), + "numUpdatedRows" -> createMetric(sc, "number of rows updated."), + "numCopiedRows" -> createMetric(sc, "number of rows copied."), + "executionTimeMs" -> + createTimingMetric(sc, "time taken to execute the entire operation"), + "scanTimeMs" -> + createTimingMetric(sc, "time taken to scan the files for matches"), + "rewriteTimeMs" -> + createTimingMetric(sc, "time taken to rewrite the matched files"), + "numAddedChangeFiles" -> createMetric(sc, "number of change data capture files generated"), + "changeFileBytes" -> createMetric(sc, "total size of change data capture files generated"), + "numTouchedRows" -> createMetric(sc, "number of rows touched (copied + updated)"), + "numDeletionVectorsAdded" -> createMetric(sc, "number of deletion vectors added"), + "numDeletionVectorsRemoved" -> createMetric(sc, "number of deletion vectors removed"), + "numDeletionVectorsUpdated" -> createMetric(sc, "number of deletion vectors updated") + ) + + final override def run(sparkSession: SparkSession): Seq[Row] = { + recordDeltaOperation(tahoeFileIndex.deltaLog, "delta.dml.update") { + val deltaLog = tahoeFileIndex.deltaLog + deltaLog.withNewTransaction(catalogTable) { txn => + DeltaLog.assertRemovable(txn.snapshot) + if (hasBeenExecuted(txn, sparkSession)) { + sendDriverMetrics(sparkSession, metrics) + return Seq.empty + } + performUpdate(sparkSession, deltaLog, txn) + } + // Re-cache all cached plans(including this relation itself, if it's cached) that refer to + // this data source relation. + sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, target) + } + Seq(Row(metrics("numUpdatedRows").value)) + } + + private def performUpdate( + sparkSession: SparkSession, deltaLog: DeltaLog, txn: OptimisticTransaction): Unit = { + import org.apache.spark.sql.delta.implicits._ + + var numTouchedFiles: Long = 0 + var numRewrittenFiles: Long = 0 + var numAddedBytes: Long = 0 + var numRemovedBytes: Long = 0 + var numAddedChangeFiles: Long = 0 + var changeFileBytes: Long = 0 + var scanTimeMs: Long = 0 + var rewriteTimeMs: Long = 0 + var numDeletionVectorsAdded: Long = 0 + var numDeletionVectorsRemoved: Long = 0 + var numDeletionVectorsUpdated: Long = 0 + + val startTime = System.nanoTime() + val numFilesTotal = txn.snapshot.numOfFiles + + val updateCondition = condition.getOrElse(Literal.TrueLiteral) + val (metadataPredicates, dataPredicates) = + DeltaTableUtils.splitMetadataAndDataPredicates( + updateCondition, txn.metadata.partitionColumns, sparkSession) + + // Should we write the DVs to represent updated rows? + val shouldWriteDeletionVectors = shouldWritePersistentDeletionVectors(sparkSession, txn) + val candidateFiles = txn.filterFiles( + metadataPredicates ++ dataPredicates, + keepNumRecords = shouldWriteDeletionVectors) + + val nameToAddFile = generateCandidateFileMap(deltaLog.dataPath, candidateFiles) + + scanTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime) + + val filesToRewrite: Seq[TouchedFileWithDV] = if (candidateFiles.isEmpty) { + // Case 1: Do nothing if no row qualifies the partition predicates + // that are part of Update condition + Nil + } else if (dataPredicates.isEmpty) { + // Case 2: Update all the rows from the files that are in the specified partitions + // when the data filter is empty + candidateFiles + .map(f => TouchedFileWithDV(f.path, f, newDeletionVector = null, deletedRows = 0L)) + } else { + // Case 3: Find all the affected files using the user-specified condition + val fileIndex = new TahoeBatchFileIndex( + sparkSession, "update", candidateFiles, deltaLog, tahoeFileIndex.path, txn.snapshot) + + val touchedFilesWithDV = if (shouldWriteDeletionVectors) { + // Case 3.1: Find all the affected files via DV path + val targetDf = DMLWithDeletionVectorsHelper.createTargetDfForScanningForMatches( + sparkSession, + target, + fileIndex) + + // Does the target table already has DVs enabled? If so, we need to read the table + // with deletion vectors. + val mustReadDeletionVectors = DeletionVectorUtils.deletionVectorsReadable(txn.snapshot) + + DMLWithDeletionVectorsHelper.findTouchedFiles( + sparkSession, + txn, + mustReadDeletionVectors, + deltaLog, + targetDf, + fileIndex, + updateCondition, + opName = "UPDATE") + } else { + // Case 3.2: Find all the affected files using the non-DV path + // Keep everything from the resolved target except a new TahoeFileIndex + // that only involves the affected files instead of all files. + val newTarget = DeltaTableUtils.replaceFileIndex(target, fileIndex) + val data = Dataset.ofRows(sparkSession, newTarget) + val incrUpdatedCountExpr = IncrementMetric(TrueLiteral, metrics("numUpdatedRows")) + val pathsToRewrite = + withStatusCode("DELTA", UpdateCommand.FINDING_TOUCHED_FILES_MSG) { + data.filter(new Column(updateCondition)) + .select(input_file_name()) + .filter(new Column(incrUpdatedCountExpr)) + .distinct() + .as[String] + .collect() + } + + // Wrap AddFile into TouchedFileWithDV that has empty DV. + pathsToRewrite + .map(getTouchedFile(deltaLog.dataPath, _, nameToAddFile)) + .map(f => TouchedFileWithDV(f.path, f, newDeletionVector = null, deletedRows = 0L)) + .toSeq + } + // Refresh scan time for Case 3, since we performed scan here. + scanTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime) + touchedFilesWithDV + } + + val totalActions = { + // When DV is on, we first mask removed rows with DVs and generate (remove, add) pairs. + val actionsForExistingFiles = if (shouldWriteDeletionVectors) { + // When there's no data predicate, all matched files are removed. + if (dataPredicates.isEmpty) { + val operationTimestamp = System.currentTimeMillis() + filesToRewrite.map(_.fileLogEntry.removeWithTimestamp(operationTimestamp)) + } else { + // When there is data predicate, we generate (remove, add) pairs. + val filesToRewriteWithDV = filesToRewrite.filter(_.newDeletionVector != null) + val (dvActions, metricMap) = DMLWithDeletionVectorsHelper.processUnmodifiedData( + sparkSession, + filesToRewriteWithDV, + txn.snapshot) + metrics("numUpdatedRows").set(metricMap("numModifiedRows")) + numDeletionVectorsAdded = metricMap("numDeletionVectorsAdded") + numDeletionVectorsRemoved = metricMap("numDeletionVectorsRemoved") + numDeletionVectorsUpdated = metricMap("numDeletionVectorsUpdated") + numTouchedFiles = metricMap("numRemovedFiles") + dvActions + } + } else { + // Without DV we'll leave the job to `rewriteFiles`. + Nil + } + + // When DV is on, we write out updated rows only. The return value will be only `add` actions. + // When DV is off, we write out updated rows plus unmodified rows from the same file, then + // return `add` and `remove` actions. + val rewriteStartNs = System.nanoTime() + val actionsForNewFiles = + withStatusCode("DELTA", UpdateCommand.rewritingFilesMsg(filesToRewrite.size)) { + if (filesToRewrite.nonEmpty) { + rewriteFiles( + sparkSession, + txn, + rootPath = tahoeFileIndex.path, + inputLeafFiles = filesToRewrite.map(_.fileLogEntry), + nameToAddFileMap = nameToAddFile, + condition = updateCondition, + generateRemoveFileActions = !shouldWriteDeletionVectors, + copyUnmodifiedRows = !shouldWriteDeletionVectors) + } else { + Nil + } + } + rewriteTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - rewriteStartNs) + + numTouchedFiles = filesToRewrite.length + val (addActions, removeActions) = actionsForNewFiles.partition(_.isInstanceOf[AddFile]) + numRewrittenFiles = addActions.size + numAddedBytes = addActions.map(_.getFileSize).sum + numRemovedBytes = removeActions.map(_.getFileSize).sum + + actionsForExistingFiles ++ actionsForNewFiles + } + + val changeActions = totalActions.collect { case f: AddCDCFile => f } + numAddedChangeFiles = changeActions.size + changeFileBytes = changeActions.map(_.size).sum + + metrics("numAddedFiles").set(numRewrittenFiles) + metrics("numAddedBytes").set(numAddedBytes) + metrics("numAddedChangeFiles").set(numAddedChangeFiles) + metrics("changeFileBytes").set(changeFileBytes) + metrics("numRemovedFiles").set(numTouchedFiles) + metrics("numRemovedBytes").set(numRemovedBytes) + metrics("executionTimeMs").set(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime)) + metrics("scanTimeMs").set(scanTimeMs) + metrics("rewriteTimeMs").set(rewriteTimeMs) + // In the case where the numUpdatedRows is not captured, we can siphon out the metrics from + // the BasicWriteStatsTracker. This is for case 2 where the update condition contains only + // metadata predicates and so the entire partition is re-written. + val outputRows = txn.getMetric("numOutputRows").map(_.value).getOrElse(-1L) + if (metrics("numUpdatedRows").value == 0 && outputRows != 0 && + metrics("numCopiedRows").value == 0) { + // We know that numTouchedRows = numCopiedRows + numUpdatedRows. + // Since an entire partition was re-written, no rows were copied. + // So numTouchedRows == numUpdateRows + metrics("numUpdatedRows").set(metrics("numTouchedRows").value) + } else { + // This is for case 3 where the update condition contains both metadata and data predicates + // so relevant files will have some rows updated and some rows copied. We don't need to + // consider case 1 here, where no files match the update condition, as we know that + // `totalActions` is empty. + metrics("numCopiedRows").set( + metrics("numTouchedRows").value - metrics("numUpdatedRows").value) + metrics("numDeletionVectorsAdded").set(numDeletionVectorsAdded) + metrics("numDeletionVectorsRemoved").set(numDeletionVectorsRemoved) + metrics("numDeletionVectorsUpdated").set(numDeletionVectorsUpdated) + } + txn.registerSQLMetrics(sparkSession, metrics) + + val finalActions = createSetTransaction(sparkSession, deltaLog).toSeq ++ totalActions + txn.commitIfNeeded(finalActions, DeltaOperations.Update(condition)) + sendDriverMetrics(sparkSession, metrics) + + recordDeltaEvent( + deltaLog, + "delta.dml.update.stats", + data = UpdateMetric( + condition = condition.map(_.sql).getOrElse("true"), + numFilesTotal, + numTouchedFiles, + numRewrittenFiles, + numAddedChangeFiles, + changeFileBytes, + scanTimeMs, + rewriteTimeMs, + numDeletionVectorsAdded, + numDeletionVectorsRemoved, + numDeletionVectorsUpdated) + ) + } + + /** + * Scan all the affected files and write out the updated files. + * + * When CDF is enabled, includes the generation of CDC preimage and postimage columns for + * changed rows. + * + * @return a list of [[FileAction]]s, consisting of newly-written data and CDC files and old + * files that have been removed. + */ + private def rewriteFiles( + spark: SparkSession, + txn: OptimisticTransaction, + rootPath: Path, + inputLeafFiles: Seq[AddFile], + nameToAddFileMap: Map[String, AddFile], + condition: Expression, + generateRemoveFileActions: Boolean, + copyUnmodifiedRows: Boolean): Seq[FileAction] = { + // Number of total rows that we have seen, i.e. are either copying or updating (sum of both). + // This will be used later, along with numUpdatedRows, to determine numCopiedRows. + val incrTouchedCountExpr = IncrementMetric(TrueLiteral, metrics("numTouchedRows")) + + // Containing the map from the relative file path to AddFile + val baseRelation = buildBaseRelation( + spark, txn, "update", rootPath, inputLeafFiles.map(_.path), nameToAddFileMap) + val newTarget = DeltaTableUtils.replaceFileIndex(target, baseRelation.location) + val targetDf = Dataset.ofRows(spark, newTarget) + val targetDfWithEvaluatedCondition = { + val evalDf = targetDf.withColumn(UpdateCommand.CONDITION_COLUMN_NAME, new Column(condition)) + val copyAndUpdateRowsDf = if (copyUnmodifiedRows) { + evalDf + } else { + evalDf.filter(new Column(UpdateCommand.CONDITION_COLUMN_NAME)) + } + copyAndUpdateRowsDf.filter(new Column(incrTouchedCountExpr)) + } + + val updatedDataFrame = UpdateCommand.withUpdatedColumns( + target.output, + updateExpressions, + condition, + targetDfWithEvaluatedCondition, + UpdateCommand.shouldOutputCdc(txn)) + + val addFiles = txn.writeFiles(updatedDataFrame) + + val removeFiles = if (generateRemoveFileActions) { + val operationTimestamp = System.currentTimeMillis() + inputLeafFiles.map(_.removeWithTimestamp(operationTimestamp)) + } else { + Nil + } + + addFiles ++ removeFiles + } + + def shouldWritePersistentDeletionVectors( + spark: SparkSession, txn: OptimisticTransaction): Boolean = { + spark.conf.get(DeltaSQLConf.UPDATE_USE_PERSISTENT_DELETION_VECTORS) && + DeletionVectorUtils.deletionVectorsWritable(txn.snapshot) + } +} + +object UpdateCommand { + val FILE_NAME_COLUMN = "_input_file_name_" + val CONDITION_COLUMN_NAME = "__condition__" + val FINDING_TOUCHED_FILES_MSG: String = "Finding files to rewrite for UPDATE operation" + + def rewritingFilesMsg(numFilesToRewrite: Long): String = + s"Rewriting $numFilesToRewrite files for UPDATE operation" + + /** + * Whether or not CDC is enabled on this table and, thus, if we should output CDC data during this + * UPDATE operation. + */ + def shouldOutputCdc(txn: OptimisticTransaction): Boolean = { + DeltaConfigs.CHANGE_DATA_FEED.fromMetaData(txn.metadata) + } + + /** + * Build the new columns. If the condition matches, generate the new value using + * the corresponding UPDATE EXPRESSION; otherwise, keep the original column value. + * + * When CDC is enabled, includes the generation of CDC pre-image and post-image columns for + * changed rows. + * + * @param originalExpressions the original column values + * @param updateExpressions the update transformation to perform on the input DataFrame + * @param dfWithEvaluatedCondition source DataFrame on which we will apply the update expressions + * with an additional column CONDITION_COLUMN_NAME which is the + * true/false value of if the update condition is satisfied + * @param condition update condition + * @param shouldOutputCdc if we should output CDC data during this UPDATE operation. + * @return the updated DataFrame, with extra CDC columns if CDC is enabled + */ + def withUpdatedColumns( + originalExpressions: Seq[Attribute], + updateExpressions: Seq[Expression], + condition: Expression, + dfWithEvaluatedCondition: DataFrame, + shouldOutputCdc: Boolean): DataFrame = { + val resultDf = if (shouldOutputCdc) { + val namedUpdateCols = updateExpressions.zip(originalExpressions).map { + case (expr, targetCol) => new Column(expr).as(targetCol.name, targetCol.metadata) + } + + // Build an array of output rows to be unpacked later. If the condition is matched, we + // generate CDC pre and postimages in addition to the final output row; if the condition + // isn't matched, we just generate a rewritten no-op row without any CDC events. + val preimageCols = originalExpressions.map(new Column(_)) :+ + lit(CDC_TYPE_UPDATE_PREIMAGE).as(CDC_TYPE_COLUMN_NAME) + val postimageCols = namedUpdateCols :+ + lit(CDC_TYPE_UPDATE_POSTIMAGE).as(CDC_TYPE_COLUMN_NAME) + val notCdcCol = new Column(CDC_TYPE_NOT_CDC).as(CDC_TYPE_COLUMN_NAME) + val updatedDataCols = namedUpdateCols :+ notCdcCol + val noopRewriteCols = originalExpressions.map(new Column(_)) :+ notCdcCol + val packedUpdates = array( + struct(preimageCols: _*), + struct(postimageCols: _*), + struct(updatedDataCols: _*) + ).expr + + val packedData = if (condition == Literal.TrueLiteral) { + packedUpdates + } else { + If( + UnresolvedAttribute(CONDITION_COLUMN_NAME), + packedUpdates, // if it should be updated, then use `packagedUpdates` + array(struct(noopRewriteCols: _*)).expr) // else, this is a noop rewrite + } + + // Explode the packed array, and project back out the final data columns. + val finalColumns = (originalExpressions :+ UnresolvedAttribute(CDC_TYPE_COLUMN_NAME)).map { + a => col(s"packedData.`${a.name}`").as(a.name, a.metadata) + } + dfWithEvaluatedCondition + .select(explode(new Column(packedData)).as("packedData")) + .select(finalColumns: _*) + } else { + val finalCols = updateExpressions.zip(originalExpressions).map { case (update, original) => + val updated = if (condition == Literal.TrueLiteral) { + update + } else { + If(UnresolvedAttribute(CONDITION_COLUMN_NAME), update, original) + } + new Column(updated).as(original.name, original.metadata) + } + + dfWithEvaluatedCondition.select(finalCols: _*) + } + + resultDf.drop(CONDITION_COLUMN_NAME) + } +} + +/** + * Used to report details about update. + * + * @param condition: what was the update condition + * @param numFilesTotal: how big is the table + * @param numTouchedFiles: how many files did we touch + * @param numRewrittenFiles: how many files had to be rewritten + * @param numAddedChangeFiles: how many change files were generated + * @param changeFileBytes: total size of change files generated + * @param scanTimeMs: how long did finding take + * @param rewriteTimeMs: how long did rewriting take + * + * @note All the time units are milliseconds. + */ +case class UpdateMetric( + condition: String, + numFilesTotal: Long, + numTouchedFiles: Long, + numRewrittenFiles: Long, + numAddedChangeFiles: Long, + changeFileBytes: Long, + scanTimeMs: Long, + rewriteTimeMs: Long, + numDeletionVectorsAdded: Long, + numDeletionVectorsRemoved: Long, + numDeletionVectorsUpdated: Long +) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/VacuumCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/VacuumCommand.scala new file mode 100644 index 00000000000..45cc79cd782 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/VacuumCommand.scala @@ -0,0 +1,603 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.net.URI +import java.util.Date +import java.util.concurrent.TimeUnit + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{AddFile, FileAction, RemoveFile} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.DeltaFileOperations +import org.apache.spark.sql.delta.util.DeltaFileOperations.tryDeleteNonRecursive +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.sql.{Column, DataFrame, Dataset, SparkSession} +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.execution.metric.SQLMetrics.createMetric +import org.apache.spark.sql.functions.{col, count, sum} +import org.apache.spark.util.{Clock, SerializableConfiguration, SystemClock} + +/** + * Vacuums the table by clearing all untracked files and folders within this table. + * First lists all the files and directories in the table, and gets the relative paths with + * respect to the base of the table. Then it gets the list of all tracked files for this table, + * which may or may not be within the table base path, and gets the relative paths of + * all the tracked files with respect to the base of the table. Files outside of the table path + * will be ignored. Then we take a diff of the files and delete directories that were already empty, + * and all files that are within the table that are no longer tracked. + */ +object VacuumCommand extends VacuumCommandImpl with Serializable { + + case class FileNameAndSize(path: String, length: Long) + /** + * Additional check on retention duration to prevent people from shooting themselves in the foot. + */ + protected def checkRetentionPeriodSafety( + spark: SparkSession, + retentionMs: Option[Long], + configuredRetention: Long): Unit = { + require(retentionMs.forall(_ >= 0), "Retention for Vacuum can't be less than 0.") + val checkEnabled = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED) + val retentionSafe = retentionMs.forall(_ >= configuredRetention) + var configuredRetentionHours = TimeUnit.MILLISECONDS.toHours(configuredRetention) + if (TimeUnit.HOURS.toMillis(configuredRetentionHours) < configuredRetention) { + configuredRetentionHours += 1 + } + require(!checkEnabled || retentionSafe, + s"""Are you sure you would like to vacuum files with such a low retention period? If you have + |writers that are currently writing to this table, there is a risk that you may corrupt the + |state of your Delta table. + | + |If you are certain that there are no operations being performed on this table, such as + |insert/upsert/delete/optimize, then you may turn off this check by setting: + |spark.databricks.delta.retentionDurationCheck.enabled = false + | + |If you are not sure, please use a value not less than "$configuredRetentionHours hours". + """.stripMargin) + } + + /** + * Helper to compute all valid files based on basePath and Snapshot provided. + */ + private def getValidFilesFromSnapshot( + spark: SparkSession, + basePath: String, + snapshot: Snapshot, + retentionMillis: Option[Long], + hadoopConf: Broadcast[SerializableConfiguration], + clock: Clock, + checkAbsolutePathOnly: Boolean): DataFrame = { + import org.apache.spark.sql.delta.implicits._ + require(snapshot.version >= 0, "No state defined for this table. Is this really " + + "a Delta table? Refusing to garbage collect.") + + val snapshotTombstoneRetentionMillis = DeltaLog.tombstoneRetentionMillis(snapshot.metadata) + checkRetentionPeriodSafety(spark, retentionMillis, snapshotTombstoneRetentionMillis) + val deleteBeforeTimestamp = retentionMillis match { + case Some(millis) => clock.getTimeMillis() - millis + case _ => snapshot.minFileRetentionTimestamp + } + val relativizeIgnoreError = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_RELATIVIZE_IGNORE_ERROR) + + snapshot.stateDS.mapPartitions { actions => + val reservoirBase = new Path(basePath) + val fs = reservoirBase.getFileSystem(hadoopConf.value.value) + actions.flatMap { + _.unwrap match { + case fa: FileAction if checkAbsolutePathOnly && !fa.path.contains(basePath) => + Nil + case tombstone: RemoveFile if tombstone.delTimestamp < deleteBeforeTimestamp => + Nil + case fa: FileAction => + getValidRelativePathsAndSubdirs( + fa, + fs, + reservoirBase, + relativizeIgnoreError + ) + case _ => Nil + } + } + }.toDF("path") + } + + /** + * Clears all untracked files and folders within this table. First lists all the files and + * directories in the table, and gets the relative paths with respect to the base of the + * table. Then it gets the list of all tracked files for this table, which may or may not + * be within the table base path, and gets the relative paths of all the tracked files with + * respect to the base of the table. Files outside of the table path will be ignored. + * Then we take a diff of the files and delete directories that were already empty, and all files + * that are within the table that are no longer tracked. + * + * @param dryRun If set to true, no files will be deleted. Instead, we will list all files and + * directories that will be cleared. + * @param retentionHours An optional parameter to override the default Delta tombstone retention + * period + * @return A Dataset containing the paths of the files/folders to delete in dryRun mode. Otherwise + * returns the base path of the table. + */ + def gc( + spark: SparkSession, + deltaLog: DeltaLog, + dryRun: Boolean = true, + retentionHours: Option[Double] = None, + clock: Clock = new SystemClock): DataFrame = { + recordDeltaOperation(deltaLog, "delta.gc") { + + val vacuumStartTime = System.currentTimeMillis() + val path = deltaLog.dataPath + val deltaHadoopConf = deltaLog.newDeltaHadoopConf() + val fs = path.getFileSystem(deltaHadoopConf) + + import org.apache.spark.sql.delta.implicits._ + + val snapshot = deltaLog.update() + deltaLog.protocolWrite(snapshot.protocol) + + val snapshotTombstoneRetentionMillis = DeltaLog.tombstoneRetentionMillis(snapshot.metadata) + val retentionMillis = retentionHours.map(h => TimeUnit.HOURS.toMillis(math.round(h))) + val deleteBeforeTimestamp = retentionMillis match { + case Some(millis) => clock.getTimeMillis() - millis + case _ => snapshot.minFileRetentionTimestamp + } + logInfo(s"Starting garbage collection (dryRun = $dryRun) of untracked files older than " + + s"${new Date(deleteBeforeTimestamp).toGMTString} in $path") + val hadoopConf = spark.sparkContext.broadcast( + new SerializableConfiguration(deltaHadoopConf)) + val basePath = fs.makeQualified(path).toString + val parallelDeleteEnabled = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_PARALLEL_DELETE_ENABLED) + val parallelDeletePartitions = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_PARALLEL_DELETE_PARALLELISM) + .getOrElse(spark.sessionState.conf.numShufflePartitions) + val startTimeToIdentifyEligibleFiles = System.currentTimeMillis() + + val validFiles = + getValidFilesFromSnapshot( + spark, + basePath, + snapshot, + retentionMillis, + hadoopConf, + clock, + checkAbsolutePathOnly = false) + + val partitionColumns = snapshot.metadata.partitionSchema.fieldNames + val parallelism = spark.sessionState.conf.parallelPartitionDiscoveryParallelism + + val allFilesAndDirs = DeltaFileOperations.recursiveListDirs( + spark, + Seq(basePath), + hadoopConf, + hiddenDirNameFilter = DeltaTableUtils.isHiddenDirectory(partitionColumns, _), + hiddenFileNameFilter = DeltaTableUtils.isHiddenDirectory(partitionColumns, _), + fileListingParallelism = Option(parallelism) + ) + .groupByKey(_.path) + .mapGroups { (k, v) => + val duplicates = v.toSeq + // of all the duplicates we can return the newest file. + duplicates.maxBy(_.modificationTime) + } + + recordFrameProfile("Delta", "VacuumCommand.gc") { + try { + allFilesAndDirs.cache() + + implicit val fileNameAndSizeEncoder = + org.apache.spark.sql.Encoders.product[FileNameAndSize] + + val dirCounts = allFilesAndDirs.where(col("isDir")).count() + 1 // +1 for the base path + val filesAndDirsPresentBeforeDelete = allFilesAndDirs.count() + + // The logic below is as follows: + // 1. We take all the files and directories listed in our reservoir + // 2. We filter all files older than our tombstone retention period and directories + // 3. We get the subdirectories of all files so that we can find non-empty directories + // 4. We groupBy each path, and count to get how many files are in each sub-directory + // 5. We subtract all the valid files and tombstones in our state + // 6. We filter all paths with a count of 1, which will correspond to files not in the + // state, and empty directories. We can safely delete all of these + val diff = allFilesAndDirs + .where(col("modificationTime") < deleteBeforeTimestamp || col("isDir")) + .mapPartitions { fileStatusIterator => + val reservoirBase = new Path(basePath) + val fs = reservoirBase.getFileSystem(hadoopConf.value.value) + fileStatusIterator.flatMap { fileStatus => + if (fileStatus.isDir) { + Iterator.single(FileNameAndSize( + relativize(fileStatus.getHadoopPath, fs, reservoirBase, isDir = true), 0L)) + } else { + val dirs = getAllSubdirs(basePath, fileStatus.path, fs) + val dirsWithSlash = dirs.map { p => + val relativizedPath = relativize(new Path(p), fs, reservoirBase, isDir = true) + FileNameAndSize(relativizedPath, 0L) + } + dirsWithSlash ++ Iterator( + FileNameAndSize(relativize( + fileStatus.getHadoopPath, fs, reservoirBase, isDir = false), + fileStatus.length)) + } + } + }.groupBy(col("path")).agg(count(new Column("*")).as("count"), + sum("length").as("length")) + .join(validFiles, Seq("path"), "leftanti") + .where(col("count") === 1) + + + val sizeOfDataToDeleteRow = diff.agg(sum("length").cast("long")).first() + val sizeOfDataToDelete = if (sizeOfDataToDeleteRow.isNullAt(0)) { + 0L + } else { + sizeOfDataToDeleteRow.getLong(0) + } + + val diffFiles = diff + .select(col("path")) + .as[String] + .map { relativePath => + assert(!stringToPath(relativePath).isAbsolute, + "Shouldn't have any absolute paths for deletion here.") + pathToString(DeltaFileOperations.absolutePath(basePath, relativePath)) + } + val timeTakenToIdentifyEligibleFiles = + System.currentTimeMillis() - startTimeToIdentifyEligibleFiles + + + val numFiles = diffFiles.count() + if (dryRun) { + val stats = DeltaVacuumStats( + isDryRun = true, + specifiedRetentionMillis = retentionMillis, + defaultRetentionMillis = snapshotTombstoneRetentionMillis, + minRetainedTimestamp = deleteBeforeTimestamp, + dirsPresentBeforeDelete = dirCounts, + filesAndDirsPresentBeforeDelete = filesAndDirsPresentBeforeDelete, + objectsDeleted = numFiles, + sizeOfDataToDelete = sizeOfDataToDelete, + timeTakenToIdentifyEligibleFiles = timeTakenToIdentifyEligibleFiles, + timeTakenForDelete = 0L, + vacuumStartTime = vacuumStartTime, + vacuumEndTime = System.currentTimeMillis, + numPartitionColumns = partitionColumns.size + ) + + recordDeltaEvent(deltaLog, "delta.gc.stats", data = stats) + logInfo(s"Found $numFiles files ($sizeOfDataToDelete bytes) and directories in " + + s"a total of $dirCounts directories that are safe to delete. Vacuum stats: $stats") + + return diffFiles.map(f => stringToPath(f).toString).toDF("path") + } + logVacuumStart( + spark, + deltaLog, + path, + diffFiles, + sizeOfDataToDelete, + retentionMillis, + snapshotTombstoneRetentionMillis) + + val deleteStartTime = System.currentTimeMillis() + val filesDeleted = try { + delete(diffFiles, spark, basePath, + hadoopConf, parallelDeleteEnabled, parallelDeletePartitions) + } catch { + case t: Throwable => + logVacuumEnd(deltaLog, spark, path) + throw t + } + val timeTakenForDelete = System.currentTimeMillis() - deleteStartTime + val stats = DeltaVacuumStats( + isDryRun = false, + specifiedRetentionMillis = retentionMillis, + defaultRetentionMillis = snapshotTombstoneRetentionMillis, + minRetainedTimestamp = deleteBeforeTimestamp, + dirsPresentBeforeDelete = dirCounts, + filesAndDirsPresentBeforeDelete = filesAndDirsPresentBeforeDelete, + objectsDeleted = filesDeleted, + sizeOfDataToDelete = sizeOfDataToDelete, + timeTakenToIdentifyEligibleFiles = timeTakenToIdentifyEligibleFiles, + timeTakenForDelete = timeTakenForDelete, + vacuumStartTime = vacuumStartTime, + vacuumEndTime = System.currentTimeMillis, + numPartitionColumns = partitionColumns.size) + recordDeltaEvent(deltaLog, "delta.gc.stats", data = stats) + logVacuumEnd(deltaLog, spark, path, Some(filesDeleted), Some(dirCounts)) + logInfo(s"Deleted $filesDeleted files ($sizeOfDataToDelete bytes) and directories in " + + s"a total of $dirCounts directories. Vacuum stats: $stats") + + + spark.createDataset(Seq(basePath)).toDF("path") + } finally { + allFilesAndDirs.unpersist() + } + } + } + } +} + +trait VacuumCommandImpl extends DeltaCommand { + + private val supportedFsForLogging = Seq( + "wasbs", "wasbss", "abfs", "abfss", "adl", "gs", "file", "hdfs" + ) + + /** + * Returns whether we should record vacuum metrics in the delta log. + */ + private def shouldLogVacuum( + spark: SparkSession, + deltaLog: DeltaLog, + hadoopConf: Configuration, + path: Path): Boolean = { + val logVacuumConf = spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_LOGGING_ENABLED) + + if (logVacuumConf.nonEmpty) { + return logVacuumConf.get + } + + val logStore = deltaLog.store + + try { + val rawResolvedUri: URI = logStore.resolvePathOnPhysicalStorage(path, hadoopConf).toUri + val scheme = rawResolvedUri.getScheme + supportedFsForLogging.contains(scheme) + } catch { + case _: UnsupportedOperationException => + logWarning("Vacuum event logging" + + " not enabled on this file system because we cannot detect your cloud storage type.") + false + } + } + + /** + * Record Vacuum specific metrics in the commit log at the START of vacuum. + * + * @param spark - spark session + * @param deltaLog - DeltaLog of the table + * @param path - the (data) path to the root of the table + * @param diff - the list of paths (files, directories) that are safe to delete + * @param sizeOfDataToDelete - the amount of data (bytes) to be deleted + * @param specifiedRetentionMillis - the optional override retention period (millis) to keep + * logically removed files before deleting them + * @param defaultRetentionMillis - the default retention period (millis) + */ + protected def logVacuumStart( + spark: SparkSession, + deltaLog: DeltaLog, + path: Path, + diff: Dataset[String], + sizeOfDataToDelete: Long, + specifiedRetentionMillis: Option[Long], + defaultRetentionMillis: Long): Unit = { + logInfo(s"Deleting untracked files and empty directories in $path. The amount of data to be " + + s"deleted is $sizeOfDataToDelete (in bytes)") + + // We perform an empty commit in order to record information about the Vacuum + if (shouldLogVacuum(spark, deltaLog, deltaLog.newDeltaHadoopConf(), path)) { + val checkEnabled = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED) + val txn = deltaLog.startTransaction() + val metrics = Map[String, SQLMetric]( + "numFilesToDelete" -> createMetric(spark.sparkContext, "number of files to deleted"), + "sizeOfDataToDelete" -> createMetric(spark.sparkContext, + "The total amount of data to be deleted in bytes") + ) + metrics("numFilesToDelete").set(diff.count()) + metrics("sizeOfDataToDelete").set(sizeOfDataToDelete) + txn.registerSQLMetrics(spark, metrics) + txn.commit(actions = Seq(), DeltaOperations.VacuumStart( + checkEnabled, + specifiedRetentionMillis, + defaultRetentionMillis + )) + } + } + + /** + * Record Vacuum specific metrics in the commit log at the END of vacuum. + * + * @param deltaLog - DeltaLog of the table + * @param spark - spark session + * @param path - the (data) path to the root of the table + * @param filesDeleted - if the vacuum completed this will contain the number of files deleted. + * if the vacuum failed, this will be None. + * @param dirCounts - if the vacuum completed this will contain the number of directories + * vacuumed. if the vacuum failed, this will be None. + */ + protected def logVacuumEnd( + deltaLog: DeltaLog, + spark: SparkSession, + path: Path, + filesDeleted: Option[Long] = None, + dirCounts: Option[Long] = None): Unit = { + if (shouldLogVacuum(spark, deltaLog, deltaLog.newDeltaHadoopConf(), path)) { + val txn = deltaLog.startTransaction() + val status = if (filesDeleted.isEmpty && dirCounts.isEmpty) { "FAILED" } else { "COMPLETED" } + if (filesDeleted.nonEmpty && dirCounts.nonEmpty) { + val metrics = Map[String, SQLMetric]( + "numDeletedFiles" -> createMetric(spark.sparkContext, "number of files deleted."), + "numVacuumedDirectories" -> + createMetric(spark.sparkContext, "num of directories vacuumed."), + "status" -> createMetric(spark.sparkContext, "status of vacuum") + ) + metrics("numDeletedFiles").set(filesDeleted.get) + metrics("numVacuumedDirectories").set(dirCounts.get) + txn.registerSQLMetrics(spark, metrics) + } + txn.commit(actions = Seq(), DeltaOperations.VacuumEnd( + status + )) + } + + if (filesDeleted.nonEmpty) { + logConsole(s"Deleted ${filesDeleted.get} files and directories in a total " + + s"of ${dirCounts.get} directories.") + } + } + + /** + * Attempts to relativize the `path` with respect to the `reservoirBase` and converts the path to + * a string. + */ + protected def relativize( + path: Path, + fs: FileSystem, + reservoirBase: Path, + isDir: Boolean): String = { + pathToString(DeltaFileOperations.tryRelativizePath(fs, reservoirBase, path)) + } + + /** + * Wrapper function for DeltaFileOperations.getAllSubDirectories + * returns all subdirectories that `file` has with respect to `base`. + */ + protected def getAllSubdirs(base: String, file: String, fs: FileSystem): Iterator[String] = { + DeltaFileOperations.getAllSubDirectories(base, file)._1 + } + + /** + * Attempts to delete the list of candidate files. Returns the number of files deleted. + */ + protected def delete( + diff: Dataset[String], + spark: SparkSession, + basePath: String, + hadoopConf: Broadcast[SerializableConfiguration], + parallel: Boolean, + parallelPartitions: Int): Long = { + import org.apache.spark.sql.delta.implicits._ + + if (parallel) { + diff.repartition(parallelPartitions).mapPartitions { files => + val fs = new Path(basePath).getFileSystem(hadoopConf.value.value) + val filesDeletedPerPartition = + files.map(p => stringToPath(p)).count(f => tryDeleteNonRecursive(fs, f)) + Iterator(filesDeletedPerPartition) + }.collect().sum + } else { + val fs = new Path(basePath).getFileSystem(hadoopConf.value.value) + val fileResultSet = diff.toLocalIterator().asScala + fileResultSet.map(p => stringToPath(p)).count(f => tryDeleteNonRecursive(fs, f)) + } + } + + // scalastyle:off pathfromuri + protected def stringToPath(path: String): Path = new Path(new URI(path)) + // scalastyle:on pathfromuri + + protected def pathToString(path: Path): String = path.toUri.toString + + /** Returns the relative path of a file action or None if the file lives outside of the table. */ + protected def getActionRelativePath( + action: FileAction, + fs: FileSystem, + basePath: Path, + relativizeIgnoreError: Boolean): Option[String] = { + val filePath = stringToPath(action.path) + if (filePath.isAbsolute) { + val maybeRelative = + DeltaFileOperations.tryRelativizePath(fs, basePath, filePath, relativizeIgnoreError) + if (maybeRelative.isAbsolute) { + // This file lives outside the directory of the table. + None + } else { + Some(pathToString(maybeRelative)) + } + } else { + Some(pathToString(filePath)) + } + } + + + /** + * Returns the relative paths of all files and subdirectories for this action that must be + * retained during GC. + */ + protected def getValidRelativePathsAndSubdirs( + action: FileAction, + fs: FileSystem, + basePath: Path, + relativizeIgnoreError: Boolean + ): Seq[String] = { + val paths = getActionRelativePath(action, fs, basePath, relativizeIgnoreError) + .map { + relativePath => + Seq(relativePath) ++ getAllSubdirs("/", relativePath, fs) + }.getOrElse(Seq.empty) + + val deletionVectorPath = + getDeletionVectorRelativePath(action).map(pathToString) + + paths ++ deletionVectorPath.toSeq + } + + /** + * Returns the path of the on-disk deletion vector if it is stored relative to the + * `basePath` otherwise `None`. + */ + protected def getDeletionVectorRelativePath(action: FileAction): Option[Path] = { + val dv = action match { + case a: AddFile if a.deletionVector != null => + Some(a.deletionVector) + case r: RemoveFile if r.deletionVector != null => + Some(r.deletionVector) + case _ => None + } + + dv match { + case Some(dv) if dv.isOnDisk => + if (dv.isRelative) { + // We actually want a relative path here. + Some(dv.absolutePath(new Path("."))) + } else { + assert(dv.isAbsolute) + // This is never going to be a path relative to `basePath` for DVs. + None + } + case None => None + } + } +} + +case class DeltaVacuumStats( + isDryRun: Boolean, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + specifiedRetentionMillis: Option[Long], + defaultRetentionMillis: Long, + minRetainedTimestamp: Long, + dirsPresentBeforeDelete: Long, + filesAndDirsPresentBeforeDelete: Long, + objectsDeleted: Long, + sizeOfDataToDelete: Long, + timeTakenToIdentifyEligibleFiles: Long, + timeTakenForDelete: Long, + vacuumStartTime: Long, + vacuumEndTime: Long, + numPartitionColumns: Long +) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/WriteIntoDelta.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/WriteIntoDelta.scala new file mode 100644 index 00000000000..eb70e96c6fa --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/WriteIntoDelta.scala @@ -0,0 +1,366 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.constraints.Constraint +import org.apache.spark.sql.delta.constraints.Constraints.Check +import org.apache.spark.sql.delta.constraints.Invariants.ArbitraryExpression +import org.apache.spark.sql.delta.schema.{ImplicitMetadataOperation, InvariantViolationException, SchemaUtils} +import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils +import org.apache.spark.sql.delta.skipping.clustering.temp.ClusterBySpec +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Expression, Literal} +import org.apache.spark.sql.catalyst.plans.logical.DeleteFromTable +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.execution.command.LeafRunnableCommand +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.functions.{array, col, explode, lit, struct} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{StringType, StructType} + +/** + * Used to write a [[DataFrame]] into a delta table. + * + * New Table Semantics + * - The schema of the [[DataFrame]] is used to initialize the table. + * - The partition columns will be used to partition the table. + * + * Existing Table Semantics + * - The save mode will control how existing data is handled (i.e. overwrite, append, etc) + * - The schema of the DataFrame will be checked and if there are new columns present + * they will be added to the tables schema. Conflicting columns (i.e. a INT, and a STRING) + * will result in an exception + * - The partition columns, if present are validated against the existing metadata. If not + * present, then the partitioning of the table is respected. + * + * In combination with `Overwrite`, a `replaceWhere` option can be used to transactionally + * replace data that matches a predicate. + * + * In combination with `Overwrite` dynamic partition overwrite mode (option `partitionOverwriteMode` + * set to `dynamic`, or in spark conf `spark.sql.sources.partitionOverwriteMode` set to `dynamic`) + * is also supported. + * + * Dynamic partition overwrite mode conflicts with `replaceWhere`: + * - If a `replaceWhere` option is provided, and dynamic partition overwrite mode is enabled in + * the DataFrameWriter options, an error will be thrown. + * - If a `replaceWhere` option is provided, and dynamic partition overwrite mode is enabled in + * the spark conf, data will be overwritten according to the `replaceWhere` expression + * + * @param catalogTableOpt Should explicitly be set when table is accessed from catalog + * @param schemaInCatalog The schema created in Catalog. We will use this schema to update metadata + * when it is set (in CTAS code path), and otherwise use schema from `data`. + */ +case class WriteIntoDelta( + override val deltaLog: DeltaLog, + mode: SaveMode, + options: DeltaOptions, + partitionColumns: Seq[String], + override val configuration: Map[String, String], + override val data: DataFrame, + val catalogTableOpt: Option[CatalogTable] = None, + schemaInCatalog: Option[StructType] = None + ) + extends LeafRunnableCommand + with ImplicitMetadataOperation + with DeltaCommand + with WriteIntoDeltaLike { + + override protected val canMergeSchema: Boolean = options.canMergeSchema + + private def isOverwriteOperation: Boolean = mode == SaveMode.Overwrite + + override protected val canOverwriteSchema: Boolean = + options.canOverwriteSchema && isOverwriteOperation && options.replaceWhere.isEmpty + + + override def run(sparkSession: SparkSession): Seq[Row] = { + deltaLog.withNewTransaction(catalogTableOpt) { txn => + if (hasBeenExecuted(txn, sparkSession, Some(options))) { + return Seq.empty + } + + val actions = write( + txn, sparkSession + ) + val operation = DeltaOperations.Write( + mode, Option(partitionColumns), + options.replaceWhere, options.userMetadata + ) + txn.commitIfNeeded(actions, operation) + } + Seq.empty + } + + override def write( + txn: OptimisticTransaction, + sparkSession: SparkSession, + clusterBySpecOpt: Option[ClusterBySpec] = None, + isTableReplace: Boolean = false): Seq[Action] = { + import org.apache.spark.sql.delta.implicits._ + if (txn.readVersion > -1) { + // This table already exists, check if the insert is valid. + if (mode == SaveMode.ErrorIfExists) { + throw DeltaErrors.pathAlreadyExistsException(deltaLog.dataPath) + } else if (mode == SaveMode.Ignore) { + return Nil + } else if (mode == SaveMode.Overwrite) { + DeltaLog.assertRemovable(txn.snapshot) + } + } + val isReplaceWhere = mode == SaveMode.Overwrite && options.replaceWhere.nonEmpty + // Validate that the preview is enabled if we are writing to a clustered table. + ClusteredTableUtils.validatePreviewEnabled(txn.snapshot.protocol) + val finalClusterBySpecOpt = + if (mode == SaveMode.Append || isReplaceWhere) { + clusterBySpecOpt.foreach { clusterBySpec => + ClusteredTableUtils.validateClusteringColumnsInSnapshot(txn.snapshot, clusterBySpec) + } + // Append mode and replaceWhere cannot update the clustering columns. + None + } else { + clusterBySpecOpt + } + val rearrangeOnly = options.rearrangeOnly + val charPadding = sparkSession.conf.get(SQLConf.READ_SIDE_CHAR_PADDING.key, "false") == "true" + val charAsVarchar = sparkSession.conf.get(SQLConf.CHAR_AS_VARCHAR) + val dataSchema = if (!charAsVarchar && charPadding) { + data.schema + } else { + // If READ_SIDE_CHAR_PADDING is not enabled, CHAR type is the same as VARCHAR. The change + // below makes DESC TABLE to show VARCHAR instead of CHAR. + CharVarcharUtils.replaceCharVarcharWithStringInSchema( + replaceCharWithVarchar(CharVarcharUtils.getRawSchema(data.schema)).asInstanceOf[StructType]) + } + val finalSchema = schemaInCatalog.getOrElse(dataSchema) + // We need to cache this canUpdateMetadata before calling updateMetadata, as that will update + // it to true. This is unavoidable as getNewDomainMetadata uses information generated by + // updateMetadata, so it needs to be run after that. + val canUpdateMetadata = txn.canUpdateMetadata + updateMetadata(data.sparkSession, txn, finalSchema, + partitionColumns, configuration, isOverwriteOperation, rearrangeOnly + ) + val newDomainMetadata = getNewDomainMetadata( + txn, + canUpdateMetadata, + isReplacingTable = isOverwriteOperation && options.replaceWhere.isEmpty, + finalClusterBySpecOpt + ) + + val replaceOnDataColsEnabled = + sparkSession.conf.get(DeltaSQLConf.REPLACEWHERE_DATACOLUMNS_ENABLED) + + val useDynamicPartitionOverwriteMode = { + if (txn.metadata.partitionColumns.isEmpty) { + // We ignore dynamic partition overwrite mode for non-partitioned tables + false + } else if (isTableReplace) { + // A replace table command should always replace the table, not just some partitions. + false + } else if (options.replaceWhere.nonEmpty) { + if (options.partitionOverwriteModeInOptions && options.isDynamicPartitionOverwriteMode) { + // replaceWhere and dynamic partition overwrite conflict because they both specify which + // data to overwrite. We throw an error when: + // 1. replaceWhere is provided in a DataFrameWriter option + // 2. partitionOverwriteMode is set to "dynamic" in a DataFrameWriter option + throw DeltaErrors.replaceWhereUsedWithDynamicPartitionOverwrite() + } else { + // If replaceWhere is provided, we do not use dynamic partition overwrite, even if it's + // enabled in the spark session configuration, since generally query-specific configs take + // precedence over session configs + false + } + } else { + options.isDynamicPartitionOverwriteMode + } + } + + if (useDynamicPartitionOverwriteMode && canOverwriteSchema) { + throw DeltaErrors.overwriteSchemaUsedWithDynamicPartitionOverwrite() + } + + // Validate partition predicates + var containsDataFilters = false + val replaceWhere = options.replaceWhere.flatMap { replace => + val parsed = parsePredicates(sparkSession, replace) + if (replaceOnDataColsEnabled) { + // Helps split the predicate into separate expressions + val (metadataPredicates, dataFilters) = DeltaTableUtils.splitMetadataAndDataPredicates( + parsed.head, txn.metadata.partitionColumns, sparkSession) + if (rearrangeOnly && dataFilters.nonEmpty) { + throw DeltaErrors.replaceWhereWithFilterDataChangeUnset(dataFilters.mkString(",")) + } + containsDataFilters = dataFilters.nonEmpty + Some(metadataPredicates ++ dataFilters) + } else if (mode == SaveMode.Overwrite) { + verifyPartitionPredicates(sparkSession, txn.metadata.partitionColumns, parsed) + Some(parsed) + } else { + None + } + } + + if (txn.readVersion < 0) { + // Initialize the log path + deltaLog.createLogDirectory() + } + + val (newFiles, addFiles, deletedFiles) = (mode, replaceWhere) match { + case (SaveMode.Overwrite, Some(predicates)) if !replaceOnDataColsEnabled => + // fall back to match on partition cols only when replaceArbitrary is disabled. + val newFiles = txn.writeFiles(data, Some(options)) + val addFiles = newFiles.collect { case a: AddFile => a } + // Check to make sure the files we wrote out were actually valid. + val matchingFiles = DeltaLog.filterFileList( + txn.metadata.partitionSchema, addFiles.toDF(sparkSession), predicates).as[AddFile] + .collect() + val invalidFiles = addFiles.toSet -- matchingFiles + if (invalidFiles.nonEmpty) { + val badPartitions = invalidFiles + .map(_.partitionValues) + .map { _.map { case (k, v) => s"$k=$v" }.mkString("/") } + .mkString(", ") + throw DeltaErrors.replaceWhereMismatchException(options.replaceWhere.get, badPartitions) + } + (newFiles, addFiles, txn.filterFiles(predicates).map(_.remove)) + case (SaveMode.Overwrite, Some(condition)) if txn.snapshot.version >= 0 => + val constraints = extractConstraints(sparkSession, condition) + + val removedFileActions = removeFiles(sparkSession, txn, condition) + val cdcExistsInRemoveOp = removedFileActions.exists(_.isInstanceOf[AddCDCFile]) + + // The above REMOVE will not produce explicit CDF data when persistent DV is enabled. + // Therefore here we need to decide whether to produce explicit CDF for INSERTs, because + // the CDF protocol requires either (i) all CDF data are generated explicitly as AddCDCFile, + // or (ii) all CDF data can be deduced from [[AddFile]] and [[RemoveFile]]. + val dataToWrite = + if (containsDataFilters && + CDCReader.isCDCEnabledOnTable(txn.metadata, sparkSession) && + sparkSession.conf.get(DeltaSQLConf.REPLACEWHERE_DATACOLUMNS_WITH_CDF_ENABLED) && + cdcExistsInRemoveOp) { + var dataWithDefaultExprs = data + + // pack new data and cdc data into an array of structs and unpack them into rows + // to share values in outputCols on both branches, avoiding re-evaluating + // non-deterministic expression twice. + val outputCols = dataWithDefaultExprs.schema.map(SchemaUtils.fieldToColumn(_)) + val insertCols = outputCols :+ + lit(CDCReader.CDC_TYPE_INSERT).as(CDCReader.CDC_TYPE_COLUMN_NAME) + val insertDataCols = outputCols :+ + new Column(CDCReader.CDC_TYPE_NOT_CDC) + .as(CDCReader.CDC_TYPE_COLUMN_NAME) + val packedInserts = array( + struct(insertCols: _*), + struct(insertDataCols: _*) + ).expr + + dataWithDefaultExprs + .select(explode(new Column(packedInserts)).as("packedData")) + .select( + (dataWithDefaultExprs.schema.map(_.name) :+ CDCReader.CDC_TYPE_COLUMN_NAME) + .map { n => col(s"packedData.`$n`").as(n) }: _*) + } else { + data + } + val newFiles = try txn.writeFiles(dataToWrite, Some(options), constraints) catch { + case e: InvariantViolationException => + throw DeltaErrors.replaceWhereMismatchException( + options.replaceWhere.get, + e) + } + (newFiles, + newFiles.collect { case a: AddFile => a }, + removedFileActions) + case (SaveMode.Overwrite, None) => + val newFiles = writeFiles( + txn, data, options + ) + val addFiles = newFiles.collect { case a: AddFile => a } + val deletedFiles = if (useDynamicPartitionOverwriteMode) { + // with dynamic partition overwrite for any partition that is being written to all + // existing data in that partition will be deleted. + // the selection what to delete is on the next two lines + val updatePartitions = addFiles.map(_.partitionValues).toSet + txn.filterFiles(updatePartitions).map(_.remove) + } else { + txn.filterFiles().map(_.remove) + } + (newFiles, addFiles, deletedFiles) + case _ => + val newFiles = writeFiles( + txn, data, options + ) + (newFiles, newFiles.collect { case a: AddFile => a }, Nil) + } + + // Need to handle replace where metrics separately. + if (replaceWhere.nonEmpty && replaceOnDataColsEnabled && + sparkSession.conf.get(DeltaSQLConf.REPLACEWHERE_METRICS_ENABLED)) { + registerReplaceWhereMetrics(sparkSession, txn, newFiles, deletedFiles) + } + + val fileActions = if (rearrangeOnly) { + val changeFiles = newFiles.collect { case c: AddCDCFile => c } + if (changeFiles.nonEmpty) { + throw DeltaErrors.unexpectedChangeFilesFound(changeFiles.mkString("\n")) + } + addFiles.map(_.copy(dataChange = !rearrangeOnly)) ++ + deletedFiles.map { + case add: AddFile => add.copy(dataChange = !rearrangeOnly) + case remove: RemoveFile => remove.copy(dataChange = !rearrangeOnly) + case other => throw DeltaErrors.illegalFilesFound(other.toString) + } + } else { + newFiles ++ deletedFiles + } + newDomainMetadata ++ + createSetTransaction(sparkSession, deltaLog, Some(options)).toSeq ++ fileActions + } + + private def writeFiles( + txn: OptimisticTransaction, + data: DataFrame, + options: DeltaOptions + ): Seq[FileAction] = { + txn.writeFiles(data, Some(options)) + } + + private def removeFiles( + spark: SparkSession, + txn: OptimisticTransaction, + condition: Seq[Expression]): Seq[Action] = { + val relation = LogicalRelation( + txn.deltaLog.createRelation(snapshotToUseOpt = Some(txn.snapshot), + catalogTableOpt = txn.catalogTable)) + val processedCondition = condition.reduceOption(And) + val command = spark.sessionState.analyzer.execute( + DeleteFromTable(relation, processedCondition.getOrElse(Literal.TrueLiteral))) + spark.sessionState.analyzer.checkAnalysis(command) + command.asInstanceOf[DeleteCommand].performDelete(spark, txn.deltaLog, txn) + } + + override def withNewWriterConfiguration(updatedConfiguration: Map[String, String]) + : WriteIntoDeltaLike = this.copy(configuration = updatedConfiguration) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/WriteIntoDeltaLike.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/WriteIntoDeltaLike.scala new file mode 100644 index 00000000000..1a035473ce0 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/WriteIntoDeltaLike.scala @@ -0,0 +1,181 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.skipping.clustering.temp.ClusterBySpec +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.OptimisticTransaction +import org.apache.spark.sql.delta.actions.Action +import org.apache.spark.sql.delta.actions.AddCDCFile +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.constraints.Constraint +import org.apache.spark.sql.delta.constraints.Constraints.Check +import org.apache.spark.sql.delta.constraints.Invariants.ArbitraryExpression +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} +import org.apache.spark.sql.types.StructType + +/** + * An interface for writing [[data]] into Delta tables. + */ +trait WriteIntoDeltaLike { + /** + * A helper method to create a new instances of [[WriteIntoDeltaLike]] with + * updated [[configuration]]. + */ + def withNewWriterConfiguration(updatedConfiguration: Map[String, String]): WriteIntoDeltaLike + + /** + * The configuration to be used for writing [[data]] into Delta table. + */ + val configuration: Map[String, String] + + /** + * Data to be written into Delta table. + */ + val data: DataFrame + + /** + * Write [[data]] into Delta table as part of [[txn]] and @return the actions to be committed. + */ + def write( + txn: OptimisticTransaction, + sparkSession: SparkSession, + clusterBySpecOpt: Option[ClusterBySpec] = None, + isTableReplace: Boolean = false): Seq[Action] + + val deltaLog: DeltaLog + + + + /** + * Replace where operationMetrics need to be recorded separately. + * @param newFiles - AddFile and AddCDCFile added by write job + * @param deleteActions - AddFile, RemoveFile, AddCDCFile added by Delete job + */ + protected def registerReplaceWhereMetrics( + spark: SparkSession, + txn: OptimisticTransaction, + newFiles: Seq[Action], + deleteActions: Seq[Action]): Unit = { + var numFiles = 0L + var numCopiedRows = 0L + var numOutputBytes = 0L + var numNewRows = 0L + var numAddedChangedFiles = 0L + var hasRowLevelMetrics = true + + newFiles.foreach { + case a: AddFile => + numFiles += 1 + numOutputBytes += a.size + if (a.numLogicalRecords.isEmpty) { + hasRowLevelMetrics = false + } else { + numNewRows += a.numLogicalRecords.get + } + case cdc: AddCDCFile => + numAddedChangedFiles += 1 + case _ => + } + + deleteActions.foreach { + case a: AddFile => + numFiles += 1 + numOutputBytes += a.size + if (a.numLogicalRecords.isEmpty) { + hasRowLevelMetrics = false + } else { + numCopiedRows += a.numLogicalRecords.get + } + case _: AddCDCFile => + numAddedChangedFiles += 1 + // Remove metrics will be handled by the delete command. + case _ => + } + + // Helper for creating a SQLMetric and setting its value, since it isn't valid to create a + // SQLMetric with a positive `initValue`. + def createSumMetricWithValue(name: String, value: Long): SQLMetric = { + val metric = new SQLMetric("sum") + metric.register(spark.sparkContext, Some(name)) + metric.set(value) + metric + } + + var sqlMetrics = Map( + "numFiles" -> createSumMetricWithValue("number of files written", numFiles), + "numOutputBytes" -> createSumMetricWithValue("number of output bytes", numOutputBytes), + "numAddedChangeFiles" -> createSumMetricWithValue( + "number of change files added", numAddedChangedFiles) + ) + if (hasRowLevelMetrics) { + sqlMetrics ++= Map( + "numOutputRows" -> createSumMetricWithValue( + "number of rows added", numNewRows + numCopiedRows), + "numCopiedRows" -> createSumMetricWithValue("number of copied rows", numCopiedRows) + ) + } else { + // this will get filtered out in DeltaOperations.WRITE transformMetrics + sqlMetrics ++= Map( + "numOutputRows" -> createSumMetricWithValue("number of rows added", 0L), + "numCopiedRows" -> createSumMetricWithValue("number of copied rows", 0L) + ) + } + txn.registerSQLMetrics(spark, sqlMetrics) + } + + import org.apache.spark.sql.types.{ArrayType, CharType, DataType, MapType, VarcharType} + protected def replaceCharWithVarchar(dt: DataType): DataType = dt match { + case ArrayType(et, nullable) => + ArrayType(replaceCharWithVarchar(et), nullable) + case MapType(kt, vt, nullable) => + MapType(replaceCharWithVarchar(kt), replaceCharWithVarchar(vt), nullable) + case StructType(fields) => + StructType(fields.map { field => + field.copy(dataType = replaceCharWithVarchar(field.dataType)) + }) + case CharType(length) => VarcharType(length) + case _ => dt + } + + protected def extractConstraints( + sparkSession: SparkSession, + expr: Seq[Expression]): Seq[Constraint] = { + if (!sparkSession.conf.get(DeltaSQLConf.REPLACEWHERE_CONSTRAINT_CHECK_ENABLED)) { + Seq.empty + } else { + expr.flatMap { e => + // While writing out the new data, we only want to enforce constraint on expressions + // with UnresolvedAttribute, that is, containing column name. Because we parse a + // predicate string without analyzing it, if there's a column name, it has to be + // unresolved. + e.collectFirst { + case _: UnresolvedAttribute => + val arbitraryExpression = ArbitraryExpression(e) + Check(arbitraryExpression.name, arbitraryExpression.expression) + } + } + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/alterDeltaTableCommands.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/alterDeltaTableCommands.scala new file mode 100644 index 00000000000..b23feee86b9 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/alterDeltaTableCommands.scala @@ -0,0 +1,932 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +// scalastyle:off import.ordering.noEmptyLine +import java.util.Locale +import java.util.concurrent.TimeUnit + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.skipping.clustering.ClusteringColumnInfo +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.constraints.{CharVarcharConstraint, Constraints} +import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils} +import org.apache.spark.sql.delta.schema.SchemaUtils.transformColumnsStructs +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.StatisticsCollection +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{AnalysisException, Column, Row, SparkSession} +import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute} +import org.apache.spark.sql.catalyst.catalog.CatalogUtils +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, QualifiedColType} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.connector.catalog.TableCatalog +import org.apache.spark.sql.connector.catalog.TableChange.{After, ColumnPosition, First} +import org.apache.spark.sql.execution.command.LeafRunnableCommand +import org.apache.spark.sql.types._ + +/** + * A super trait for alter table commands that modify Delta tables. + */ +trait AlterDeltaTableCommand extends DeltaCommand { + + def table: DeltaTableV2 + + protected def startTransaction(): OptimisticTransaction = { + // WARNING: It's not safe to use startTransactionWithInitialSnapshot here. Some commands call + // this method more than once, and some commands can be created with a stale table. + val txn = table.startTransaction() + if (txn.readVersion == -1) { + throw DeltaErrors.notADeltaTableException(table.name()) + } + txn + } + + /** + * Check if the column to change has any dependent expressions: + * - generated column expressions + * - check constraints + */ + protected def checkDependentExpressions( + sparkSession: SparkSession, + columnParts: Seq[String], + newMetadata: actions.Metadata, + protocol: Protocol, + operationName: String): Unit = { + if (!sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_ALTER_TABLE_CHANGE_COLUMN_CHECK_EXPRESSIONS)) { + return + } + // check if the column to change is referenced by check constraints + val dependentConstraints = + Constraints.findDependentConstraints(sparkSession, columnParts, newMetadata) + if (dependentConstraints.nonEmpty) { + throw DeltaErrors.foundViolatingConstraintsForColumnChange( + operationName, UnresolvedAttribute(columnParts).name, dependentConstraints) + } + // check if the column to change is referenced by any generated columns + val dependentGenCols = SchemaUtils.findDependentGeneratedColumns( + sparkSession, columnParts, protocol, newMetadata.schema) + if (dependentGenCols.nonEmpty) { + throw DeltaErrors.foundViolatingGeneratedColumnsForColumnChange( + operationName, UnresolvedAttribute(columnParts).name, dependentGenCols.toList) + } + } +} + +/** + * A command that sets Delta table configuration. + * + * The syntax of this command is: + * {{{ + * ALTER TABLE table1 SET TBLPROPERTIES ('key1' = 'val1', 'key2' = 'val2', ...); + * }}} + */ +case class AlterTableSetPropertiesDeltaCommand( + table: DeltaTableV2, + configuration: Map[String, String]) + extends LeafRunnableCommand with AlterDeltaTableCommand with IgnoreCachedData { + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaLog = table.deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.alter.setProperties") { + val txn = startTransaction() + + val metadata = txn.metadata + val filteredConfs = configuration.filterKeys { + case k if k.toLowerCase(Locale.ROOT).startsWith("delta.constraints.") => + throw DeltaErrors.useAddConstraints + case k if k == TableCatalog.PROP_LOCATION => + throw DeltaErrors.useSetLocation() + case k if k == TableCatalog.PROP_COMMENT => + false + case k if k == TableCatalog.PROP_PROVIDER => + throw DeltaErrors.cannotChangeProvider() + case k if k == TableFeatureProtocolUtils.propertyKey(ClusteringTableFeature) => + throw DeltaErrors.alterTableSetClusteringTableFeatureException( + ClusteringTableFeature.name) + case _ => + true + } + + val newMetadata = metadata.copy( + description = configuration.getOrElse(TableCatalog.PROP_COMMENT, metadata.description), + configuration = metadata.configuration ++ filteredConfs) + + txn.updateMetadata(newMetadata) + + txn.commit(Nil, DeltaOperations.SetTableProperties(configuration)) + + Seq.empty[Row] + } + } +} + +/** + * A command that unsets Delta table configuration. + * If ifExists is false, each individual key will be checked if it exists or not, it's a + * one-by-one operation, not an all or nothing check. Otherwise, non-existent keys will be ignored. + * + * The syntax of this command is: + * {{{ + * ALTER TABLE table1 UNSET TBLPROPERTIES [IF EXISTS] ('key1', 'key2', ...); + * }}} + */ +case class AlterTableUnsetPropertiesDeltaCommand( + table: DeltaTableV2, + propKeys: Seq[String], + ifExists: Boolean) + extends LeafRunnableCommand with AlterDeltaTableCommand with IgnoreCachedData { + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaLog = table.deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.alter.unsetProperties") { + val txn = startTransaction() + val metadata = txn.metadata + + val normalizedKeys = DeltaConfigs.normalizeConfigKeys(propKeys) + if (!ifExists) { + normalizedKeys.foreach { k => + if (!metadata.configuration.contains(k)) { + throw DeltaErrors.unsetNonExistentProperty(k, table.name()) + } + } + } + + val newConfiguration = metadata.configuration.filterNot { + case (key, _) => normalizedKeys.contains(key) + } + val description = if (normalizedKeys.contains(TableCatalog.PROP_COMMENT)) null else { + metadata.description + } + val newMetadata = metadata.copy( + description = description, + configuration = newConfiguration) + txn.updateMetadata(newMetadata) + txn.commit(Nil, DeltaOperations.UnsetTableProperties(normalizedKeys, ifExists)) + + Seq.empty[Row] + } + } +} + +/** + * A command that removes an existing feature from the table. The feature needs to implement the + * [[RemovableFeature]] trait. + * + * The syntax of the command is: + * {{{ + * ALTER TABLE t DROP FEATURE f [TRUNCATE HISTORY] + * }}} + * + * The operation consists of two stages (see [[RemovableFeature]]): + * 1) preDowngradeCommand. This command is responsible for removing any data and metadata + * related to the feature. + * 2) Protocol downgrade. Removes the feature from the current version's protocol. + * During this stage we also validate whether all traces of the feature-to-be-removed are gone. + * + * For removing writer features the 2 steps above are sufficient. However, for removing + * reader+writer features we also need to ensure the history does not contain any traces of the + * removed feature. The user journey is the following: + * + * 1) The user runs the remove feature command which removes any traces of the feature from + * the latest version. The removal command throws a message that there was partial success + * and the retention period must pass before a protocol downgrade is possible. + * 2) The user runs again the command after the retention period is over. The command checks the + * current state again and the history. If everything is clean, it proceeds with the protocol + * downgrade. The TRUNCATE HISTORY option may be used here to automatically set + * the log retention period to a minimum of 24 hours before clearing the logs. The minimum + * value is based on the expected duration of the longest running transaction. This is the + * lowest retention period we can set without endangering concurrent transactions. + * If transactions do run for longer than this period while this command is run, then this + * can lead to data corruption. + * + * Note, legacy features can be removed as well, as long as the protocol supports Table Features. + * This will not downgrade protocol versions but only remove the feature from the + * supported features list. For example, removing legacyRWFeature from + * (3, 7, [legacyRWFeature], [legacyRWFeature]) will result in (3, 7, [], []) and not (1, 1). + */ +case class AlterTableDropFeatureDeltaCommand( + table: DeltaTableV2, + featureName: String, + truncateHistory: Boolean = false) + extends LeafRunnableCommand with AlterDeltaTableCommand with IgnoreCachedData { + + def createEmptyCommitAndCheckpoint(snapshotRefreshStartTime: Long): Unit = { + val log = table.deltaLog + val snapshot = log.update(checkIfUpdatedSinceTs = Some(snapshotRefreshStartTime)) + val emptyCommitTS = System.nanoTime() + log.startTransaction(table.catalogTable, Some(snapshot)) + .commit(Nil, DeltaOperations.EmptyCommit) + log.checkpoint(log.update(checkIfUpdatedSinceTs = Some(emptyCommitTS))) + } + + def truncateHistoryLogRetentionMillis(txn: OptimisticTransaction): Option[Long] = { + if (!truncateHistory) return None + + val truncateHistoryLogRetention = DeltaConfigs + .TABLE_FEATURE_DROP_TRUNCATE_HISTORY_LOG_RETENTION + .fromMetaData(txn.metadata) + + Some(DeltaConfigs.getMilliSeconds(truncateHistoryLogRetention)) + } + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaLog = table.deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.alter.dropFeature") { + val removableFeature = TableFeature.featureNameToFeature(featureName) match { + case Some(feature: RemovableFeature) => feature + case Some(_) => throw DeltaErrors.dropTableFeatureNonRemovableFeature(featureName) + case None => throw DeltaErrors.dropTableFeatureFeatureNotSupportedByClient(featureName) + } + + // Check whether the protocol contains the feature in either the writer features list or + // the reader+writer features list. + if (!table.initialSnapshot.protocol.readerAndWriterFeatureNames.contains(featureName)) { + throw DeltaErrors.dropTableFeatureFeatureNotSupportedByProtocol(featureName) + } + + if (truncateHistory && !removableFeature.isReaderWriterFeature) { + throw DeltaErrors.tableFeatureDropHistoryTruncationNotAllowed() + } + + // The removableFeature.preDowngradeCommand needs to adhere to the following requirements: + // + // a) Bring the table to a state the validation passes. + // b) To not allow concurrent commands to alter the table in a way the validation does not + // pass. This can be done by first disabling the relevant metadata property. + // c) Undoing (b) should cause the preDowngrade command to fail. + // + // Note, for features that cannot be disabled we solely rely for correctness on + // validateRemoval. + val isReaderWriterFeature = removableFeature.isReaderWriterFeature + val startTimeNs = System.nanoTime() + val preDowngradeMadeChanges = + removableFeature.preDowngradeCommand(table).removeFeatureTracesIfNeeded() + if (preDowngradeMadeChanges && isReaderWriterFeature) { + // Generate a checkpoint after the cleanup that is based on commits that do not use + // the feature. This intends to help slow-moving tables to qualify for history truncation + // asap. The checkpoint is based on a new commit to avoid creating a checkpoint + // on a commit that still contains traces of the removed feature. + createEmptyCommitAndCheckpoint(startTimeNs) + + // If the pre-downgrade command made changes, then the table's historical versions + // certainly still contain traces of the feature. We don't have to run an expensive + // explicit check, but instead we fail straight away. + throw DeltaErrors.dropTableFeatureWaitForRetentionPeriod( + featureName, table.initialSnapshot.metadata) + } + + val txn = table.startTransaction() + val snapshot = txn.snapshot + + // Verify whether all requirements hold before performing the protocol downgrade. + // If any concurrent transactions interfere with the protocol downgrade txn we + // revalidate the requirements against the snapshot of the winning txn. + if (!removableFeature.validateRemoval(snapshot)) { + throw DeltaErrors.dropTableFeatureConflictRevalidationFailed() + } + + // For reader+writer features, before downgrading the protocol we need to ensure there are no + // traces of the feature in past versions. If traces are found, the user is advised to wait + // until the retention period is over. This is a slow operation. + // Note, if this txn conflicts, we check all winning commits for traces of the feature. + // Therefore, we do not need to check again for historical versions during conflict + // resolution. + if (isReaderWriterFeature) { + // Clean up expired logs before checking history. This also makes sure there is no + // concurrent metadataCleanup during findEarliestReliableCheckpoint. Note, this + // cleanUpExpiredLogs call truncates the cutoff at a minute granularity. + deltaLog.cleanUpExpiredLogs( + snapshot, + truncateHistoryLogRetentionMillis(txn), + TruncationGranularity.MINUTE) + + val historyContainsFeature = removableFeature.historyContainsFeature( + spark = sparkSession, + downgradeTxnReadSnapshot = snapshot) + if (historyContainsFeature) { + throw DeltaErrors.dropTableFeatureHistoricalVersionsExist(featureName, snapshot.metadata) + } + } + + txn.updateProtocol(txn.protocol.removeFeature(removableFeature)) + txn.commit(Nil, DeltaOperations.DropTableFeature(featureName, truncateHistory)) + Nil + } + } +} + +/** + * A command that add columns to a Delta table. + * The syntax of using this command in SQL is: + * {{{ + * ALTER TABLE table_identifier + * ADD COLUMNS (col_name data_type [COMMENT col_comment], ...); + * }}} +*/ +case class AlterTableAddColumnsDeltaCommand( + table: DeltaTableV2, + colsToAddWithPosition: Seq[QualifiedColType]) + extends LeafRunnableCommand with AlterDeltaTableCommand with IgnoreCachedData { + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaLog = table.deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.alter.addColumns") { + val txn = startTransaction() + + if (SchemaUtils.filterRecursively( + StructType(colsToAddWithPosition.map { + case QualifiedColTypeWithPosition(_, column, _) => column + }), true)(!_.nullable).nonEmpty) { + throw DeltaErrors.operationNotSupportedException("NOT NULL in ALTER TABLE ADD COLUMNS") + } + + // TODO: remove this after auto cache refresh is merged. + table.tableIdentifier.foreach { identifier => + try sparkSession.catalog.uncacheTable(identifier) catch { + case NonFatal(e) => + log.warn(s"Exception when attempting to uncache table $identifier", e) + } + } + + val metadata = txn.metadata + val oldSchema = metadata.schema + + val resolver = sparkSession.sessionState.conf.resolver + val newSchema = colsToAddWithPosition.foldLeft(oldSchema) { + case (schema, QualifiedColTypeWithPosition(columnPath, column, None)) => + val parentPosition = SchemaUtils.findColumnPosition(columnPath, schema, resolver) + val insertPosition = SchemaUtils.getNestedTypeFromPosition(schema, parentPosition) match { + case s: StructType => s.size + case other => + throw DeltaErrors.addColumnParentNotStructException(column, other) + } + SchemaUtils.addColumn(schema, column, parentPosition :+ insertPosition) + case (schema, QualifiedColTypeWithPosition(columnPath, column, Some(_: First))) => + val parentPosition = SchemaUtils.findColumnPosition(columnPath, schema, resolver) + SchemaUtils.addColumn(schema, column, parentPosition :+ 0) + case (schema, + QualifiedColTypeWithPosition(columnPath, column, Some(after: After))) => + val prevPosition = + SchemaUtils.findColumnPosition(columnPath :+ after.column, schema, resolver) + val position = prevPosition.init :+ (prevPosition.last + 1) + SchemaUtils.addColumn(schema, column, position) + } + + SchemaMergingUtils.checkColumnNameDuplication(newSchema, "in adding columns") + SchemaUtils.checkSchemaFieldNames(newSchema, metadata.columnMappingMode) + + val newMetadata = metadata.copy(schemaString = newSchema.json) + txn.updateMetadata(newMetadata) + txn.commit(Nil, DeltaOperations.AddColumns( + colsToAddWithPosition.map { + case QualifiedColTypeWithPosition(path, col, colPosition) => + DeltaOperations.QualifiedColTypeWithPositionForLog( + path, col, colPosition.map(_.toString)) + })) + + Seq.empty[Row] + } + } + + object QualifiedColTypeWithPosition { + + private def toV2Position(input: Any): ColumnPosition = { + input.asInstanceOf[org.apache.spark.sql.catalyst.analysis.FieldPosition].position + } + + def unapply( + col: QualifiedColType): Option[(Seq[String], StructField, Option[ColumnPosition])] = { + val builder = new MetadataBuilder + col.comment.foreach(builder.putString("comment", _)) + + val field = StructField(col.name.last, col.dataType, col.nullable, builder.build()) + + col.default.map { value => + Some((col.name.init, field.withCurrentDefaultValue(value), col.position.map(toV2Position))) + }.getOrElse { + Some((col.name.init, field, col.position.map(toV2Position))) + } + } + } +} + +/** + * A command that drop columns from a Delta table. + * The syntax of using this command in SQL is: + * {{{ + * ALTER TABLE table_identifier + * DROP COLUMN(S) (col_name_1, col_name_2, ...); + * }}} + */ +case class AlterTableDropColumnsDeltaCommand( + table: DeltaTableV2, + columnsToDrop: Seq[Seq[String]]) + extends LeafRunnableCommand with AlterDeltaTableCommand with IgnoreCachedData { + + override def run(sparkSession: SparkSession): Seq[Row] = { + if (!sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_ALTER_TABLE_DROP_COLUMN_ENABLED)) { + // this featue is still behind the flag and not ready for release. + throw DeltaErrors.dropColumnNotSupported(suggestUpgrade = false) + } + val deltaLog = table.deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.alter.dropColumns") { + val txn = startTransaction() + val metadata = txn.metadata + if (txn.metadata.columnMappingMode == NoMapping) { + throw DeltaErrors.dropColumnNotSupported(suggestUpgrade = true) + } + val newSchema = columnsToDrop.foldLeft(metadata.schema) { case (schema, columnPath) => + val parentPosition = + SchemaUtils.findColumnPosition( + columnPath, schema, sparkSession.sessionState.conf.resolver) + SchemaUtils.dropColumn(schema, parentPosition)._1 + } + + // in case any of the dropped column is partition columns + val droppedColumnSet = columnsToDrop.map(UnresolvedAttribute(_).name).toSet + val droppingPartitionCols = metadata.partitionColumns.filter(droppedColumnSet.contains(_)) + if (droppingPartitionCols.nonEmpty) { + throw DeltaErrors.dropPartitionColumnNotSupported(droppingPartitionCols) + } + // Disallow dropping clustering columns. + val clusteringCols = ClusteringColumnInfo.extractLogicalNames(txn.snapshot) + val droppingClusteringCols = clusteringCols.filter(droppedColumnSet.contains(_)) + if (droppingClusteringCols.nonEmpty) { + throw DeltaErrors.dropClusteringColumnNotSupported(droppingClusteringCols) + } + // Updates the delta statistics column list by removing the dropped columns from it. + val newConfiguration = metadata.configuration ++ + StatisticsCollection.dropDeltaStatsColumns(metadata, columnsToDrop) + val newMetadata = metadata.copy( + schemaString = newSchema.json, + configuration = newConfiguration + ) + columnsToDrop.foreach { columnParts => + checkDependentExpressions(sparkSession, columnParts, newMetadata, txn.protocol, "drop") + } + + txn.updateMetadata(newMetadata) + txn.commit(Nil, DeltaOperations.DropColumns(columnsToDrop)) + + Seq.empty[Row] + } + } +} + +/** + * A command to change the column for a Delta table, support changing the comment of a column and + * reordering columns. + * + * The syntax of using this command in SQL is: + * {{{ + * ALTER TABLE table_identifier + * CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment] + * [FIRST | AFTER column_name]; + * }}} + */ +case class AlterTableChangeColumnDeltaCommand( + table: DeltaTableV2, + columnPath: Seq[String], + columnName: String, + newColumn: StructField, + colPosition: Option[ColumnPosition], + syncIdentity: Boolean) + extends LeafRunnableCommand with AlterDeltaTableCommand with IgnoreCachedData { + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaLog = table.deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.alter.changeColumns") { + val txn = startTransaction() + val metadata = txn.metadata + val oldSchema = metadata.schema + val resolver = sparkSession.sessionState.conf.resolver + + // Verify that the columnName provided actually exists in the schema + SchemaUtils.findColumnPosition(columnPath :+ columnName, oldSchema, resolver) + + val newSchema = transformColumnsStructs(oldSchema, Some(columnName)) { + case (`columnPath`, struct @ StructType(fields), _) => + val oldColumn = struct(columnName) + verifyColumnChange(sparkSession, struct(columnName), resolver, txn) + + val newField = { + // Take the name, comment, nullability and data type from newField + // It's crucial to keep the old column's metadata, which may contain column mapping + // metadata. + var result = newColumn.getComment().map(oldColumn.withComment).getOrElse(oldColumn) + // Apply the current default value as well, if any. + result = newColumn.getCurrentDefaultValue() match { + case Some(newDefaultValue) => result.withCurrentDefaultValue(newDefaultValue) + case None => result.clearCurrentDefaultValue() + } + result + .copy( + name = newColumn.name, + dataType = + SchemaUtils.changeDataType(oldColumn.dataType, newColumn.dataType, resolver), + nullable = newColumn.nullable) + } + + // Replace existing field with new field + val newFieldList = fields.map { field => + if (DeltaColumnMapping.getPhysicalName(field) == + DeltaColumnMapping.getPhysicalName(newField)) { + newField + } else field + } + + // Reorder new field to correct position if necessary + colPosition.map { position => + reorderFieldList(struct, newFieldList, newField, position, resolver) + }.getOrElse(newFieldList.toSeq) + + case (_, _ @ StructType(fields), _) => fields + } + + // update `partitionColumns` if the changed column is a partition column + val newPartitionColumns = if (columnPath.isEmpty) { + metadata.partitionColumns.map { partCol => + if (partCol == columnName) newColumn.name else partCol + } + } else metadata.partitionColumns + + val oldColumnPath = columnPath :+ columnName + val newColumnPath = columnPath :+ newColumn.name + // Rename the column in the delta statistics columns configuration, if present. + val newConfiguration = metadata.configuration ++ + StatisticsCollection.renameDeltaStatsColumn(metadata, oldColumnPath, newColumnPath) + + val newMetadata = metadata.copy( + schemaString = newSchema.json, + partitionColumns = newPartitionColumns, + configuration = newConfiguration + ) + + if (newColumn.name != columnName) { + // need to validate the changes if the column is renamed + checkDependentExpressions( + sparkSession, columnPath :+ columnName, newMetadata, txn.protocol, "rename") + } + + + txn.updateMetadata(newMetadata) + + if (newColumn.name != columnName) { + // record column rename separately + txn.commit(Nil, DeltaOperations.RenameColumn(oldColumnPath, newColumnPath)) + } else { + txn.commit(Nil, DeltaOperations.ChangeColumn( + columnPath, columnName, newColumn, colPosition.map(_.toString))) + } + + Seq.empty[Row] + } + } + + /** + * Reorder the given fieldList to place `field` at the given `position` in `fieldList` + * + * @param struct The initial StructType with the original field at its original position + * @param fieldList List of fields with the changed field in the original position + * @param field The field that is to be added + * @param position Position where the field is to be placed + * @return Returns a new list of fields with the changed field in the new position + */ + private def reorderFieldList( + struct: StructType, + fieldList: Array[StructField], + field: StructField, + position: ColumnPosition, + resolver: Resolver): Seq[StructField] = { + val startIndex = struct.fieldIndex(columnName) + val filtered = fieldList.filterNot(_.name == columnName) + val newFieldList = position match { + case _: First => + field +: filtered + + case after: After if after.column() == columnName => + filtered.slice(0, startIndex)++ + Seq(field) ++ + filtered.slice(startIndex, filtered.length) + + case after: After => + val endIndex = filtered.indexWhere(i => resolver(i.name, after.column())) + if (endIndex < 0) { + throw DeltaErrors.columnNotInSchemaException(after.column(), struct) + } + + filtered.slice(0, endIndex + 1) ++ + Seq(field) ++ + filtered.slice(endIndex + 1, filtered.length) + } + newFieldList.toSeq + } + + /** + * Given two columns, verify whether replacing the original column with the new column is a valid + * operation. + * + * Note that this requires a full table scan in the case of SET NOT NULL to verify that all + * existing values are valid. + * + * @param originalField The existing column + */ + private def verifyColumnChange( + spark: SparkSession, + originalField: StructField, + resolver: Resolver, + txn: OptimisticTransaction): Unit = { + + originalField.dataType match { + case same if same == newColumn.dataType => + // just changing comment or position so this is fine + case s: StructType if s != newColumn.dataType => + val fieldName = UnresolvedAttribute(columnPath :+ columnName).name + throw DeltaErrors.cannotUpdateStructField(table.name(), fieldName) + case m: MapType if m != newColumn.dataType => + val fieldName = UnresolvedAttribute(columnPath :+ columnName).name + throw DeltaErrors.cannotUpdateMapField(table.name(), fieldName) + case a: ArrayType if a != newColumn.dataType => + val fieldName = UnresolvedAttribute(columnPath :+ columnName).name + throw DeltaErrors.cannotUpdateArrayField(table.name(), fieldName) + case _: AtomicType => + // update is okay + case o => + throw DeltaErrors.cannotUpdateOtherField(table.name(), o) + } + + // Analyzer already validates the char/varchar type change of ALTER COLUMN in + // `CheckAnalysis.checkAlterTableCommand`. We should normalize char/varchar type to string type + // first (original data type is already normalized as we store char/varchar as string type with + // special metadata in the Delta log), then apply Delta-specific checks. + val newType = CharVarcharUtils.replaceCharVarcharWithString(newColumn.dataType) + if (SchemaUtils.canChangeDataType(originalField.dataType, newType, resolver, + txn.metadata.columnMappingMode, columnPath :+ originalField.name).nonEmpty) { + throw DeltaErrors.alterTableChangeColumnException( + s"'${UnresolvedAttribute(columnPath :+ originalField.name).name}' with type " + + s"'${originalField.dataType}" + + s" (nullable = ${originalField.nullable})'", + s"'${UnresolvedAttribute(Seq(newColumn.name)).name}' with type " + + s"'$newType" + + s" (nullable = ${newColumn.nullable})'") + } + + if (columnName != newColumn.name) { + if (txn.metadata.columnMappingMode == NoMapping) { + throw DeltaErrors.columnRenameNotSupported + } + } + + if (originalField.nullable && !newColumn.nullable) { + throw DeltaErrors.alterTableChangeColumnException( + s"'${UnresolvedAttribute(columnPath :+ originalField.name).name}' with type " + + s"'${originalField.dataType}" + + s" (nullable = ${originalField.nullable})'", + s"'${UnresolvedAttribute(Seq(newColumn.name)).name}' with type " + + s"'${newColumn.dataType}" + + s" (nullable = ${newColumn.nullable})'") + } + } +} + +/** + * A command to replace columns for a Delta table, support changing the comment of a column, + * reordering columns, and loosening nullabilities. + * + * The syntax of using this command in SQL is: + * {{{ + * ALTER TABLE table_identifier REPLACE COLUMNS (col_spec[, col_spec ...]); + * }}} + */ +case class AlterTableReplaceColumnsDeltaCommand( + table: DeltaTableV2, + columns: Seq[StructField]) + extends LeafRunnableCommand with AlterDeltaTableCommand with IgnoreCachedData { + + override def run(sparkSession: SparkSession): Seq[Row] = { + recordDeltaOperation(table.deltaLog, "delta.ddl.alter.replaceColumns") { + val txn = startTransaction() + + val metadata = txn.metadata + val existingSchema = metadata.schema + + val resolver = sparkSession.sessionState.conf.resolver + val changingSchema = StructType(columns) + + SchemaUtils.canChangeDataType(existingSchema, changingSchema, resolver, + txn.metadata.columnMappingMode, failOnAmbiguousChanges = true).foreach { operation => + throw DeltaErrors.alterTableReplaceColumnsException( + existingSchema, changingSchema, operation) + } + + val newSchema = SchemaUtils.changeDataType(existingSchema, changingSchema, resolver) + .asInstanceOf[StructType] + + SchemaMergingUtils.checkColumnNameDuplication(newSchema, "in replacing columns") + SchemaUtils.checkSchemaFieldNames(newSchema, metadata.columnMappingMode) + + val newMetadata = metadata.copy(schemaString = newSchema.json) + txn.updateMetadata(newMetadata) + txn.commit(Nil, DeltaOperations.ReplaceColumns(columns)) + + Nil + } + } +} + +/** + * A command to change the location of a Delta table. Effectively, this only changes the symlink + * in the Hive MetaStore from one Delta table to another. + * + * This command errors out if the new location is not a Delta table. By default, the new Delta + * table must have the same schema as the old table, but we have a SQL conf that allows users + * to bypass this schema check. + * + * The syntax of using this command in SQL is: + * {{{ + * ALTER TABLE table_identifier SET LOCATION 'path/to/new/delta/table'; + * }}} + */ +case class AlterTableSetLocationDeltaCommand( + table: DeltaTableV2, + location: String) + extends LeafRunnableCommand + with AlterDeltaTableCommand + with IgnoreCachedData { + + override def run(sparkSession: SparkSession): Seq[Row] = { + val catalog = sparkSession.sessionState.catalog + if (table.catalogTable.isEmpty) { + throw DeltaErrors.setLocationNotSupportedOnPathIdentifiers() + } + val catalogTable = table.catalogTable.get + val locUri = CatalogUtils.stringToURI(location) + + val oldTable = table.deltaLog.update() + if (oldTable.version == -1) { + throw DeltaErrors.notADeltaTableException(table.name()) + } + val oldMetadata = oldTable.metadata + + var updatedTable = catalogTable.withNewStorage(locationUri = Some(locUri)) + + val (_, newTable) = DeltaLog.forTableWithSnapshot(sparkSession, new Path(location)) + if (newTable.version == -1) { + throw DeltaErrors.notADeltaTableException(DeltaTableIdentifier(path = Some(location))) + } + val newMetadata = newTable.metadata + val bypassSchemaCheck = sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_ALTER_LOCATION_BYPASS_SCHEMA_CHECK) + + if (!bypassSchemaCheck && !schemasEqual(oldMetadata, newMetadata)) { + throw DeltaErrors.alterTableSetLocationSchemaMismatchException( + oldMetadata.schema, newMetadata.schema) + } + catalog.alterTable(updatedTable) + + Seq.empty[Row] + } + + private def schemasEqual( + oldMetadata: actions.Metadata, newMetadata: actions.Metadata): Boolean = { + import DeltaColumnMapping._ + dropColumnMappingMetadata(oldMetadata.schema) == + dropColumnMappingMetadata(newMetadata.schema) && + dropColumnMappingMetadata(oldMetadata.partitionSchema) == + dropColumnMappingMetadata(newMetadata.partitionSchema) + } +} + +trait AlterTableConstraintDeltaCommand + extends LeafRunnableCommand with AlterDeltaTableCommand with IgnoreCachedData { + + def getConstraintWithName( + table: DeltaTableV2, + name: String, + metadata: actions.Metadata, + sparkSession: SparkSession): Option[String] = { + val expr = Constraints.getExprTextByName(name, metadata, sparkSession) + if (expr.nonEmpty) { + return expr + } + None + } +} + +/** + * Command to add a constraint to a Delta table. Currently only CHECK constraints are supported. + * + * Adding a constraint will scan all data in the table to verify the constraint currently holds. + * + * @param table The table to which the constraint should be added. + * @param name The name of the new constraint. + * @param exprText The contents of the new CHECK constraint, to be parsed and evaluated. + */ +case class AlterTableAddConstraintDeltaCommand( + table: DeltaTableV2, + name: String, + exprText: String) + extends AlterTableConstraintDeltaCommand { + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaLog = table.deltaLog + if (name == CharVarcharConstraint.INVARIANT_NAME) { + throw DeltaErrors.invalidConstraintName(name) + } + recordDeltaOperation(deltaLog, "delta.ddl.alter.addConstraint") { + val txn = startTransaction() + + getConstraintWithName(table, name, txn.metadata, sparkSession).foreach { oldExpr => + throw DeltaErrors.constraintAlreadyExists(name, oldExpr) + } + + val newMetadata = txn.metadata.copy( + configuration = txn.metadata.configuration + + (Constraints.checkConstraintPropertyName(name) -> exprText) + ) + + val expr = sparkSession.sessionState.sqlParser.parseExpression(exprText) + if (expr.dataType != BooleanType) { + throw DeltaErrors.checkConstraintNotBoolean(name, exprText) + } + logInfo(s"Checking that $exprText is satisfied for existing data. " + + "This will require a full table scan.") + recordDeltaOperation( + txn.snapshot.deltaLog, + "delta.ddl.alter.addConstraint.checkExisting") { + val df = txn.snapshot.deltaLog.createDataFrame(txn.snapshot, txn.filterFiles()) + val n = df.where(new Column(Or(Not(expr), IsUnknown(expr)))).count() + + if (n > 0) { + throw DeltaErrors.newCheckConstraintViolated(n, table.name(), exprText) + } + } + + txn.commit(newMetadata :: Nil, DeltaOperations.AddConstraint(name, exprText)) + } + Seq() + } +} + +/** + * Command to drop a constraint from a Delta table. No-op if a constraint with the given name + * doesn't exist. + * + * Currently only CHECK constraints are supported. + * + * @param table The table from which the constraint should be dropped + * @param name The name of the constraint to drop + */ +case class AlterTableDropConstraintDeltaCommand( + table: DeltaTableV2, + name: String, + ifExists: Boolean) + extends AlterTableConstraintDeltaCommand { + + override def run(sparkSession: SparkSession): Seq[Row] = { + val deltaLog = table.deltaLog + recordDeltaOperation(deltaLog, "delta.ddl.alter.dropConstraint") { + val txn = startTransaction() + + val oldExprText = Constraints.getExprTextByName(name, txn.metadata, sparkSession) + if (oldExprText.isEmpty && !ifExists && !sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_ASSUMES_DROP_CONSTRAINT_IF_EXISTS)) { + val quotedTableName = table.getTableIdentifierIfExists.map(_.quotedString) + .orElse(table.catalogTable.map(_.identifier.quotedString)) + .getOrElse(table.name()) + throw DeltaErrors.nonexistentConstraint(name, quotedTableName) + } + + val newMetadata = txn.metadata.copy( + configuration = txn.metadata.configuration - Constraints.checkConstraintPropertyName(name)) + + txn.commit(newMetadata :: Nil, DeltaOperations.DropConstraint(name, oldExprText)) + } + + Seq() + } +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/cdc/CDCReader.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/cdc/CDCReader.scala new file mode 100644 index 00000000000..10713cea655 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/cdc/CDCReader.scala @@ -0,0 +1,1045 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.cdc + +import java.sql.Timestamp + +import scala.collection.mutable.{ListBuffer, Map => MutableMap} + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, RoaringBitmapArrayFormat} +import org.apache.spark.sql.delta.files.{CdcAddFileIndex, TahoeChangeFileIndex, TahoeFileIndexWithSnapshotDescriptor, TahoeRemoveFileIndex} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.{DeltaDataSource, DeltaSource, DeltaSQLConf} +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore +import org.apache.spark.sql.util.ScalaExtensions.OptionExt + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession, SQLContext} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Literal} +import org.apache.spark.sql.catalyst.plans.logical.Statistics +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.execution.LogicalRDD +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources.{BaseRelation, Filter, PrunedFilteredScan} +import org.apache.spark.sql.types.{LongType, StringType, StructType, TimestampType} +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * The API that allows reading Change data between two versions of a table. + * + * The basic abstraction here is the CDC type column defined by [[CDCReader.CDC_TYPE_COLUMN_NAME]]. + * When CDC is enabled, our writer will treat this column as a special partition column even though + * it's not part of the table. Writers should generate a query that has two types of rows in it: + * the main data in partition CDC_TYPE_NOT_CDC and the CDC data with the appropriate CDC type value. + * + * [[org.apache.spark.sql.delta.files.DelayedCommitProtocol]] + * does special handling for this column, dispatching the main data to its normal location while the + * CDC data is sent to [[AddCDCFile]] entries. + */ +object CDCReader extends CDCReaderImpl +{ + // Definitions for the CDC type column. Delta writers will write data with a non-null value for + // this column into [[AddCDCFile]] actions separate from the main table, and the CDC reader will + // read this column to determine what type of change it was. + val CDC_TYPE_COLUMN_NAME = "_change_type" // emitted from data + val CDC_COMMIT_VERSION = "_commit_version" // inferred by reader + val CDC_COMMIT_TIMESTAMP = "_commit_timestamp" // inferred by reader + val CDC_TYPE_DELETE_STRING = "delete" + val CDC_TYPE_DELETE = Literal(CDC_TYPE_DELETE_STRING) + val CDC_TYPE_INSERT = "insert" + val CDC_TYPE_UPDATE_PREIMAGE = "update_preimage" + val CDC_TYPE_UPDATE_POSTIMAGE = "update_postimage" + + /** + * Append CDC metadata columns to the provided schema. + */ + def cdcAttributes: Seq[Attribute] = Seq( + AttributeReference(CDC_TYPE_COLUMN_NAME, StringType)(), + AttributeReference(CDC_COMMIT_VERSION, LongType)(), + AttributeReference(CDC_COMMIT_TIMESTAMP, TimestampType)()) + + // A special sentinel value indicating rows which are part of the main table rather than change + // data. Delta writers will partition rows with this value away from the CDC data and + // write them as normal to the main table. + // Note that we specifically avoid using `null` here, because partition values of `null` are in + // some scenarios mapped to a special string for Hive compatibility. + val CDC_TYPE_NOT_CDC: Literal = Literal(null, StringType) + + // The virtual column name used for dividing CDC data from main table data. Delta writers should + // permit this column through even though it's not part of the main table, and the + // [[DelayedCommitProtocol]] will apply some special handling, ensuring there's only a + // subfolder with __is_cdc = true and writing data with __is_cdc = false to the base location + // as it would with CDC output off. + // This is a bit redundant with CDC_TYPE_COL, but partitioning directly on the type would mean + // that CDC of each type is partitioned away separately, exacerbating small file problems. + val CDC_PARTITION_COL = "__is_cdc" // emitted by data + + // The top-level folder within the Delta table containing change data. This folder may contain + // partitions within itself. + val CDC_LOCATION = "_change_data" + + // CDC specific columns in data written by operations + val CDC_COLUMNS_IN_DATA = Seq(CDC_PARTITION_COL, CDC_TYPE_COLUMN_NAME) + + // A snapshot coupled with a schema mode that user specified + case class SnapshotWithSchemaMode(snapshot: Snapshot, schemaMode: DeltaBatchCDFSchemaMode) + + /** + * A special BaseRelation wrapper for CDF reads. + */ + case class DeltaCDFRelation( + snapshotWithSchemaMode: SnapshotWithSchemaMode, + sqlContext: SQLContext, + startingVersion: Option[Long], + endingVersion: Option[Long]) extends BaseRelation with PrunedFilteredScan { + + private val deltaLog = snapshotWithSchemaMode.snapshot.deltaLog + + private lazy val latestVersionOfTableDuringAnalysis: Long = deltaLog.update().version + + /** + * There may be a slight divergence here in terms of what schema is in the latest data vs what + * schema we have captured during analysis, but this is an inherent limitation of Spark. + * + * However, if there are schema changes between analysis and execution, since we froze this + * schema, our schema incompatibility checks will kick in during the scan so we will always + * be safe - Although it is a notable caveat that user should be aware of because the CDC query + * may break. + */ + private lazy val endingVersionForBatchSchema: Long = endingVersion.map { v => + // As defined in the method doc, if ending version is greater than the latest version, we will + // just use the latest version to find the schema. + latestVersionOfTableDuringAnalysis min v + }.getOrElse { + // Or if endingVersion is not specified, we just use the latest schema. + latestVersionOfTableDuringAnalysis + } + + // The final snapshot whose schema is going to be used as this CDF relation's schema + private val snapshotForBatchSchema: Snapshot = snapshotWithSchemaMode.schemaMode match { + case BatchCDFSchemaEndVersion => + // Fetch the ending version and its schema + deltaLog.getSnapshotAt(endingVersionForBatchSchema) + case _ => + // Apply the default, either latest generated by DeltaTableV2 or specified by Time-travel + // options. + snapshotWithSchemaMode.snapshot + } + + override val schema: StructType = cdcReadSchema(snapshotForBatchSchema.metadata.schema) + + override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = { + val df = changesToBatchDF( + deltaLog, + startingVersion.get, + // The actual ending version we should scan until during execution, as it might have changed + endingVersion.getOrElse { + deltaLog.update().version + }, + sqlContext.sparkSession, + readSchemaSnapshot = Some(snapshotForBatchSchema)) + + df.select(requiredColumns.map(SchemaUtils.fieldNameToColumn): _*).rdd + } + } + + case class CDCDataSpec[T <: FileAction]( + version: Long, + timestamp: Timestamp, + actions: Seq[T], + commitInfo: Option[CommitInfo]) { + def this( + tableVersion: TableVersion, + actions: Seq[T], + commitInfo: Option[CommitInfo]) = { + this( + tableVersion.version, + tableVersion.timestamp, + actions, + commitInfo) + } + } + + /** A version number of a Delta table, with the version's timestamp. */ + case class TableVersion(version: Long, timestamp: Timestamp) { + def this(wp: FilePathWithTableVersion) = this(wp.version, wp.timestamp) + } + + /** Path of a file of a Delta table, together with it's origin table version & timestamp. */ + case class FilePathWithTableVersion( + path: String, + commitInfo: Option[CommitInfo], + version: Long, + timestamp: Timestamp) +} + +trait CDCReaderImpl extends DeltaLogging { + + import org.apache.spark.sql.delta.commands.cdc.CDCReader._ + + /** + * Given timestamp or version, this method returns the corresponding version for that timestamp + * or the version itself, as well as how the return version is obtained: by `version` or + * `timestamp`. + */ + private def getVersionForCDC( + spark: SparkSession, + deltaLog: DeltaLog, + conf: SQLConf, + options: CaseInsensitiveStringMap, + versionKey: String, + timestampKey: String): Option[ResolvedCDFVersion] = { + if (options.containsKey(versionKey)) { + Some(ResolvedCDFVersion(options.get(versionKey).toLong, timestamp = None)) + } else if (options.containsKey(timestampKey)) { + val ts = options.get(timestampKey) + val spec = DeltaTimeTravelSpec(Some(Literal(ts)), None, Some("cdcReader")) + val timestamp = spec.getTimestamp(spark.sessionState.conf) + val allowOutOfRange = conf.getConf(DeltaSQLConf.DELTA_CDF_ALLOW_OUT_OF_RANGE_TIMESTAMP) + val resolvedVersion = if (timestampKey == DeltaDataSource.CDC_START_TIMESTAMP_KEY) { + // For the starting timestamp we need to find a version after the provided timestamp + // we can use the same semantics as streaming. + DeltaSource.getStartingVersionFromTimestamp(spark, deltaLog, timestamp, allowOutOfRange) + } else { + // For ending timestamp the version should be before the provided timestamp. + DeltaTableUtils.resolveTimeTravelVersion(conf, deltaLog, spec, allowOutOfRange)._1 + } + Some(ResolvedCDFVersion(resolvedVersion, Some(timestamp))) + } else { + None + } + } + + /** + * Get the batch cdf schema mode for a table, considering whether it has column mapping enabled + * or not. + */ + def getBatchSchemaModeForTable( + spark: SparkSession, + snapshot: Snapshot): DeltaBatchCDFSchemaMode = { + if (snapshot.metadata.columnMappingMode != NoMapping) { + // Column-mapping table uses exact schema by default, but can be overridden by conf + DeltaBatchCDFSchemaMode(spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE + )) + } else { + // Non column-mapping table uses the current default, which is typically `legacy` - usually + // the latest schema is used, but it can depend on time-travel arguments as well. + // Using time-travel arguments with CDF is default blocked right now as it is an non-expected + // use case, users can unblock themselves with `DeltaSQLConf.DELTA_CDF_ENABLE_TIME_TRAVEL`. + BatchCDFSchemaLegacy + } + } + + /** + * Get a Relation that represents change data between two snapshots of the table. + * + * @param spark Spark session + * @param snapshotToUse Snapshot to use to provide read schema and version + * @param isTimeTravelQuery Whether this CDC scan is used in conjunction with time-travel args + * @param conf SQL conf + * @param options CDC specific options + */ + def getCDCRelation( + spark: SparkSession, + snapshotToUse: Snapshot, + isTimeTravelQuery: Boolean, + conf: SQLConf, + options: CaseInsensitiveStringMap): BaseRelation = { + + val startingVersion = getVersionForCDC( + spark, + snapshotToUse.deltaLog, + conf, + options, + DeltaDataSource.CDC_START_VERSION_KEY, + DeltaDataSource.CDC_START_TIMESTAMP_KEY).getOrElse { + throw DeltaErrors.noStartVersionForCDC() + } + + val schemaMode = getBatchSchemaModeForTable(spark, snapshotToUse) + + // Non-legacy schema mode options cannot be used with time-travel because the schema to use + // will be confusing. + if (isTimeTravelQuery && schemaMode != BatchCDFSchemaLegacy) { + throw DeltaErrors.illegalDeltaOptionException( + DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE.key, + schemaMode.name, + s"${DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE.key} " + + s"cannot be used with time travel options.") + } + + def emptyCDFRelation() = { + new DeltaCDFRelation( + SnapshotWithSchemaMode(snapshotToUse, schemaMode), + spark.sqlContext, + startingVersion = None, + endingVersion = None) { + override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = + sqlContext.sparkSession.sparkContext.emptyRDD[Row] + } + } + + // add a version check here that is cheap instead of after trying to list a large version + // that doesn't exist + if (startingVersion.version > snapshotToUse.version) { + val allowOutOfRange = conf.getConf(DeltaSQLConf.DELTA_CDF_ALLOW_OUT_OF_RANGE_TIMESTAMP) + // LS-129: return an empty relation if start version passed in is beyond latest commit version + if (allowOutOfRange) { + return emptyCDFRelation() + } + throw DeltaErrors.startVersionAfterLatestVersion( + startingVersion.version, snapshotToUse.version) + } + + val endingVersionOpt = getVersionForCDC( + spark, + snapshotToUse.deltaLog, + conf, + options, + DeltaDataSource.CDC_END_VERSION_KEY, + DeltaDataSource.CDC_END_TIMESTAMP_KEY + ) + + // Given two timestamps, there is a case when both of them lay closely between two versions: + // version: 4 5 + // ---------|-------------------------------------------------|-------- + // ^ start timestamp ^ end timestamp + // In this case the starting version will be 5 and ending version will be 4. We must not + // throw `endBeforeStartVersionInCDC` but return empty result. + endingVersionOpt.foreach { endingVersion => + if (startingVersion.resolvedByTimestamp && endingVersion.resolvedByTimestamp) { + // The next `if` is true when end is less than start but no commit is in between. + // We need to capture such a case and throw early. + if (startingVersion.timestamp.get.after(endingVersion.timestamp.get)) { + throw DeltaErrors.endBeforeStartVersionInCDC( + startingVersion.version, + endingVersion.version) + } + if (endingVersion.version == startingVersion.version - 1) { + return emptyCDFRelation() + } + } + } + + if (endingVersionOpt.exists(_.version < startingVersion.version)) { + throw DeltaErrors.endBeforeStartVersionInCDC( + startingVersion.version, + endingVersionOpt.get.version) + } + + logInfo( + s"startingVersion: ${startingVersion.version}, " + + s"endingVersion: ${endingVersionOpt.map(_.version)}") + + DeltaCDFRelation( + SnapshotWithSchemaMode(snapshotToUse, schemaMode), + spark.sqlContext, + Some(startingVersion.version), + endingVersionOpt.map(_.version)) + } + + /** + * Function to check if file actions should be skipped for no-op merges based on + * CommitInfo metrics. + * MERGE will sometimes rewrite files in a way which *could* have changed data + * (so dataChange = true) but did not actually do so (so no CDC will be produced). + * In this case the correct CDC output is empty - we shouldn't serve it from + * those files. This should be handled within the command, but as a hotfix-safe fix, we check + * the metrics. If the command reported 0 rows inserted, updated, or deleted, then CDC + * shouldn't be produced. + */ + def shouldSkipFileActionsInCommit(commitInfo: CommitInfo): Boolean = { + val isMerge = commitInfo.operation == DeltaOperations.OP_MERGE + val knownToHaveNoChangedRows = { + val metrics = commitInfo.operationMetrics.getOrElse(Map.empty) + // Note that if any metrics are missing, this condition will be false and we won't skip. + // Unfortunately there are no predefined constants for these metric values. + Seq("numTargetRowsInserted", "numTargetRowsUpdated", "numTargetRowsDeleted").forall { + metrics.get(_).contains("0") + } + } + isMerge && knownToHaveNoChangedRows + } + + + /** + * For a sequence of changes(AddFile, RemoveFile, AddCDCFile) create a DataFrame that represents + * that captured change data between start and end inclusive. + * + * Builds the DataFrame using the following logic: Per each change of type (Long, Seq[Action]) in + * `changes`, iterates over the actions and handles two cases. + * - If there are any CDC actions, then we ignore the AddFile and RemoveFile actions in that + * version and create an AddCDCFile instead. + * - If there are no CDC actions, then we must infer the CDC data from the AddFile and RemoveFile + * actions, taking only those with `dataChange = true`. + * + * These buffers of AddFile, RemoveFile, and AddCDCFile actions are then used to create + * corresponding FileIndexes (e.g. [[TahoeChangeFileIndex]]), where each is suited to use the + * given action type to read CDC data. These FileIndexes are then unioned to produce the final + * DataFrame. + * + * @param readSchemaSnapshot - Snapshot for the table for which we are creating a CDF + * Dataframe, the schema of the snapshot is expected to be + * the change DF's schema. We have already adjusted this + * snapshot with the schema mode if there's any. We don't use + * its data actually. + * @param start - startingVersion of the changes + * @param end - endingVersion of the changes + * @param changes - changes is an iterator of all FileActions for a particular commit version. + * @param spark - SparkSession + * @param isStreaming - indicates whether the DataFrame returned is a streaming DataFrame + * @param useCoarseGrainedCDC - ignores checks related to CDC being disabled in any of the + * versions and computes CDC entirely from AddFiles/RemoveFiles (ignoring + * AddCDCFile actions) + * @param startVersionSnapshot - The snapshot of the starting version. + * @return CDCInfo which contains the DataFrame of the changes as well as the statistics + * related to the changes + */ + def changesToDF( + readSchemaSnapshot: SnapshotDescriptor, + start: Long, + end: Long, + changes: Iterator[(Long, Seq[Action])], + spark: SparkSession, + isStreaming: Boolean = false, + useCoarseGrainedCDC: Boolean = false, + startVersionSnapshot: Option[SnapshotDescriptor] = None): CDCVersionDiffInfo = { + val deltaLog = readSchemaSnapshot.deltaLog + + if (end < start) { + throw DeltaErrors.endBeforeStartVersionInCDC(start, end) + } + + // A map from change version to associated commit timestamp. + val timestampsByVersion: Map[Long, Timestamp] = + getTimestampsByVersion(deltaLog, start, end, spark) + + val changeFiles = ListBuffer[CDCDataSpec[AddCDCFile]]() + val addFiles = ListBuffer[CDCDataSpec[AddFile]]() + val removeFiles = ListBuffer[CDCDataSpec[RemoveFile]]() + + val startVersionMetadata = startVersionSnapshot.map(_.metadata).getOrElse { + deltaLog.getSnapshotAt(start).metadata + } + if (!useCoarseGrainedCDC && !isCDCEnabledOnTable(startVersionMetadata, spark)) { + throw DeltaErrors.changeDataNotRecordedException(start, start, end) + } + + // Check schema read-compatibility + val allowUnsafeBatchReadOnIncompatibleSchemaChanges = + spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_CDF_UNSAFE_BATCH_READ_ON_INCOMPATIBLE_SCHEMA_CHANGES) + + if (allowUnsafeBatchReadOnIncompatibleSchemaChanges) { + recordDeltaEvent(deltaLog, "delta.unsafe.cdf.readOnColumnMappingSchemaChanges") + } + + val shouldCheckSchemaToBlockBatchRead = + !isStreaming && !allowUnsafeBatchReadOnIncompatibleSchemaChanges + /** + * Check metadata (which may contain schema change)'s read compatibility with read schema. + */ + def checkBatchCdfReadSchemaIncompatibility( + metadata: Metadata, metadataVer: Long, isSchemaChange: Boolean): Unit = { + // We do not check for any incompatibility if the global "I don't care" flag is turned on + if (shouldCheckSchemaToBlockBatchRead) { + // Column mapping incompatibilities + val compatible = { + // For column mapping schema change, the order matters because we don't want to treat + // an ADD COLUMN as an inverse DROP COLUMN. + if (metadataVer <= readSchemaSnapshot.version) { + DeltaColumnMapping.hasNoColumnMappingSchemaChanges( + newMetadata = readSchemaSnapshot.metadata, oldMetadata = metadata) + } else { + DeltaColumnMapping.hasNoColumnMappingSchemaChanges( + newMetadata = metadata, oldMetadata = readSchemaSnapshot.metadata) + } + } && { + // Other standard read incompatibilities + if (metadataVer <= readSchemaSnapshot.version) { + // If the metadata is before the read schema version, make sure: + // a) metadata schema is a part of the read schema, i.e. only ADD COLUMN can evolve + // metadata schema into read schema + // b) data type for common fields remain the same + // c) metadata schema should not contain field that is nullable=true but the read schema + // is nullable=false. + SchemaUtils.isReadCompatible( + existingSchema = metadata.schema, + readSchema = readSchemaSnapshot.schema, + forbidTightenNullability = true) + } else { + // If the metadata is POST the read schema version, which can happen due to time-travel + // or simply a divergence between analyzed version and the actual latest + // version during scan, we will make sure the other way around: + // a) the metadata must be a super set of the read schema, i.e. only ADD COLUMN can + // evolve read schema into metadata schema + // b) data type for common fields remain the same + // c) read schema should not contain field that is nullable=false but the metadata + // schema has nullable=true. + SchemaUtils.isReadCompatible( + existingSchema = readSchemaSnapshot.schema, + readSchema = metadata.schema, + forbidTightenNullability = false) + } + } + + if (!compatible) { + throw DeltaErrors.blockBatchCdfReadWithIncompatibleSchemaChange( + start, end, + // The consistent read schema + readSchemaSnapshot.metadata.schema, readSchemaSnapshot.version, + // The conflicting schema or schema change version + metadataVer, + isSchemaChange + ) + } + } + } + + var totalBytes = 0L + var totalFiles = 0L + + changes.foreach { + case (v, actions) => + // Check whether CDC was newly disabled in this version. (We should have already checked + // that it's enabled for the starting version, so checking this for each version + // incrementally is sufficient to ensure that it's enabled for the entire range.) + val cdcDisabled = actions.exists { + case m: Metadata => !isCDCEnabledOnTable(m, spark) + case _ => false + } + + if (cdcDisabled && !useCoarseGrainedCDC) { + throw DeltaErrors.changeDataNotRecordedException(v, start, end) + } + + // Check all intermediary metadata schema changes, this guarantees that there will be no + // read-incompatible schema changes across the querying range. + // Note that we don't have to check the schema change if it's at the start version, because: + // 1. If it's an initialization, e.g. CREATE AS SELECT, we don't have to consider this + // as a schema change and report weird error messages. + // 2. If it's indeed a schema change, as we won't be reading any data prior to it that + // falls back to the previous (possibly incorrect) schema, we will be safe. Also if there + // are any data file residing in the same commit, it will follow the new schema as well. + if (v > start) { + actions.collect { case a: Metadata => a }.foreach { metadata => + // Verify with start snapshot to check for any read-incompatible changes + // This also detects the corner case in that there's only one schema change between + // start and end, which looks exactly like the end schema. + checkBatchCdfReadSchemaIncompatibility(metadata, v, isSchemaChange = true) + } + } + + // Set up buffers for all action types to avoid multiple passes. + val cdcActions = ListBuffer[AddCDCFile]() + val ts = timestampsByVersion.get(v).orNull + + // Note that the CommitInfo is *not* guaranteed to be generated in 100% of cases. + // We are using it only for a hotfix-safe mitigation/defense-in-depth - the value + // extracted here cannot be relied on for correctness. + var commitInfo: Option[CommitInfo] = None + actions.foreach { + case c: AddCDCFile => + cdcActions.append(c) + totalFiles += 1L + totalBytes += c.size + case a: AddFile => + totalFiles += 1L + totalBytes += a.size + case r: RemoveFile => + totalFiles += 1L + totalBytes += r.size.getOrElse(0L) + case i: CommitInfo => commitInfo = Some(i) + case _ => // do nothing + } + + // If there are CDC actions, we read them exclusively if we should not use the + // Add and RemoveFiles. + if (cdcActions.nonEmpty && !useCoarseGrainedCDC) { + changeFiles.append(CDCDataSpec(v, ts, cdcActions.toSeq, commitInfo)) + } else { + val shouldSkipIndexedFile = commitInfo.exists(CDCReader.shouldSkipFileActionsInCommit) + if (shouldSkipIndexedFile) { + // This was introduced for a hotfix, so we're mirroring the existing logic as closely + // as possible - it'd likely be safe to just return an empty dataframe here. + addFiles.append(CDCDataSpec(v, ts, Nil, commitInfo)) + removeFiles.append(CDCDataSpec(v, ts, Nil, commitInfo)) + } else { + // Otherwise, we take the AddFile and RemoveFile actions with dataChange = true and + // infer CDC from them. + val addActions = actions.collect { case a: AddFile if a.dataChange => a } + val removeActions = actions.collect { case r: RemoveFile if r.dataChange => r } + addFiles.append( + CDCDataSpec( + version = v, + timestamp = ts, + actions = addActions, + commitInfo = commitInfo) + ) + removeFiles.append( + CDCDataSpec( + version = v, + timestamp = ts, + actions = removeActions, + commitInfo = commitInfo) + ) + } + } + } + + // Verify the final read schema with the start snapshot version once again + // This is needed to: + // 1. Handle the case in that there are no read-incompatible schema change with the range, BUT + // the latest schema may still be incompatible as it COULD be arbitrary. + // 2. Similarly, handle the corner case when there are no read-incompatible schema change with + // the range, BUT time-travel is used so the read schema could also be arbitrary. + // It is sufficient to just verify with the start version schema because we have already + // verified that all data being queries is read-compatible with start schema. + checkBatchCdfReadSchemaIncompatibility(startVersionMetadata, start, isSchemaChange = false) + + val dfs = ListBuffer[DataFrame]() + if (changeFiles.nonEmpty) { + dfs.append(scanIndex( + spark, + new TahoeChangeFileIndex( + spark, changeFiles.toSeq, deltaLog, deltaLog.dataPath, readSchemaSnapshot), + isStreaming)) + } + + val deletedAndAddedRows = getDeletedAndAddedRows( + addFiles.toSeq, removeFiles.toSeq, deltaLog, + readSchemaSnapshot, isStreaming, spark) + dfs.append(deletedAndAddedRows: _*) + + val readSchema = cdcReadSchema(readSchemaSnapshot.metadata.schema) + // build an empty DS. This DS retains the table schema and the isStreaming property + // NOTE: We need to manually set the stats to 0 otherwise we will use default stats of INT_MAX, + // which causes lots of optimizations to be applied wrong. + val emptyRdd = LogicalRDD( + toAttributes(readSchema), + spark.sparkContext.emptyRDD[InternalRow], + isStreaming = isStreaming + )(spark.sqlContext.sparkSession, Some(Statistics(0, Some(0)))) + val emptyDf = + Dataset.ofRows(spark.sqlContext.sparkSession, emptyRdd) + + CDCVersionDiffInfo( + (emptyDf +: dfs).reduce((df1, df2) => df1.union( + df2 + )), + totalFiles, + totalBytes) + } + + /** + * Generate CDC rows by looking at added and removed files, together with Deletion Vectors they + * may have. + * + * When DV is used, the same file can be removed then added in the same version, and the only + * difference is the assigned DVs. The base method does not consider DVs in this case, thus will + * produce CDC that *all* rows in file being removed then *some* re-added. The correct answer, + * however, is to compare two DVs and apply the diff to the file to get removed and re-added rows. + * + * Currently it is always the case that in the log "remove" comes first, followed by "add" -- + * which means that the file stays alive with a new DV. There's another possibility, though not + * make many senses, that a file is "added" to log then "removed" in the same version. If this + * becomes possible in future, we have to reconstruct the timeline considering the order of + * actions rather than simply matching files by path. + */ + protected def getDeletedAndAddedRows( + addFileSpecs: Seq[CDCDataSpec[AddFile]], + removeFileSpecs: Seq[CDCDataSpec[RemoveFile]], + deltaLog: DeltaLog, + snapshot: SnapshotDescriptor, + isStreaming: Boolean, + spark: SparkSession): Seq[DataFrame] = { + // Transform inputs to maps indexed by version and path and map each version to a CommitInfo + // object. + val versionToCommitInfo = MutableMap.empty[Long, CommitInfo] + val addFilesMap = addFileSpecs.flatMap { spec => + spec.commitInfo.ifDefined { ci => versionToCommitInfo(spec.version) = ci } + spec.actions.map { action => + val key = + FilePathWithTableVersion(action.path, spec.commitInfo, spec.version, spec.timestamp) + key -> action + } + }.toMap + val removeFilesMap = removeFileSpecs.flatMap { spec => + spec.commitInfo.ifDefined { ci => versionToCommitInfo(spec.version) = ci } + spec.actions.map { action => + val key = + FilePathWithTableVersion(action.path, spec.commitInfo, spec.version, spec.timestamp) + key -> action + } + }.toMap + + val finalAddFiles = MutableMap[TableVersion, ListBuffer[AddFile]]() + val finalRemoveFiles = MutableMap[TableVersion, ListBuffer[RemoveFile]]() + + // If a path is only being added, then scan it normally as inserted rows + (addFilesMap.keySet -- removeFilesMap.keySet).foreach { addKey => + finalAddFiles + .getOrElseUpdate(new TableVersion(addKey), ListBuffer()) + .append(addFilesMap(addKey)) + } + + // If a path is only being removed, then scan it normally as removed rows + (removeFilesMap.keySet -- addFilesMap.keySet).foreach { removeKey => + finalRemoveFiles + .getOrElseUpdate(new TableVersion(removeKey), ListBuffer()) + .append(removeFilesMap(removeKey)) + } + + // Convert maps back into Seq[CDCDataSpec] and feed it into a single scan. This will greatly + // reduce the number of tasks. + val finalAddFilesSpecs = buildCDCDataSpecSeq(finalAddFiles, versionToCommitInfo) + val finalRemoveFilesSpecs = buildCDCDataSpecSeq(finalRemoveFiles, versionToCommitInfo) + + val dfAddsAndRemoves = ListBuffer[DataFrame]() + + if (finalAddFilesSpecs.nonEmpty) { + dfAddsAndRemoves.append( + scanIndex( + spark, + new CdcAddFileIndex(spark, finalAddFilesSpecs, deltaLog, deltaLog.dataPath, snapshot), + isStreaming)) + } + + if (finalRemoveFilesSpecs.nonEmpty) { + dfAddsAndRemoves.append( + scanIndex( + spark, + new TahoeRemoveFileIndex( + spark, + finalRemoveFilesSpecs, + deltaLog, + deltaLog.dataPath, + snapshot), + isStreaming)) + } + + val dfGeneratedDvScanActions = processDeletionVectorActions( + addFilesMap, + removeFilesMap, + versionToCommitInfo.toMap, + deltaLog, + snapshot, + isStreaming, + spark) + + dfAddsAndRemoves.toSeq ++ dfGeneratedDvScanActions + } + + def processDeletionVectorActions( + addFilesMap: Map[FilePathWithTableVersion, AddFile], + removeFilesMap: Map[FilePathWithTableVersion, RemoveFile], + versionToCommitInfo: Map[Long, CommitInfo], + deltaLog: DeltaLog, + snapshot: SnapshotDescriptor, + isStreaming: Boolean, + spark: SparkSession): Seq[DataFrame] = { + val finalReplaceAddFiles = MutableMap[TableVersion, ListBuffer[AddFile]]() + val finalReplaceRemoveFiles = MutableMap[TableVersion, ListBuffer[RemoveFile]]() + + val dvStore = DeletionVectorStore.createInstance(deltaLog.newDeltaHadoopConf()) + (addFilesMap.keySet intersect removeFilesMap.keySet).foreach { key => + val add = addFilesMap(key) + val remove = removeFilesMap(key) + val generatedActions = generateFileActionsWithInlineDv(add, remove, dvStore, deltaLog) + generatedActions.foreach { + case action: AddFile => + finalReplaceAddFiles + .getOrElseUpdate(new TableVersion(key), ListBuffer()) + .append(action) + case action: RemoveFile => + finalReplaceRemoveFiles + .getOrElseUpdate(new TableVersion(key), ListBuffer()) + .append(action) + case _ => + throw new Exception("Expecting AddFile or RemoveFile.") + } + } + + // We have to build one scan for each version because DVs attached to actions will be + // broadcasted in [[ScanWithDeletionVectors.createBroadcastDVMap]] which is not version-aware. + // Here, one file can have different row index filters in different versions. + val dfs = ListBuffer[DataFrame]() + // Scan for masked rows as change_type = "insert", + // see explanation in [[generateFileActionsWithInlineDv]]. + finalReplaceAddFiles.foreach { case (tableVersion, addFiles) => + val commitInfo = versionToCommitInfo.get(tableVersion.version) + dfs.append( + scanIndex( + spark, + new CdcAddFileIndex( + spark, + Seq(new CDCDataSpec(tableVersion, addFiles.toSeq, commitInfo)), + deltaLog, + deltaLog.dataPath, + snapshot, + rowIndexFilters = + Some(fileActionsToIfNotContainedRowIndexFilters(addFiles.toSeq))), + isStreaming)) + } + + // Scan for masked rows as change_type = "delete", + // see explanation in [[generateFileActionsWithInlineDv]]. + finalReplaceRemoveFiles.foreach { case (tableVersion, removeFiles) => + val commitInfo = versionToCommitInfo.get(tableVersion.version) + dfs.append( + scanIndex( + spark, + new TahoeRemoveFileIndex( + spark, + Seq(new CDCDataSpec(tableVersion, removeFiles.toSeq, commitInfo)), + deltaLog, + deltaLog.dataPath, + snapshot, + rowIndexFilters = + Some(fileActionsToIfNotContainedRowIndexFilters(removeFiles.toSeq))), + isStreaming)) + } + + dfs.toSeq + } + + /** + * Builds a map from commit versions to associated commit timestamps. + * @param start start commit version + * @param end end commit version + */ + def getTimestampsByVersion( + deltaLog: DeltaLog, + start: Long, + end: Long, + spark: SparkSession): Map[Long, Timestamp] = { + // Correct timestamp values are only available through DeltaHistoryManager.getCommits(). Commit + // info timestamps are wrong, and file modification times are wrong because they need to be + // monotonized first. This just performs a list (we don't read the contents of the files in + // getCommits()) so the performance overhead is minimal. + val monotonizationStart = + math.max(start - DeltaHistoryManager.POTENTIALLY_UNMONOTONIZED_TIMESTAMPS, 0) + val commits = DeltaHistoryManager.getCommits( + deltaLog.store, + deltaLog.logPath, + monotonizationStart, + Some(end + 1), + deltaLog.newDeltaHadoopConf()) + + // Note that the timestamps come from filesystem modification timestamps, so they're + // milliseconds since epoch and we don't need to deal with timezones. + commits.map(f => (f.version -> new Timestamp(f.timestamp))).toMap + } + + /** + * Get the block of change data from start to end Delta log versions (both sides inclusive). + * The returned DataFrame has isStreaming set to false. + * + * @param readSchemaSnapshot The snapshot with the desired schema that will be used to + * serve this CDF batch. It is usually passed upstream from + * e.g. DeltaTableV2 as an effort to stablize the schema used for the + * batch DF. We don't actually use its data. + * If not set, it will fallback to the legacy behavior of using + * whatever deltaLog.unsafeVolatileSnapshot is. This should be + * avoided in production. + */ + def changesToBatchDF( + deltaLog: DeltaLog, + start: Long, + end: Long, + spark: SparkSession, + readSchemaSnapshot: Option[Snapshot] = None, + useCoarseGrainedCDC: Boolean = false, + startVersionSnapshot: Option[SnapshotDescriptor] = None): DataFrame = { + + val changesWithinRange = deltaLog.getChanges(start).takeWhile { case (version, _) => + version <= end + } + changesToDF( + readSchemaSnapshot.getOrElse(deltaLog.unsafeVolatileSnapshot), + start, + end, + changesWithinRange, + spark, + isStreaming = false, + useCoarseGrainedCDC = useCoarseGrainedCDC, + startVersionSnapshot = startVersionSnapshot) + .fileChangeDf + } + + /** + * Build a dataframe from the specified file index. We can't use a DataFrame scan directly on the + * file names because that scan wouldn't include partition columns. + * + * It can optionally take a customReadSchema for the dataframe generated. + */ + protected def scanIndex( + spark: SparkSession, + index: TahoeFileIndexWithSnapshotDescriptor, + isStreaming: Boolean = false): DataFrame = { + + val relation = HadoopFsRelation( + index, + index.partitionSchema, + cdcReadSchema(index.schema), + bucketSpec = None, + new DeltaParquetFileFormat(index.protocol, index.metadata), + options = index.deltaLog.options)(spark) + val plan = LogicalRelation(relation, isStreaming = isStreaming) + Dataset.ofRows(spark, plan) + } + + /** + * Append CDC metadata columns to the provided schema. + */ + def cdcReadSchema(deltaSchema: StructType): StructType = { + deltaSchema + .add(CDC_TYPE_COLUMN_NAME, StringType) + .add(CDC_COMMIT_VERSION, LongType) + .add(CDC_COMMIT_TIMESTAMP, TimestampType) + } + + /** + * Based on the read options passed it indicates whether the read was a cdc read or not. + */ + def isCDCRead(options: CaseInsensitiveStringMap): Boolean = { + val cdcEnabled = options.containsKey(DeltaDataSource.CDC_ENABLED_KEY) && + options.get(DeltaDataSource.CDC_ENABLED_KEY) == "true" + + val cdcLegacyConfEnabled = options.containsKey(DeltaDataSource.CDC_ENABLED_KEY_LEGACY) && + options.get(DeltaDataSource.CDC_ENABLED_KEY_LEGACY) == "true" + + cdcEnabled || cdcLegacyConfEnabled + } + + /** + * Determine if the metadata provided has cdc enabled or not. + */ + def isCDCEnabledOnTable(metadata: Metadata, spark: SparkSession): Boolean = { + ChangeDataFeedTableFeature.metadataRequiresFeatureToBeEnabled(metadata, spark) + } + + /** + * Given `add` and `remove` actions of the same file, manipulate DVs to get rows that are deleted + * and re-added from `add` to `remove`. + * + * @return One or more [[AddFile]] and [[RemoveFile]], corresponding to CDC change_type "insert" + * and "delete". Rows masked by inline DVs are changed rows. + */ + private def generateFileActionsWithInlineDv( + add: AddFile, + remove: RemoveFile, + dvStore: DeletionVectorStore, + deltaLog: DeltaLog): Seq[FileAction] = { + + val removeDvOpt = Option(remove.deletionVector) + val addDvOpt = Option(add.deletionVector) + + val newActions = ListBuffer[FileAction]() + + // Four cases: + // 1) Remove without DV, add without DV: + // Not possible. This case has been handled before. + // 2) Remove without DV, add with DV1: + // Rows masked by DV1 are deleted. + // 3) Remove with DV1, add without DV: + // Rows masked by DV1 are added. May happen when restoring a table. + // 4) Remove with DV1, add with DV2: + // a) Rows masked by DV2 but not DV1 are deleted. + // b) Rows masked by DV1 but not DV2 are re-added. May happen when restoring a table. + (removeDvOpt, addDvOpt) match { + case (None, None) => + throw new Exception("Expecting one or both of add and remove contain DV.") + case (None, Some(addDv)) => + newActions += remove.copy(deletionVector = addDv) + case (Some(removeDv), None) => + newActions += add.copy(deletionVector = removeDv) + case (Some(removeDv), Some(addDv)) => + val removeBitmap = dvStore.read(removeDv, deltaLog.dataPath) + val addBitmap = dvStore.read(addDv, deltaLog.dataPath) + + // Case 4a + val finalRemovedRowsBitmap = getDeletionVectorsDiff(addBitmap, removeBitmap) + // Case 4b + val finalReAddedRowsBitmap = getDeletionVectorsDiff(removeBitmap, addBitmap) + + val finalRemovedRowsDv = DeletionVectorDescriptor.inlineInLog( + finalRemovedRowsBitmap.serializeAsByteArray(RoaringBitmapArrayFormat.Portable), + finalRemovedRowsBitmap.cardinality) + val finalReAddedRowsDv = DeletionVectorDescriptor.inlineInLog( + finalReAddedRowsBitmap.serializeAsByteArray(RoaringBitmapArrayFormat.Portable), + finalReAddedRowsBitmap.cardinality) + + newActions += remove.copy(deletionVector = finalRemovedRowsDv) + newActions += add.copy(deletionVector = finalReAddedRowsDv) + } + + newActions.toSeq + } + + /** + * Return a map of file paths to IfNotContained row index filters, to keep only the marked rows. + */ + private def fileActionsToIfNotContainedRowIndexFilters( + actions: Seq[FileAction]): Map[String, RowIndexFilterType] = { + actions.map(f => f.path -> RowIndexFilterType.IF_NOT_CONTAINED).toMap + } + + /** + * Get a new [[RoaringBitmapArray]] copy storing values that are in `left` but not in `right`. + */ + private def getDeletionVectorsDiff( + left: RoaringBitmapArray, + right: RoaringBitmapArray): RoaringBitmapArray = { + val leftCopy = left.copy() + leftCopy.diff(right) + leftCopy + } + + private def buildCDCDataSpecSeq[T <: FileAction]( + actionsByVersion: MutableMap[TableVersion, ListBuffer[T]], + versionToCommitInfo: MutableMap[Long, CommitInfo] + ): Seq[CDCDataSpec[T]] = actionsByVersion.map { case (fileVersion, addFiles) => + val commitInfo = versionToCommitInfo.get(fileVersion.version) + new CDCDataSpec(fileVersion, addFiles.toSeq, commitInfo) + }.toSeq + + /** + * Represents the changes between some start and end version of a Delta table + * @param fileChangeDf contains all of the file changes (AddFile, RemoveFile, AddCDCFile) + * @param numFiles the number of AddFile + RemoveFile + AddCDCFiles that are in the df + * @param numBytes the total size of the AddFile + RemoveFile + AddCDCFiles that are in the df + */ + case class CDCVersionDiffInfo(fileChangeDf: DataFrame, numFiles: Long, numBytes: Long) + + /** + * Represents a Delta log version, and how the version is determined. + * @param version the determined version. + * @param timestamp the commit timestamp of the determined version. Will be filled when the + * version is determined by timestamp. + */ + private case class ResolvedCDFVersion(version: Long, timestamp: Option[Timestamp]) { + /** Whether this version is resolved by timestamp. */ + def resolvedByTimestamp: Boolean = timestamp.isDefined + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ConvertUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ConvertUtils.scala new file mode 100644 index 00000000000..1b9adfb3344 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ConvertUtils.scala @@ -0,0 +1,323 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.convert + +import java.lang.reflect.InvocationTargetException + +import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaErrors, SerializableFileStatus} +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.{DateFormatter, DeltaFileOperations, PartitionUtils, TimestampFormatter} +import org.apache.commons.lang3.exception.ExceptionUtils +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.sql.{AnalysisException, Dataset, SparkSession} +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.Cast +import org.apache.spark.sql.connector.catalog.Table +import org.apache.spark.sql.execution.datasources.PartitioningUtils +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{StringType, StructType} +import org.apache.spark.util.{SerializableConfiguration, Utils} + +object ConvertUtils extends ConvertUtilsBase + +trait ConvertUtilsBase extends DeltaLogging { + + val timestampPartitionPattern = "yyyy-MM-dd HH:mm:ss[.S]" + + var icebergSparkTableClassPath = + "org.apache.spark.sql.delta.commands.convert.IcebergTable" + var icebergLibTableClassPath = "org.apache.iceberg.Table" + + /** + * Creates a source Parquet table for conversion. + * + * @param spark: the spark session to use. + * @param targetDir: the target directory of the Parquet table. + * @param catalogTable: the optional catalog table of the Parquet table. + * @param partitionSchema: the user provided partition schema (if exists) of the Parquet table. + * @return a target Parquet table. + */ + def getParquetTable( + spark: SparkSession, + targetDir: String, + catalogTable: Option[CatalogTable], + partitionSchema: Option[StructType]): ConvertTargetTable = { + val qualifiedDir = getQualifiedPath(spark, new Path(targetDir)).toString + new ParquetTable(spark, qualifiedDir, catalogTable, partitionSchema) + } + + /** + * Creates a source Iceberg table for conversion. + * + * @param spark: the spark session to use. + * @param targetDir: the target directory of the Iceberg table. + * @param sparkTable: the optional V2 table interface of the Iceberg table. + * @param tableSchema: the existing converted Delta table schema (if exists) of the Iceberg table. + * @return a target Iceberg table. + */ + def getIcebergTable( + spark: SparkSession, + targetDir: String, + sparkTable: Option[Table], + tableSchema: Option[StructType]): ConvertTargetTable = { + try { + val clazz = Utils.classForName(icebergSparkTableClassPath) + if (sparkTable.isDefined) { + val constFromTable = clazz.getConstructor( + classOf[SparkSession], + Utils.classForName(icebergLibTableClassPath), + classOf[Option[StructType]]) + val method = sparkTable.get.getClass.getMethod("table") + constFromTable.newInstance(spark, method.invoke(sparkTable.get), tableSchema) + } else { + val baseDir = getQualifiedPath(spark, new Path(targetDir)).toString + val constFromPath = clazz.getConstructor( + classOf[SparkSession], classOf[String], classOf[Option[StructType]]) + constFromPath.newInstance(spark, baseDir, tableSchema) + } + } catch { + case e: ClassNotFoundException => + logError(s"Failed to find Iceberg class", e) + throw DeltaErrors.icebergClassMissing(spark.sparkContext.getConf, e) + case e: InvocationTargetException => + logError(s"Got error when creating an Iceberg Converter", e) + // The better error is within the cause + throw ExceptionUtils.getRootCause(e) + } + } + + /** + * Generates a qualified Hadoop path from a given path. + * + * @param spark: the spark session to use + * @param path: the raw path used to generate the qualified path. + * @return the qualified path of the provided raw path. + */ + def getQualifiedPath(spark: SparkSession, path: Path): Path = { + // scalastyle:off deltahadoopconfiguration + val sessionHadoopConf = spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + val fs = path.getFileSystem(sessionHadoopConf) + val qualifiedPath = fs.makeQualified(path) + if (!fs.exists(qualifiedPath)) { + throw DeltaErrors.directoryNotFoundException(qualifiedPath.toString) + } + qualifiedPath + } + + /** + * Generates AddFile from ConvertTargetFile for conversion. + * + * @param targetFile: the target file to convert. + * @param basePath: the table directory of the target file. + * @param fs: the file system to access the target file. + * @param conf: the SQL configures use to convert. + * @param partitionSchema: the partition schema of the target file if exists. + * @param useAbsolutePath: whether to use absolute path instead of relative path in the AddFile. + * @return an AddFile corresponding to the provided ConvertTargetFile. + */ + def createAddFile( + targetFile: ConvertTargetFile, + basePath: Path, + fs: FileSystem, + conf: SQLConf, + partitionSchema: Option[StructType], + useAbsolutePath: Boolean = false): AddFile = { + val partitionFields = partitionSchema.map(_.fields.toSeq).getOrElse(Nil) + val partitionColNames = partitionSchema.map(_.fieldNames.toSeq).getOrElse(Nil) + val physicalPartitionColNames = partitionSchema.map(_.map { f => + DeltaColumnMapping.getPhysicalName(f) + }).getOrElse(Nil) + val file = targetFile.fileStatus + val path = file.getHadoopPath + val partition = targetFile.partitionValues.getOrElse { + // partition values are not provided by the source table format, so infer from the file path + val pathStr = file.getHadoopPath.toUri.toString + val dateFormatter = DateFormatter() + val timestampFormatter = + TimestampFormatter(timestampPartitionPattern, java.util.TimeZone.getDefault) + val resolver = conf.resolver + val dir = if (file.isDir) file.getHadoopPath else file.getHadoopPath.getParent + val (partitionOpt, _) = PartitionUtils.parsePartition( + dir, + typeInference = false, + basePaths = Set(basePath), + userSpecifiedDataTypes = Map.empty, + validatePartitionColumns = false, + java.util.TimeZone.getDefault, + dateFormatter, + timestampFormatter) + + partitionOpt.map { partValues => + if (partitionColNames.size != partValues.columnNames.size) { + throw DeltaErrors.unexpectedNumPartitionColumnsFromFileNameException( + pathStr, partValues.columnNames, partitionColNames) + } + + val tz = Option(conf.sessionLocalTimeZone) + // Check if the partition value can be casted to the provided type + if (!conf.getConf(DeltaSQLConf.DELTA_CONVERT_PARTITION_VALUES_IGNORE_CAST_FAILURE)) { + partValues.literals.zip(partitionFields).foreach { case (literal, field) => + if (literal.eval() != null && + Cast(literal, field.dataType, tz, ansiEnabled = false).eval() == null) { + val partitionValue = Cast(literal, StringType, tz, ansiEnabled = false).eval() + val partitionValueStr = Option(partitionValue).map(_.toString).orNull + throw DeltaErrors.castPartitionValueException(partitionValueStr, field.dataType) + } + } + } + + val values = partValues + .literals + .map(l => Cast(l, StringType, tz, ansiEnabled = false).eval()) + .map(Option(_).map(_.toString).orNull) + + partitionColNames.zip(partValues.columnNames).foreach { case (expected, parsed) => + if (!resolver(expected, parsed)) { + throw DeltaErrors.unexpectedPartitionColumnFromFileNameException( + pathStr, parsed, expected) + } + } + physicalPartitionColNames.zip(values).toMap + }.getOrElse { + if (partitionColNames.nonEmpty) { + throw DeltaErrors.unexpectedNumPartitionColumnsFromFileNameException( + pathStr, Seq.empty, partitionColNames) + } + Map[String, String]() + } + } + + val pathStrForAddFile = if (!useAbsolutePath) { + val relativePath = DeltaFileOperations.tryRelativizePath(fs, basePath, path) + assert(!relativePath.isAbsolute, + s"Fail to relativize path $path against base path $basePath.") + relativePath.toUri.toString + } else { + path.toUri.toString + } + + AddFile(pathStrForAddFile, partition, file.length, file.modificationTime, dataChange = true) + } + + /** + * A helper function to check whether a directory should be skipped during conversion. + * + * @param dirName: the directory name to check. + * @return true if directory should be skipped for conversion, otherwise false. + */ + def dirNameFilter(dirName: String): Boolean = { + // Allow partition column name starting with underscore and dot + DeltaFileOperations.defaultHiddenFileFilter(dirName) && !dirName.contains("=") + } + + /** + * Lists directories non-recursively in the distributed manner. + * + * @param spark: the spark session to use. + * @param rootDir: the root directory of all directories to list + * @param dirs: the list of directories to list. + * @param serializableConf: the hadoop configure to use. + * @return a dataset of files from the listing. + */ + def listDirsInParallel( + spark: SparkSession, + rootDir: String, + dirs: Seq[String], + serializableConf: SerializableConfiguration): Dataset[SerializableFileStatus] = { + + import org.apache.spark.sql.delta.implicits._ + + val conf = spark.sparkContext.broadcast(serializableConf) + val parallelism = spark.sessionState.conf.parallelPartitionDiscoveryParallelism + + val rdd = spark.sparkContext.parallelize(dirs, math.min(parallelism, dirs.length)) + .mapPartitions { batch => + batch.flatMap { dir => + DeltaFileOperations + .localListDirs(conf.value.value, Seq(dir), recursive = false) + .filter(!_.isDir) + } + } + spark.createDataset(rdd) + } + + /** + * Merges the schemas of the ConvertTargetFiles. + * + * @param spark: the SparkSession used for schema merging. + * @param partitionSchema: the partition schema to be merged with the data schema. + * @param convertTargetFiles: the Dataset of ConvertTargetFiles to be merged. + * @return the merged StructType representing the combined schema of the Parquet files. + * @throws DeltaErrors.failedInferSchema If no schemas are found for merging. + */ + def mergeSchemasInParallel( + spark: SparkSession, + partitionSchema: StructType, + convertTargetFiles: Dataset[ConvertTargetFile]): StructType = { + import org.apache.spark.sql.delta.implicits._ + val partiallyMergedSchemas = + recordFrameProfile("Delta", "ConvertUtils.mergeSchemasInParallel") { + convertTargetFiles.mapPartitions { iterator => + var dataSchema: StructType = StructType(Seq()) + iterator.foreach { file => + try { + dataSchema = SchemaMergingUtils.mergeSchemas(dataSchema, + StructType.fromDDL(file.parquetSchemaDDL.get).asNullable) + } catch { + case cause: AnalysisException => + throw DeltaErrors.failedMergeSchemaFile( + file.fileStatus.path, StructType.fromDDL(file.parquetSchemaDDL.get).treeString, + cause) + } + } + Iterator.single(dataSchema.toDDL) + }.collect().filter(_.nonEmpty) + } + + if (partiallyMergedSchemas.isEmpty) { + throw DeltaErrors.failedInferSchema + } + var mergedSchema: StructType = StructType(Seq()) + partiallyMergedSchemas.foreach { schema => + mergedSchema = SchemaMergingUtils.mergeSchemas(mergedSchema, StructType.fromDDL(schema)) + } + PartitioningUtils.mergeDataAndPartitionSchema( + mergedSchema, + StructType(partitionSchema.fields.toSeq), + spark.sessionState.conf.caseSensitiveAnalysis)._1 + } +} + +/** + * Configuration for fetching Parquet schema. + * + * @param assumeBinaryIsString: whether unannotated BINARY fields should be assumed to be Spark + * SQL [[StringType]] fields. + * @param assumeInt96IsTimestamp: whether unannotated INT96 fields should be assumed to be Spark + * SQL [[TimestampType]] fields. + * @param ignoreCorruptFiles: a boolean indicating whether corrupt files should be ignored during + * schema retrieval. + */ +case class ParquetSchemaFetchConfig( + assumeBinaryIsString: Boolean, + assumeInt96IsTimestamp: Boolean, + ignoreCorruptFiles: Boolean) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ParquetFileManifest.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ParquetFileManifest.scala new file mode 100644 index 00000000000..a026278e5b0 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ParquetFileManifest.scala @@ -0,0 +1,234 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.convert + +import org.apache.spark.sql.delta.{DeltaErrors, SerializableFileStatus} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.{DeltaFileOperations, PartitionUtils} +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetToSparkSchemaConverter} +import org.apache.spark.sql.execution.streaming.MetadataLogFileIndex +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.SerializableConfiguration + +/** A file manifest generated through recursively listing a base path. */ +class ManualListingFileManifest( + spark: SparkSession, + override val basePath: String, + partitionSchema: StructType, + parquetSchemaFetchConfig: ParquetSchemaFetchConfig, + serializableConf: SerializableConfiguration) + extends ConvertTargetFileManifest with DeltaLogging { + + protected def doList(): Dataset[SerializableFileStatus] = { + val conf = spark.sparkContext.broadcast(serializableConf) + DeltaFileOperations + .recursiveListDirs(spark, Seq(basePath), conf, ConvertUtils.dirNameFilter) + .where("!isDir") + } + + override lazy val allFiles: Dataset[ConvertTargetFile] = { + import org.apache.spark.sql.delta.implicits._ + + val conf = spark.sparkContext.broadcast(serializableConf) + val fetchConfig = parquetSchemaFetchConfig + val files = doList().mapPartitions { iter => + val fileStatuses = iter.toSeq + val pathToStatusMapping = fileStatuses.map { fileStatus => + fileStatus.path -> fileStatus + }.toMap + val footerSeq = DeltaFileOperations.readParquetFootersInParallel( + conf.value.value, fileStatuses.map(_.toFileStatus), fetchConfig.ignoreCorruptFiles) + val schemaConverter = new ParquetToSparkSchemaConverter( + assumeBinaryIsString = fetchConfig.assumeBinaryIsString, + assumeInt96IsTimestamp = fetchConfig.assumeInt96IsTimestamp + ) + footerSeq.map { footer => + val fileStatus = pathToStatusMapping(footer.getFile.toString) + val schema = ParquetFileFormat.readSchemaFromFooter(footer, schemaConverter) + ConvertTargetFile(fileStatus, None, Some(schema.toDDL)) + }.toIterator + } + files.cache() + files + } + + override lazy val parquetSchema: Option[StructType] = { + recordDeltaOperationForTablePath(basePath, "delta.convert.schemaInference") { + Some(ConvertUtils.mergeSchemasInParallel(spark, partitionSchema, allFiles)) + } + } + + override def close(): Unit = allFiles.unpersist() +} + +/** A file manifest generated through listing partition paths from Metastore catalog. */ +class CatalogFileManifest( + spark: SparkSession, + override val basePath: String, + catalogTable: CatalogTable, + partitionSchema: StructType, + parquetSchemaFetchConfig: ParquetSchemaFetchConfig, + serializableConf: SerializableConfiguration) + extends ConvertTargetFileManifest with DeltaLogging { + + private val useCatalogSchema = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_CONVERT_USE_CATALOG_SCHEMA) + + // List of partition directories and corresponding partition values. + private lazy val partitionList = { + if (catalogTable.partitionSchema.isEmpty) { + // Not a partitioned table. + Seq(basePath -> Map.empty[String, String]) + } else { + val partitions = spark.sessionState.catalog.listPartitions(catalogTable.identifier) + partitions.map { partition => + val partitionDir = partition.storage.locationUri.map(_.toString()) + .getOrElse { + val partitionDir = + PartitionUtils.getPathFragment(partition.spec, catalogTable.partitionSchema) + basePath.stripSuffix("/") + "/" + partitionDir + } + partitionDir -> partition.spec + } + } + } + + protected def doList(): Dataset[SerializableFileStatus] = { + if (partitionList.isEmpty) { + throw DeltaErrors.convertToDeltaNoPartitionFound(catalogTable.identifier.unquotedString) + } + + ConvertUtils.listDirsInParallel(spark, basePath, partitionList.map(_._1), serializableConf) + } + + override lazy val allFiles: Dataset[ConvertTargetFile] = { + import org.apache.spark.sql.delta.implicits._ + + // Avoid the serialization of this CatalogFileManifest during distributed execution. + val conf = spark.sparkContext.broadcast(serializableConf) + val useParquetSchema = !useCatalogSchema + val dirToPartitionSpec = partitionList.toMap + val fetchConfig = parquetSchemaFetchConfig + + val files = doList().mapPartitions { iter => + val fileStatuses = iter.toSeq + if (useParquetSchema) { + val pathToFile = fileStatuses.map { fileStatus => fileStatus.path -> fileStatus }.toMap + val footerSeq = DeltaFileOperations.readParquetFootersInParallel( + conf.value.value, + fileStatuses.map(_.toFileStatus), + fetchConfig.ignoreCorruptFiles) + val schemaConverter = new ParquetToSparkSchemaConverter( + assumeBinaryIsString = fetchConfig.assumeBinaryIsString, + assumeInt96IsTimestamp = fetchConfig.assumeInt96IsTimestamp + ) + footerSeq.map { footer => + val schema = ParquetFileFormat.readSchemaFromFooter(footer, schemaConverter) + val fileStatus = pathToFile(footer.getFile.toString) + ConvertTargetFile( + fileStatus, + dirToPartitionSpec.get(footer.getFile.getParent.toString), + Some(schema.toDDL)) + }.toIterator + } else { + // TODO: Currently "spark.sql.files.ignoreCorruptFiles" is not respected for + // CatalogFileManifest when catalog schema is used to avoid performance regression. + fileStatuses.map { fileStatus => + ConvertTargetFile( + fileStatus, + dirToPartitionSpec.get(fileStatus.getHadoopPath.getParent.toString), + None) + }.toIterator + } + } + files.cache() + files + } + + override lazy val parquetSchema: Option[StructType] = { + if (useCatalogSchema) { + Some(catalogTable.schema) + } else { + recordDeltaOperationForTablePath(basePath, "delta.convert.schemaInference") { + Some(ConvertUtils.mergeSchemasInParallel(spark, partitionSchema, allFiles)) + } + } + } + + override def close(): Unit = allFiles.unpersist() +} + +/** A file manifest generated from pre-existing parquet MetadataLog. */ +class MetadataLogFileManifest( + spark: SparkSession, + override val basePath: String, + partitionSchema: StructType, + parquetSchemaFetchConfig: ParquetSchemaFetchConfig, + serializableConf: SerializableConfiguration) + extends ConvertTargetFileManifest with DeltaLogging { + + val index = new MetadataLogFileIndex(spark, new Path(basePath), Map.empty, None) + + protected def doList(): Dataset[SerializableFileStatus] = { + import org.apache.spark.sql.delta.implicits._ + + val rdd = spark.sparkContext.parallelize(index.allFiles()).mapPartitions { _ + .map(SerializableFileStatus.fromStatus) + } + spark.createDataset(rdd) + } + + override lazy val allFiles: Dataset[ConvertTargetFile] = { + import org.apache.spark.sql.delta.implicits._ + + val conf = spark.sparkContext.broadcast(serializableConf) + val fetchConfig = parquetSchemaFetchConfig + + val files = doList().mapPartitions { iter => + val fileStatuses = iter.toSeq + val pathToStatusMapping = fileStatuses.map { fileStatus => + fileStatus.path -> fileStatus + }.toMap + val footerSeq = DeltaFileOperations.readParquetFootersInParallel( + conf.value.value, fileStatuses.map(_.toFileStatus), fetchConfig.ignoreCorruptFiles) + val schemaConverter = new ParquetToSparkSchemaConverter( + assumeBinaryIsString = fetchConfig.assumeBinaryIsString, + assumeInt96IsTimestamp = fetchConfig.assumeInt96IsTimestamp + ) + footerSeq.map { footer => + val fileStatus = pathToStatusMapping(footer.getFile.toString) + val schema = ParquetFileFormat.readSchemaFromFooter(footer, schemaConverter) + ConvertTargetFile(fileStatus, None, Some(schema.toDDL)) + }.toIterator + } + files.cache() + files + } + + override lazy val parquetSchema: Option[StructType] = { + recordDeltaOperationForTablePath(basePath, "delta.convert.schemaInference") { + Some(ConvertUtils.mergeSchemasInParallel(spark, partitionSchema, allFiles)) + } + } + + override def close(): Unit = allFiles.unpersist() +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ParquetTable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ParquetTable.scala new file mode 100644 index 00000000000..7ba4f5897f8 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/ParquetTable.scala @@ -0,0 +1,93 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.convert + +import org.apache.spark.sql.delta.{DeltaErrors, SerializableFileStatus} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.DeltaFileOperations +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.execution.datasources.PartitioningUtils +import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetToSparkSchemaConverter} +import org.apache.spark.sql.execution.streaming.FileStreamSink +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.SerializableConfiguration + +/** + * A target Parquet table for conversion to a Delta table. + * + * @param spark: spark session to use. + * @param basePath: the root directory of the Parquet table. + * @param catalogTable: optional catalog table (if exists) of the Parquet table. + * @param userPartitionSchema: user provided partition schema of the Parquet table. + */ +class ParquetTable( + val spark: SparkSession, + val basePath: String, + val catalogTable: Option[CatalogTable], + val userPartitionSchema: Option[StructType]) extends ConvertTargetTable with DeltaLogging { + + // Validate user provided partition schema if catalogTable is available. + if (catalogTable.isDefined && userPartitionSchema.isDefined + && !catalogTable.get.partitionSchema.equals(userPartitionSchema.get)) { + throw DeltaErrors.unexpectedPartitionSchemaFromUserException( + catalogTable.get.partitionSchema, userPartitionSchema.get) + } + + protected lazy val serializableConf: SerializableConfiguration = { + // scalastyle:off deltahadoopconfiguration + new SerializableConfiguration(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + } + + override val partitionSchema: StructType = { + userPartitionSchema.orElse(catalogTable.map(_.partitionSchema)).getOrElse(new StructType()) + } + + override lazy val numFiles: Long = fileManifest.numFiles + + def tableSchema: StructType = fileManifest.parquetSchema.get + + override val format: String = "parquet" + + val fileManifest: ConvertTargetFileManifest = { + val fetchConfig = ParquetSchemaFetchConfig( + spark.sessionState.conf.isParquetBinaryAsString, + spark.sessionState.conf.isParquetINT96AsTimestamp, + spark.sessionState.conf.ignoreCorruptFiles + ) + if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_CONVERT_USE_METADATA_LOG) && + FileStreamSink.hasMetadata(Seq(basePath), serializableConf.value, spark.sessionState.conf)) { + new MetadataLogFileManifest(spark, basePath, partitionSchema, fetchConfig, serializableConf) + } else if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_CONVERT_USE_CATALOG_PARTITIONS) && + catalogTable.isDefined) { + new CatalogFileManifest( + spark, basePath, catalogTable.get, partitionSchema, fetchConfig, serializableConf) + } else { + new ManualListingFileManifest( + spark, + basePath, + partitionSchema, + fetchConfig, + serializableConf) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/interfaces.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/interfaces.scala new file mode 100644 index 00000000000..df752ecd77f --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/convert/interfaces.scala @@ -0,0 +1,87 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.convert + +import java.io.Closeable + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.{DeltaColumnMappingMode, NoMapping, SerializableFileStatus} + +import org.apache.spark.sql.Dataset +import org.apache.spark.sql.types.StructType + +/** + * An interface for the table to be converted to Delta. + */ +trait ConvertTargetTable { + /** The table schema of the target table */ + def tableSchema: StructType + + /** The table properties of the target table */ + def properties: Map[String, String] = Map.empty + + /** The partition schema of the target table */ + def partitionSchema: StructType + + /** The file manifest of the target table */ + def fileManifest: ConvertTargetFileManifest + + /** The number of files from the target table */ + def numFiles: Long + + /** Whether this table requires column mapping to be converted */ + def requiredColumnMappingMode: DeltaColumnMappingMode = NoMapping + + /* The format of the table */ + def format: String + +} + +/** An interface for providing an iterator of files for a table. */ +trait ConvertTargetFileManifest extends Closeable { + /** The base path of a table. Should be a qualified, normalized path. */ + val basePath: String + + /** Return all files as a Dataset for parallelized processing. */ + def allFiles: Dataset[ConvertTargetFile] + + /** Return the active files for a table in sequence */ + def getFiles: Iterator[ConvertTargetFile] = allFiles.toLocalIterator().asScala + + /** Return the number of files for the table */ + def numFiles: Long = allFiles.count() + + /** Return the parquet schema for the table. + * Defined only when the schema cannot be inferred from CatalogTable. + */ + def parquetSchema: Option[StructType] = None +} + +/** + * An interface for the file to be included during conversion. + * + * @param fileStatus the file info + * @param partitionValues partition values of this file that may be available from the source + * table format. If none, the converter will infer partition values from the + * file path, assuming the Hive directory format. + * @param parquetSchemaDDL the Parquet schema DDL associated with the file. + */ +case class ConvertTargetFile( + fileStatus: SerializableFileStatus, + partitionValues: Option[Map[String, String]] = None, + parquetSchemaDDL: Option[String] = None) extends Serializable diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/ClassicMergeExecutor.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/ClassicMergeExecutor.scala new file mode 100644 index 00000000000..dfdd4d76af5 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/ClassicMergeExecutor.scala @@ -0,0 +1,539 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.merge + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, FileAction} +import org.apache.spark.sql.delta.commands.{DeletionVectorBitmapGenerator, DMLWithDeletionVectorsHelper, MergeIntoCommandBase} +import org.apache.spark.sql.delta.commands.cdc.CDCReader.{CDC_TYPE_COLUMN_NAME, CDC_TYPE_NOT_CDC} +import org.apache.spark.sql.delta.commands.merge.MergeOutputGeneration.{SOURCE_ROW_INDEX_COL, TARGET_ROW_INDEX_COL} +import org.apache.spark.sql.delta.files.TahoeBatchFileIndex +import org.apache.spark.sql.delta.util.SetAccumulator + +import org.apache.spark.sql.{Column, Dataset, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{And, Expression, Literal, Or} +import org.apache.spark.sql.catalyst.plans.logical.DeltaMergeIntoClause +import org.apache.spark.sql.functions.{coalesce, col, count, input_file_name, lit, monotonically_increasing_id, sum} + +/** + * Trait with merge execution in two phases: + * + * Phase 1: Find the input files in target that are touched by the rows that satisfy + * the condition and verify that no two source rows match with the same target row. + * This is implemented as an inner-join using the given condition (see [[findTouchedFiles]]). + * In the special case that there is no update clause we write all the non-matching + * source data as new files and skip phase 2. + * Issues an error message when the ON search_condition of the MERGE statement can match + * a single row from the target table with multiple rows of the source table-reference. + * + * Phase 2: Read the touched files again and write new files with updated and/or inserted rows. + * If there are updates, then use an outer join using the given condition to write the + * updates and inserts (see [[writeAllChanges()]]). If there are no matches for updates, + * only inserts, then write them directly (see [[writeInsertsOnlyWhenNoMatches()]]). + * + * Note, when deletion vectors are enabled, phase 2 is split into two parts: + * 2.a. Read the touched files again and only write modified and new + * rows (see [[writeAllChanges()]]). + * 2.b. Read the touched files and generate deletion vectors for the modified + * rows (see [[writeDVs()]]). + * + * If there are no matches for updates, only inserts, then write them directly + * (see [[writeInsertsOnlyWhenNoMatches()]]). This remains the same when DVs are enabled since there + * are no modified rows. Furthermore, eee [[InsertOnlyMergeExecutor]] for the optimized executor + * used in case there are only inserts. + */ +trait ClassicMergeExecutor extends MergeOutputGeneration { + self: MergeIntoCommandBase => + import MergeIntoCommandBase._ + + /** + * Find the target table files that contain the rows that satisfy the merge condition. This is + * implemented as an inner-join between the source query/table and the target table using + * the merge condition. + */ + protected def findTouchedFiles( + spark: SparkSession, + deltaTxn: OptimisticTransaction + ): (Seq[AddFile], DeduplicateCDFDeletes) = recordMergeOperation( + extraOpType = "findTouchedFiles", + status = "MERGE operation - scanning files for matches", + sqlMetricName = "scanTimeMs") { + + val columnComparator = spark.sessionState.analyzer.resolver + + // Accumulator to collect all the distinct touched files + val touchedFilesAccum = new SetAccumulator[String]() + spark.sparkContext.register(touchedFilesAccum, TOUCHED_FILES_ACCUM_NAME) + + // Prune non-matching files if we don't need to collect them for NOT MATCHED BY SOURCE clauses. + val dataSkippedFiles = + if (notMatchedBySourceClauses.isEmpty) { + deltaTxn.filterFiles(getTargetOnlyPredicates(spark), keepNumRecords = true) + } else { + deltaTxn.filterFiles(filters = Seq(Literal.TrueLiteral), keepNumRecords = true) + } + + // Join the source and target table using the merge condition to find touched files. An inner + // join collects all candidate files for MATCHED clauses, a right outer join also includes + // candidates for NOT MATCHED BY SOURCE clauses. + // In addition, we attach two columns + // - a monotonically increasing row id for target rows to later identify whether the same + // target row is modified by multiple user or not + // - the target file name the row is from to later identify the files touched by matched rows + val joinType = if (notMatchedBySourceClauses.isEmpty) "inner" else "right_outer" + + // When they are only MATCHED clauses, after the join we prune files that have no rows that + // satisfy any of the clause conditions. + val matchedPredicate = + if (isMatchedOnly) { + matchedClauses + // An undefined condition (None) is implicitly true + .map(_.condition.getOrElse(Literal.TrueLiteral)) + .reduce((a, b) => Or(a, b)) + } else Literal.TrueLiteral + + // Compute the columns needed for the inner join. + val targetColsNeeded = { + condition.references.map(_.name) ++ deltaTxn.snapshot.metadata.partitionColumns ++ + matchedPredicate.references.map(_.name) + } + + val columnsToDrop = deltaTxn.snapshot.metadata.schema.map(_.name) + .filterNot { field => + targetColsNeeded.exists { name => columnComparator(name, field) } + } + val incrSourceRowCountExpr = incrementMetricAndReturnBool("numSourceRows", valueToReturn = true) + // We can't use filter() directly on the expression because that will prevent + // column pruning. We don't need the SOURCE_ROW_PRESENT_COL so we immediately drop it. + val sourceDF = getMergeSource.df + .withColumn(SOURCE_ROW_PRESENT_COL, Column(incrSourceRowCountExpr)) + .filter(SOURCE_ROW_PRESENT_COL) + .drop(SOURCE_ROW_PRESENT_COL) + val targetPlan = + buildTargetPlanWithFiles( + spark, + deltaTxn, + dataSkippedFiles, + columnsToDrop) + val targetDF = Dataset.ofRows(spark, targetPlan) + .withColumn(ROW_ID_COL, monotonically_increasing_id()) + .withColumn(FILE_NAME_COL, input_file_name()) + + val joinToFindTouchedFiles = + sourceDF.join(targetDF, Column(condition), joinType) + + // UDFs to records touched files names and add them to the accumulator + val recordTouchedFileName = + DeltaUDF.intFromStringBoolean { (fileName, shouldRecord) => + if (shouldRecord) { + touchedFilesAccum.add(fileName) + } + 1 + }.asNondeterministic() + + // Process the matches from the inner join to record touched files and find multiple matches + val collectTouchedFiles = joinToFindTouchedFiles + .select(col(ROW_ID_COL), + recordTouchedFileName(col(FILE_NAME_COL), Column(matchedPredicate)).as("one")) + + // Calculate frequency of matches per source row + val matchedRowCounts = collectTouchedFiles.groupBy(ROW_ID_COL).agg(sum("one").as("count")) + + // Get multiple matches and simultaneously collect (using touchedFilesAccum) the file names + import org.apache.spark.sql.delta.implicits._ + val (multipleMatchCount, multipleMatchSum) = matchedRowCounts + .filter("count > 1") + .select(coalesce(count(Column("*")), lit(0)), coalesce(sum("count"), lit(0))) + .as[(Long, Long)] + .collect() + .head + + val hasMultipleMatches = multipleMatchCount > 0 + throwErrorOnMultipleMatches(hasMultipleMatches, spark) + if (hasMultipleMatches) { + // This is only allowed for delete-only queries. + // This query will count the duplicates for numTargetRowsDeleted in Job 2, + // because we count matches after the join and not just the target rows. + // We have to compensate for this by subtracting the duplicates later, + // so we need to record them here. + val duplicateCount = multipleMatchSum - multipleMatchCount + multipleMatchDeleteOnlyOvercount = Some(duplicateCount) + } + + // Get the AddFiles using the touched file names. + val touchedFileNames = touchedFilesAccum.value.iterator().asScala.toSeq + logTrace(s"findTouchedFiles: matched files:\n\t${touchedFileNames.mkString("\n\t")}") + + val nameToAddFileMap = generateCandidateFileMap(targetDeltaLog.dataPath, dataSkippedFiles) + val touchedAddFiles = touchedFileNames.map( + getTouchedFile(targetDeltaLog.dataPath, _, nameToAddFileMap)) + + if (metrics("numSourceRows").value == 0 && (dataSkippedFiles.isEmpty || + dataSkippedFiles.forall(_.numLogicalRecords.getOrElse(0) == 0))) { + // The target table is empty, and the optimizer optimized away the join entirely OR the + // source table is truly empty. In that case, scanning the source table once is the only + // way to get the correct metric. + val numSourceRows = sourceDF.count() + metrics("numSourceRows").set(numSourceRows) + } + + metrics("numTargetFilesBeforeSkipping") += deltaTxn.snapshot.numOfFiles + metrics("numTargetBytesBeforeSkipping") += deltaTxn.snapshot.sizeInBytes + val (afterSkippingBytes, afterSkippingPartitions) = + totalBytesAndDistinctPartitionValues(dataSkippedFiles) + metrics("numTargetFilesAfterSkipping") += dataSkippedFiles.size + metrics("numTargetBytesAfterSkipping") += afterSkippingBytes + metrics("numTargetPartitionsAfterSkipping") += afterSkippingPartitions + val (removedBytes, removedPartitions) = totalBytesAndDistinctPartitionValues(touchedAddFiles) + metrics("numTargetFilesRemoved") += touchedAddFiles.size + metrics("numTargetBytesRemoved") += removedBytes + metrics("numTargetPartitionsRemovedFrom") += removedPartitions + val dedupe = DeduplicateCDFDeletes( + hasMultipleMatches && isCdcEnabled(deltaTxn), + includesInserts) + (touchedAddFiles, dedupe) + } + + /** + * Helper function that produces an expression by combining a sequence of clauses with OR. + * Requires the sequence to be non-empty. + */ + protected def clauseDisjunction(clauses: Seq[DeltaMergeIntoClause]): Expression = { + require(clauses.nonEmpty) + clauses + .map(_.condition.getOrElse(Literal.TrueLiteral)) + .reduceLeft(Or) + } + + /** + * Returns the expression that can be used for selecting the modified rows generated + * by the merge operation. The expression is to designed to work irrespectively + * of the join type used between the source and target tables. + * + * The expression consists of two parts, one for each of the action clause types that produce + * row modifications: MATCHED, NOT MATCHED BY SOURCE. All actions of the same clause type form + * a disjunctive clause. The result is then conjucted to an expression that filters the rows + * of the particular action clause type. For example: + * + * MERGE INTO t + * USING s + * ON s.id = t.id + * WHEN MATCHED AND id < 5 THEN ... + * WHEN MATCHED AND id > 10 THEN ... + * WHEN NOT MATCHED BY SOURCE AND id > 20 THEN ... + * + * Produces the following expression: + * + * ((as.id = t.id) AND (id < 5 OR id > 10)) + * OR + * ((SOURCE TABLE IS NULL) AND (id > 20)) + */ + protected def generateFilterForModifiedRows(): Expression = { + val matchedExpression = if (matchedClauses.nonEmpty) { + And(Column(condition).expr, clauseDisjunction(matchedClauses)) + } else { + Literal.FalseLiteral + } + + val notMatchedBySourceExpression = if (notMatchedBySourceClauses.nonEmpty) { + val combinedClauses = clauseDisjunction(notMatchedBySourceClauses) + And(col(SOURCE_ROW_PRESENT_COL).isNull.expr, combinedClauses) + } else { + Literal.FalseLiteral + } + + Or(matchedExpression, notMatchedBySourceExpression) + } + + /** + * Returns the expression that can be used for selecting the new rows generated + * by the merge operation. + */ + protected def generateFilterForNewRows(): Expression = { + if (notMatchedClauses.nonEmpty) { + val combinedClauses = clauseDisjunction(notMatchedClauses) + And(col(TARGET_ROW_PRESENT_COL).isNull.expr, combinedClauses) + } else { + Literal.FalseLiteral + } + } + + /** + * Write new files by reading the touched files and updating/inserting data using the source + * query/table. This is implemented using a full-outer-join using the merge condition. + * + * Note that unlike the insert-only code paths with just one control column ROW_DROPPED_COL, this + * method has a second control column CDC_TYPE_COL_NAME used for handling CDC when enabled. + */ + protected def writeAllChanges( + spark: SparkSession, + deltaTxn: OptimisticTransaction, + filesToRewrite: Seq[AddFile], + deduplicateCDFDeletes: DeduplicateCDFDeletes, + writeUnmodifiedRows: Boolean): Seq[FileAction] = recordMergeOperation( + extraOpType = if (!writeUnmodifiedRows) { + "writeModifiedRowsOnly" + } else if (shouldOptimizeMatchedOnlyMerge(spark)) { + "writeAllUpdatesAndDeletes" + } else { + "writeAllChanges" + }, + status = s"MERGE operation - Rewriting ${filesToRewrite.size} files", + sqlMetricName = "rewriteTimeMs") { + + val cdcEnabled = isCdcEnabled(deltaTxn) + + require( + !deduplicateCDFDeletes.enabled || cdcEnabled, + "CDF delete duplication is enabled but overall the CDF generation is disabled") + + // Generate a new target dataframe that has same output attributes exprIds as the target plan. + // This allows us to apply the existing resolved update/insert expressions. + val targetPlan = buildTargetPlanWithFiles( + spark, + deltaTxn, + filesToRewrite, + columnsToDrop = Nil) + val baseTargetDF = Dataset.ofRows(spark, targetPlan) + val joinType = if (writeUnmodifiedRows) { + if (shouldOptimizeMatchedOnlyMerge(spark)) { + "rightOuter" + } else { + "fullOuter" + } + } else { + // Since we do not need to write unmodified rows, we can perform stricter joins. + if (isMatchedOnly) { + "inner" + } else if (notMatchedBySourceClauses.isEmpty) { + "leftOuter" + } else if (notMatchedClauses.isEmpty) { + "rightOuter" + } else { + "fullOuter" + } + } + + logDebug(s"""writeAllChanges using $joinType join: + | source.output: ${source.outputSet} + | target.output: ${target.outputSet} + | condition: $condition + | newTarget.output: ${baseTargetDF.queryExecution.logical.outputSet} + """.stripMargin) + + // Expressions to update metrics + val incrSourceRowCountExpr = incrementMetricAndReturnBool( + "numSourceRowsInSecondScan", valueToReturn = true) + val incrNoopCountExpr = incrementMetricAndReturnBool( + "numTargetRowsCopied", valueToReturn = false) + + // Apply an outer join to find both, matches and non-matches. We are adding two boolean fields + // with value `true`, one to each side of the join. Whether this field is null or not after + // the outer join, will allow us to identify whether the joined row was a + // matched inner result or an unmatched result with null on one side. + val joinedBaseDF = { + var sourceDF = getMergeSource.df + if (deduplicateCDFDeletes.enabled && deduplicateCDFDeletes.includesInserts) { + // Add row index for the source rows to identify inserted rows during the cdf deleted rows + // deduplication. See [[deduplicateCDFDeletes()]] + sourceDF = sourceDF.withColumn(SOURCE_ROW_INDEX_COL, monotonically_increasing_id()) + } + val left = sourceDF + .withColumn(SOURCE_ROW_PRESENT_COL, Column(incrSourceRowCountExpr)) + // In some cases, the optimizer (incorrectly) decides to omit the metrics column. + // This causes issues in the source determinism validation. We work around the issue by + // adding a redundant dummy filter to make sure the column is not pruned. + .filter(SOURCE_ROW_PRESENT_COL) + + val targetDF = baseTargetDF + .withColumn(TARGET_ROW_PRESENT_COL, lit(true)) + val right = if (deduplicateCDFDeletes.enabled) { + targetDF.withColumn(TARGET_ROW_INDEX_COL, monotonically_increasing_id()) + } else { + targetDF + } + left.join(right, Column(condition), joinType) + } + + val joinedDF = + if (writeUnmodifiedRows) { + joinedBaseDF + } else { + val filter = Or(generateFilterForModifiedRows(), generateFilterForNewRows()) + joinedBaseDF.filter(Column(filter)) + } + + // Precompute conditions in matched and not matched clauses and generate + // the joinedDF with precomputed columns and clauses with rewritten conditions. + val (joinedAndPrecomputedConditionsDF, clausesWithPrecompConditions) = + generatePrecomputedConditionsAndDF( + joinedDF, + clauses = matchedClauses ++ notMatchedClauses ++ notMatchedBySourceClauses) + + // The target output columns need to be marked as nullable here, as they are going to be used + // to reference the output of an outer join. + val targetOutputCols = getTargetOutputCols(deltaTxn, makeNullable = true) + + // If there are N columns in the target table, the full outer join output will have: + // - N columns for target table + // - ROW_DROPPED_COL to define whether the generated row should be dropped or written + // - if CDC is enabled, also CDC_TYPE_COLUMN_NAME with the type of change being performed + // in a particular row + // (N+1 or N+2 columns depending on CDC disabled / enabled) + val outputColNames = + targetOutputCols.map(_.name) ++ + Seq(ROW_DROPPED_COL) ++ + (if (cdcEnabled) Some(CDC_TYPE_COLUMN_NAME) else None) + + // Copy expressions to copy the existing target row and not drop it (ROW_DROPPED_COL=false), + // and in case CDC is enabled, set it to CDC_TYPE_NOT_CDC. + // (N+1 or N+2 or N+3 columns depending on CDC disabled / enabled and if Row IDs are preserved) + var noopCopyExprs = (targetOutputCols :+ incrNoopCountExpr) ++ + (if (cdcEnabled) Some(CDC_TYPE_NOT_CDC) else None) + + // Generate output columns. + val outputCols = generateWriteAllChangesOutputCols( + targetOutputCols, + outputColNames, + noopCopyExprs, + clausesWithPrecompConditions, + cdcEnabled + ) + + val preOutputDF = if (cdcEnabled) { + generateCdcAndOutputRows( + joinedAndPrecomputedConditionsDF, + outputCols, + outputColNames, + noopCopyExprs, + deduplicateCDFDeletes) + } else { + // change data capture is off, just output the normal data + joinedAndPrecomputedConditionsDF + .select(outputCols: _*) + } + // The filter ensures we only consider rows that are not dropped. + // The drop ensures that the dropped flag does not leak out to the output. + val outputDF = preOutputDF + .filter(s"$ROW_DROPPED_COL = false") + .drop(ROW_DROPPED_COL) + + logDebug("writeAllChanges: join output plan:\n" + outputDF.queryExecution) + + // Write to Delta + val newFiles = writeFiles(spark, deltaTxn, outputDF) + + // Update metrics + val (addedBytes, addedPartitions) = totalBytesAndDistinctPartitionValues(newFiles) + metrics("numTargetFilesAdded") += newFiles.count(_.isInstanceOf[AddFile]) + metrics("numTargetChangeFilesAdded") += newFiles.count(_.isInstanceOf[AddCDCFile]) + metrics("numTargetChangeFileBytes") += newFiles.collect{ case f: AddCDCFile => f.size }.sum + metrics("numTargetBytesAdded") += addedBytes + metrics("numTargetPartitionsAddedTo") += addedPartitions + if (multipleMatchDeleteOnlyOvercount.isDefined) { + // Compensate for counting duplicates during the query. + val actualRowsDeleted = + metrics("numTargetRowsDeleted").value - multipleMatchDeleteOnlyOvercount.get + assert(actualRowsDeleted >= 0) + metrics("numTargetRowsDeleted").set(actualRowsDeleted) + val actualRowsMatchedDeleted = + metrics("numTargetRowsMatchedDeleted").value - multipleMatchDeleteOnlyOvercount.get + assert(actualRowsMatchedDeleted >= 0) + metrics("numTargetRowsMatchedDeleted").set(actualRowsMatchedDeleted) + } + + newFiles + } + + /** + * Writes Deletion Vectors for rows modified by the merge operation. + */ + protected def writeDVs( + spark: SparkSession, + deltaTxn: OptimisticTransaction, + filesToRewrite: Seq[AddFile]): Seq[FileAction] = recordMergeOperation( + extraOpType = "writeDeletionVectors", + status = s"MERGE operation - Rewriting Deletion Vectors to ${filesToRewrite.size} files", + sqlMetricName = "rewriteTimeMs") { + + val fileIndex = new TahoeBatchFileIndex( + spark, + actionType = "merge", + addFiles = filesToRewrite, + deltaLog = deltaTxn.deltaLog, + path = deltaTxn.deltaLog.dataPath, + snapshot = deltaTxn.snapshot) + + val targetDF = DMLWithDeletionVectorsHelper.createTargetDfForScanningForMatches( + spark, + target, + fileIndex) + + // For writing DVs we are only interested in the target table. When there are no + // notMatchedBySource clauses an inner join is sufficient. Otherwise, we need an rightOuter + // join to include target rows that are not matched. + val joinType = if (notMatchedBySourceClauses.isEmpty) { + "inner" + } else { + "rightOuter" + } + + val joinedDF = getMergeSource.df + .withColumn(SOURCE_ROW_PRESENT_COL, lit(true)) + .join(targetDF, Column(condition), joinType) + + val modifiedRowsFilter = generateFilterForModifiedRows() + val matchedDVResult = + DeletionVectorBitmapGenerator.buildRowIndexSetsForFilesMatchingCondition( + spark, + deltaTxn, + tableHasDVs = true, + targetDf = joinedDF, + candidateFiles = filesToRewrite, + condition = modifiedRowsFilter + ) + + val nameToAddFileMap = generateCandidateFileMap(targetDeltaLog.dataPath, filesToRewrite) + + val touchedFilesWithDVs = DMLWithDeletionVectorsHelper + .findFilesWithMatchingRows(deltaTxn, nameToAddFileMap, matchedDVResult) + + val (dvActions, metricsMap) = DMLWithDeletionVectorsHelper.processUnmodifiedData( + spark, + touchedFilesWithDVs, + deltaTxn.snapshot) + + metrics("numTargetDeletionVectorsAdded") + .set(metricsMap.getOrElse("numDeletionVectorsAdded", 0L)) + metrics("numTargetDeletionVectorsRemoved") + .set(metricsMap.getOrElse("numDeletionVectorsRemoved", 0L)) + metrics("numTargetDeletionVectorsUpdated") + .set(metricsMap.getOrElse("numDeletionVectorsUpdated", 0L)) + + // When DVs are enabled we override metrics related to removed files. + metrics("numTargetFilesRemoved").set(metricsMap.getOrElse("numRemovedFiles", 0L)) + + val fullyRemovedFiles = touchedFilesWithDVs.filter(_.isFullyReplaced()).map(_.fileLogEntry) + val (removedBytes, removedPartitions) = totalBytesAndDistinctPartitionValues(fullyRemovedFiles) + metrics("numTargetBytesRemoved").set(removedBytes) + metrics("numTargetPartitionsRemovedFrom").set(removedPartitions) + + dvActions + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/InsertOnlyMergeExecutor.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/InsertOnlyMergeExecutor.scala new file mode 100644 index 00000000000..7eb6a49345d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/InsertOnlyMergeExecutor.scala @@ -0,0 +1,269 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.merge + +import org.apache.spark.sql.delta.metric.IncrementMetric +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{AddFile, FileAction} +import org.apache.spark.sql.delta.commands.MergeIntoCommandBase + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.expressions.{Alias, CaseWhen, Expression, Literal} +import org.apache.spark.sql.catalyst.plans.logical._ + +/** + * Trait with optimized execution for merges that only inserts new data. + * There are two cases for inserts only: when there are no matched clauses for the merge command + * and when there is nothing matched for the merge command even if there are matched clauses. + */ +trait InsertOnlyMergeExecutor extends MergeOutputGeneration { + self: MergeIntoCommandBase => + import MergeIntoCommandBase._ + + /** + * Optimization to write new files by inserting only new data. + * + * When there are no matched clauses for the merge command, data is skipped + * based on the merge condition and left anti join is performed on the source + * data to find the rows to be inserted. + * + * When there is nothing matched for the merge command even if there are matched clauses, + * the source table is used to perform inserting. + * + * @param spark The spark session. + * @param deltaTxn The existing transaction. + * @param filterMatchedRows Whether to filter away matched data or not. + * @param numSourceRowsMetric The name of the metric in which to record the number of source rows + */ + protected def writeOnlyInserts( + spark: SparkSession, + deltaTxn: OptimisticTransaction, + filterMatchedRows: Boolean, + numSourceRowsMetric: String): Seq[FileAction] = { + val extraOpType = if (filterMatchedRows) { + "writeInsertsOnlyWhenNoMatchedClauses" + } else "writeInsertsOnlyWhenNoMatches" + recordMergeOperation( + extraOpType = extraOpType, + status = "MERGE operation - writing new files for only inserts", + sqlMetricName = "rewriteTimeMs") { + + // If nothing to do when not matched, then nothing to insert, that is, no new files to write + if (!includesInserts && !filterMatchedRows) { + performedSecondSourceScan = false + return Seq.empty + } + + // source DataFrame + val mergeSource = getMergeSource + // Expression to update metrics. + val incrSourceRowCountExpr = incrementMetricAndReturnBool(numSourceRowsMetric, true) + val sourceDF = filterSource(mergeSource.df.filter(Column(incrSourceRowCountExpr))) + + var dataSkippedFiles: Option[Seq[AddFile]] = None + val preparedSourceDF = if (filterMatchedRows) { + // This is an optimization of the case when there is no update clause for the merge. + // We perform an left anti join on the source data to find the rows to be inserted. + + // Skip data based on the merge condition + val conjunctivePredicates = splitConjunctivePredicates(condition) + val targetOnlyPredicates = + conjunctivePredicates.filter(_.references.subsetOf(target.outputSet)) + dataSkippedFiles = Some(deltaTxn.filterFiles(targetOnlyPredicates)) + + val targetPlan = buildTargetPlanWithFiles( + spark, + deltaTxn, + dataSkippedFiles.get, + columnsToDrop = Nil) + val targetDF = Dataset.ofRows(spark, targetPlan) + sourceDF.join(targetDF, Column(condition), "leftanti") + } else { + sourceDF + } + + val outputDF = generateInsertsOnlyOutputDF(preparedSourceDF, deltaTxn) + logDebug(s"$extraOpType: output plan:\n" + outputDF.queryExecution) + + val newFiles = writeFiles(spark, deltaTxn, outputDF) + + // Update metrics + if (filterMatchedRows) { + metrics("numTargetFilesBeforeSkipping") += deltaTxn.snapshot.numOfFiles + metrics("numTargetBytesBeforeSkipping") += deltaTxn.snapshot.sizeInBytes + if (dataSkippedFiles.nonEmpty) { + val (afterSkippingBytes, afterSkippingPartitions) = + totalBytesAndDistinctPartitionValues(dataSkippedFiles.get) + metrics("numTargetFilesAfterSkipping") += dataSkippedFiles.get.size + metrics("numTargetBytesAfterSkipping") += afterSkippingBytes + metrics("numTargetPartitionsAfterSkipping") += afterSkippingPartitions + } + metrics("numTargetFilesRemoved") += 0 + metrics("numTargetBytesRemoved") += 0 + metrics("numTargetPartitionsRemovedFrom") += 0 + } + metrics("numTargetFilesAdded") += newFiles.count(_.isInstanceOf[AddFile]) + val (addedBytes, addedPartitions) = totalBytesAndDistinctPartitionValues(newFiles) + metrics("numTargetBytesAdded") += addedBytes + metrics("numTargetPartitionsAddedTo") += addedPartitions + newFiles + } + } + + private def filterSource(source: DataFrame): DataFrame = { + // If there is only one insert clause, then filter out the source rows that do not + // satisfy the clause condition because those rows will not be written out. + if (notMatchedClauses.size == 1 && notMatchedClauses.head.condition.isDefined) { + source.filter(Column(notMatchedClauses.head.condition.get)) + } else { + source + } + } + + /** + * Generate the DataFrame to write out for merges that contains only inserts - either, insert-only + * clauses or inserts when no matches were found. + * + * Specifically, it handles insert clauses in two cases: when there is only one insert clause, + * and when there are multiple insert clauses. + */ + private def generateInsertsOnlyOutputDF( + preparedSourceDF: DataFrame, + deltaTxn: OptimisticTransaction): DataFrame = { + + val targetOutputColNames = getTargetOutputCols(deltaTxn).map(_.name) + + // When there is only one insert clause, there is no need for ROW_DROPPED_COL and + // output df can be generated without CaseWhen. + if (notMatchedClauses.size == 1) { + val outputCols = generateOneInsertOutputCols(targetOutputColNames) + return preparedSourceDF.select(outputCols: _*) + } + + // Precompute conditions in insert clauses and generate source data frame with precomputed + // boolean columns and insert clauses with rewritten conditions. + val (sourceWithPrecompConditions, insertClausesWithPrecompConditions) = + generatePrecomputedConditionsAndDF(preparedSourceDF, notMatchedClauses) + + // Generate output cols. + val outputCols = generateInsertsOnlyOutputCols( + targetOutputColNames, + insertClausesWithPrecompConditions + .collect { case c: DeltaMergeIntoNotMatchedInsertClause => c }) + + sourceWithPrecompConditions + .select(outputCols: _*) + .filter(s"$ROW_DROPPED_COL = false") + .drop(ROW_DROPPED_COL) + } + + /** + * Generate output columns when there is only one insert clause. + * + * It assumes that the caller has already filtered out the source rows (`preparedSourceDF`) + * that do not satisfy the insert clause condition (if any). + * Then it simply applies the insertion action expression to generate + * the output target table rows. + */ + private def generateOneInsertOutputCols( + targetOutputColNames: Seq[String] + ): Seq[Column] = { + + val outputColNames = targetOutputColNames + val outputExprs = notMatchedClauses.head.resolvedActions.map(_.expr) + assert(outputExprs.nonEmpty) + // generate the outputDF without `CaseWhen` expressions. + outputExprs.zip(outputColNames).zipWithIndex.map { case ((expr, name), i) => + val exprAfterPassthru = if (i == 0) { + IncrementMetric(expr, metrics("numTargetRowsInserted")) + } else { + expr + } + new Column(Alias(exprAfterPassthru, name)()) + } + } + + /** + * Generate the output columns for inserts only when there are multiple insert clauses. + * + * It combines all the conditions and corresponding actions expressions + * into complicated CaseWhen expressions - one CaseWhen expression for + * each column in the target row. If a source row does not match any of the clause conditions, + * then the row will be dropped. These CaseWhen expressions basically look like this. + * + * For the i-th output column, + * CASE + * WHEN [insert condition 1] THEN [execute i-th expression of insert action 1] + * WHEN [insert condition 2] THEN [execute i-th expression of insert action 2] + * ELSE [mark the source row to be dropped] + */ + private def generateInsertsOnlyOutputCols( + targetOutputColNames: Seq[String], + insertClausesWithPrecompConditions: Seq[DeltaMergeIntoNotMatchedClause] + ): Seq[Column] = { + // ==== Generate the expressions to generate the target rows from the source rows ==== + // If there are N columns in the target table, there will be N + 1 columns generated + // - N columns for target table + // - ROW_DROPPED_COL to define whether the generated row should be dropped or written out + // To generate these N + 1 columns, we will generate N + 1 expressions + + val outputColNames = targetOutputColNames :+ ROW_DROPPED_COL + val numOutputCols = outputColNames.size + + // Generate the sequence of N + 1 expressions from the sequence of INSERT clauses + val allInsertExprs: Seq[Seq[Expression]] = + insertClausesWithPrecompConditions.map { clause => + clause.resolvedActions.map(_.expr) :+ incrementMetricAndReturnBool( + "numTargetRowsInserted", false) + } + + // Expressions to drop the source row when it does not match any of the insert clause + // conditions. Note that it sets the N+1-th column ROW_DROPPED_COL to true. + val dropSourceRowExprs = + targetOutputColNames.map { _ => Literal(null)} :+ Literal.TrueLiteral + + // Generate the final N + 1 expressions to generate the final target output rows. + // There are multiple not match clauses. Use `CaseWhen` to conditionally evaluate the right + // action expressions to output columns. + val outputExprs: Seq[Expression] = { + val allInsertConditions = + insertClausesWithPrecompConditions.map(_.condition.getOrElse(Literal.TrueLiteral)) + + (0 until numOutputCols).map { i => + // For the i-th output column, generate + // CASE + // WHEN THEN + // WHEN THEN + // ... + // + val conditionalBranches = allInsertConditions.zip(allInsertExprs).map { + case (notMatchCond, notMatchActionExprs) => notMatchCond -> notMatchActionExprs(i) + } + CaseWhen(conditionalBranches, dropSourceRowExprs(i)) + } + } + + assert(outputExprs.size == numOutputCols, + s"incorrect # not matched expressions:\n\t" + seqToString(outputExprs)) + logDebug("prepareInsertsOnlyOutputDF: not matched expressions\n\t" + + seqToString(outputExprs)) + + outputExprs.zip(outputColNames).map { case (expr, name) => + new Column(Alias(expr, name)()) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala new file mode 100644 index 00000000000..3ca6838257b --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala @@ -0,0 +1,478 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.merge + +import scala.annotation.tailrec +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.DeltaSparkPlanUtils + +import org.apache.spark.SparkException +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.catalyst.FileSourceOptions +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{AttributeSet, Expression} +import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.execution.LogicalRDD +import org.apache.spark.sql.execution.datasources.HadoopFsRelation +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.internal.SQLConf._ +import org.apache.spark.sql.sources.BaseRelation +import org.apache.spark.storage.StorageLevel + +/** + * Trait with logic and utilities used for materializing a snapshot of MERGE source + * in case we can't guarantee deterministic repeated reads from it. + * + * We materialize source if it is not safe to assume that it's deterministic + * (override with MERGE_SOURCE_MATERIALIZATION). + * Otherwise, if source changes between the phases of the MERGE, it can produce wrong results. + * We use local checkpointing for the materialization, which saves the source as a + * materialized RDD[InternalRow] on the executor local disks. + * + * 1st concern is that if an executor is lost, this data can be lost. + * When Spark executor decommissioning API is used, it should attempt to move this + * materialized data safely out before removing the executor. + * + * 2nd concern is that if an executor is lost for another reason (e.g. spot kill), we will + * still lose that data. To mitigate that, we implement a retry loop. + * The whole Merge operation needs to be restarted from the beginning in this case. + * When we retry, we increase the replication level of the materialized data from 1 to 2. + * (override with MERGE_SOURCE_MATERIALIZATION_RDD_STORAGE_LEVEL_RETRY). + * If it still fails after the maximum number of attempts (MERGE_MATERIALIZE_SOURCE_MAX_ATTEMPTS), + * we record the failure for tracking purposes. + * + * 3rd concern is that executors run out of disk space with the extra materialization. + * We record such failures for tracking purposes. + */ +trait MergeIntoMaterializeSource extends DeltaLogging with DeltaSparkPlanUtils { + + import MergeIntoMaterializeSource._ + + /** + * Prepared Dataframe with source data. + * If needed, it is materialized, @see prepareMergeSource + */ + private var mergeSource: Option[MergeSource] = None + + /** + * If the source was materialized, reference to the checkpointed RDD. + */ + protected var materializedSourceRDD: Option[RDD[InternalRow]] = None + + /** + * Track which attempt or retry it is in runWithMaterializedSourceAndRetries + */ + protected var attempt: Int = 0 + + /** + * Run the Merge with retries in case it detects an RDD block lost error of the + * materialized source RDD. + * It will also record out of disk error, if such happens - possibly because of increased disk + * pressure from the materialized source RDD. + */ + protected def runWithMaterializedSourceLostRetries( + spark: SparkSession, + deltaLog: DeltaLog, + metrics: Map[String, SQLMetric], + runMergeFunc: SparkSession => Seq[Row]): Seq[Row] = { + var doRetry = false + var runResult: Seq[Row] = null + attempt = 1 + do { + doRetry = false + metrics.values.foreach(_.reset()) + try { + runResult = runMergeFunc(spark) + } catch { + case NonFatal(ex) => + val isLastAttempt = + attempt == spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_MAX_ATTEMPTS) + handleExceptionDuringAttempt(ex, isLastAttempt, deltaLog) match { + case RetryHandling.Retry => + logInfo(s"Retrying MERGE with materialized source. Attempt $attempt failed.") + doRetry = true + attempt += 1 + case RetryHandling.ExhaustedRetries => + logError(s"Exhausted retries after $attempt attempts in MERGE with" + + s" materialized source. Logging latest exception.", ex) + throw DeltaErrors.sourceMaterializationFailedRepeatedlyInMerge + case RetryHandling.RethrowException => + logError(s"Fatal error in MERGE with materialized source in attempt $attempt.", ex) + throw ex + } + } finally { + // Remove source from RDD cache (noop if wasn't cached) + materializedSourceRDD.foreach { rdd => + rdd.unpersist() + } + materializedSourceRDD = None + mergeSource = None + } + } while (doRetry) + + runResult + } + + object RetryHandling extends Enumeration { + type Result = Value + + val Retry, RethrowException, ExhaustedRetries = Value + } + + /** + * Handle exception that was thrown from runMerge(). + * Search for errors to log, or that can be handled by retry. + * It may need to descend into ex.getCause() to find the errors, as Spark may have wrapped them. + * @param isLastAttempt indicates that it's the last allowed attempt and there shall be no retry. + * @return true if the exception is handled and merge should retry + * false if the caller should rethrow the error + */ + @tailrec + private def handleExceptionDuringAttempt( + ex: Throwable, + isLastAttempt: Boolean, + deltaLog: DeltaLog): RetryHandling.Result = ex match { + // If Merge failed because the materialized source lost blocks from the + // locally checkpointed RDD, we want to retry the whole operation. + // If a checkpointed RDD block is lost, it throws + // SparkCoreErrors.checkpointRDDBlockIdNotFoundError from LocalCheckpointRDD.compute. + case s: SparkException + if materializedSourceRDD.nonEmpty && + s.getMessage.matches( + mergeMaterializedSourceRddBlockLostErrorRegex(materializedSourceRDD.get.id)) => + log.warn("Materialized Merge source RDD block lost. Merge needs to be restarted. " + + s"This was attempt number $attempt.") + if (!isLastAttempt) { + RetryHandling.Retry + } else { + // Record situations where we lost RDD materialized source blocks, despite retries. + recordDeltaEvent( + deltaLog, + MergeIntoMaterializeSourceError.OP_TYPE, + data = MergeIntoMaterializeSourceError( + errorType = MergeIntoMaterializeSourceErrorType.RDD_BLOCK_LOST.toString, + attempt = attempt, + materializedSourceRDDStorageLevel = + materializedSourceRDD.get.getStorageLevel.toString + ) + ) + RetryHandling.ExhaustedRetries + } + + // Record if we ran out of executor disk space. + case s: SparkException + if s.getMessage.contains("java.io.IOException: No space left on device") => + // Record situations where we ran out of disk space, possibly because of the space took + // by the materialized RDD. + recordDeltaEvent( + deltaLog, + MergeIntoMaterializeSourceError.OP_TYPE, + data = MergeIntoMaterializeSourceError( + errorType = MergeIntoMaterializeSourceErrorType.OUT_OF_DISK.toString, + attempt = attempt, + materializedSourceRDDStorageLevel = + materializedSourceRDD.get.getStorageLevel.toString + ) + ) + RetryHandling.RethrowException + + // Descend into ex.getCause. + // The errors that we are looking for above might have been wrapped inside another exception. + case NonFatal(ex) if ex.getCause() != null => + handleExceptionDuringAttempt(ex.getCause(), isLastAttempt, deltaLog) + + // Descended to the bottom of the causes without finding a retryable error + case _ => RetryHandling.RethrowException + } + + private def planContainsIgnoreUnreadableFilesReadOptions(plan: LogicalPlan): Boolean = { + def relationContainsOptions(relation: BaseRelation): Boolean = { + relation match { + case hdpRelation: HadoopFsRelation => + hdpRelation.options.get(FileSourceOptions.IGNORE_CORRUPT_FILES).contains("true") || + hdpRelation.options.get(FileSourceOptions.IGNORE_MISSING_FILES).contains("true") + case _ => false + } + } + + val res = plan.collectFirst { + case lr: LogicalRelation if relationContainsOptions(lr.relation) => lr + } + res.nonEmpty + } + + private def ignoreUnreadableFilesConfigsAreSet(plan: LogicalPlan, spark: SparkSession) + : Boolean = { + spark.conf.get(IGNORE_MISSING_FILES) || spark.conf.get(IGNORE_CORRUPT_FILES) || + planContainsIgnoreUnreadableFilesReadOptions(plan) + } + + /** + * @return pair of boolean whether source should be materialized + * and the source materialization reason + */ + protected def shouldMaterializeSource( + spark: SparkSession, source: LogicalPlan, isInsertOnly: Boolean + ): (Boolean, MergeIntoMaterializeSourceReason.MergeIntoMaterializeSourceReason) = { + val materializeType = spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE) + val forceMaterializationWithUnreadableFiles = + spark.conf.get(DeltaSQLConf.MERGE_FORCE_SOURCE_MATERIALIZATION_WITH_UNREADABLE_FILES) + import DeltaSQLConf.MergeMaterializeSource._ + val checkDeterministicOptions = + DeltaSparkPlanUtils.CheckDeterministicOptions(allowDeterministicUdf = true) + materializeType match { + case ALL => + (true, MergeIntoMaterializeSourceReason.MATERIALIZE_ALL) + case NONE => + (false, MergeIntoMaterializeSourceReason.NOT_MATERIALIZED_NONE) + case AUTO => + if (isInsertOnly && spark.conf.get(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED)) { + (false, MergeIntoMaterializeSourceReason.NOT_MATERIALIZED_AUTO_INSERT_ONLY) + } else if (!planContainsOnlyDeltaScans(source)) { + (true, MergeIntoMaterializeSourceReason.NON_DETERMINISTIC_SOURCE_NON_DELTA) + } else if (!planIsDeterministic(source, checkDeterministicOptions)) { + (true, MergeIntoMaterializeSourceReason.NON_DETERMINISTIC_SOURCE_OPERATORS) + // Force source materialization if Spark configs IGNORE_CORRUPT_FILES, + // IGNORE_MISSING_FILES or file source read options FileSourceOptions.IGNORE_CORRUPT_FILES + // FileSourceOptions.IGNORE_MISSING_FILES are enabled on the source. + // This is done so to prevent irrecoverable data loss or unexpected results. + } else if (forceMaterializationWithUnreadableFiles && + ignoreUnreadableFilesConfigsAreSet(source, spark)) { + (true, MergeIntoMaterializeSourceReason.IGNORE_UNREADABLE_FILES_CONFIGS_ARE_SET) + } else { + (false, MergeIntoMaterializeSourceReason.NOT_MATERIALIZED_AUTO) + } + case _ => + // If the config is invalidly set, also materialize. + (true, MergeIntoMaterializeSourceReason.INVALID_CONFIG) + } + } + /** + * If source needs to be materialized, prepare the materialized dataframe in sourceDF + * Otherwise, prepare regular dataframe. + * @return the source materialization reason + */ + protected def prepareMergeSource( + spark: SparkSession, + source: LogicalPlan, + condition: Expression, + matchedClauses: Seq[DeltaMergeIntoMatchedClause], + notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause], + isInsertOnly: Boolean): Unit = { + val (materialize, materializeReason) = + shouldMaterializeSource(spark, source, isInsertOnly) + if (!materialize) { + // Does not materialize, simply return the dataframe from source plan + mergeSource = Some( + MergeSource( + df = Dataset.ofRows(spark, source), + isMaterialized = false, + materializeReason = materializeReason + ) + ) + return + } + + val referencedSourceColumns = + getReferencedSourceColumns(source, condition, matchedClauses, notMatchedClauses) + // When we materialize the source, we want to make sure that columns got pruned before caching. + val sourceWithSelectedColumns = Project(referencedSourceColumns, source) + val baseSourcePlanDF = Dataset.ofRows(spark, sourceWithSelectedColumns) + + // Caches the source in RDD cache using localCheckpoint, which cuts away the RDD lineage, + // which shall ensure that the source cannot be recomputed and thus become inconsistent. + val checkpointedSourcePlanDF = baseSourcePlanDF + // Set eager=false for now, even if we should be doing eager, so that we can set the storage + // level before executing. + .localCheckpoint(eager = false) + + // We have to reach through the crust and into the plan of the checkpointed DF + // to get the RDD that was actually checkpointed, to be able to unpersist it later... + var checkpointedPlan = checkpointedSourcePlanDF.queryExecution.analyzed + val rdd = checkpointedPlan.asInstanceOf[LogicalRDD].rdd + materializedSourceRDD = Some(rdd) + rdd.setName("mergeMaterializedSource") + + // We should still keep the hints from the input plan. + checkpointedPlan = addHintsToPlan(source, checkpointedPlan) + + mergeSource = Some( + MergeSource( + df = Dataset.ofRows(spark, checkpointedPlan), + isMaterialized = true, + materializeReason = materializeReason + ) + ) + + + // Sets appropriate StorageLevel + val storageLevel = StorageLevel.fromString( + if (attempt == 1) { + spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_RDD_STORAGE_LEVEL) + } else { + // If it failed the first time, potentially use a different storage level on retry. + spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_RDD_STORAGE_LEVEL_RETRY) + } + ) + rdd.persist(storageLevel) + + // WARNING: if eager == false, the source used during the first Spark Job that uses this may + // still be inconsistent with source materialized afterwards. + // This is because doCheckpoint that finalizes the lazy checkpoint is called after the Job + // that triggered the lazy checkpointing finished. + // If blocks were lost during that job, they may still get recomputed and changed compared + // to how they were used during the execution of the job. + if (spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_EAGER)) { + // Force the evaluation of the `rdd`, since we cannot access `doCheckpoint()` from here. + rdd + .mapPartitions(_ => Iterator.empty.asInstanceOf[Iterator[InternalRow]]) + .foreach((_: InternalRow) => ()) + assert(rdd.isCheckpointed) + } + + logDebug(s"Materializing MERGE with pruned columns $referencedSourceColumns.") + logDebug(s"Materialized MERGE source plan:\n${getMergeSource.df.queryExecution}") + } + + /** Returns the prepared merge source. */ + protected def getMergeSource: MergeSource = mergeSource match { + case Some(source) => source + case None => throw new IllegalStateException( + "mergeSource was not initialized! Call prepareMergeSource before.") + } + + private def addHintsToPlan(sourcePlan: LogicalPlan, plan: LogicalPlan): LogicalPlan = { + val hints = EliminateResolvedHint.extractHintsFromPlan(sourcePlan)._2 + // This follows similar code in CacheManager from https://github.com/apache/spark/pull/24580 + if (hints.nonEmpty) { + // The returned hint list is in top-down order, we should create the hint nodes from + // right to left. + val planWithHints = + hints.foldRight[LogicalPlan](plan) { case (hint, p) => + ResolvedHint(p, hint) + } + planWithHints + } else { + plan + } + } +} + +object MergeIntoMaterializeSource { + case class MergeSource( + df: DataFrame, + isMaterialized: Boolean, + materializeReason: MergeIntoMaterializeSourceReason.MergeIntoMaterializeSourceReason) { + assert(!isMaterialized || + MergeIntoMaterializeSourceReason.MATERIALIZED_REASONS.contains(materializeReason)) + } + + // This depends on SparkCoreErrors.checkpointRDDBlockIdNotFoundError msg + def mergeMaterializedSourceRddBlockLostErrorRegex(rddId: Int): String = + s"(?s).*Checkpoint block rdd_${rddId}_[0-9]+ not found!.*" + + /** + * @return The columns of the source plan that are used in this MERGE + */ + private def getReferencedSourceColumns( + source: LogicalPlan, + condition: Expression, + matchedClauses: Seq[DeltaMergeIntoMatchedClause], + notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause]) = { + val conditionCols = condition.references + val matchedCondCols = matchedClauses.flatMap(_.condition).flatMap(_.references) + val notMatchedCondCols = notMatchedClauses.flatMap(_.condition).flatMap(_.references) + val matchedActionsCols = matchedClauses + .flatMap(_.resolvedActions) + .flatMap(_.expr.references) + val notMatchedActionsCols = notMatchedClauses + .flatMap(_.resolvedActions) + .flatMap(_.expr.references) + val allCols = AttributeSet( + conditionCols ++ + matchedCondCols ++ + notMatchedCondCols ++ + matchedActionsCols ++ + notMatchedActionsCols) + + source.output.filter(allCols.contains) + } +} + +/** + * Enumeration with possible reasons that source may be materialized in a MERGE command. + */ +object MergeIntoMaterializeSourceReason extends Enumeration { + type MergeIntoMaterializeSourceReason = Value + // It was determined to not materialize on auto config. + val NOT_MATERIALIZED_AUTO = Value("notMaterializedAuto") + // Config was set to never materialize source. + val NOT_MATERIALIZED_NONE = Value("notMaterializedNone") + // Insert only merge is single pass, no need for materialization + val NOT_MATERIALIZED_AUTO_INSERT_ONLY = Value("notMaterializedAutoInsertOnly") + // Config was set to always materialize source. + val MATERIALIZE_ALL = Value("materializeAll") + // The source query is considered non-deterministic, because it contains a non-delta scan. + val NON_DETERMINISTIC_SOURCE_NON_DELTA = Value("materializeNonDeterministicSourceNonDelta") + // The source query is considered non-deterministic, because it contains non-deterministic + // operators. + val NON_DETERMINISTIC_SOURCE_OPERATORS = Value("materializeNonDeterministicSourceOperators") + // Either spark configs to ignore unreadable files are set or the source plan contains relations + // with ignore unreadable files options. + val IGNORE_UNREADABLE_FILES_CONFIGS_ARE_SET = + Value("materializeIgnoreUnreadableFilesConfigsAreSet") + // Materialize when the configuration is invalid + val INVALID_CONFIG = Value("invalidConfigurationFailsafe") + // Catch-all case. + val UNKNOWN = Value("unknown") + + // Set of reasons that result in source materialization. + final val MATERIALIZED_REASONS: Set[MergeIntoMaterializeSourceReason] = Set( + MATERIALIZE_ALL, + NON_DETERMINISTIC_SOURCE_NON_DELTA, + NON_DETERMINISTIC_SOURCE_OPERATORS, + IGNORE_UNREADABLE_FILES_CONFIGS_ARE_SET, + INVALID_CONFIG + ) +} + +/** + * Structure with data for "delta.dml.merge.materializeSourceError" event. + * Note: We log only errors that we want to track (out of disk or lost RDD blocks). + */ +case class MergeIntoMaterializeSourceError( + errorType: String, + attempt: Int, + materializedSourceRDDStorageLevel: String +) + +object MergeIntoMaterializeSourceError { + val OP_TYPE = "delta.dml.merge.materializeSourceError" +} + +object MergeIntoMaterializeSourceErrorType extends Enumeration { + type MergeIntoMaterializeSourceError = Value + val RDD_BLOCK_LOST = Value("materializeSourceRDDBlockLostRetriesFailure") + val OUT_OF_DISK = Value("materializeSourceOutOfDiskFailure") +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeOutputGeneration.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeOutputGeneration.scala new file mode 100644 index 00000000000..d5a74ed118e --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeOutputGeneration.scala @@ -0,0 +1,525 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.merge + +import scala.collection.mutable + +import org.apache.spark.sql.delta.commands.MergeIntoCommandBase +import org.apache.spark.sql.delta.commands.cdc.CDCReader + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.functions._ + +/** + * Contains logic to transform the merge clauses into expressions that can be evaluated to obtain + * the output of the merge operation. + */ +trait MergeOutputGeneration { self: MergeIntoCommandBase => + import CDCReader._ + import MergeIntoCommandBase._ + import MergeOutputGeneration._ + + /** + * Precompute conditions in MATCHED and NOT MATCHED clauses and generate the source + * data frame with precomputed boolean columns. + * @param sourceDF the source DataFrame. + * @param clauses the merge clauses to precompute. + * @return Generated sourceDF with precomputed boolean columns, matched clauses with + * possible rewritten clause conditions, insert clauses with possible rewritten + * clause conditions + */ + protected def generatePrecomputedConditionsAndDF( + sourceDF: DataFrame, + clauses: Seq[DeltaMergeIntoClause]): (DataFrame, Seq[DeltaMergeIntoClause]) = { + // + // ==== Precompute conditions in MATCHED and NOT MATCHED clauses ==== + // If there are conditions in the clauses, each condition will be computed once for every + // column (and obviously for every row) within the per-column CaseWhen expressions. Since, the + // conditions can be arbitrarily expensive, it is likely to be more efficient to + // precompute them into boolean columns and use these new columns in the CaseWhen exprs. + // Then each condition will be computed only once per row, and the resultant boolean reused + // for all the columns in the row. + // + val preComputedClauseConditions = new mutable.ArrayBuffer[(String, Expression)]() + + // Rewrite clause condition into a simple lookup of precomputed column + def rewriteCondition[T <: DeltaMergeIntoClause](clause: T): T = { + clause.condition match { + case Some(conditionExpr) => + val colName = + s"""_${clause.clauseType}${PRECOMPUTED_CONDITION_COL} + |${preComputedClauseConditions.length}_ + |""".stripMargin.replaceAll("\n", "") // ex: _update_condition_0_ + preComputedClauseConditions += ((colName, conditionExpr)) + clause.makeCopy(Array(Some(UnresolvedAttribute(colName)), clause.actions)).asInstanceOf[T] + case None => clause + } + } + + // Get the clauses with possibly rewritten clause conditions. + // This will automatically populate the `preComputedClauseConditions` + // (as part of `rewriteCondition`) + val clausesWithPrecompConditions = clauses.map(rewriteCondition) + + // Add the columns to the given `sourceDF` to precompute clause conditions + val sourceWithPrecompConditions = { + val newCols = preComputedClauseConditions.map { case (colName, conditionExpr) => + Column(conditionExpr).as(colName) + }.toSeq + sourceDF.select(col("*") +: newCols: _*) + } + (sourceWithPrecompConditions, clausesWithPrecompConditions) + } + + /** + * Generate the expressions to process full-outer join output and generate target rows. + * + * To generate these N + 2 columns, we generate N + 2 expressions and apply them + * on the joinedDF. The CDC column will be either used for CDC generation or dropped before + * performing the final write, and the other column will always be dropped after executing the + * increment metric expression and filtering on ROW_DROPPED_COL. + */ + protected def generateWriteAllChangesOutputCols( + targetOutputCols: Seq[Expression], + outputColNames: Seq[String], + noopCopyExprs: Seq[Expression], + clausesWithPrecompConditions: Seq[DeltaMergeIntoClause], + cdcEnabled: Boolean, + shouldCountDeletedRows: Boolean = true): IndexedSeq[Column] = { + + val numOutputCols = outputColNames.size + + // ==== Generate N + 2 (N + 4 preserving Row Tracking) expressions for MATCHED clauses ==== + val processedMatchClauses: Seq[ProcessedClause] = generateAllActionExprs( + targetOutputCols, + clausesWithPrecompConditions.collect { case c: DeltaMergeIntoMatchedClause => c }, + cdcEnabled, + shouldCountDeletedRows) + val matchedExprs: Seq[Expression] = generateClauseOutputExprs( + numOutputCols, + processedMatchClauses, + noopCopyExprs) + + // N + 1 (or N + 2 with CDC, N + 4 preserving Row Tracking and CDC) expressions to delete the + // unmatched source row when it should not be inserted. `target.output` will produce NULLs + // which will get deleted eventually. + val deleteSourceRowExprs = (targetOutputCols :+ Literal.TrueLiteral) ++ + (if (cdcEnabled) Seq(CDC_TYPE_NOT_CDC) else Nil) + + // ==== Generate N + 2 (N + 4 preserving Row Tracking) expressions for NOT MATCHED clause ==== + val processedNotMatchClauses: Seq[ProcessedClause] = generateAllActionExprs( + targetOutputCols, + clausesWithPrecompConditions.collect { case c: DeltaMergeIntoNotMatchedClause => c }, + cdcEnabled, + shouldCountDeletedRows) + val notMatchedExprs: Seq[Expression] = generateClauseOutputExprs( + numOutputCols, + processedNotMatchClauses, + deleteSourceRowExprs) + + // === Generate N + 2 (N + 4 with Row Tracking) expressions for NOT MATCHED BY SOURCE clause === + val processedNotMatchBySourceClauses: Seq[ProcessedClause] = generateAllActionExprs( + targetOutputCols, + clausesWithPrecompConditions.collect { case c: DeltaMergeIntoNotMatchedBySourceClause => c }, + cdcEnabled, + shouldCountDeletedRows) + val notMatchedBySourceExprs: Seq[Expression] = generateClauseOutputExprs( + numOutputCols, + processedNotMatchBySourceClauses, + noopCopyExprs) + + // ==== Generate N + 2 (N + 4 preserving Row Tracking) expressions that invokes the MATCHED, + // NOT MATCHED and NOT MATCHED BY SOURCE expressions ==== + // That is, conditionally invokes them based on whether there was a match in the outer join. + + // Predicates to check whether there was a match in the full outer join. + val ifSourceRowNull = col(SOURCE_ROW_PRESENT_COL).isNull.expr + val ifTargetRowNull = col(TARGET_ROW_PRESENT_COL).isNull.expr + + val outputCols = outputColNames.zipWithIndex.map { case (name, i) => + // Coupled with the clause conditions, the resultant possibly-nested CaseWhens can + // be the following for every i-th column. (In the case with single matched/not-matched + // clauses, instead of nested CaseWhens, there will be If/Else.) + // + // CASE WHEN (source row is null) + // CASE WHEN + // THEN + // WHEN + // THEN + // ... + // ELSE + // + // WHEN (target row is null) + // THEN + // CASE WHEN + // THEN + // WHEN + // THEN + // ... + // ELSE + // + // ELSE (both source and target row are not null) + // CASE WHEN + // THEN + // WHEN + // THEN + // ... + // ELSE + // + val caseWhen = CaseWhen(Seq( + ifSourceRowNull -> notMatchedBySourceExprs(i), + ifTargetRowNull -> notMatchedExprs(i)), + /* otherwise */ matchedExprs(i)) + Column(Alias(caseWhen, name)()) + } + logDebug("writeAllChanges: join output expressions\n\t" + seqToString(outputCols.map(_.expr))) + outputCols + }.toIndexedSeq + + /** + * Represents a merge clause after its condition and action expressions have been processed before + * generating the final output expression. + * @param condition Optional precomputed condition. + * @param actions List of output expressions generated from every action of the clause. + */ + protected case class ProcessedClause(condition: Option[Expression], actions: Seq[Expression]) + + /** + * Generate expressions for every output column and every merge clause based on the corresponding + * UPDATE, DELETE and/or INSERT action(s). + * @param targetOutputCols List of output column expressions from the target table. Used to + * generate CDC data for DELETE. + * @param clausesWithPrecompConditions List of merge clauses with precomputed conditions. Action + * expressions are generated for each of these clauses. + * @param cdcEnabled Whether the generated expressions should include CDC information. + * @param shouldCountDeletedRows Whether metrics for number of deleted rows should be incremented + * here. + * @return For each merge clause, a list of [[ProcessedClause]] each with a precomputed + * condition and N+2 action expressions (N output columns + [[ROW_DROPPED_COL]] + + * [[CDC_TYPE_COLUMN_NAME]]) to apply on a row when that clause matches. + */ + protected def generateAllActionExprs( + targetOutputCols: Seq[Expression], + clausesWithPrecompConditions: Seq[DeltaMergeIntoClause], + cdcEnabled: Boolean, + shouldCountDeletedRows: Boolean): Seq[ProcessedClause] = { + clausesWithPrecompConditions.map { clause => + val actions = clause match { + // Seq of up to N+3 expressions to generate output rows based on the UPDATE, DELETE and/or + // INSERT action(s) + case u: DeltaMergeIntoMatchedUpdateClause => + val incrCountExpr = incrementMetricsAndReturnBool( + names = Seq("numTargetRowsUpdated", "numTargetRowsMatchedUpdated"), + valueToReturn = false) + // Generate update expressions and set ROW_DROPPED_COL = false + u.resolvedActions.map(_.expr) ++ + Seq(incrCountExpr) ++ + (if (cdcEnabled) Some(Literal(CDC_TYPE_UPDATE_POSTIMAGE)) else None) + case u: DeltaMergeIntoNotMatchedBySourceUpdateClause => + val incrCountExpr = incrementMetricsAndReturnBool( + names = Seq("numTargetRowsUpdated", "numTargetRowsNotMatchedBySourceUpdated"), + valueToReturn = false) + // Generate update expressions and set ROW_DROPPED_COL = false + u.resolvedActions.map(_.expr) ++ + Seq(incrCountExpr) ++ + (if (cdcEnabled) Some(Literal(CDC_TYPE_UPDATE_POSTIMAGE)) else None) + case _: DeltaMergeIntoMatchedDeleteClause => + val incrCountExpr = { + if (shouldCountDeletedRows) { + incrementMetricsAndReturnBool( + names = Seq("numTargetRowsDeleted", "numTargetRowsMatchedDeleted"), + valueToReturn = true) + } else { + Literal.TrueLiteral + } + } + // Generate expressions to set the ROW_DROPPED_COL = true and mark as a DELETE + targetOutputCols ++ + Seq(incrCountExpr) ++ + (if (cdcEnabled) Some(CDC_TYPE_DELETE) else None) + case _: DeltaMergeIntoNotMatchedBySourceDeleteClause => + val incrCountExpr = { + if (shouldCountDeletedRows) { + incrementMetricsAndReturnBool( + names = Seq("numTargetRowsDeleted", "numTargetRowsNotMatchedBySourceDeleted"), + valueToReturn = true) + } else { + Literal.TrueLiteral + } + } + // Generate expressions to set the ROW_DROPPED_COL = true and mark as a DELETE + targetOutputCols ++ + Seq(incrCountExpr) ++ + (if (cdcEnabled) Some(CDC_TYPE_DELETE) else None) + case i: DeltaMergeIntoNotMatchedInsertClause => + val incrInsertedCountExpr = incrementMetricsAndReturnBool( + names = Seq("numTargetRowsInserted"), + valueToReturn = false) + i.resolvedActions.map(_.expr) ++ + Seq(incrInsertedCountExpr) ++ + (if (cdcEnabled) Some(Literal(CDC_TYPE_INSERT)) else None) + } + ProcessedClause(clause.condition, actions) + } + } + + /** + * Generate the output expression for each output column to apply the correct action for a type of + * merge clause. For each output column, the resulting expression dispatches the correct action + * based on all clause conditions. + * @param numOutputCols Number of output columns. + * @param clauses List of preprocessed merge clauses to bind together. + * @param noopExprs Default expression to apply when no condition holds. + * @return A list of one expression per output column to apply for a type of merge clause. + */ + protected def generateClauseOutputExprs( + numOutputCols: Int, + clauses: Seq[ProcessedClause], + noopExprs: Seq[Expression]): Seq[Expression] = { + val clauseExprs = if (clauses.isEmpty) { + // Nothing to update or delete + noopExprs + } else { + if (clauses.head.condition.isEmpty) { + // Only one clause without any condition, so the corresponding action expressions + // can be evaluated directly to generate the output columns. + clauses.head.actions + } else if (clauses.length == 1) { + // Only one clause _with_ a condition, so generate IF/THEN instead of CASE WHEN. + // + // For the i-th output column, generate + // IF THEN + // ELSE + // + val condition = clauses.head.condition.get + clauses.head.actions.zip(noopExprs).map { case (a, noop) => If(condition, a, noop) } + } else { + // There are multiple clauses. Use `CaseWhen` to conditionally evaluate the right + // action expressions to output columns + Seq.range(0, numOutputCols).map { i => + // For the i-th output column, generate + // CASE + // WHEN THEN + // WHEN THEN + // ... + // ELSE + // + val conditionalBranches = clauses.map { precomp => + precomp.condition.getOrElse(Literal.TrueLiteral) -> precomp.actions(i) + } + CaseWhen(conditionalBranches, Some(noopExprs(i))) + } + } + } + assert(clauseExprs.size == numOutputCols, + s"incorrect # expressions:\n\t" + seqToString(clauseExprs)) + logDebug(s"writeAllChanges: expressions\n\t" + seqToString(clauseExprs)) + clauseExprs + } + + /** + * Build the full output as an array of packed rows, then explode into the final result. Based + * on the CDC type as originally marked, we produce both rows for the CDC_TYPE_NOT_CDC partition + * to be written to the main table and rows for the CDC partitions to be written as CDC files. + * + * See [[CDCReader]] for general details on how partitioning on the CDC type column works. + */ + protected def generateCdcAndOutputRows( + sourceDf: DataFrame, + outputCols: Seq[Column], + outputColNames: Seq[String], + noopCopyExprs: Seq[Expression], + deduplicateDeletes: DeduplicateCDFDeletes): DataFrame = { + import org.apache.spark.sql.delta.commands.cdc.CDCReader._ + // The main partition just needs to swap in the CDC_TYPE_NOT_CDC value. + val mainDataOutput = + outputCols.dropRight(1) :+ Column(CDC_TYPE_NOT_CDC).as(CDC_TYPE_COLUMN_NAME) + + // Deleted rows are sent to the CDC partition instead of the main partition. These rows are + // marked as dropped, we need to retain them while incrementing the original metric column + // ourselves. + val keepRowAndIncrDeletedCountExpr = !outputCols(outputCols.length - 2) + val deleteCdcOutput = outputCols + .updated(outputCols.length - 2, keepRowAndIncrDeletedCountExpr.as(ROW_DROPPED_COL)) + + // Update preimages need special handling. This is conceptually the same as the + // transformation for cdcOutputCols, but we have to transform the noop exprs to columns + // ourselves because it hasn't already been done. + val cdcNoopExprs = noopCopyExprs.dropRight(2) :+ + Literal.FalseLiteral :+ Literal(CDC_TYPE_UPDATE_PREIMAGE) + val updatePreimageCdcOutput = cdcNoopExprs.zipWithIndex.map { + case (e, i) => Column(Alias(e, outputColNames(i))()) + } + + // To avoid duplicate evaluation of nondeterministic column values such as + // [[GenerateIdentityValues]], we EXPLODE CDC rows first, from which we EXPLODE again, + // and for each of "insert" and "update_postimage" rows, generate main data rows. + // The first EXPLODE will force evaluation all nondeterministic expressions, + // and the second EXPLODE will just copy the generated value from CDC rows + // to main data. By doing so we ensure nondeterministic column values in CDC and + // main data rows stay the same. + + val cdcTypeCol = outputCols.last + val cdcArray = Column(CaseWhen(Seq( + EqualNullSafe(cdcTypeCol.expr, Literal(CDC_TYPE_INSERT)) -> array( + struct(outputCols: _*)).expr, + + EqualNullSafe(cdcTypeCol.expr, Literal(CDC_TYPE_UPDATE_POSTIMAGE)) -> array( + struct(updatePreimageCdcOutput: _*), + struct(outputCols: _*)).expr, + + EqualNullSafe(cdcTypeCol.expr, CDC_TYPE_DELETE) -> array( + struct(deleteCdcOutput: _*)).expr), + + // If none of the CDC cases apply (true for purely rewritten target rows, dropped source + // rows, etc.) just stick to the normal output. + array(struct(mainDataOutput: _*)).expr + )) + + val cdcToMainDataArray = Column(If( + Or( + EqualNullSafe(col(s"packedCdc.$CDC_TYPE_COLUMN_NAME").expr, + Literal(CDC_TYPE_INSERT)), + EqualNullSafe(col(s"packedCdc.$CDC_TYPE_COLUMN_NAME").expr, + Literal(CDC_TYPE_UPDATE_POSTIMAGE))), + array( + col("packedCdc"), + struct( + outputColNames + .dropRight(1) + .map { n => col(s"packedCdc.`$n`") } + :+ Column(CDC_TYPE_NOT_CDC).as(CDC_TYPE_COLUMN_NAME): _*) + ).expr, + array(col("packedCdc")).expr + )) + + if (deduplicateDeletes.enabled) { + deduplicateCDFDeletes( + deduplicateDeletes, + sourceDf, + cdcArray, + cdcToMainDataArray, + outputColNames) + } else { + packAndExplodeCDCOutput( + sourceDf, + cdcArray, + cdcToMainDataArray, + outputColNames, + dedupColumns = Nil) + } + } + + /** + * Applies the transformations to generate the CDC output: + * 1. Transform each input row into its corresponding array of CDC rows, e.g. an updated row + * yields: array(update_preimage, update_postimage). + * 2. Add the main data output for inserted/updated rows to the previously packed CDC data. + * 3. Explode the result to flatten the packed arrays. + * + * @param sourceDf The dataframe generated after processing the merge output. + * @param cdcArray Transforms the merge output into the corresponding packed CDC data that will be + * written to the CDC partition. + * @param cdcToMainDataArray Transforms the packed CDC data to add the main data output, i.e. rows + * that are inserted or updated and will be written to the main + * partition. + * @param outputColNames All the main and CDC columns to use in the output. + * @param dedupColumns Additional columns to add to enable deduplication. + */ + private def packAndExplodeCDCOutput( + sourceDf: DataFrame, + cdcArray: Column, + cdcToMainDataArray: Column, + outputColNames: Seq[String], + dedupColumns: Seq[Column]): DataFrame = { + val unpackedCols = outputColNames.map { name => + col(s"packedData.`$name`").as(name) + } + sourceDf + // `explode()` creates a [[Generator]] which can't handle non-deterministic expressions that + // we use to increment metric counters. We first project the CDC array so that the expressions + // are evaluated before we explode the array, + .select(cdcArray.as("projectedCDC") +: dedupColumns: _*) + .select(explode(col("projectedCDC")).as("packedCdc") +: dedupColumns: _*) + .select(explode(cdcToMainDataArray).as("packedData") +: dedupColumns: _*) + .select(unpackedCols ++ dedupColumns: _*) + } + + /** + * This method deduplicates CDF deletes where a target row has potentially multiple matches. It + * assumes that the input dataframe contains the [[TARGET_ROW_INDEX_COL]] and + * to detect inserts the [[SOURCE_ROW_INDEX_COL]] column to track the origin of the row. + * + * All duplicates of deleted rows have the same [[TARGET_ROW_INDEX_COL]] and + * [[CDC_TYPE_COLUMN_NAME]] therefore we use both columns as compound deduplication key. + * In case the input data frame contains additional insert rows we leave them untouched by using + * the [[SOURCE_ROW_INDEX_COL]] to fill the null values of the [[TARGET_ROW_INDEX_COL]]. This + * may lead to duplicates as part of the final row index but this is not a problem since if + * an insert and a delete have the same [[TARGET_ROW_INDEX_COL]] they definitely have a + * different [[CDC_TYPE_COLUMN_NAME]]. + */ + private def deduplicateCDFDeletes( + deduplicateDeletes: DeduplicateCDFDeletes, + df: DataFrame, + cdcArray: Column, + cdcToMainDataArray: Column, + outputColNames: Seq[String]): DataFrame = { + val dedupColumns = if (deduplicateDeletes.includesInserts) { + Seq(col(TARGET_ROW_INDEX_COL), col(SOURCE_ROW_INDEX_COL)) + } else { + Seq(col(TARGET_ROW_INDEX_COL)) + } + + val cdcDf = packAndExplodeCDCOutput( + df, + cdcArray, + cdcToMainDataArray, + outputColNames, + dedupColumns + ) + + val cdcDfWithIncreasingIds = if (deduplicateDeletes.includesInserts) { + cdcDf.withColumn( + TARGET_ROW_INDEX_COL, + coalesce(col(TARGET_ROW_INDEX_COL), col(SOURCE_ROW_INDEX_COL))) + } else { + cdcDf + } + cdcDfWithIncreasingIds + .dropDuplicates(TARGET_ROW_INDEX_COL, CDC_TYPE_COLUMN_NAME) + .drop(TARGET_ROW_INDEX_COL, SOURCE_ROW_INDEX_COL) + } +} + +/** + * This class enables and configures the deduplication of CDF deletes in case the merge statement + * contains an unconditional delete statement that matches multiple target rows. + * + * @param enabled CDF generation should be enabled and duplicate target matches are detected + * @param includesInserts in addition to the unconditional deletes the merge also inserts rows + */ +case class DeduplicateCDFDeletes( + enabled: Boolean, + includesInserts: Boolean) + +object MergeOutputGeneration { + final val TARGET_ROW_INDEX_COL = "_target_row_index_" + final val SOURCE_ROW_INDEX_COL = "_source_row_index" +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeStats.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeStats.scala new file mode 100644 index 00000000000..3188b1eaf89 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeStats.scala @@ -0,0 +1,216 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.merge + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.util.ScalaExtensions._ +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import org.apache.commons.lang3.StringUtils + +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.{DeltaMergeIntoClause, DeltaMergeIntoMatchedClause, DeltaMergeIntoNotMatchedBySourceClause, DeltaMergeIntoNotMatchedClause} +import org.apache.spark.sql.execution.metric.SQLMetric + +case class MergeDataSizes( + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + rows: Option[Long] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + files: Option[Long] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + bytes: Option[Long] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + partitions: Option[Long] = None) + +/** + * Represents the state of a single merge clause: + * - merge clause's (optional) predicate + * - action type (insert, update, delete) + * - action's expressions + */ +case class MergeClauseStats( + condition: Option[String], + actionType: String, + actionExpr: Seq[String]) + +object MergeClauseStats { + def apply(mergeClause: DeltaMergeIntoClause): MergeClauseStats = { + MergeClauseStats( + condition = mergeClause.condition.map(c => StringUtils.abbreviate(c.sql, 256)), + mergeClause.clauseType.toLowerCase(), + actionExpr = truncateSeq( + mergeClause.actions.map(a => StringUtils.abbreviate(a.sql, 256)), + maxLength = 512) + ) + } + + /** + * Truncate a list of items to be serialized to around 'maxLength' characters. + * Always include at least on item. + */ + private def truncateSeq(seq: Seq[String], maxLength: Long): Seq[String] = { + val buffer = ArrayBuffer.empty[String] + var length = 0L + for (x <- seq if length + x.length <= maxLength || buffer.isEmpty) { + length += x.length + 3 // quotes and comma + buffer.append(x) + } + val numTruncatedItems = seq.length - buffer.length + if (numTruncatedItems > 0) { + buffer.append("... " + numTruncatedItems + " more fields") + } + buffer.toSeq + } +} + +/** State for a merge operation */ +case class MergeStats( + // Merge condition expression + conditionExpr: String, + + // Expressions used in old MERGE stats, now always Null + updateConditionExpr: String, + updateExprs: Seq[String], + insertConditionExpr: String, + insertExprs: Seq[String], + deleteConditionExpr: String, + + // Newer expressions used in MERGE with any number of MATCHED/NOT MATCHED/NOT MATCHED BY SOURCE + matchedStats: Seq[MergeClauseStats], + notMatchedStats: Seq[MergeClauseStats], + notMatchedBySourceStats: Seq[MergeClauseStats], + + // Timings + executionTimeMs: Long, + scanTimeMs: Long, + rewriteTimeMs: Long, + + // Data sizes of source and target at different stages of processing + source: MergeDataSizes, + targetBeforeSkipping: MergeDataSizes, + targetAfterSkipping: MergeDataSizes, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + sourceRowsInSecondScan: Option[Long], + + // Data change sizes + targetFilesRemoved: Long, + targetFilesAdded: Long, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + targetChangeFilesAdded: Option[Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + targetChangeFileBytes: Option[Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + targetBytesRemoved: Option[Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + targetBytesAdded: Option[Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + targetPartitionsRemovedFrom: Option[Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + targetPartitionsAddedTo: Option[Long], + targetRowsCopied: Long, + targetRowsUpdated: Long, + targetRowsMatchedUpdated: Long, + targetRowsNotMatchedBySourceUpdated: Long, + targetRowsInserted: Long, + targetRowsDeleted: Long, + targetRowsMatchedDeleted: Long, + targetRowsNotMatchedBySourceDeleted: Long, + numTargetDeletionVectorsAdded: Long, + numTargetDeletionVectorsRemoved: Long, + numTargetDeletionVectorsUpdated: Long, + + // MergeMaterializeSource stats + materializeSourceReason: Option[String] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + materializeSourceAttempts: Option[Long] = None +) + +object MergeStats { + + def fromMergeSQLMetrics( + metrics: Map[String, SQLMetric], + condition: Expression, + matchedClauses: Seq[DeltaMergeIntoMatchedClause], + notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause], + notMatchedBySourceClauses: Seq[DeltaMergeIntoNotMatchedBySourceClause], + isPartitioned: Boolean, + performedSecondSourceScan: Boolean): MergeStats = { + + def metricValueIfPartitioned(metricName: String): Option[Long] = { + if (isPartitioned) Some(metrics(metricName).value) else None + } + + MergeStats( + // Merge condition expression + conditionExpr = StringUtils.abbreviate(condition.sql, 2048), + + // Newer expressions used in MERGE with any number of MATCHED/NOT MATCHED/ + // NOT MATCHED BY SOURCE + matchedStats = matchedClauses.map(MergeClauseStats(_)), + notMatchedStats = notMatchedClauses.map(MergeClauseStats(_)), + notMatchedBySourceStats = notMatchedBySourceClauses.map(MergeClauseStats(_)), + + // Timings + executionTimeMs = metrics("executionTimeMs").value, + scanTimeMs = metrics("scanTimeMs").value, + rewriteTimeMs = metrics("rewriteTimeMs").value, + + // Data sizes of source and target at different stages of processing + source = MergeDataSizes(rows = Some(metrics("numSourceRows").value)), + targetBeforeSkipping = + MergeDataSizes( + files = Some(metrics("numTargetFilesBeforeSkipping").value), + bytes = Some(metrics("numTargetBytesBeforeSkipping").value)), + targetAfterSkipping = + MergeDataSizes( + files = Some(metrics("numTargetFilesAfterSkipping").value), + bytes = Some(metrics("numTargetBytesAfterSkipping").value), + partitions = metricValueIfPartitioned("numTargetPartitionsAfterSkipping")), + sourceRowsInSecondScan = + Option.when(performedSecondSourceScan)(metrics("numSourceRowsInSecondScan").value), + + // Data change sizes + targetFilesAdded = metrics("numTargetFilesAdded").value, + targetChangeFilesAdded = metrics.get("numTargetChangeFilesAdded").map(_.value), + targetChangeFileBytes = metrics.get("numTargetChangeFileBytes").map(_.value), + targetFilesRemoved = metrics("numTargetFilesRemoved").value, + targetBytesAdded = Some(metrics("numTargetBytesAdded").value), + targetBytesRemoved = Some(metrics("numTargetBytesRemoved").value), + targetPartitionsRemovedFrom = metricValueIfPartitioned("numTargetPartitionsRemovedFrom"), + targetPartitionsAddedTo = metricValueIfPartitioned("numTargetPartitionsAddedTo"), + targetRowsCopied = metrics("numTargetRowsCopied").value, + targetRowsUpdated = metrics("numTargetRowsUpdated").value, + targetRowsMatchedUpdated = metrics("numTargetRowsMatchedUpdated").value, + targetRowsNotMatchedBySourceUpdated = metrics("numTargetRowsNotMatchedBySourceUpdated").value, + targetRowsInserted = metrics("numTargetRowsInserted").value, + targetRowsDeleted = metrics("numTargetRowsDeleted").value, + targetRowsMatchedDeleted = metrics("numTargetRowsMatchedDeleted").value, + targetRowsNotMatchedBySourceDeleted = metrics("numTargetRowsNotMatchedBySourceDeleted").value, + + // Deletion Vector metrics. + numTargetDeletionVectorsAdded = metrics("numTargetDeletionVectorsAdded").value, + numTargetDeletionVectorsRemoved = metrics("numTargetDeletionVectorsRemoved").value, + numTargetDeletionVectorsUpdated = metrics("numTargetDeletionVectorsUpdated").value, + + // Deprecated fields + updateConditionExpr = null, + updateExprs = null, + insertConditionExpr = null, + insertExprs = null, + deleteConditionExpr = null) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/optimize/OptimizeStats.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/optimize/OptimizeStats.scala new file mode 100644 index 00000000000..b02e39d94fb --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/optimize/OptimizeStats.scala @@ -0,0 +1,275 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.optimize + +import org.apache.spark.sql.delta.actions.{AddFile, FileAction, RemoveFile} + +// scalastyle:off import.ordering.noEmptyLine + +/** + * Stats for an OPTIMIZE operation accumulated across all batches. + */ +case class OptimizeStats( + var addedFilesSizeStats: FileSizeStats = FileSizeStats(), + var removedFilesSizeStats: FileSizeStats = FileSizeStats(), + var numPartitionsOptimized: Long = 0, + var zOrderStats: Option[ZOrderStats] = None, + var numBatches: Long = 0, + var totalConsideredFiles: Long = 0, + var totalFilesSkipped: Long = 0, + var preserveInsertionOrder: Boolean = false, + var numFilesSkippedToReduceWriteAmplification: Long = 0, + var numBytesSkippedToReduceWriteAmplification: Long = 0, + startTimeMs: Long = System.currentTimeMillis(), + var endTimeMs: Long = 0, + var totalClusterParallelism: Long = 0, + var totalScheduledTasks: Long = 0, + var deletionVectorStats: Option[DeletionVectorStats] = None, + var numTableColumns: Long = 0, + var numTableColumnsWithStats: Long = 0, + var autoCompactParallelismStats: AutoCompactParallelismStats = AutoCompactParallelismStats()) { + + def toOptimizeMetrics: OptimizeMetrics = { + OptimizeMetrics( + numFilesAdded = addedFilesSizeStats.totalFiles, + numFilesRemoved = removedFilesSizeStats.totalFiles, + filesAdded = addedFilesSizeStats.toFileSizeMetrics, + filesRemoved = removedFilesSizeStats.toFileSizeMetrics, + partitionsOptimized = numPartitionsOptimized, + zOrderStats = zOrderStats, + numBatches = numBatches, + totalConsideredFiles = totalConsideredFiles, + totalFilesSkipped = totalFilesSkipped, + preserveInsertionOrder = preserveInsertionOrder, + numFilesSkippedToReduceWriteAmplification = numFilesSkippedToReduceWriteAmplification, + numBytesSkippedToReduceWriteAmplification = numBytesSkippedToReduceWriteAmplification, + startTimeMs = startTimeMs, + endTimeMs = endTimeMs, + totalClusterParallelism = totalClusterParallelism, + totalScheduledTasks = totalScheduledTasks, + deletionVectorStats = deletionVectorStats, + numTableColumns = numTableColumns, + numTableColumnsWithStats = numTableColumnsWithStats, + autoCompactParallelismStats = autoCompactParallelismStats.toMetrics) + } +} + +/** + * This statistics class keeps tracking the parallelism usage of Auto Compaction. + * It collects following metrics: + * -- the min/max parallelism among the whole cluster are used for Auto Compact, + * -- the min/max parallelism occupied by current Auto Compact session, + */ +case class AutoCompactParallelismStats( + var maxClusterUsedParallelism: Long = 0, + var minClusterUsedParallelism: Long = 0, + var maxSessionUsedParallelism: Long = 0, + var minSessionUsedParallelism: Long = 0) { + def toMetrics: Option[ParallelismMetrics] = { + if (maxSessionUsedParallelism == 0) { + return None + } + Some(ParallelismMetrics( + Some(maxClusterUsedParallelism), + Some(minClusterUsedParallelism), + Some(maxSessionUsedParallelism), + Some(minSessionUsedParallelism))) + } + + /** Update the statistics of parallelism of current Auto Compact command. */ + def update(clusterUsedParallelism: Long, sessionUsedParallelism: Long): Unit = { + maxClusterUsedParallelism = Math.max(maxClusterUsedParallelism, clusterUsedParallelism) + minClusterUsedParallelism = if (minClusterUsedParallelism == 0) { + clusterUsedParallelism + } else { + Math.min(minClusterUsedParallelism, clusterUsedParallelism) + } + maxSessionUsedParallelism = Math.max(maxSessionUsedParallelism, sessionUsedParallelism) + minSessionUsedParallelism = if (minSessionUsedParallelism == 0) { + sessionUsedParallelism + } else { + Math.min(minSessionUsedParallelism, sessionUsedParallelism) + } + } +} + +case class FileSizeStats( + var minFileSize: Long = 0, + var maxFileSize: Long = 0, + var totalFiles: Long = 0, + var totalSize: Long = 0) { + + def avgFileSize: Double = if (totalFiles > 0) { + totalSize * 1.0 / totalFiles + } else { + 0.0 + } + + def merge(candidateFiles: Seq[FileAction]): Unit = { + if (totalFiles == 0 && candidateFiles.nonEmpty) { + minFileSize = Long.MaxValue + maxFileSize = Long.MinValue + } + candidateFiles.foreach { file => + val fileSize = file match { + case addFile: AddFile => addFile.size + case removeFile: RemoveFile => removeFile.size.getOrElse(0L) + case default => + throw new IllegalArgumentException(s"Unknown FileAction type: ${default.getClass}") + } + minFileSize = math.min(fileSize, minFileSize) + maxFileSize = math.max(fileSize, maxFileSize) + totalSize += fileSize + } + totalFiles += candidateFiles.length + } + + + def toFileSizeMetrics: FileSizeMetrics = { + if (totalFiles == 0) { + return FileSizeMetrics(min = None, max = None, avg = 0, totalFiles = 0, totalSize = 0) + } + FileSizeMetrics( + min = Some(minFileSize), + max = Some(maxFileSize), + avg = avgFileSize, + totalFiles = totalFiles, + totalSize = totalSize) + } +} +/** + * Percentiles on the file sizes in this batch. + * @param min Size of the smallest file + * @param p25 Size of the 25th percentile file + * @param p50 Size of the 50th percentile file + * @param p75 Size of the 75th percentile file + * @param max Size of the largest file + */ +case class FileSizeStatsWithHistogram( + min: Long, + p25: Long, + p50: Long, + p75: Long, + max: Long) + +object FileSizeStatsWithHistogram { + + /** + * Creates a [[FileSizeStatsWithHistogram]] based on the passed sorted file sizes + * @return Some(fileSizeStatsWithHistogram) if sizes are non-empty, else returns None + */ + def create(sizes: Seq[Long]): Option[FileSizeStatsWithHistogram] = { + if (sizes.isEmpty) { + return None + } + val count = sizes.length + Some(FileSizeStatsWithHistogram( + min = sizes.head, + // we do not need to ceil the computed index as arrays start at 0 + p25 = sizes(count / 4), + p50 = sizes(count / 2), + p75 = sizes(count * 3 / 4), + max = sizes.last)) + } +} + +/** + * Metrics returned by the optimize command. + * + * @param numFilesAdded number of files added by optimize + * @param numFilesRemoved number of files removed by optimize + * @param filesAdded Stats for the files added + * @param filesRemoved Stats for the files removed + * @param partitionsOptimized Number of partitions optimized + * @param zOrderStats Z-Order stats + * @param numBatches Number of batches + * @param totalConsideredFiles Number of files considered for the Optimize operation. + * @param totalFilesSkipped Number of files that are skipped from being Optimized. + * @param preserveInsertionOrder If optimize was run with insertion preservation enabled. + * @param numFilesSkippedToReduceWriteAmplification Number of files skipped for reducing write + * amplification. + * @param numBytesSkippedToReduceWriteAmplification Number of bytes skipped for reducing write + * amplification. + * @param startTimeMs The start time of Optimize command. + * @param endTimeMs The end time of Optimize command. + * @param totalClusterParallelism The total number of parallelism of this cluster. + * @param totalScheduledTasks The total number of optimize task scheduled. + * @param autoCompactParallelismStats The metrics of cluster and session parallelism. + * @param deletionVectorStats Statistics related with Deletion Vectors. + * @param numTableColumns Number of columns in the table. + * @param numTableColumnsWithStats Number of table columns to collect data skipping stats. + */ +case class OptimizeMetrics( + numFilesAdded: Long, + numFilesRemoved: Long, + filesAdded: FileSizeMetrics = + FileSizeMetrics(min = None, max = None, avg = 0, totalFiles = 0, totalSize = 0), + filesRemoved: FileSizeMetrics = + FileSizeMetrics(min = None, max = None, avg = 0, totalFiles = 0, totalSize = 0), + partitionsOptimized: Long = 0, + zOrderStats: Option[ZOrderStats] = None, + numBatches: Long, + totalConsideredFiles: Long, + totalFilesSkipped: Long = 0, + preserveInsertionOrder: Boolean = false, + numFilesSkippedToReduceWriteAmplification: Long = 0, + numBytesSkippedToReduceWriteAmplification: Long = 0, + startTimeMs: Long = 0, + endTimeMs: Long = 0, + totalClusterParallelism: Long = 0, + totalScheduledTasks: Long = 0, + autoCompactParallelismStats: Option[ParallelismMetrics] = None, + deletionVectorStats: Option[DeletionVectorStats] = None, + numTableColumns: Long = 0, + numTableColumnsWithStats: Long = 0 + ) + +/** + * Basic Stats on file sizes. + * + * @param min Minimum file size + * @param max Maximum file size + * @param avg Average of the file size + * @param totalFiles Total number of files + * @param totalSize Total size of the files + */ +case class FileSizeMetrics( + min: Option[Long], + max: Option[Long], + avg: Double, + totalFiles: Long, + totalSize: Long) + +/** + * This statistics contains following metrics: + * -- the min/max parallelism among the whole cluster are used, + * -- the min/max parallelism occupied by current session, + */ +case class ParallelismMetrics( + maxClusterActiveParallelism: Option[Long] = None, + minClusterActiveParallelism: Option[Long] = None, + maxSessionActiveParallelism: Option[Long] = None, + minSessionActiveParallelism: Option[Long] = None) + +/** + * Accumulator for statistics related with Deletion Vectors. + * Note that this case class contains mutable variables and cannot be used in places where immutable + * case classes can be used (e.g. map/set keys). + */ +case class DeletionVectorStats( + var numDeletionVectorsRemoved: Long = 0, + var numDeletionVectorRowsRemoved: Long = 0) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/optimize/ZOrderMetrics.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/optimize/ZOrderMetrics.scala new file mode 100644 index 00000000000..f14f67cb8c2 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/optimize/ZOrderMetrics.scala @@ -0,0 +1,56 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands.optimize + +// scalastyle:off import.ordering.noEmptyLine + +/** + * Aggregated file stats for a category of ZCube files. + * @param num Total number of files. + * @param size Total size of files in bytes. + */ +case class ZOrderFileStats(num: Long, size: Long) + +object ZOrderFileStats { + def apply(v: Iterable[(Int, Long)]): ZOrderFileStats = { + v.foldLeft(ZOrderFileStats(0, 0)) { (a, b) => + ZOrderFileStats(a.num + b._1, a.size + b._2) + } + } +} + +/** + * Aggregated stats for OPTIMIZE ZORDERBY command. + * This is a public facing API, consider any change carefully. + * + * @param strategyName ZCubeMergeStrategy used. + * @param inputCubeFiles Files in the ZCube matching the current OPTIMIZE operation. + * @param inputOtherFiles Files not in any ZCube or in other ZCube orderings. + * @param inputNumCubes Number of different cubes among input files. + * @param mergedFiles Subset of input files merged by the current operation + * @param numOutputCubes Number of output ZCubes written out + * @param mergedNumCubes Number of different cubes among merged files. + */ +case class ZOrderStats( + strategyName: String, + inputCubeFiles: ZOrderFileStats, + inputOtherFiles: ZOrderFileStats, + inputNumCubes: Long, + mergedFiles: ZOrderFileStats, + numOutputCubes: Long, + mergedNumCubes: Option[Long] = None +) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/CharVarcharConstraint.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/CharVarcharConstraint.scala new file mode 100644 index 00000000000..bbf4ed2ec9f --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/CharVarcharConstraint.scala @@ -0,0 +1,70 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.constraints + +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.types._ + +// Delta implements char/varchar length check with CONSTRAINTS, and needs to generate predicate +// expression which is different from the OSS version. +object CharVarcharConstraint { + final val INVARIANT_NAME = "__CHAR_VARCHAR_STRING_LENGTH_CHECK__" + + def stringConstraints(schema: StructType): Seq[Constraint] = { + schema.flatMap { f => + val targetType = CharVarcharUtils.getRawType(f.metadata).getOrElse(f.dataType) + val col = UnresolvedAttribute(Seq(f.name)) + checkStringLength(col, targetType).map { lengthCheckExpr => + Constraints.Check(INVARIANT_NAME, lengthCheckExpr) + } + } + } + + private def checkStringLength(expr: Expression, dt: DataType): Option[Expression] = dt match { + case VarcharType(length) => + Some(Or(IsNull(expr), LessThanOrEqual(Length(expr), Literal(length)))) + + case CharType(length) => + checkStringLength(expr, VarcharType(length)) + + case StructType(fields) => + fields.zipWithIndex.flatMap { case (f, i) => + checkStringLength(GetStructField(expr, i, Some(f.name)), f.dataType) + }.reduceOption(And(_, _)) + + case ArrayType(et, containsNull) => + checkStringLengthInArray(expr, et, containsNull) + + case MapType(kt, vt, valueContainsNull) => + (checkStringLengthInArray(MapKeys(expr), kt, false) ++ + checkStringLengthInArray(MapValues(expr), vt, valueContainsNull)) + .reduceOption(And(_, _)) + + case _ => None + } + + private def checkStringLengthInArray( + arr: Expression, et: DataType, containsNull: Boolean): Option[Expression] = { + val cleanedType = CharVarcharUtils.replaceCharVarcharWithString(et) + val param = NamedLambdaVariable("x", cleanedType, containsNull) + checkStringLength(param, et).map { checkExpr => + Or(IsNull(arr), ArrayForAll(arr, LambdaFunction(checkExpr, Seq(param)))) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/CheckDeltaInvariant.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/CheckDeltaInvariant.scala new file mode 100644 index 00000000000..3983284c207 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/CheckDeltaInvariant.scala @@ -0,0 +1,140 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.constraints + +import org.apache.spark.sql.delta.constraints.Constraints.{Check, NotNull} +import org.apache.spark.sql.delta.schema.DeltaInvariantViolationException + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{AttributeSeq, BindReferences, Expression, NonSQLExpression, UnaryExpression} +import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.types.{DataType, NullType} + +/** + * An expression that validates a specific invariant on a column, before writing into Delta. + * + * @param child The fully resolved expression to be evaluated to check the constraint. + * @param columnExtractors Extractors for each referenced column. Used to generate readable errors. + * @param constraint The original constraint definition. + */ +case class CheckDeltaInvariant( + child: Expression, + columnExtractors: Map[String, Expression], + constraint: Constraint) + extends UnaryExpression with NonSQLExpression with CodegenFallback { + + override def dataType: DataType = NullType + override def foldable: Boolean = false + override def nullable: Boolean = true + + def withBoundReferences(input: AttributeSeq): CheckDeltaInvariant = { + CheckDeltaInvariant( + BindReferences.bindReference(child, input), + columnExtractors.map { + case (column, extractor) => column -> BindReferences.bindReference(extractor, input) + }, + constraint) + } + + private def assertRule(input: InternalRow): Unit = constraint match { + case n: NotNull => + if (child.eval(input) == null) { + throw DeltaInvariantViolationException(n) + } + case c: Check => + val result = child.eval(input) + if (result == null || result == false) { + throw DeltaInvariantViolationException(c, columnExtractors.mapValues(_.eval(input)).toMap) + } + } + + override def eval(input: InternalRow): Any = { + assertRule(input) + null + } + + private def generateNotNullCode(ctx: CodegenContext): Block = { + val childGen = child.genCode(ctx) + val invariantField = ctx.addReferenceObj("errMsg", constraint) + code"""${childGen.code} + | + |if (${childGen.isNull}) { + | throw org.apache.spark.sql.delta.schema.DeltaInvariantViolationException.apply( + | $invariantField); + |} + """.stripMargin + } + + /** + * Generate the code to extract values for the columns referenced in a violated CHECK constraint. + * We build parallel lists of full column names and their extracted values in the row which + * violates the constraint, to be passed to the [[InvariantViolationException]] constructor + * in [[generateExpressionValidationCode()]]. + * + * Note that this code is a bit expensive, so it shouldn't be run until we already + * know the constraint has been violated. + */ + private def generateColumnValuesCode( + colList: String, valList: String, ctx: CodegenContext): Block = { + val start = + code""" + |java.util.List $colList = new java.util.ArrayList(); + |java.util.List $valList = new java.util.ArrayList(); + |""".stripMargin + columnExtractors.map { + case (name, extractor) => + val colValue = extractor.genCode(ctx) + code""" + |$colList.add("$name"); + |${colValue.code} + |if (${colValue.isNull}) { + | $valList.add(null); + |} else { + | $valList.add(${colValue.value}); + |} + |""".stripMargin + }.fold(start)(_ + _) + } + + private def generateExpressionValidationCode( + constraintName: String, expr: Expression, ctx: CodegenContext): Block = { + val elementValue = child.genCode(ctx) + val invariantField = ctx.addReferenceObj("errMsg", constraint) + val colListName = ctx.freshName("colList") + val valListName = ctx.freshName("valList") + code"""${elementValue.code} + | + |if (${elementValue.isNull} || ${elementValue.value} == false) { + | ${generateColumnValuesCode(colListName, valListName, ctx)} + | throw org.apache.spark.sql.delta.schema.DeltaInvariantViolationException.apply( + | $invariantField, $colListName, $valListName); + |} + """.stripMargin + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val code = constraint match { + case NotNull(_) => generateNotNullCode(ctx) + case Check(name, expr) => generateExpressionValidationCode(name, expr, ctx) + } + ev.copy(code = code, isNull = TrueLiteral, value = JavaCode.literal("null", NullType)) + } + + override protected def withNewChildInternal(newChild: Expression): CheckDeltaInvariant = + copy(child = newChild) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/Constraints.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/Constraints.scala new file mode 100644 index 00000000000..aae7524f3c2 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/Constraints.scala @@ -0,0 +1,105 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.constraints + +import java.util.Locale + +import org.apache.spark.sql.delta.actions.Metadata +import org.apache.spark.sql.delta.schema.SchemaUtils + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression + +/** + * A constraint defined on a Delta table, which writers must verify before writing. + */ +sealed trait Constraint { + val name: String +} + +/** + * Utilities for handling constraints. Right now this includes: + * - Column-level invariants delegated to [[Invariants]], including both NOT NULL constraints and + * an old style of CHECK constraint specified in the column metadata + * - Table-level CHECK constraints + */ +object Constraints { + /** + * A constraint that the specified column must not be NULL. Note that when the column is nested, + * this implies its parents must also not be NULL. + */ + case class NotNull(column: Seq[String]) extends Constraint { + override val name: String = "NOT NULL" + } + + /** A SQL expression to check for when writing out data. */ + case class Check(name: String, expression: Expression) extends Constraint + + /** + * Extract CHECK constraints from the table properties. Note that some CHECK constraints may also + * come from schema metadata; these constraints were never released in a public API but are + * maintained for protocol compatibility. + */ + def getCheckConstraints(metadata: Metadata, spark: SparkSession): Seq[Constraint] = { + metadata.configuration.collect { + case (key, constraintText) if key.toLowerCase(Locale.ROOT).startsWith("delta.constraints.") => + val name = key.stripPrefix("delta.constraints.") + val expression = spark.sessionState.sqlParser.parseExpression(constraintText) + Check(name, expression) + }.toSeq + } + + /** Extract all constraints from the given Delta table metadata. */ + def getAll(metadata: Metadata, spark: SparkSession): Seq[Constraint] = { + val checkConstraints = getCheckConstraints(metadata, spark) + val constraintsFromSchema = Invariants.getFromSchema(metadata.schema, spark) + val charVarcharLengthChecks = if (spark.sessionState.conf.charVarcharAsString) { + Nil + } else { + CharVarcharConstraint.stringConstraints(metadata.schema) + } + + (checkConstraints ++ constraintsFromSchema ++ charVarcharLengthChecks).toSeq + } + + /** Get the expression text for a constraint with the given name, if present. */ + def getExprTextByName( + name: String, + metadata: Metadata, + spark: SparkSession): Option[String] = { + metadata.configuration.get(checkConstraintPropertyName(name)) + } + + def checkConstraintPropertyName(constraintName: String): String = { + "delta.constraints." + constraintName.toLowerCase(Locale.ROOT) + } + + /** + * Find all the check constraints that reference the given column name. + */ + def findDependentConstraints( + sparkSession: SparkSession, + columnName: Seq[String], + metadata: Metadata): Map[String, String] = { + metadata.configuration.filter { + case (key, constraint) if key.toLowerCase(Locale.ROOT).startsWith("delta.constraints.") => + SchemaUtils.containsDependentExpression( + sparkSession, columnName, constraint, sparkSession.sessionState.conf.resolver) + case _ => false + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala new file mode 100644 index 00000000000..b3c99856922 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala @@ -0,0 +1,179 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.constraints + +import scala.collection.mutable + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaIllegalStateException} +import org.apache.spark.sql.delta.constraints.Constraints.{Check, NotNull} +import org.apache.spark.sql.delta.schema.SchemaUtils + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.optimizer.ReplaceExpressions +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.execution.{SparkPlan, SparkStrategy, UnaryExecNode} +import org.apache.spark.sql.types.StructType + +/** + * Operator that validates that records satisfy provided constraints before they are written into + * Delta. Each row is left unchanged after validations. + */ +case class DeltaInvariantChecker( + child: LogicalPlan, + deltaConstraints: Seq[Constraint]) extends UnaryNode { + assert(deltaConstraints.nonEmpty) + + override def output: Seq[Attribute] = child.output + + override protected def withNewChildInternal(newChild: LogicalPlan): DeltaInvariantChecker = + copy(child = newChild) +} + +object DeltaInvariantCheckerStrategy extends SparkStrategy { + override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { + case DeltaInvariantChecker(child, constraints) => + DeltaInvariantCheckerExec(planLater(child), constraints) :: Nil + case _ => Nil + } +} + +/** + * A physical operator that validates records, before they are written into Delta. Each row + * is left unchanged after validations. + */ +case class DeltaInvariantCheckerExec( + child: SparkPlan, + constraints: Seq[Constraint]) extends UnaryExecNode { + + override def output: Seq[Attribute] = child.output + + override protected def doExecute(): RDD[InternalRow] = { + if (constraints.isEmpty) return child.execute() + val invariantChecks = + DeltaInvariantCheckerExec.buildInvariantChecks(child.output, constraints, session) + val boundRefs = invariantChecks.map(_.withBoundReferences(child.output)) + + child.execute().mapPartitionsInternal { rows => + val assertions = UnsafeProjection.create(boundRefs) + rows.map { row => + assertions(row) + row + } + } + } + + override def outputOrdering: Seq[SortOrder] = child.outputOrdering + + override def outputPartitioning: Partitioning = child.outputPartitioning + + override protected def withNewChildInternal(newChild: SparkPlan): DeltaInvariantCheckerExec = + copy(child = newChild) +} + +object DeltaInvariantCheckerExec { + + // Specialized optimizer to run necessary rules so that the check expressions can be evaluated. + object DeltaInvariantCheckerOptimizer extends RuleExecutor[LogicalPlan] { + final override protected def batches = Seq( + Batch("Finish Analysis", Once, ReplaceExpressions) + ) + } + + /** Build the extractor for a particular column. */ + private def buildExtractor(output: Seq[Attribute], column: Seq[String]): Option[Expression] = { + assert(column.nonEmpty) + val topLevelColumn = column.head + val topLevelRefOpt = output.collectFirst { + case a: AttributeReference if SchemaUtils.DELTA_COL_RESOLVER(a.name, topLevelColumn) => a + } + + if (column.length == 1) { + topLevelRefOpt + } else { + topLevelRefOpt.flatMap { topLevelRef => + try { + val nested = column.tail.foldLeft[Expression](topLevelRef) { case (e, fieldName) => + e.dataType match { + case StructType(fields) => + val ordinal = fields.indexWhere(f => + SchemaUtils.DELTA_COL_RESOLVER(f.name, fieldName)) + if (ordinal == -1) { + throw DeltaErrors.notNullColumnNotFoundInStruct( + s"${fields.map(_.name).mkString("[", ",", "]")}") + } + GetStructField(e, ordinal, Some(fieldName)) + case _ => + // NOTE: We should also update `GeneratedColumn.validateGeneratedColumns` to enable + // `GetMapValue` and `GetArrayStructFields` expressions when this is supported. + throw DeltaErrors.unSupportedInvariantNonStructType + } + } + Some(nested) + } catch { + case _: IndexOutOfBoundsException => None + } + } + } + } + + def buildInvariantChecks( + output: Seq[Attribute], + constraints: Seq[Constraint], + spark: SparkSession): Seq[CheckDeltaInvariant] = { + constraints.map { constraint => + val columnExtractors = mutable.Map[String, Expression]() + val executableExpr = constraint match { + case n @ NotNull(column) => + buildExtractor(output, column).getOrElse { + throw DeltaErrors.notNullColumnMissingException(n) + } + case Check(name, expr) => + // We need to do two stages of resolution here: + // * Build the extractors to evaluate attribute references against input InternalRows. + // * Do logical analysis to handle nested field extractions, functions, etc. + + val attributesExtracted = expr.transformUp { + case a: UnresolvedAttribute => + val ex = buildExtractor(output, a.nameParts).getOrElse(Literal(null)) + columnExtractors(a.name) = ex + ex + } + + val wrappedPlan: LogicalPlan = ExpressionLogicalPlanWrapper(attributesExtracted) + val analyzedLogicalPlan = spark.sessionState.analyzer.execute(wrappedPlan) + val optimizedLogicalPlan = DeltaInvariantCheckerOptimizer.execute(analyzedLogicalPlan) + optimizedLogicalPlan match { + case ExpressionLogicalPlanWrapper(e) => e + // This should never happen. + case plan => throw new DeltaIllegalStateException( + errorClass = "INTERNAL_ERROR", + messageParameters = Array( + "Applying type casting resulted in a bad plan rather than a simple expression.\n" + + s"Plan:${plan.prettyJson}\n")) + } + } + + CheckDeltaInvariant(executableExpr, columnExtractors.toMap, constraint) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/ExpressionLogicalPlanWrapper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/ExpressionLogicalPlanWrapper.scala new file mode 100644 index 00000000000..ee7c8bcc3d4 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/ExpressionLogicalPlanWrapper.scala @@ -0,0 +1,27 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.constraints + +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.plans.logical.LeafNode + +/** + * A dummy wrapper for expressions so we can pass them to the [[Analyzer]]. + */ +private[constraints] case class ExpressionLogicalPlanWrapper(e: Expression) extends LeafNode { + override def output: Seq[Attribute] = Seq.empty +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/Invariants.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/Invariants.scala new file mode 100644 index 00000000000..5cd5b1e07ed --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/Invariants.scala @@ -0,0 +1,96 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.constraints + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.util.JsonUtils + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.types.StructType + +/** + * List of invariants that can be defined on a Delta table that will allow us to perform + * validation checks during changes to the table. + */ +object Invariants { + sealed trait Rule { + val name: String + } + + /** Used for columns that should never be null. */ + case object NotNull extends Rule { override val name: String = "NOT NULL" } + + sealed trait RulePersistedInMetadata { + def wrap: PersistedRule + def json: String = JsonUtils.toJson(wrap) + } + + /** Rules that are persisted in the metadata field of a schema. */ + case class PersistedRule(expression: PersistedExpression = null) { + def unwrap: RulePersistedInMetadata = { + if (expression != null) { + expression + } else { + null + } + } + } + + /** A SQL expression to check for when writing out data. */ + case class ArbitraryExpression(expression: Expression) extends Rule { + override val name: String = s"EXPRESSION($expression)" + } + + object ArbitraryExpression { + def apply(sparkSession: SparkSession, exprString: String): ArbitraryExpression = { + val expr = sparkSession.sessionState.sqlParser.parseExpression(exprString) + ArbitraryExpression(expr) + } + } + + /** Persisted companion of the ArbitraryExpression rule. */ + case class PersistedExpression(expression: String) extends RulePersistedInMetadata { + override def wrap: PersistedRule = PersistedRule(expression = this) + } + + /** Extract invariants from the given schema */ + def getFromSchema(schema: StructType, spark: SparkSession): Seq[Constraint] = { + val columns = SchemaUtils.filterRecursively(schema, checkComplexTypes = false) { field => + !field.nullable || field.metadata.contains(INVARIANTS_FIELD) + } + columns.map { + case (parents, field) if !field.nullable => + Constraints.NotNull(parents :+ field.name) + case (parents, field) => + val rule = field.metadata.getString(INVARIANTS_FIELD) + val invariant = Option(JsonUtils.mapper.readValue[PersistedRule](rule).unwrap) match { + case Some(PersistedExpression(exprString)) => + ArbitraryExpression(spark, exprString) + case _ => + throw DeltaErrors.unrecognizedInvariant() + } + Constraints.Check(invariant.name, invariant.expression) + } + } + + val INVARIANTS_FIELD = "delta.invariants" +} + +/** A rule applied on a column to ensure data hygiene. */ +case class Invariant(column: Seq[String], rule: Invariants.Rule) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/tableChanges.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/tableChanges.scala new file mode 100644 index 00000000000..dc3868f9a45 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/tableChanges.scala @@ -0,0 +1,37 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.constraints + +import org.apache.spark.sql.connector.catalog.TableChange + +/** + * Change to add a CHECK constraint to a table. + * + * @param constraintName The name of the new constraint. Note that constraint names are + * case insensitive. + * @param expr The expression to add, as a SQL parseable string. + */ +case class AddConstraint(constraintName: String, expr: String) extends TableChange {} + +/** + * Change to drop a constraint from a table. Note that this is always idempotent - no error + * will be thrown if the constraint doesn't exist. + * + * @param constraintName the name of the constraint to drop - case insensitive + * @param ifExists if false, throws an error if the constraint to be dropped does not exist + */ +case class DropConstraint(constraintName: String, ifExists: Boolean) extends TableChange {} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/RoaringBitmapArray.scala b/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/RoaringBitmapArray.scala new file mode 100644 index 00000000000..5b840c7751c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/RoaringBitmapArray.scala @@ -0,0 +1,674 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.deletionvectors + +import java.io.IOException +import java.nio.{ByteBuffer, ByteOrder} + +import scala.collection.immutable.NumericRange + +import com.google.common.primitives.{Ints, UnsignedInts} +import org.roaringbitmap.{RelativeRangeConsumer, RoaringBitmap} + +/** + * A 64-bit extension of [[RoaringBitmap]] that is optimized for cases that usually fit within + * a 32-bit bitmap, but may run over by a few bits on occasion. + * + * This focus makes it different from [[org.roaringbitmap.longlong.Roaring64NavigableMap]] and + * [[org.roaringbitmap.longlong.Roaring64Bitmap]] which focus on sparse bitmaps over the whole + * 64-bit range. + * + * Structurally, this implementation simply uses the most-significant 4 bytes to index into + * an array of 32-bit [[RoaringBitmap]] instances. + * The array is grown as necessary to accommodate the largest value in the bitmap. + * + * *Note:* As opposed to the other two 64-bit bitmap implementations mentioned above, + * this implementation cannot accommodate `Long` values where the most significant + * bit is non-zero (i.e., negative `Long` values). + * It cannot even accommodate values where the 4 high-order bytes are `Int.MaxValue`, + * because then the length of the `bitmaps` array would be a negative number + * (`Int.MaxValue + 1`). + */ +final class RoaringBitmapArray extends Equals { + import RoaringBitmapArray._ + + private var bitmaps: Array[RoaringBitmap] = Array.empty + + /** + * Add the value to the container (set the value to `true`), + * whether it already appears or not. + */ + def add(value: Long): Unit = { + require(value >= 0 && value <= MAX_REPRESENTABLE_VALUE) + val (high, low) = decomposeHighLowBytes(value) + if (high >= bitmaps.length) { + extendBitmaps(newLength = high + 1) + } + val highBitmap = bitmaps(high) + highBitmap.add(low) + } + + /** Add all `values` to the container. For testing purposes only. */ + protected[delta] def addAll(values: Long*): Unit = values.foreach(add) + + /** Add all values in `range` to the container. */ + protected[delta] def addRange(range: Range): Unit = { + require(0 <= range.start && range.start <= range.end) + if (range.isEmpty) return // Nothing to do. + if (range.step != 1) { + // Can't optimize in this case. + range.foreach(i => add(UnsignedInts.toLong(i))) + return + } + // This is an Int range, so it must fit completely into the first bitmap. + if (bitmaps.isEmpty) { + extendBitmaps(newLength = 1) + } + val end = if (range.isInclusive) range.end + 1 else range.end + bitmaps.head.add(range.start, end) + } + + /** Add all values in `range` to the container. */ + protected[delta] def addRange(range: NumericRange[Long]): Unit = { + require(0L <= range.start && range.start <= range.end && range.end <= MAX_REPRESENTABLE_VALUE) + if (range.isEmpty) return // Nothing to do. + if (range.step != 1L) { + // Can't optimize in this case. + range.foreach(add) + return + } + // Decompose into sub-ranges that target a single bitmap, + // to use the range adds within a bitmap for efficiency. + val (startHigh, startLow) = decomposeHighLowBytes(range.start) + val (endHigh, endLow) = decomposeHighLowBytes(range.end) + val lastHigh = if (endLow == 0 && !range.isInclusive) endHigh - 1 else endHigh + if (lastHigh >= bitmaps.length) { + extendBitmaps(newLength = lastHigh + 1) + } + var currentHigh = startHigh + while (currentHigh <= lastHigh) { + val start = if (currentHigh == startHigh) UnsignedInts.toLong(startLow) else 0L + // RoaringBitmap.add is exclusive the end boundary. + val end = if (currentHigh == endHigh) { + if (range.isInclusive) UnsignedInts.toLong(endLow) + 1L else UnsignedInts.toLong(endLow) + } else { + 0xFFFFFFFFL + 1L + } + bitmaps(currentHigh).add(start, end) + currentHigh += 1 + } + } + + /** + * If present, remove the `value` (effectively, sets its bit value to false). + * + * @param value The index in a bitmap. + */ + protected[deletionvectors] def remove(value: Long): Unit = { + require(value >= 0 && value <= MAX_REPRESENTABLE_VALUE) + val (high, low) = decomposeHighLowBytes(value) + if (high < bitmaps.length) { + val highBitmap = bitmaps(high) + highBitmap.remove(low) + if (highBitmap.isEmpty) { + // Clean up all bitmaps that are now empty (from the end). + var latestNonEmpty = bitmaps.length - 1 + var done = false + while (!done && latestNonEmpty >= 0) { + if (bitmaps(latestNonEmpty).isEmpty) { + latestNonEmpty -= 1 + } else { + done = true + } + } + shrinkBitmaps(latestNonEmpty + 1) + } + } + } + + /** Remove all values from the bitmap. */ + def clear(): Unit = { + bitmaps = Array.empty + } + + /** + * Checks whether the value is included, + * which is equivalent to checking if the corresponding bit is set. + */ + def contains(value: Long): Boolean = { + require(value >= 0 && value <= MAX_REPRESENTABLE_VALUE) + val high = highBytes(value) + if (high >= bitmaps.length) { + false + } else { + val highBitmap = bitmaps(high) + val low = lowBytes(value) + highBitmap.contains(low) + } + } + + /** + * Return the set values as an array, if the cardinality is smaller than 2147483648. + * + * The integer values are in sorted order. + */ + def toArray: Array[Long] = { + val cardinality = this.cardinality + require(cardinality <= Int.MaxValue) + val values = Array.ofDim[Long](cardinality.toInt) + var valuesIndex = 0 + for ((bitmap, bitmapIndex) <- bitmaps.zipWithIndex) { + bitmap.forEach((value: Int) => { + values(valuesIndex) = composeFromHighLowBytes(bitmapIndex, value) + valuesIndex += 1 + }) + } + values + } + + /** Materialise the whole set into an array */ + def values: Array[Long] = toArray + + /** Returns the number of distinct integers added to the bitmap (e.g., number of bits set). */ + def cardinality: Long = bitmaps.foldLeft(0L)((sum, bitmap) => sum + bitmap.getLongCardinality) + + /** Tests whether the bitmap is empty. */ + def isEmpty: Boolean = bitmaps.forall(_.isEmpty) + + /** + * Use a run-length encoding where it is more space efficient. + * + * @return `true` if a change was applied + */ + def runOptimize(): Boolean = { + var changeApplied = false + for (bitmap <- bitmaps) { + changeApplied |= bitmap.runOptimize() + } + changeApplied + } + + /** + * Remove run-length encoding even when it is more space efficient. + * + * @return `true` if a change was applied + */ + def removeRunCompression(): Boolean = { + var changeApplied = false + for (bitmap <- bitmaps) { + changeApplied |= bitmap.removeRunCompression() + } + changeApplied + } + + /** + * In-place bitwise OR (union) operation. + * + * The current bitmap is modified. + */ + def or(that: RoaringBitmapArray): Unit = { + if (this.bitmaps.length < that.bitmaps.length) { + extendBitmaps(newLength = that.bitmaps.length) + } + for (index <- that.bitmaps.indices) { + val thisBitmap = this.bitmaps(index) + val thatBitmap = that.bitmaps(index) + thisBitmap.or(thatBitmap) + } + } + + /** Merges the `other` set into this one. */ + def merge(other: RoaringBitmapArray): Unit = this.or(other) + + /** Get values in `this` but not `that`. */ + def diff(other: RoaringBitmapArray): Unit = this.andNot(other) + + /** Copy `this` along with underlying bitmaps to a new instance. */ + def copy(): RoaringBitmapArray = { + val newBitmap = new RoaringBitmapArray() + newBitmap.merge(this) + newBitmap + } + + /** + * In-place bitwise AND (this & that) operation. + * + * The current bitmap is modified. + */ + def and(that: RoaringBitmapArray): Unit = { + for (index <- 0 until this.bitmaps.length) { + val thisBitmap = this.bitmaps(index) + if (index < that.bitmaps.length) { + val thatBitmap = that.bitmaps(index) + thisBitmap.and(thatBitmap) + } else { + thisBitmap.clear() + } + } + } + + /** + * In-place bitwise AND-NOT (this & ~that) operation. + * + * The current bitmap is modified. + */ + def andNot(that: RoaringBitmapArray): Unit = { + val validLength = math.min(this.bitmaps.length, that.bitmaps.length) + for (index <- 0 until validLength) { + val thisBitmap = this.bitmaps(index) + val thatBitmap = that.bitmaps(index) + thisBitmap.andNot(thatBitmap) + } + } + + /** + * Report the number of bytes required to serialize this bitmap. + * + * This is the number of bytes written out when using the [[serialize]] method. + */ + def serializedSizeInBytes(format: RoaringBitmapArrayFormat.Value): Long = { + val magicNumberSize = 4 + + val serializedBitmapsSize = format.formatImpl.serializedSizeInBytes(bitmaps) + + magicNumberSize + serializedBitmapsSize + } + + /** + * Serialize this [[RoaringBitmapArray]] into the `buffer`. + * + * == Format == + * - A Magic Number indicating the format used (4 bytes) + * - The actual data as specified by the format. + * + */ + def serialize(buffer: ByteBuffer, format: RoaringBitmapArrayFormat.Value): Unit = { + require(ByteOrder.LITTLE_ENDIAN == buffer.order(), + "RoaringBitmapArray has to be serialized using a little endian buffer") + // Magic number to make sure we don't try to deserialize a simple RoaringBitmap or the wrong + // format later. + buffer.putInt(format.formatImpl.MAGIC_NUMBER) + format.formatImpl.serialize(bitmaps, buffer) + } + + /** Serializes this [[RoaringBitmapArray]] and returns the serialized form as a byte array. */ + def serializeAsByteArray(format: RoaringBitmapArrayFormat.Value): Array[Byte] = { + val size = serializedSizeInBytes(format) + if (!size.isValidInt) { + throw new IOException( + s"A bitmap was too big to be serialized into an array ($size bytes)") + } + val buffer = ByteBuffer.allocate(size.toInt) + buffer.order(ByteOrder.LITTLE_ENDIAN) + // This is faster than Java serialization. + // See: https://richardstartin.github.io/posts/roaringbitmap-performance-tricks#serialisation + serialize(buffer, format) + buffer.array() + } + + /** + * Deserialize the contents of `buffer` into this [[RoaringBitmapArray]]. + * + * All existing content will be discarded! + * + * See [[serialize]] for the expected serialization format. + */ + def deserialize(buffer: ByteBuffer): Unit = { + require(ByteOrder.LITTLE_ENDIAN == buffer.order(), + "RoaringBitmapArray has to be deserialized using a little endian buffer") + + val magicNumber = buffer.getInt + val serializationFormat = magicNumber match { + case NativeRoaringBitmapArraySerializationFormat.MAGIC_NUMBER => + NativeRoaringBitmapArraySerializationFormat + case PortableRoaringBitmapArraySerializationFormat.MAGIC_NUMBER => + PortableRoaringBitmapArraySerializationFormat + case _ => + throw new IOException(s"Unexpected RoaringBitmapArray magic number $magicNumber") + } + bitmaps = serializationFormat.deserialize(buffer) + } + + /** + * Consume presence information for all values in the range `[start, start + length)`. + * + * @param start Lower bound of values to consume. + * @param length Maximum number of values to consume. + * @param rrc Code to be executed for each present or absent value. + */ + def forAllInRange(start: Long, length: Int, consumer: RelativeRangeConsumer): Unit = { + // This one is complicated and deserves its own PR, + // when we actually want to enable it. + throw new UnsupportedOperationException + } + + /** Execute the `consume` function for every value in the set represented by this bitmap. */ + def forEach(consume: Long => Unit): Unit = { + for ((bitmap, high) <- bitmaps.zipWithIndex) { + bitmap.forEach { low: Int => + val value = composeFromHighLowBytes(high, low) + consume(value) + } + } + } + + override def canEqual(that: Any): Boolean = that.isInstanceOf[RoaringBitmapArray] + + override def equals(other: Any): Boolean = { + other match { + case that: RoaringBitmapArray => + (this eq that) || // don't need to check canEqual because class is final + java.util.Arrays.deepEquals( + this.bitmaps.asInstanceOf[Array[AnyRef]], + that.bitmaps.asInstanceOf[Array[AnyRef]]) + case _ => false + } + } + + override def hashCode: Int = 131 * java.util.Arrays.deepHashCode( + bitmaps.asInstanceOf[Array[AnyRef]]) + + def mkString(start: String = "", sep: String = "", end: String = ""): String = + toArray.mkString(start, sep, end) + + def first: Option[Long] = { + for ((bitmap, high) <- bitmaps.zipWithIndex) { + if (!bitmap.isEmpty) { + val low = bitmap.first() + return Some(composeFromHighLowBytes(high, low)) + } + } + None + } + + def last: Option[Long] = { + for ((bitmap, high) <- bitmaps.zipWithIndex.reverse) { + if (!bitmap.isEmpty) { + val low = bitmap.last() + return Some(composeFromHighLowBytes(high, low)) + } + } + None + } + + /** + * Utility method to extend the array of [[RoaringBitmap]] to given length, keeping + * the existing elements in place. + */ + private def extendBitmaps(newLength: Int): Unit = { + // Optimization for the most common case + if (bitmaps.isEmpty && newLength == 1) { + bitmaps = Array(new RoaringBitmap()) + return + } + val newBitmaps = Array.ofDim[RoaringBitmap](newLength) + System.arraycopy( + bitmaps, // source + 0, // source start pos + newBitmaps, // dest + 0, // dest start pos + bitmaps.length) // number of entries to copy + for (i <- bitmaps.length until newLength) { + newBitmaps(i) = new RoaringBitmap() + } + bitmaps = newBitmaps + } + + /** Utility method to shrink the array of [[RoaringBitmap]] to given length. */ + private def shrinkBitmaps(newLength: Int): Unit = { + if (newLength == 0) { + bitmaps = Array.empty + } else { + val newBitmaps = Array.ofDim[RoaringBitmap](newLength) + System.arraycopy( + bitmaps, // source + 0, // source start pos + newBitmaps, // dest + 0, // dest start pos + newLength) // number of entries to copy + bitmaps = newBitmaps + } + } + + // For testing purposes + protected[delta] def toBitmap32Bit(): RoaringBitmap = { + val bitmap32 = new RoaringBitmap() + forEach { value => + val value32 = Ints.checkedCast(value) + bitmap32.add(value32) + } + bitmap32.runOptimize() + bitmap32 + } +} + +object RoaringBitmapArray { + + /** The largest value a [[RoaringBitmapArray]] can possibly represent. */ + final val MAX_REPRESENTABLE_VALUE: Long = composeFromHighLowBytes(Int.MaxValue - 1, Int.MinValue) + final val MAX_BITMAP_CARDINALITY: Long = 1L << 32 + + /** Create a new [[RoaringBitmapArray]] with the given `values`. */ + def apply(values: Long*): RoaringBitmapArray = { + val bitmap = new RoaringBitmapArray + bitmap.addAll(values: _*) + bitmap + } + + /** + * + * @param value Any `Long`; positive or negative. + * @return An `Int` holding the 4 high-order bytes of information of the input `value`. + */ + def highBytes(value: Long): Int = (value >> 32).toInt + + /** + * + * @param value Any `Long`; positive or negative. + * @return An `Int` holding the 4 low-order bytes of information of the input `value`. + */ + def lowBytes(value: Long): Int = value.toInt + + /** Separate high and low 4 bytes into a pair of `Int`s (high, low). */ + def decomposeHighLowBytes(value: Long): (Int, Int) = (highBytes(value), lowBytes(value)) + + /** + * Combine high and low 4 bytes of a pair of `Int`s into a `Long`. + * + * This is essentially the inverse of [[decomposeHighLowBytes()]]. + * + * @param high An `Int` representing the 4 high-order bytes of the output `Long` + * @param low An `Int` representing the 4 low-order bytes of the output `Long` + * @return A `Long` composing the `high` and `low` bytes. + */ + def composeFromHighLowBytes(high: Int, low: Int): Long = + (high.toLong << 32) | (low.toLong & 0xFFFFFFFFL) // Must bitmask to avoid sign extension. + + /** Deserialize the right instance from the given bytes */ + def readFrom(bytes: Array[Byte]): RoaringBitmapArray = { + val buffer = ByteBuffer.wrap(bytes) + buffer.order(ByteOrder.LITTLE_ENDIAN) + val bitmap = new RoaringBitmapArray() + bitmap.deserialize(buffer) + bitmap + } +} + +/** + * Abstracts out how to (de-)serialize the array. + * + * All formats are indicated by a magic number in the first 4-bytes, + * which must be add/stripped by the *caller*. + */ +private[deletionvectors] sealed trait RoaringBitmapArraySerializationFormat { + /** Magic number prefix for serialization with this format. */ + val MAGIC_NUMBER: Int + /** The number of bytes written out when using the [[serialize]] method. */ + def serializedSizeInBytes(bitmaps: Array[RoaringBitmap]): Long + /** Serialize `bitmaps` into `buffer`. */ + def serialize(bitmaps: Array[RoaringBitmap], buffer: ByteBuffer): Unit + /** Deserialize all bitmaps from the `buffer` into a fresh array. */ + def deserialize(buffer: ByteBuffer): Array[RoaringBitmap] +} + +/** Legal values for the serialization format for [[RoaringBitmapArray]]. */ +object RoaringBitmapArrayFormat extends Enumeration { + protected case class Format(formatImpl: RoaringBitmapArraySerializationFormat) + extends super.Val + + import scala.language.implicitConversions + implicit def valueToFormat(x: Value): Format = x.asInstanceOf[Format] + + val Native = Format(NativeRoaringBitmapArraySerializationFormat) + val Portable = Format(PortableRoaringBitmapArraySerializationFormat) +} + +private[deletionvectors] object NativeRoaringBitmapArraySerializationFormat + extends RoaringBitmapArraySerializationFormat { + + override val MAGIC_NUMBER: Int = 1681511376 + + override def serializedSizeInBytes(bitmaps: Array[RoaringBitmap]): Long = { + val roaringBitmapsCountSize = 4 + + val roaringBitmapLengthSize = 4 + val roaringBitmapsSize = bitmaps.foldLeft(0L) { (sum, bitmap) => + sum + bitmap.serializedSizeInBytes() + roaringBitmapLengthSize + } + + roaringBitmapsCountSize + roaringBitmapsSize + } + + /** + * Serialize `bitmaps` into the `buffer`. + * + * == Format == + * - Number of bitmaps (4 bytes) + * - For each individual bitmap: + * - Length of the serialized bitmap + * - Serialized bitmap data using the standard format + * (see https://github.com/RoaringBitmap/RoaringFormatSpec) + */ + override def serialize(bitmaps: Array[RoaringBitmap], buffer: ByteBuffer): Unit = { + buffer.putInt(bitmaps.length) + for (bitmap <- bitmaps) { + val placeholderPos = buffer.position() + buffer.putInt(-1) // Placeholder for the serialized size + val startPos = placeholderPos + 4 + bitmap.serialize(buffer) + val endPos = buffer.position() + val writtenBytes = endPos - startPos + buffer.putInt(placeholderPos, writtenBytes) + } + } + + override def deserialize(buffer: ByteBuffer): Array[RoaringBitmap] = { + val numberOfBitmaps = buffer.getInt + if (numberOfBitmaps < 0) { + throw new IOException(s"Invalid RoaringBitmapArray length" + + s" ($numberOfBitmaps < 0)") + } + val bitmaps = Array.fill(numberOfBitmaps)(new RoaringBitmap()) + for (index <- 0 until numberOfBitmaps) { + val bitmapSize = buffer.getInt + bitmaps(index).deserialize(buffer) + // RoaringBitmap.deserialize doesn't move the buffer's pointer + buffer.position(buffer.position() + bitmapSize) + } + bitmaps + } +} + +/** + * This is the "official" portable format defined in the spec. + * + * See [[https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations]] + */ +private[sql] object PortableRoaringBitmapArraySerializationFormat + extends RoaringBitmapArraySerializationFormat { + + override val MAGIC_NUMBER: Int = 1681511377 + + override def serializedSizeInBytes(bitmaps: Array[RoaringBitmap]): Long = { + val bitmapCountSize = 8 + + val individualBitmapKeySize = 4 + val bitmapSizes = bitmaps.foldLeft(0L) { (sum, bitmap) => + sum + bitmap.serializedSizeInBytes() + individualBitmapKeySize + } + + bitmapCountSize + bitmapSizes + } + + /** + * Serialize `bitmaps` into the `buffer`. + * + * ==Format== + * - Number of bitmaps (8 bytes, upper 4 are basically padding) + * - For each individual bitmap, in increasing key order (unsigned, technically, but + * RoaringBitmapArray doesn't support negative keys anyway.): + * - key of the bitmap (upper 32 bit) + * - Serialized bitmap data using the standard format (see + * https://github.com/RoaringBitmap/RoaringFormatSpec) + */ + override def serialize(bitmaps: Array[RoaringBitmap], buffer: ByteBuffer): Unit = { + buffer.putLong(bitmaps.length.toLong) + // Iterate in index-order, so that the keys are ascending as required by spec. + for ((bitmap, index) <- bitmaps.zipWithIndex) { + // In our array-based implementation the index is the key. + buffer.putInt(index) + bitmap.serialize(buffer) + } + } + override def deserialize(buffer: ByteBuffer): Array[RoaringBitmap] = { + val numberOfBitmaps = buffer.getLong + // These cases are allowed by the format, but out implementation doesn't support them. + if (numberOfBitmaps < 0L) { + throw new IOException(s"Invalid RoaringBitmapArray length ($numberOfBitmaps < 0)") + } + if (numberOfBitmaps > Int.MaxValue) { + throw new IOException( + s"Invalid RoaringBitmapArray length ($numberOfBitmaps > ${Int.MaxValue})") + } + // This format is designed for sparse bitmaps, so numberOfBitmaps is only a lower bound for the + // actual size of the array. + val minimumArraySize = numberOfBitmaps.toInt + val bitmaps = Array.newBuilder[RoaringBitmap] + bitmaps.sizeHint(minimumArraySize) + var lastIndex = 0 + for (_ <- 0L until numberOfBitmaps) { + val key = buffer.getInt + if (key < 0L) { + throw new IOException(s"Invalid unsigned entry in RoaringBitmapArray ($key)") + } + assert(key >= lastIndex, "Keys are required to be sorted in ascending order.") + // Fill gaps in sparse data. + while (lastIndex < key) { + bitmaps += new RoaringBitmap() + lastIndex += 1 + } + val bitmap = new RoaringBitmap() + bitmap.deserialize(buffer) + bitmaps += bitmap + lastIndex += 1 + // RoaringBitmap.deserialize doesn't move the buffer's pointer + buffer.position(buffer.position() + bitmap.serializedSizeInBytes()) + } + bitmaps.result() + } +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/RowIndexMarkingFilters.scala b/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/RowIndexMarkingFilters.scala new file mode 100644 index 00000000000..c6cdc1868f0 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/RowIndexMarkingFilters.scala @@ -0,0 +1,137 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.deletionvectors + +import org.apache.spark.sql.delta.RowIndexFilter +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.execution.vectorized.WritableColumnVector + +/** + * Base class for row index filters. + * @param bitmap Represents the deletion vector + */ +abstract sealed class RowIndexMarkingFilters(bitmap: RoaringBitmapArray) extends RowIndexFilter { + val valueWhenContained: Byte + val valueWhenNotContained: Byte + + override def materializeIntoVector(start: Long, end: Long, batch: WritableColumnVector): Unit = { + val batchSize = (end - start).toInt + var rowId = 0 + while (rowId < batchSize) { + val isContained = bitmap.contains(start + rowId.toLong) + val filterOutput = if (isContained) { + valueWhenContained + } else { + valueWhenNotContained + } + batch.putByte(rowId, filterOutput) + rowId += 1 + } + } +} + +sealed trait RowIndexMarkingFiltersBuilder { + def getFilterForEmptyDeletionVector(): RowIndexFilter + def getFilterForNonEmptyDeletionVector(bitmap: RoaringBitmapArray): RowIndexFilter + + def createInstance( + deletionVector: DeletionVectorDescriptor, + hadoopConf: Configuration, + tablePath: Option[Path]): RowIndexFilter = { + if (deletionVector.cardinality == 0) { + getFilterForEmptyDeletionVector() + } else { + require(tablePath.nonEmpty, "Table path is required for non-empty deletion vectors") + val dvStore = DeletionVectorStore.createInstance(hadoopConf) + val storedBitmap = StoredBitmap.create(deletionVector, tablePath.get) + val bitmap = storedBitmap.load(dvStore) + getFilterForNonEmptyDeletionVector(bitmap) + } + } +} + +/** + * Implementation of [[RowIndexFilter]] which checks, for a given row index and deletion vector, + * whether the row index is present in the deletion vector. If present, the row is marked for + * skipping. + * @param bitmap Represents the deletion vector + */ +final class DropMarkedRowsFilter(bitmap: RoaringBitmapArray) + extends RowIndexMarkingFilters(bitmap) { + override val valueWhenContained: Byte = RowIndexFilter.DROP_ROW_VALUE + override val valueWhenNotContained: Byte = RowIndexFilter.KEEP_ROW_VALUE +} + +/** + * Utility methods that creates [[DropMarkedRowsFilter]] to filter out row indices that are present + * in the given deletion vector. + */ +object DropMarkedRowsFilter extends RowIndexMarkingFiltersBuilder { + override def getFilterForEmptyDeletionVector(): RowIndexFilter = KeepAllRowsFilter + + override def getFilterForNonEmptyDeletionVector(bitmap: RoaringBitmapArray): RowIndexFilter = + new DropMarkedRowsFilter(bitmap) +} + +/** + * Implementation of [[RowIndexFilter]] which checks, for a given row index and deletion vector, + * whether the row index is present in the deletion vector. If not present, the row is marked for + * skipping. + * @param bitmap Represents the deletion vector + */ +final class KeepMarkedRowsFilter(bitmap: RoaringBitmapArray) + extends RowIndexMarkingFilters(bitmap) { + override val valueWhenContained: Byte = RowIndexFilter.KEEP_ROW_VALUE + override val valueWhenNotContained: Byte = RowIndexFilter.DROP_ROW_VALUE +} + +/** + * Utility methods that creates [[KeepMarkedRowsFilter]] to filter out row indices that are present + * in the given deletion vector. + */ +object KeepMarkedRowsFilter extends RowIndexMarkingFiltersBuilder { + override def getFilterForEmptyDeletionVector(): RowIndexFilter = DropAllRowsFilter + + override def getFilterForNonEmptyDeletionVector(bitmap: RoaringBitmapArray): RowIndexFilter = + new KeepMarkedRowsFilter(bitmap) +} + +case object DropAllRowsFilter extends RowIndexFilter { + override def materializeIntoVector(start: Long, end: Long, batch: WritableColumnVector): Unit = { + val batchSize = (end - start).toInt + var rowId = 0 + while (rowId < batchSize) { + batch.putByte(rowId, RowIndexFilter.DROP_ROW_VALUE) + rowId += 1 + } + } +} + +case object KeepAllRowsFilter extends RowIndexFilter { + override def materializeIntoVector(start: Long, end: Long, batch: WritableColumnVector): Unit = { + val batchSize = (end - start).toInt + var rowId = 0 + while (rowId < batchSize) { + batch.putByte(rowId, RowIndexFilter.KEEP_ROW_VALUE) + rowId += 1 + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/StoredBitmap.scala b/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/StoredBitmap.scala new file mode 100644 index 00000000000..7ad4625e0c9 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/deletionvectors/StoredBitmap.scala @@ -0,0 +1,131 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.deletionvectors + +import java.io.{IOException, ObjectInputStream} + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.util.Utils + +/** + * Interface for bitmaps that are stored as Deletion Vectors. + */ +trait StoredBitmap { + /** + * Read the bitmap into memory. + * Use `dvStore` if this variant is in cloud storage, otherwise just deserialize. + */ + def load(dvStore: DeletionVectorStore): RoaringBitmapArray + + /** + * The serialized size of the stored bitmap in bytes. + * Can be used for planning memory management without a round-trip to cloud storage. + */ + def size: Int + + /** + * The number of entries in the bitmap. + */ + def cardinality: Long + + /** + * Returns a unique identifier for this bitmap (Deletion Vector serialized as a JSON object). + */ + def getUniqueId: String +} + +/** + * Bitmap for a Deletion Vector, implemented as a thin wrapper around a Deletion Vector + * Descriptor. The bitmap can be empty, inline or on-disk. In case of on-disk deletion + * vectors, `tableDataPath` must be set to the data path of the Delta table, which is where + * deletion vectors are stored. + */ +case class DeletionVectorStoredBitmap( + dvDescriptor: DeletionVectorDescriptor, + tableDataPath: Option[Path] = None +) extends StoredBitmap with DeltaLogging { + require(tableDataPath.isDefined || !dvDescriptor.isOnDisk, + "Table path is required for on-disk deletion vectors") + + override def load(dvStore: DeletionVectorStore): RoaringBitmapArray = { + val bitmap = if (isEmpty) { + new RoaringBitmapArray() + } else if (isInline) { + RoaringBitmapArray.readFrom(dvDescriptor.inlineData) + } else { + assert(isOnDisk) + dvStore.read(onDiskPath.get, dvDescriptor.offset.getOrElse(0), dvDescriptor.sizeInBytes) + } + + // Verify that the cardinality in the bitmap matches the DV descriptor. + if (bitmap.cardinality != dvDescriptor.cardinality) { + recordDeltaEvent( + deltaLog = null, + opType = "delta.assertions.deletionVectorReadCardinalityMismatch", + data = Map( + "deletionVectorPath" -> onDiskPath, + "deletionVectorCardinality" -> bitmap.cardinality, + "deletionVectorDescriptor" -> dvDescriptor), + path = tableDataPath) + throw DeltaErrors.deletionVectorCardinalityMismatch() + } + + bitmap + } + + override def size: Int = dvDescriptor.sizeInBytes + + override def cardinality: Long = dvDescriptor.cardinality + + override lazy val getUniqueId: String = JsonUtils.toJson(dvDescriptor) + + private def isEmpty: Boolean = dvDescriptor.isEmpty + + private def isInline: Boolean = dvDescriptor.isInline + + private def isOnDisk: Boolean = dvDescriptor.isOnDisk + + /** The absolute path for on-disk deletion vectors. */ + private lazy val onDiskPath: Option[Path] = tableDataPath.map(dvDescriptor.absolutePath) +} + +object StoredBitmap { + /** The stored bitmap of an empty deletion vector. */ + final val EMPTY = DeletionVectorStoredBitmap(DeletionVectorDescriptor.EMPTY, None) + + + /** Factory for inline deletion vectors. */ + def inline(dvDescriptor: DeletionVectorDescriptor): StoredBitmap = { + require(dvDescriptor.isInline) + DeletionVectorStoredBitmap(dvDescriptor, None) + } + + /** Factory for deletion vectors. */ + def create(dvDescriptor: DeletionVectorDescriptor, tablePath: Path): StoredBitmap = { + if (dvDescriptor.isOnDisk) { + DeletionVectorStoredBitmap(dvDescriptor, Some(tablePath)) + } else { + DeletionVectorStoredBitmap(dvDescriptor, None) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertIndex.scala b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertIndex.scala new file mode 100644 index 00000000000..fd5f605b540 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertIndex.scala @@ -0,0 +1,403 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +import java.util + +import scala.collection.mutable + +import org.apache.spark.sql.delta.expressions.HilbertUtils._ + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.{AbstractDataType, DataType, DataTypes} + +/** + * Represents a hilbert index built from the provided columns. + * The columns are expected to all be Ints and to have at most numBits individually. + * The points along the hilbert curve are represented by Longs. + */ +private[sql] case class HilbertLongIndex(numBits: Int, children: Seq[Expression]) + extends Expression with ExpectsInputTypes with CodegenFallback { + + private val n: Int = children.size + private val nullValue: Int = 0 + + override def nullable: Boolean = false + + // pre-initialize working set array + private val ints = new Array[Int](n) + + override def eval(input: InternalRow): Any = { + var i = 0 + while (i < n) { + ints(i) = children(i).eval(input) match { + case null => nullValue + case int: Integer => int + case any => throw new IllegalArgumentException( + s"${this.getClass.getSimpleName} expects only inputs of type Int, but got: " + + s"$any of type${any.getClass.getSimpleName}") + } + i += 1 + } + + HilbertStates.getStateList(n).translateNPointToDKey(ints, numBits) + } + + override def dataType: DataType = DataTypes.LongType + + override def inputTypes: Seq[AbstractDataType] = Seq.fill(n)(DataTypes.IntegerType) + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): HilbertLongIndex = copy(children = newChildren) +} + +/** + * Represents a hilbert index built from the provided columns. + * The columns are expected to all be Ints and to have at most numBits. + * The points along the hilbert curve are represented by Byte arrays. + */ +private[sql] case class HilbertByteArrayIndex(numBits: Int, children: Seq[Expression]) + extends Expression with ExpectsInputTypes with CodegenFallback { + + private val n: Int = children.size + private val nullValue: Int = 0 + + override def nullable: Boolean = false + + // pre-initialize working set array + private val ints = new Array[Int](n) + + override def eval(input: InternalRow): Any = { + var i = 0 + while (i < n) { + ints(i) = children(i).eval(input) match { + case null => nullValue + case int: Integer => int + case any => throw new IllegalArgumentException( + s"${this.getClass.getSimpleName} expects only inputs of type Int, but got: " + + s"$any of type${any.getClass.getSimpleName}") + } + i += 1 + } + + HilbertStates.getStateList(n).translateNPointToDKeyArray(ints, numBits) + } + + override def dataType: DataType = DataTypes.BinaryType + + override def inputTypes: Seq[AbstractDataType] = Seq.fill(n)(DataTypes.IntegerType) + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): HilbertByteArrayIndex = copy(children = newChildren) +} + +// scalastyle:off line.size.limit +/** + * The following code is based on this paper: + * https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=bfd6d94c98627756989b0147a68b7ab1f881a0d6 + * with optimizations around matrix manipulation taken from this one: + * https://pdfs.semanticscholar.org/4043/1c5c43a2121e1bc071fc035e90b8f4bb7164.pdf + * + * At a high level you construct a GeneratorTable with the getStateGenerator method. + * That represents the information necessary to construct a state list for a given number + * of dimension, N. + * Once you have the generator table for your dimension you can construct a state list. + * You can then turn those state lists into compact state lists that store all the information + * in one large array of longs. + */ +// scalastyle:on line.size.limit +object HilbertIndex { + + private type CompactStateList = HilbertCompactStateList + + val SIZE_OF_INT = 32 + + /** + * Construct the generator table for a space of dimension n. + * This table consists of 2^n rows, each row containing Y, X1, and TY. + * Y The index in the array representing the table. (0 to (2^n - 1)) + * X1 A coordinate representing points on the curve expressed as an n-point. + * These are arranged such that if two rows differ by 1 in Y then the binary + * representation of their X1 values differ by exactly one bit. + * These are the "Gray-codes" of their Y value. + * TY A transformation matrix that transforms X2(1) to the X1 value where Y is zero and + * transforms X2(2) to the X1 value where Y is (2^n - 1) + */ + def getStateGenerator(n: Int): GeneratorTable = { + val x2s = getX2GrayCodes(n) + + val len = 1 << n + val rows = (0 until len).map { i => + // A pair of n-points corresponding to the first and last points on the first order curve to + // which X1 transforms in the construction of a second order curve. + val x21 = x2s(i << 1) + val x22 = x2s((i << 1) + 1) + // Represents the magnitude of difference between X2 values in this row. + val dy = x21 ^ x22 + + Row( + y = i, + x1 = i ^ (i >>> 1), + m = HilbertMatrix(n, x21, getSetColumn(n, dy)) + ) + } + + new GeneratorTable(n, rows) + } + + // scalastyle:off line.size.limit + /** + * This will construct an x2-gray-codes sequence of order n as described in + * https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=bfd6d94c98627756989b0147a68b7ab1f881a0d6 + * + * Each pair of values corresponds to the first and last coordinates of points on a first + * order curve to which a point taken from column X1 transforms to at the second order. + */ + // scalastyle:on line.size.limit + private[this] def getX2GrayCodes(n: Int) : Array[Int] = { + if (n == 1) { + // hard code the base case + return Array(0, 1, 0, 1) + } + val mask = 1 << (n - 1) + val base = getX2GrayCodes(n - 1) + base(base.length - 1) = base(base.length - 2) + mask + val result = Array.fill(base.length * 2)(0) + base.indices.foreach { i => + result(i) = base(i) + result(result.length - 1 - i) = base(i) ^ mask + } + result + } + + private[this] case class Row(y: Int, x1: Int, m: HilbertMatrix) + + private[this] case class PointState(y: Int, var x1: Int = 0, var state: Int = 0) + + private[this] case class State(id: Int, matrix: HilbertMatrix, var pointStates: Seq[PointState]) + + private[sql] class StateList(n: Int, states: Map[Int, State]) { + def getNPointToDKeyStateMap: CompactStateList = { + val numNPoints = 1 << n + val array = new Array[Long](numNPoints * states.size) + + states.foreach { case (stateIdx, state) => + val stateStartIdx = stateIdx * numNPoints + + state.pointStates.foreach { ps => + val psLong = (ps.y.toLong << SIZE_OF_INT) | ps.state.toLong + array(stateStartIdx + ps.x1) = psLong + } + } + new CompactStateList(n, array) + } + def getDKeyToNPointStateMap: CompactStateList = { + val numNPoints = 1 << n + val array = new Array[Long](numNPoints * states.size) + + states.foreach { case (stateIdx, state) => + val stateStartIdx = stateIdx * numNPoints + + state.pointStates.foreach { ps => + val psLong = (ps.x1.toLong << SIZE_OF_INT) | ps.state.toLong + array(stateStartIdx + ps.y) = psLong + } + } + new CompactStateList(n, array) + } + } + + private[sql] class GeneratorTable(n: Int, rows: Seq[Row]) { + def generateStateList(): StateList = { + val result = mutable.Map[Int, State]() + val list = new util.LinkedList[State]() + + var nextStateNum = 1 + + val initialState = State(0, HilbertMatrix.identity(n), rows.map(r => PointState(r.y, r.x1))) + result.put(0, initialState) + + rows.foreach { row => + val matrix = row.m + result.find { case (_, s) => s.matrix == matrix } match { + case Some((_, s)) => + initialState.pointStates(row.y).state = s.id + case _ => + initialState.pointStates(row.y).state = nextStateNum + val newState = State(nextStateNum, matrix, Seq()) + result.put(nextStateNum, newState) + list.addLast(newState) + nextStateNum += 1 + } + } + + while (!list.isEmpty) { + val currentState = list.removeFirst() + currentState.pointStates = rows.indices.map(r => PointState(r)) + + rows.indices.foreach { i => + val j = currentState.matrix.transform(i) + val p = initialState.pointStates.find(_.x1 == j).get + val currentPointState = currentState.pointStates(p.y) + currentPointState.x1 = i + val tm = result(p.state).matrix.multiply(currentState.matrix) + + result.find { case (_, s) => s.matrix == tm } match { + case Some((_, s)) => + currentPointState.state = s.id + case _ => + currentPointState.state = nextStateNum + val newState = State(nextStateNum, tm, Seq()) + result.put(nextStateNum, newState) + list.addLast(newState) + nextStateNum += 1 + } + } + } + + new StateList(n, result.toMap) + } + } +} + +/** + * Represents a compact state map. This is used in the mapping between n-points and d-keys. + * [[array]] is treated as a Map(Int -> Map(Int -> (Int, Int))) + * + * Each values in the array will be a combination of two things, a point and the index of the + * next state, in the most- and least- significant bits, respectively. + * state -> coord -> [point + nextState] + */ +private[sql] class HilbertCompactStateList(n: Int, array: Array[Long]) { + private val maxNumN = 1 << n + private val mask = maxNumN - 1 + private val intMask = (1L << HilbertIndex.SIZE_OF_INT) - 1 + + // point and nextState + @inline def transform(nPoint: Int, state: Int): (Int, Int) = { + val value = array(state * maxNumN + nPoint) + ( + (value >>> HilbertIndex.SIZE_OF_INT).toInt, + (value & intMask).toInt + ) + } + + // These while loops are to minimize overhead. + // This method exists only for testing + private[expressions] def translateDKeyToNPoint(key: Long, k: Int): Array[Int] = { + val result = new Array[Int](n) + var currentState = 0 + var i = 0 + while (i < k) { + val h = (key >> ((k - 1 - i) * n)) & mask + + val (z, nextState) = transform(h.toInt, currentState) + + var j = 0 + while (j < n) { + val v = (z >> (n - 1 - j)) & 1 + result(j) = (result(j) << 1) | v + j += 1 + } + + currentState = nextState + i += 1 + } + result + } + + // These while loops are to minimize overhead. + // This method exists only for testing + private[expressions] def translateDKeyArrayToNPoint(key: Array[Byte], k: Int): Array[Int] = { + val result = new Array[Int](n) + val initialOffset = (key.length * 8) - (k * n) + var currentState = 0 + var i = 0 + while (i < k) { + val offset = initialOffset + (i * n) + val h = getBits(key, offset, n) + + val (z, nextState) = transform(h, currentState) + + var j = 0 + while (j < n) { + val v = (z >> (n - 1 - j)) & 1 + result(j) = (result(j) << 1) | v + j += 1 + } + + currentState = nextState + i += 1 + } + result + } + + /** + * Translate an n-dimensional point into it's corresponding position on the n-dimensional + * hilbert curve. + * @param point An n-dimensional point. (assumed to have n elements) + * @param k The number of meaningful bits in each value of the point. + */ + def translateNPointToDKey(point: Array[Int], k: Int): Long = { + var result = 0L + var currentState = 0 + var i = 0 + while (i < k) { + var z = 0 + var j = 0 + while (j < n) { + z = (z << 1) | ((point(j) >> (k - 1 - i)) & 1) + j += 1 + } + val (h, nextState) = transform(z, currentState) + result = (result << n) | h + currentState = nextState + i += 1 + } + result + } + + /** + * Translate an n-dimensional point into it's corresponding position on the n-dimensional + * hilbert curve. Returns the resulting integer as an array of bytes. + * @param point An n-dimensional point. (assumed to have n elements) + * @param k The number of meaningful bits in each value of the point. + */ + def translateNPointToDKeyArray(point: Array[Int], k: Int): Array[Byte] = { + val numBits = k * n + val numBytes = (numBits + 7) / 8 + val result = new Array[Byte](numBytes) + val initialOffset = (numBytes * 8) - numBits + var currentState = 0 + var i = 0 + while (i < k) { + var z = 0 + var j = 0 + while (j < n) { + z = (z << 1) | ((point(j) >> (k - 1 - i)) & 1) + j += 1 + } + val (h, nextState) = transform(z, currentState) + setBits(result, initialOffset + (i * n), h, n) + currentState = nextState + i += 1 + } + result + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertStates.java b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertStates.java new file mode 100644 index 00000000000..d7c69a7f7e4 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertStates.java @@ -0,0 +1,92 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions; + +import org.apache.spark.SparkException; + +public class HilbertStates { + + /** + * Constructs a hilbert state for the given arity, [[n]]. + * This state list can be used to map n-points to their corresponding d-key value. + * + * @param n The number of bits in this space (we assert 2 <= n <= 9 for simplicity) + * @return The CompactStateList for mapping from n-point to hilbert distance key. + */ + private static HilbertCompactStateList constructHilbertState(int n) { + HilbertIndex.GeneratorTable generator = HilbertIndex.getStateGenerator(n); + return generator.generateStateList().getNPointToDKeyStateMap(); + } + + private HilbertStates() { } + + private static class HilbertIndex2 { + static final HilbertCompactStateList STATE_LIST = constructHilbertState(2); + } + + private static class HilbertIndex3 { + static final HilbertCompactStateList STATE_LIST = constructHilbertState(3); + } + + private static class HilbertIndex4 { + static final HilbertCompactStateList STATE_LIST = constructHilbertState(4); + } + + private static class HilbertIndex5 { + static final HilbertCompactStateList STATE_LIST = constructHilbertState(5); + } + + private static class HilbertIndex6 { + static final HilbertCompactStateList STATE_LIST = constructHilbertState(6); + } + + private static class HilbertIndex7 { + static final HilbertCompactStateList STATE_LIST = constructHilbertState(7); + } + + private static class HilbertIndex8 { + static final HilbertCompactStateList STATE_LIST = constructHilbertState(8); + } + + private static class HilbertIndex9 { + static final HilbertCompactStateList STATE_LIST = constructHilbertState(9); + } + + public static HilbertCompactStateList getStateList(int n) throws SparkException { + switch (n) { + case 2: + return HilbertIndex2.STATE_LIST; + case 3: + return HilbertIndex3.STATE_LIST; + case 4: + return HilbertIndex4.STATE_LIST; + case 5: + return HilbertIndex5.STATE_LIST; + case 6: + return HilbertIndex6.STATE_LIST; + case 7: + return HilbertIndex7.STATE_LIST; + case 8: + return HilbertIndex8.STATE_LIST; + case 9: + return HilbertIndex9.STATE_LIST; + default: + throw new SparkException(String.format("Cannot perform hilbert clustering on " + + "fewer than 2 or more than 9 dimensions; got %d dimensions", n)); + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertUtils.scala new file mode 100644 index 00000000000..ebffce4b901 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/HilbertUtils.scala @@ -0,0 +1,165 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +object HilbertUtils { + + /** + * Returns the column number that is set. We assume that a bit is set. + */ + @inline def getSetColumn(n: Int, i: Int): Int = { + n - 1 - Integer.numberOfTrailingZeros(i) + } + + @inline def circularLeftShift(n: Int, i: Int, shift: Int): Int = { + ((i << shift) | (i >>> (n - shift))) & ((1 << n) - 1) + } + + @inline def circularRightShift(n: Int, i: Int, shift: Int): Int = { + ((i >>> shift) | (i << (n - shift))) & ((1 << n) - 1) + } + + @inline + private[expressions] def getBits(key: Array[Byte], offset: Int, n: Int): Int = { + // [ ][ ][ ][ ][ ] + // <---offset---> [ n-bits ] <- this is the result + var result = 0 + + var remainingBits = n + var keyIndex = offset / 8 + // initial key offset + var keyOffset = offset - (keyIndex * 8) + while (remainingBits > 0) { + val bitsFromIdx = math.min(remainingBits, 8 - keyOffset) + val newInt = if (remainingBits >= 8) { + java.lang.Byte.toUnsignedInt(key(keyIndex)) + } else { + java.lang.Byte.toUnsignedInt(key(keyIndex)) >>> (8 - keyOffset - bitsFromIdx) + } + result = (result << bitsFromIdx) | (newInt & ((1 << bitsFromIdx) - 1)) + + remainingBits -= (8 - keyOffset) + keyOffset = 0 + keyIndex += 1 + } + + result + } + + @inline + private[expressions] def setBits( + key: Array[Byte], + offset: Int, + newBits: Int, + n: Int): Array[Byte] = { + // bits: [ meaningless bits ][ n meaningful bits ] + // + // [ ][ ][ ][ ][ ] + // <---offset---> [ n-bits ] + + // move meaningful bits to the far left + var bits = newBits << (32 - n) + var remainingBits = n + + // initial key index + var keyIndex = offset / 8 + // initial key offset + var keyOffset = offset - (keyIndex * 8) + while (remainingBits > 0) { + key(keyIndex) = (key(keyIndex) | (bits >>> (24 + keyOffset))).toByte + remainingBits -= (8 - keyOffset) + bits = bits << (8 - keyOffset) + keyOffset = 0 + keyIndex += 1 + } + key + } + + /** + * treats `key` as an Integer and adds 1 + */ + @inline def addOne(key: Array[Byte]): Array[Byte] = { + var idx = key.length - 1 + var overflow = true + while (overflow && idx >= 0) { + key(idx) = (key(idx) + 1.toByte).toByte + overflow = key(idx) == 0 + idx -= 1 + } + key + } + + def manhattanDist(p1: Array[Int], p2: Array[Int]): Int = { + assert(p1.length == p2.length) + p1.zip(p2).map { case (a, b) => math.abs(a - b) }.sum + } + + + /** + * This is not really a matrix, but a representation of one. Due to the constraints of this + * system the necessary matrices can be defined by two values: dY and X2. DY is the amount + * of right shifting of the identity matrix, and X2 is a bitmask for which column values are + * negative. The [[toString]] method is overridden to construct and print the matrix to aid + * in debugging. + * Instead of constructing the matrix directly we store and manipulate these values. + */ + case class HilbertMatrix(n: Int, x2: Int, dy: Int) { + override def toString(): String = { + val sb = new StringBuilder() + + val base = 1 << (n - 1 - dy) + (0 until n).foreach { i => + sb.append('\n') + val row = circularRightShift(n, base, i) + (0 until n).foreach { j => + if (isColumnSet(row, j)) { + if (isColumnSet(x2, j)) { + sb.append('-') + } else { + sb.append(' ') + } + sb.append('1') + } else { + sb.append(" 0") + } + } + } + sb.append('\n') + sb.toString + } + + // columns count from the left: 0, 1, 2 ... , n + @inline def isColumnSet(i: Int, column: Int): Boolean = { + val mask = 1 << (n - 1 - column) + (i & mask) > 0 + } + + def transform(e: Int): Int = { + circularLeftShift(n, e ^ x2, dy) + } + + def multiply(other: HilbertMatrix): HilbertMatrix = { + HilbertMatrix(n, circularRightShift(n, x2, other.dy) ^ other.x2, (dy + other.dy) % n) + } + } + + object HilbertMatrix { + def identity(n: Int): HilbertMatrix = { + HilbertMatrix(n, 0, 0) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/expressions/InterleaveBits.scala b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/InterleaveBits.scala new file mode 100644 index 00000000000..3dc3980b16c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/InterleaveBits.scala @@ -0,0 +1,399 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper} +import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.{BinaryType, DataType, IntegerType} + + +/** + * Interleaves the bits of its input data in a round-robin fashion. + * + * If the input data is seen as a series of multidimensional points, this function computes the + * corresponding Z-values, in a way that's preserving data locality: input points that are close + * in the multidimensional space will be mapped to points that are close on the Z-order curve. + * + * The returned value is a byte array where the size of the array is 4 * num of input columns. + * + * @see https://en.wikipedia.org/wiki/Z-order_curve + * + * @note Only supports input expressions of type Int for now. + */ +case class InterleaveBits(children: Seq[Expression]) + extends Expression with ExpectsInputTypes with SQLConfHelper + with CodegenFallback /* TODO: implement doGenCode() */ { + + private val n: Int = children.size + + override def inputTypes: Seq[DataType] = Seq.fill(n)(IntegerType) + + override def dataType: DataType = BinaryType + + override def nullable: Boolean = false + + /** Nulls in the input will be treated like this value */ + val nullValue: Int = 0 + + private val childrenArray: Array[Expression] = children.toArray + + private val fastInterleaveBitsEnabled = conf.getConf(DeltaSQLConf.FAST_INTERLEAVE_BITS_ENABLED) + + private val ints = new Array[Int](n) + + override def eval(input: InternalRow): Any = { + var i = 0 + while (i < n) { + val int = childrenArray(i).eval(input) match { + case null => nullValue + case int: Int => int + case any => throw new IllegalArgumentException( + s"${this.getClass.getSimpleName} expects only inputs of type Int, but got: " + + s"$any of type${any.getClass.getSimpleName}") + } + ints.update(i, int) + i += 1 + } + InterleaveBits.interleaveBits(ints, fastInterleaveBitsEnabled) + } + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): InterleaveBits = copy(children = newChildren) +} + +object InterleaveBits { + + private[expressions] def interleaveBits( + inputs: Array[Int], + fastInterleaveBitsEnabled: Boolean): Array[Byte] = { + if (fastInterleaveBitsEnabled) { + inputs.length match { + // The default algorithm has the complexity O(32 * n) (n is the number of input columns) + // The new algorithm has O(4 * 8) complexity when the number of Z-Order by columns is + // less than 9. It uses the algorithm described here + // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveTableObvious + case 0 => Array.empty + case 1 => intToByte(inputs(0)) + case 2 => interleave2Ints(inputs(1), inputs(0)) + case 3 => interleave3Ints(inputs(2), inputs(1), inputs(0)) + case 4 => interleave4Ints(inputs(3), inputs(2), inputs(1), inputs(0)) + case 5 => interleave5Ints(inputs(4), inputs(3), inputs(2), inputs(1), inputs(0)) + case 6 => interleave6Ints(inputs(5), inputs(4), inputs(3), inputs(2), inputs(1), inputs(0)) + case 7 => interleave7Ints(inputs(6), inputs(5), inputs(4), inputs(3), inputs(2), inputs(1), + inputs(0)) + case 8 => interleave8Ints(inputs(7), inputs(6), inputs(5), inputs(4), inputs(3), inputs(2), + inputs(1), inputs(0)) + case _ => defaultInterleaveBits(inputs, inputs.length) + } + } else { + defaultInterleaveBits(inputs, inputs.length) + } + } + + private def defaultInterleaveBits(inputs: Array[Int], numCols: Int): Array[Byte] = { + val ret = new Array[Byte](numCols * 4) + var ret_idx: Int = 0 + var ret_bit: Int = 7 + var ret_byte: Byte = 0 + + var bit = 31 /* going from most to least significant bit */ + while (bit >= 0) { + var idx = 0 + while (idx < numCols) { + ret_byte = (ret_byte | (((inputs(idx) >> bit) & 1) << ret_bit)).toByte + ret_bit -= 1 + if (ret_bit == -1) { + // finished processing a byte + ret.update(ret_idx, ret_byte) + ret_byte = 0 + ret_idx += 1 + ret_bit = 7 + } + idx += 1 + } + bit -= 1 + } + assert(ret_idx == numCols * 4) + assert(ret_bit == 7) + ret + } + + private def interleave2Ints(i1: Int, i2: Int): Array[Byte] = { + val result = new Array[Byte](8) + var i = 0 + while (i < 4) { + val tmp1 = ((i1 >> (i * 8)) & 0xFF).toByte + val tmp2 = ((i2 >> (i * 8)) & 0xFF).toByte + + var z = 0 + var j = 0 + while (j < 8) { + val x_masked = tmp1 & (1 << j) + val y_masked = tmp2 & (1 << j) + z |= (x_masked << j) + z |= (y_masked << (j + 1)) + j = j + 1 + } + result((3 - i) * 2 + 1) = (z & 0xFF).toByte + result((3 - i) * 2) = ((z >> 8) & 0xFF).toByte + i = i + 1 + } + result + } + + private def intToByte(input: Int): Array[Byte] = { + val result = new Array[Byte](4) + var i = 0 + while (i <= 3) { + val offset = i * 8 + result(3 - i) = ((input >> offset) & 0xFF).toByte + i += 1 + } + result + } + + private def interleave3Ints(i1: Int, i2: Int, i3: Int): Array[Byte] = { + val result = new Array[Byte](12) + var i = 0 + while (i < 4) { + val tmp1 = ((i1 >> (i * 8)) & 0xFF).toByte + val tmp2 = ((i2 >> (i * 8)) & 0xFF).toByte + val tmp3 = ((i3 >> (i * 8)) & 0xFF).toByte + + var z = 0 + var j = 0 + while (j < 8) { + val r1_mask = tmp1 & (1 << j) + val r2_mask = tmp2 & (1 << j) + val r3_mask = tmp3 & (1 << j) + z |= (r1_mask << (2 * j)) | (r2_mask << (2 * j + 1)) | (r3_mask << (2 * j + 2)) + j = j + 1 + } + result((3 - i) * 3 + 2) = (z & 0xFF).toByte + result((3 - i) * 3 + 1) = ((z >> 8) & 0xFF).toByte + result((3 - i) * 3) = ((z >> 16) & 0xFF).toByte + i = i + 1 + } + result + } + + private def interleave4Ints(i1: Int, i2: Int, i3: Int, i4: Int): Array[Byte] = { + val result = new Array[Byte](16) + var i = 0 + while (i < 4) { + val tmp1 = ((i1 >> (i * 8)) & 0xFF).toByte + val tmp2 = ((i2 >> (i * 8)) & 0xFF).toByte + val tmp3 = ((i3 >> (i * 8)) & 0xFF).toByte + val tmp4 = ((i4 >> (i * 8)) & 0xFF).toByte + + var z = 0 + var j = 0 + while (j < 8) { + val r1_mask = tmp1 & (1 << j) + val r2_mask = tmp2 & (1 << j) + val r3_mask = tmp3 & (1 << j) + val r4_mask = tmp4 & (1 << j) + z |= (r1_mask << (3 * j)) | (r2_mask << (3 * j + 1)) | (r3_mask << (3 * j + 2)) | + (r4_mask << (3 * j + 3)) + j = j + 1 + } + result((3 - i) * 4 + 3) = (z & 0xFF).toByte + result((3 - i) * 4 + 2) = ((z >> 8) & 0xFF).toByte + result((3 - i) * 4 + 1) = ((z >> 16) & 0xFF).toByte + result((3 - i) * 4) = ((z >> 24) & 0xFF).toByte + i = i + 1 + } + result + } + + private def interleave5Ints( + i1: Int, + i2: Int, + i3: Int, + i4: Int, + i5: Int): Array[Byte] = { + val result = new Array[Byte](20) + var i = 0 + while (i < 4) { + val tmp1 = ((i1 >> (i * 8)) & 0xFF).toByte + val tmp2 = ((i2 >> (i * 8)) & 0xFF).toByte + val tmp3 = ((i3 >> (i * 8)) & 0xFF).toByte + val tmp4 = ((i4 >> (i * 8)) & 0xFF).toByte + val tmp5 = ((i5 >> (i * 8)) & 0xFF).toByte + + var z = 0L + var j = 0 + while (j < 8) { + val r1_mask = tmp1 & (1 << j).toLong + val r2_mask = tmp2 & (1 << j).toLong + val r3_mask = tmp3 & (1 << j).toLong + val r4_mask = tmp4 & (1 << j).toLong + val r5_mask = tmp5 & (1 << j).toLong + z |= (r1_mask << (4 * j)) | (r2_mask << (4 * j + 1)) | (r3_mask << (4 * j + 2)) | + (r4_mask << (4 * j + 3)) | (r5_mask << (4 * j + 4)) + j = j + 1 + } + result((3 - i) * 5 + 4) = (z & 0xFF).toByte + result((3 - i) * 5 + 3) = ((z >> 8) & 0xFF).toByte + result((3 - i) * 5 + 2) = ((z >> 16) & 0xFF).toByte + result((3 - i) * 5 + 1) = ((z >> 24) & 0xFF).toByte + result((3 - i) * 5) = ((z >> 32) & 0xFF).toByte + i = i + 1 + } + result + } + + private def interleave6Ints( + i1: Int, + i2: Int, + i3: Int, + i4: Int, + i5: Int, + i6: Int): Array[Byte] = { + val result = new Array[Byte](24) + var i = 0 + while (i < 4) { + val tmp1 = ((i1 >> (i * 8)) & 0xFF).toByte + val tmp2 = ((i2 >> (i * 8)) & 0xFF).toByte + val tmp3 = ((i3 >> (i * 8)) & 0xFF).toByte + val tmp4 = ((i4 >> (i * 8)) & 0xFF).toByte + val tmp5 = ((i5 >> (i * 8)) & 0xFF).toByte + val tmp6 = ((i6 >> (i * 8)) & 0xFF).toByte + + var z = 0L + var j = 0 + while (j < 8) { + val r1_mask = tmp1 & (1 << j).toLong + val r2_mask = tmp2 & (1 << j).toLong + val r3_mask = tmp3 & (1 << j).toLong + val r4_mask = tmp4 & (1 << j).toLong + val r5_mask = tmp5 & (1 << j).toLong + val r6_mask = tmp6 & (1 << j).toLong + z |= (r1_mask << (5 * j)) | (r2_mask << (5 * j + 1)) | (r3_mask << (5 * j + 2)) | + (r4_mask << (5 * j + 3)) | (r5_mask << (5 * j + 4)) | (r6_mask << (5 * j + 5)) + j = j + 1 + } + result((3 - i) * 6 + 5) = (z & 0xFF).toByte + result((3 - i) * 6 + 4) = ((z >> 8) & 0xFF).toByte + result((3 - i) * 6 + 3) = ((z >> 16) & 0xFF).toByte + result((3 - i) * 6 + 2) = ((z >> 24) & 0xFF).toByte + result((3 - i) * 6 + 1) = ((z >> 32) & 0xFF).toByte + result((3 - i) * 6) = ((z >> 40) & 0xFF).toByte + i = i + 1 + } + result + } + + private def interleave7Ints( + i1: Int, + i2: Int, + i3: Int, + i4: Int, + i5: Int, + i6: Int, + i7: Int): Array[Byte] = { + val result = new Array[Byte](28) + var i = 0 + while (i < 4) { + val tmp1 = ((i1 >> (i * 8)) & 0xFF).toByte + val tmp2 = ((i2 >> (i * 8)) & 0xFF).toByte + val tmp3 = ((i3 >> (i * 8)) & 0xFF).toByte + val tmp4 = ((i4 >> (i * 8)) & 0xFF).toByte + val tmp5 = ((i5 >> (i * 8)) & 0xFF).toByte + val tmp6 = ((i6 >> (i * 8)) & 0xFF).toByte + val tmp7 = ((i7 >> (i * 8)) & 0xFF).toByte + + var z = 0L + var j = 0 + while (j < 8) { + val r1_mask = tmp1 & (1 << j).toLong + val r2_mask = tmp2 & (1 << j).toLong + val r3_mask = tmp3 & (1 << j).toLong + val r4_mask = tmp4 & (1 << j).toLong + val r5_mask = tmp5 & (1 << j).toLong + val r6_mask = tmp6 & (1 << j).toLong + val r7_mask = tmp7 & (1 << j).toLong + z |= (r1_mask << (6 * j)) | (r2_mask << (6 * j + 1)) | (r3_mask << (6 * j + 2)) | + (r4_mask << (6 * j + 3)) | (r5_mask << (6 * j + 4)) | (r6_mask << (6 * j + 5)) | + (r7_mask << (6 * j + 6)) + j = j + 1 + } + result((3 - i) * 7 + 6) = (z & 0xFF).toByte + result((3 - i) * 7 + 5) = ((z >> 8) & 0xFF).toByte + result((3 - i) * 7 + 4) = ((z >> 16) & 0xFF).toByte + result((3 - i) * 7 + 3) = ((z >> 24) & 0xFF).toByte + result((3 - i) * 7 + 2) = ((z >> 32) & 0xFF).toByte + result((3 - i) * 7 + 1) = ((z >> 40) & 0xFF).toByte + result((3 - i) * 7) = ((z >> 48) & 0xFF).toByte + i = i + 1 + } + result + } + + private def interleave8Ints( + i1: Int, + i2: Int, + i3: Int, + i4: Int, + i5: Int, + i6: Int, + i7: Int, + i8: Int): Array[Byte] = { + val result = new Array[Byte](32) + var i = 0 + while (i < 4) { + val tmp1 = ((i1 >> (i * 8)) & 0xFF).toByte + val tmp2 = ((i2 >> (i * 8)) & 0xFF).toByte + val tmp3 = ((i3 >> (i * 8)) & 0xFF).toByte + val tmp4 = ((i4 >> (i * 8)) & 0xFF).toByte + val tmp5 = ((i5 >> (i * 8)) & 0xFF).toByte + val tmp6 = ((i6 >> (i * 8)) & 0xFF).toByte + val tmp7 = ((i7 >> (i * 8)) & 0xFF).toByte + val tmp8 = ((i8 >> (i * 8)) & 0xFF).toByte + + var z = 0L + var j = 0 + while (j < 8) { + val r1_mask = tmp1 & (1 << j).toLong + val r2_mask = tmp2 & (1 << j).toLong + val r3_mask = tmp3 & (1 << j).toLong + val r4_mask = tmp4 & (1 << j).toLong + val r5_mask = tmp5 & (1 << j).toLong + val r6_mask = tmp6 & (1 << j).toLong + val r7_mask = tmp7 & (1 << j).toLong + val r8_mask = tmp8 & (1 << j).toLong + z |= (r1_mask << (7 * j)) | (r2_mask << (7 * j + 1)) | (r3_mask << (7 * j + 2)) | + (r4_mask << (7 * j + 3)) | (r5_mask << (7 * j + 4)) | (r6_mask << (7 * j + 5)) | + (r7_mask << (7 * j + 6)) | (r8_mask << (7 * j + 7)) + j = j + 1 + } + result((3 - i) * 8 + 7) = (z & 0xFF).toByte + result((3 - i) * 8 + 6) = ((z >> 8) & 0xFF).toByte + result((3 - i) * 8 + 5) = ((z >> 16) & 0xFF).toByte + result((3 - i) * 8 + 4) = ((z >> 24) & 0xFF).toByte + result((3 - i) * 8 + 3) = ((z >> 32) & 0xFF).toByte + result((3 - i) * 8 + 2) = ((z >> 40) & 0xFF).toByte + result((3 - i) * 8 + 1) = ((z >> 48) & 0xFF).toByte + result((3 - i) * 8) = ((z >> 56) & 0xFF).toByte + i = i + 1 + } + result + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/expressions/JoinedProjection.scala b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/JoinedProjection.scala new file mode 100644 index 00000000000..787a404ed4a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/JoinedProjection.scala @@ -0,0 +1,95 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, BoundReference, Expression, GetStructField} +import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.types.StructType + +/** + * Helper class for generating a joined projection. + * + * + * This class is used to instantiate a "Joined Row" - a wrapper that makes two rows appear to be a + * single concatenated row, by using nested access. It is primarily used during statistics + * collection to update a buffer of per-column aggregates (i.e. the left-hand side row) with stats + * from the latest row processed (i.e. the right-hand side row). + * + * Implementation Note: If we instead stored `leftRow` and `rightRow` we would have to perform size + * checks on `leftRow` during every access, which is slow. + */ +object JoinedProjection { + /** + * Bind attributes for a joined projection. This resulting project list expects an input row + * that has two nested struct fields, the struct at position 0 must be the left hand row of the + * join, and the struct at position 1 must be the right hand row of the join. + * + * The following shows example shows how this can be used for updating an aggregation buffer: + * {{{ + * val buffer = new GenericInternalRow() + * + * val update = GenerateMutableProjection.generate( + * expressions = JoinedProjection( + * leftAttributes = bufferAttrs, + * rightAttributes = dataCols, + * projectList = aggregates.flatMap(_.updateExpressions)), + * inputSchema = Nil, + * useSubexprElimination = true + * ).target(buffer) + * + * val joinedRow = new GenericInternalRow(2) + * joinedRow.update(0, input) + * + * def updateBuffer(input: InternalRow): Unit = { + * joinedRow.update(1, input) + * update(joinedRow) + * } + * }}} + */ + def bind( + leftAttributes: Seq[Attribute], + rightAttributes: Seq[Attribute], + projectList: Seq[Expression], + leftCanBeNull: Boolean = false, + rightCanBeNull: Boolean = false): Seq[Expression] = { + val mapping = AttributeMap( + createMapping(0, leftCanBeNull, leftAttributes) + ++ createMapping(1, rightCanBeNull, rightAttributes)) + projectList.map { expr => + expr.transformUp { + case a: Attribute => mapping(a) + } + } + } + + /** + * Helper method to create a nested struct field with efficient value extraction. + */ + private def createMapping( + index: Int, + nullable: Boolean, + attributes: Seq[Attribute]): Seq[(Attribute, Expression)] = { + val ref = BoundReference( + index, + DataTypeUtils.fromAttributes(attributes), + nullable) + attributes.zipWithIndex.map { + case (a, ordinal) => a -> GetStructField(ref, ordinal, Option(a.name)) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/expressions/RangePartitionId.scala b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/RangePartitionId.scala new file mode 100644 index 00000000000..ace2c53a348 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/expressions/RangePartitionId.scala @@ -0,0 +1,90 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +import org.apache.spark.Partitioner +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.expressions.{Expression, GenericInternalRow, RowOrdering, UnaryExpression, Unevaluable} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.types._ + + +/** + * Unevaluable placeholder expression to be rewritten by the optimizer into [[PartitionerExpr]] + * + * This is just a convenient way to introduce the former, without the need to manually construct the + * [[RangePartitioner]] beforehand, which requires an RDD to be sampled in order to determine range + * partition boundaries. The optimizer rule will take care of all that. + * + * @see [[org.apache.spark.sql.delta.optimizer.RangeRepartitionIdRewrite]] + */ +case class RangePartitionId(child: Expression, numPartitions: Int) + extends UnaryExpression with Unevaluable { + + require(numPartitions > 0, "expected the number partitions to be greater than zero") + + override def checkInputDataTypes(): TypeCheckResult = { + if (RowOrdering.isOrderable(child.dataType)) { + TypeCheckResult.TypeCheckSuccess + } else { + TypeCheckResult.TypeCheckFailure(s"cannot sort data type ${child.dataType.simpleString}") + } + } + + override def dataType: DataType = IntegerType + + override def nullable: Boolean = false + + override protected def withNewChildInternal(newChild: Expression): RangePartitionId = + copy(child = newChild) +} + +/** + * Thin wrapper around [[Partitioner]] instances that are used in Shuffle operations. + * TODO: If needed elsewhere, consider moving it into its own file. + */ +case class PartitionerExpr(child: Expression, partitioner: Partitioner) + extends UnaryExpression { + + override def dataType: DataType = IntegerType + + override def nullable: Boolean = false + + private lazy val row = new GenericInternalRow(Array[Any](null)) + + override def eval(input: InternalRow): Any = { + val value: Any = child.eval(input) + row.update(0, value) + partitioner.getPartition(row) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val partitionerReference = ctx.addReferenceObj("partitioner", partitioner) + val rowReference = ctx.addReferenceObj("row", row) + + nullSafeCodeGen(ctx, ev, input => + s"""$rowReference.update(0, $input); + |${ev.value} = $partitionerReference.getPartition($rowReference); + """.stripMargin) + } + + override protected def withNewChildInternal(newChild: Expression): PartitionerExpr = + copy(child = newChild) +} + + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/CdcAddFileIndex.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/CdcAddFileIndex.scala new file mode 100644 index 00000000000..2cdea19d4bf --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/CdcAddFileIndex.scala @@ -0,0 +1,82 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +import java.text.SimpleDateFormat + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.commands.cdc.CDCReader._ +import org.apache.spark.sql.delta.implicits._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.types.StructType + +/** + * A [[TahoeFileIndex]] for scanning a sequence of added files as CDC. Similar to + * [[TahoeBatchFileIndex]], with a bit of special handling to attach the log version + * and CDC type on a per-file basis. + * @param spark The Spark session. + * @param filesByVersion Grouped FileActions, one per table version. + * @param deltaLog The delta log instance. + * @param path The table's data path. + * @param snapshot The snapshot where we read CDC from. + * @param rowIndexFilters Map from URI-encoded file path to a row index filter type. + */ +class CdcAddFileIndex( + spark: SparkSession, + filesByVersion: Seq[CDCDataSpec[AddFile]], + deltaLog: DeltaLog, + path: Path, + snapshot: SnapshotDescriptor, + override val rowIndexFilters: Option[Map[String, RowIndexFilterType]] = None + ) extends TahoeBatchFileIndex( + spark, "cdcRead", filesByVersion.flatMap(_.actions), deltaLog, path, snapshot) { + + override def matchingFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[AddFile] = { + val addFiles = filesByVersion.flatMap { + case CDCDataSpec(version, ts, files, ci) => + files.map { f => + // We add the metadata as faked partition columns in order to attach it on a per-file + // basis. + val tsOpt = Option(ts) + .map(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS Z").format(_)).orNull + val newPartitionVals = f.partitionValues + + (CDC_COMMIT_VERSION -> version.toString) + + (CDC_COMMIT_TIMESTAMP -> tsOpt) + + (CDC_TYPE_COLUMN_NAME -> CDC_TYPE_INSERT) + f.copy(partitionValues = newPartitionVals) + } + } + DeltaLog.filterFileList(partitionSchema, addFiles.toDF(spark), partitionFilters) + .as[AddFile] + .collect() + } + + override def inputFiles: Array[String] = { + filesByVersion.flatMap(_.actions).map(f => absolutePath(f.path).toString).toArray + } + + override val partitionSchema: StructType = + CDCReader.cdcReadSchema(snapshot.metadata.partitionSchema) + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/DelayedCommitProtocol.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/DelayedCommitProtocol.scala new file mode 100644 index 00000000000..d25728aaccc --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/DelayedCommitProtocol.scala @@ -0,0 +1,243 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +// scalastyle:off import.ordering.noEmptyLine +import java.net.URI +import java.util.UUID + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, FileAction} +import org.apache.spark.sql.delta.commands.cdc.CDCReader.{CDC_LOCATION, CDC_PARTITION_COL} +import org.apache.spark.sql.delta.util.{DateFormatter, PartitionUtils, TimestampFormatter, Utils => DeltaUtils} +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext} + +import org.apache.spark.internal.Logging +import org.apache.spark.internal.io.FileCommitProtocol +import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage +import org.apache.spark.sql.catalyst.expressions.Cast +import org.apache.spark.sql.types.StringType + +/** + * Writes out the files to `path` and returns a list of them in `addedStatuses`. Includes + * special handling for partitioning on [[CDC_PARTITION_COL]] for + * compatibility between enabled and disabled CDC; partitions with a value of false in this + * column produce no corresponding partitioning directory. + * @param path The base path files will be written + * @param randomPrefixLength The length of random subdir name under 'path' that files been written + * @param subdir The immediate subdir under path; If randomPrefixLength and subdir both exist, file + * path will be path/subdir/[rand str of randomPrefixLength]/file + */ +class DelayedCommitProtocol( + jobId: String, + path: String, + randomPrefixLength: Option[Int], + subdir: Option[String]) + extends FileCommitProtocol with Serializable with Logging { + // Track the list of files added by a task, only used on the executors. + @transient protected var addedFiles: ArrayBuffer[(Map[String, String], String)] = _ + + // Track the change files added, only used on the driver. Files are sorted between this buffer + // and addedStatuses based on the value of the [[CDC_TYPE_COLUMN_NAME]] partition column - a + // file goes to addedStatuses if the value is CDC_TYPE_NOT_CDC and changeFiles otherwise. + @transient val changeFiles = new ArrayBuffer[AddCDCFile] + + // Track the overall files added, only used on the driver. + // + // In rare cases, some of these AddFiles can be empty (i.e. contain no logical records). + // If the caller wishes to have only non-empty AddFiles, they must collect stats and perform + // the filter themselves. See TransactionalWrite::writeFiles. This filter will be best-effort, + // since there's no guarantee the stats will exist. + @transient val addedStatuses = new ArrayBuffer[AddFile] + + val timestampPartitionPattern = "yyyy-MM-dd HH:mm:ss[.S]" + + // Constants for CDC partition manipulation. Used only in newTaskTempFile(), but we define them + // here to avoid building a new redundant regex for every file. + protected val cdcPartitionFalse = s"${CDC_PARTITION_COL}=false" + protected val cdcPartitionTrue = s"${CDC_PARTITION_COL}=true" + protected val cdcPartitionTrueRegex = cdcPartitionTrue.r + + override def setupJob(jobContext: JobContext): Unit = { + + } + + /** + * Commits a job after the writes succeed. Must be called on the driver. Partitions the written + * files into [[AddFile]]s and [[AddCDCFile]]s as these metadata actions are treated differently + * by [[TransactionalWrite]] (i.e. AddFile's may have additional statistics injected) + */ + override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = { + val (addFiles, changeFiles) = taskCommits.flatMap(_.obj.asInstanceOf[Seq[_]]) + .partition { + case _: AddFile => true + case _: AddCDCFile => false + case other => + throw DeltaErrors.unrecognizedFileAction(s"$other", s"${other.getClass}") + } + + // we cannot add type information above because of type erasure + addedStatuses ++= addFiles.map(_.asInstanceOf[AddFile]) + this.changeFiles ++= changeFiles.map(_.asInstanceOf[AddCDCFile]).toArray[AddCDCFile] + } + + override def abortJob(jobContext: JobContext): Unit = { + // TODO: Best effort cleanup + } + + override def setupTask(taskContext: TaskAttemptContext): Unit = { + addedFiles = new ArrayBuffer[(Map[String, String], String)] + } + + protected def getFileName( + taskContext: TaskAttemptContext, + ext: String, + partitionValues: Map[String, String]): String = { + // The file name looks like part-r-00000-2dd664f9-d2c4-4ffe-878f-c6c70c1fb0cb_00003.gz.parquet + // Note that %05d does not truncate the split number, so if we have more than 100000 tasks, + // the file name is fine and won't overflow. + val split = taskContext.getTaskAttemptID.getTaskID.getId + val uuid = UUID.randomUUID.toString + // CDC files (CDC_PARTITION_COL = true) are named with "cdc-..." instead of "part-...". + if (partitionValues.get(CDC_PARTITION_COL).contains("true")) { + f"cdc-$split%05d-$uuid$ext" + } else { + f"part-$split%05d-$uuid$ext" + } + } + + protected def parsePartitions(dir: String): Map[String, String] = { + // TODO: timezones? + // TODO: enable validatePartitionColumns? + val dateFormatter = DateFormatter() + val timestampFormatter = + TimestampFormatter(timestampPartitionPattern, java.util.TimeZone.getDefault) + val parsedPartition = + PartitionUtils + .parsePartition( + new Path(dir), + typeInference = false, + Set.empty, + Map.empty, + validatePartitionColumns = false, + java.util.TimeZone.getDefault, + dateFormatter, + timestampFormatter) + ._1 + .get + parsedPartition + .columnNames + .zip( + parsedPartition + .literals + .map(l => Cast(l, StringType).eval()) + .map(Option(_).map(_.toString).orNull)) + .toMap + } + + /** + * Notifies the commit protocol to add a new file, and gets back the full path that should be + * used. + * + * Includes special logic for CDC files and paths. Specifically, if the directory `dir` contains + * the CDC partition `__is_cdc=true` then + * - the file name begins with `cdc-` instead of `part-` + * - the directory has the `__is_cdc=true` partition removed and is placed in the `_changed_data` + * folder + */ + override def newTaskTempFile( + taskContext: TaskAttemptContext, dir: Option[String], ext: String): String = { + val partitionValues = dir.map(parsePartitions).getOrElse(Map.empty[String, String]) + val filename = getFileName(taskContext, ext, partitionValues) + val relativePath = randomPrefixLength.map { prefixLength => + DeltaUtils.getRandomPrefix(prefixLength) // Generate a random prefix as a first choice + }.orElse { + dir // or else write into the partition directory if it is partitioned + }.map { subDir => + // Do some surgery on the paths we write out to eliminate the CDC_PARTITION_COL. Non-CDC + // data is written to the base location, while CDC data is written to a special folder + // _change_data. + // The code here gets a bit complicated to accommodate two corner cases: an empty subdir + // can't be passed to new Path() at all, and a single-level subdir won't have a trailing + // slash. + if (subDir == cdcPartitionFalse) { + new Path(filename) + } else if (subDir.startsWith(cdcPartitionTrue)) { + val cleanedSubDir = cdcPartitionTrueRegex.replaceFirstIn(subDir, CDC_LOCATION) + new Path(cleanedSubDir, filename) + } else if (subDir.startsWith(cdcPartitionFalse)) { + // We need to remove the trailing slash in addition to the directory - otherwise + // it'll be interpreted as an absolute path and fail. + val cleanedSubDir = subDir.stripPrefix(cdcPartitionFalse + "/") + new Path(cleanedSubDir, filename) + } else { + new Path(subDir, filename) + } + }.getOrElse(new Path(filename)) // or directly write out to the output path + + val relativePathWithSubdir = subdir.map(new Path(_, relativePath)).getOrElse(relativePath) + addedFiles.append((partitionValues, relativePathWithSubdir.toUri.toString)) + new Path(path, relativePathWithSubdir).toString + } + + override def newTaskTempFileAbsPath( + taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String = { + throw DeltaErrors.unsupportedAbsPathAddFile(s"$this") + } + + protected def buildActionFromAddedFile( + f: (Map[String, String], String), + stat: FileStatus, + taskContext: TaskAttemptContext): FileAction = { + // The partitioning in the Delta log action will be read back as part of the data, so our + // virtual CDC_PARTITION_COL needs to be stripped out. + val partitioning = f._1.filter { case (k, v) => k != CDC_PARTITION_COL } + f._1.get(CDC_PARTITION_COL) match { + case Some("true") => + val partitioning = f._1.filter { case (k, v) => k != CDC_PARTITION_COL } + AddCDCFile(f._2, partitioning, stat.getLen) + case _ => + val addFile = AddFile(f._2, partitioning, stat.getLen, stat.getModificationTime, true) + addFile + } + } + + override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = { + if (addedFiles.nonEmpty) { + val fs = new Path(path, addedFiles.head._2).getFileSystem(taskContext.getConfiguration) + val statuses: Seq[FileAction] = addedFiles.map { f => + // scalastyle:off pathfromuri + val filePath = new Path(path, new Path(new URI(f._2))) + // scalastyle:on pathfromuri + val stat = fs.getFileStatus(filePath) + + buildActionFromAddedFile(f, stat, taskContext) + }.toSeq + + new TaskCommitMessage(statuses) + } else { + new TaskCommitMessage(Nil) + } + } + + override def abortTask(taskContext: TaskAttemptContext): Unit = { + // TODO: we can also try delete the addedFiles as a best-effort cleanup. + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala new file mode 100644 index 00000000000..2a73b8b852a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala @@ -0,0 +1,479 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +import java.util.{Date, UUID} + +import org.apache.spark.sql.delta.DeltaOptions +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileAlreadyExistsException, Path} +import org.apache.hadoop.mapreduce._ +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl + +import org.apache.spark._ +import org.apache.spark.internal.Logging +import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils} +import org.apache.spark.shuffle.FetchFailedException +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.catalog.BucketSpec +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences +import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} +import org.apache.spark.sql.connector.write.WriterCommitMessage +import org.apache.spark.sql.errors.QueryExecutionErrors +import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan, SQLExecution, UnsafeExternalRowSorter} +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.datasources.FileFormatWriter._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.{SerializableConfiguration, Utils} + +/** + * A helper object for writing FileFormat data out to a location. + * Logic is copied from FileFormatWriter from Spark 3.5 with added functionality to write partition + * values to data files. Specifically L123-126, L132, and L140 where it adds option + * WRITE_PARTITION_COLUMNS + */ +object DeltaFileFormatWriter extends Logging { + + /** + * A variable used in tests to check whether the output ordering of the query matches the + * required ordering of the write command. + */ + private var outputOrderingMatched: Boolean = false + + /** + * A variable used in tests to check the final executed plan. + */ + private var executedPlan: Option[SparkPlan] = None + + // scalastyle:off argcount + /** + * Basic work flow of this command is: + * 1. Driver side setup, including output committer initialization and data source specific + * preparation work for the write job to be issued. + * 2. Issues a write job consists of one or more executor side tasks, each of which writes all + * rows within an RDD partition. + * 3. If no exception is thrown in a task, commits that task, otherwise aborts that task; If any + * exception is thrown during task commitment, also aborts that task. + * 4. If all tasks are committed, commit the job, otherwise aborts the job; If any exception is + * thrown during job commitment, also aborts the job. + * 5. If the job is successfully committed, perform post-commit operations such as + * processing statistics. + * @return The set of all partition paths that were updated during this write job. + */ + def write( + sparkSession: SparkSession, + plan: SparkPlan, + fileFormat: FileFormat, + committer: FileCommitProtocol, + outputSpec: OutputSpec, + hadoopConf: Configuration, + partitionColumns: Seq[Attribute], + bucketSpec: Option[BucketSpec], + statsTrackers: Seq[WriteJobStatsTracker], + options: Map[String, String], + numStaticPartitionCols: Int = 0): Set[String] = { + require(partitionColumns.size >= numStaticPartitionCols) + + val job = Job.getInstance(hadoopConf) + job.setOutputKeyClass(classOf[Void]) + job.setOutputValueClass(classOf[InternalRow]) + FileOutputFormat.setOutputPath(job, new Path(outputSpec.outputPath)) + + val partitionSet = AttributeSet(partitionColumns) + // cleanup the internal metadata information of + // the file source metadata attribute if any before write out + val finalOutputSpec = outputSpec.copy( + outputColumns = outputSpec.outputColumns + .map(FileSourceMetadataAttribute.cleanupFileSourceMetadataInformation) + ) + val dataColumns = finalOutputSpec.outputColumns.filterNot(partitionSet.contains) + + val writerBucketSpec = V1WritesUtils.getWriterBucketSpec(bucketSpec, dataColumns, options) + val sortColumns = V1WritesUtils.getBucketSortColumns(bucketSpec, dataColumns) + + val caseInsensitiveOptions = CaseInsensitiveMap(options) + + val dataSchema = dataColumns.toStructType + DataSourceUtils.verifySchema(fileFormat, dataSchema) + DataSourceUtils.checkFieldNames(fileFormat, dataSchema) + // Note: prepareWrite has side effect. It sets "job". + + val outputDataColumns = + if (caseInsensitiveOptions.get(DeltaOptions.WRITE_PARTITION_COLUMNS).contains("true")) { + dataColumns ++ partitionColumns + } else dataColumns + + val outputWriterFactory = + fileFormat.prepareWrite( + sparkSession, + job, + caseInsensitiveOptions, + outputDataColumns.toStructType + ) + + val description = new WriteJobDescription( + uuid = UUID.randomUUID.toString, + serializableHadoopConf = new SerializableConfiguration(job.getConfiguration), + outputWriterFactory = outputWriterFactory, + allColumns = finalOutputSpec.outputColumns, + dataColumns = outputDataColumns, + partitionColumns = partitionColumns, + bucketSpec = writerBucketSpec, + path = finalOutputSpec.outputPath, + customPartitionLocations = finalOutputSpec.customPartitionLocations, + maxRecordsPerFile = caseInsensitiveOptions + .get("maxRecordsPerFile") + .map(_.toLong) + .getOrElse(sparkSession.sessionState.conf.maxRecordsPerFile), + timeZoneId = caseInsensitiveOptions + .get(DateTimeUtils.TIMEZONE_OPTION) + .getOrElse(sparkSession.sessionState.conf.sessionLocalTimeZone), + statsTrackers = statsTrackers + ) + + // We should first sort by dynamic partition columns, then bucket id, and finally sorting + // columns. + val requiredOrdering = partitionColumns.drop(numStaticPartitionCols) ++ + writerBucketSpec.map(_.bucketIdExpression) ++ sortColumns + val writeFilesOpt = V1WritesUtils.getWriteFilesOpt(plan) + + // SPARK-40588: when planned writing is disabled and AQE is enabled, + // plan contains an AdaptiveSparkPlanExec, which does not know + // its final plan's ordering, so we have to materialize that plan first + // it is fine to use plan further down as the final plan is cached in that plan + def materializeAdaptiveSparkPlan(plan: SparkPlan): SparkPlan = plan match { + case a: AdaptiveSparkPlanExec => a.finalPhysicalPlan + case p: SparkPlan => p.withNewChildren(p.children.map(materializeAdaptiveSparkPlan)) + } + + // the sort order doesn't matter + val actualOrdering = writeFilesOpt + .map(_.child) + .getOrElse(materializeAdaptiveSparkPlan(plan)) + .outputOrdering + val orderingMatched = V1WritesUtils.isOrderingMatched(requiredOrdering, actualOrdering) + + SQLExecution.checkSQLExecutionId(sparkSession) + + // propagate the description UUID into the jobs, so that committers + // get an ID guaranteed to be unique. + job.getConfiguration.set("spark.sql.sources.writeJobUUID", description.uuid) + + // When `PLANNED_WRITE_ENABLED` is true, the optimizer rule V1Writes will add logical sort + // operator based on the required ordering of the V1 write command. So the output + // ordering of the physical plan should always match the required ordering. Here + // we set the variable to verify this behavior in tests. + // There are two cases where FileFormatWriter still needs to add physical sort: + // 1) When the planned write config is disabled. + // 2) When the concurrent writers are enabled (in this case the required ordering of a + // V1 write command will be empty). + if (Utils.isTesting) outputOrderingMatched = orderingMatched + + if (writeFilesOpt.isDefined) { + // build `WriteFilesSpec` for `WriteFiles` + val concurrentOutputWriterSpecFunc = (plan: SparkPlan) => { + val sortPlan = createSortPlan(plan, requiredOrdering, outputSpec) + createConcurrentOutputWriterSpec(sparkSession, sortPlan, sortColumns) + } + val writeSpec = WriteFilesSpec( + description = description, + committer = committer, + concurrentOutputWriterSpecFunc = concurrentOutputWriterSpecFunc + ) + executeWrite(sparkSession, plan, writeSpec, job) + } else { + executeWrite( + sparkSession, + plan, + job, + description, + committer, + outputSpec, + requiredOrdering, + partitionColumns, + sortColumns, + orderingMatched + ) + } + } + // scalastyle:on argcount + + private def executeWrite( + sparkSession: SparkSession, + plan: SparkPlan, + job: Job, + description: WriteJobDescription, + committer: FileCommitProtocol, + outputSpec: OutputSpec, + requiredOrdering: Seq[Expression], + partitionColumns: Seq[Attribute], + sortColumns: Seq[Attribute], + orderingMatched: Boolean): Set[String] = { + val projectList = V1WritesUtils.convertEmptyToNull(plan.output, partitionColumns) + val empty2NullPlan = if (projectList.nonEmpty) ProjectExec(projectList, plan) else plan + + writeAndCommit(job, description, committer) { + val (planToExecute, concurrentOutputWriterSpec) = if (orderingMatched) { + (empty2NullPlan, None) + } else { + val sortPlan = createSortPlan(empty2NullPlan, requiredOrdering, outputSpec) + val concurrentOutputWriterSpec = + createConcurrentOutputWriterSpec(sparkSession, sortPlan, sortColumns) + if (concurrentOutputWriterSpec.isDefined) { + (empty2NullPlan, concurrentOutputWriterSpec) + } else { + (sortPlan, concurrentOutputWriterSpec) + } + } + + // In testing, this is the only way to get hold of the actually executed plan written to file + if (Utils.isTesting) executedPlan = Some(planToExecute) + + val rdd = planToExecute.execute() + + // SPARK-23271 If we are attempting to write a zero partition rdd, create a dummy single + // partition rdd to make sure we at least set up one write task to write the metadata. + val rddWithNonEmptyPartitions = if (rdd.partitions.length == 0) { + sparkSession.sparkContext.parallelize(Array.empty[InternalRow], 1) + } else { + rdd + } + + val jobTrackerID = SparkHadoopWriterUtils.createJobTrackerID(new Date()) + val ret = new Array[WriteTaskResult](rddWithNonEmptyPartitions.partitions.length) + sparkSession.sparkContext.runJob( + rddWithNonEmptyPartitions, + (taskContext: TaskContext, iter: Iterator[InternalRow]) => { + executeTask( + description = description, + jobTrackerID = jobTrackerID, + sparkStageId = taskContext.stageId(), + sparkPartitionId = taskContext.partitionId(), + sparkAttemptNumber = taskContext.taskAttemptId().toInt & Integer.MAX_VALUE, + committer, + iterator = iter, + concurrentOutputWriterSpec = concurrentOutputWriterSpec + ) + }, + rddWithNonEmptyPartitions.partitions.indices, + (index, res: WriteTaskResult) => { + committer.onTaskCommit(res.commitMsg) + ret(index) = res + } + ) + ret + } + } + + private def writeAndCommit( + job: Job, + description: WriteJobDescription, + committer: FileCommitProtocol)(f: => Array[WriteTaskResult]): Set[String] = { + // This call shouldn't be put into the `try` block below because it only initializes and + // prepares the job, any exception thrown from here shouldn't cause abortJob() to be called. + committer.setupJob(job) + try { + val ret = f + val commitMsgs = ret.map(_.commitMsg) + + logInfo(s"Start to commit write Job ${description.uuid}.") + val (_, duration) = Utils.timeTakenMs { committer.commitJob(job, commitMsgs) } + logInfo(s"Write Job ${description.uuid} committed. Elapsed time: $duration ms.") + + processStats(description.statsTrackers, ret.map(_.summary.stats), duration) + logInfo(s"Finished processing stats for write job ${description.uuid}.") + + // return a set of all the partition paths that were updated during this job + ret.map(_.summary.updatedPartitions).reduceOption(_ ++ _).getOrElse(Set.empty) + } catch { + case cause: Throwable => + logError(s"Aborting job ${description.uuid}.", cause) + committer.abortJob(job) + throw cause + } + } + + /** + * Write files using [[SparkPlan.executeWrite]] + */ + private def executeWrite( + session: SparkSession, + planForWrites: SparkPlan, + writeFilesSpec: WriteFilesSpec, + job: Job): Set[String] = { + val committer = writeFilesSpec.committer + val description = writeFilesSpec.description + + // In testing, this is the only way to get hold of the actually executed plan written to file + if (Utils.isTesting) executedPlan = Some(planForWrites) + + writeAndCommit(job, description, committer) { + val rdd = planForWrites.executeWrite(writeFilesSpec) + val ret = new Array[WriteTaskResult](rdd.partitions.length) + session.sparkContext.runJob( + rdd, + (context: TaskContext, iter: Iterator[WriterCommitMessage]) => { + assert(iter.hasNext) + val commitMessage = iter.next() + assert(!iter.hasNext) + commitMessage + }, + rdd.partitions.indices, + (index, res: WriterCommitMessage) => { + assert(res.isInstanceOf[WriteTaskResult]) + val writeTaskResult = res.asInstanceOf[WriteTaskResult] + committer.onTaskCommit(writeTaskResult.commitMsg) + ret(index) = writeTaskResult + } + ) + ret + } + } + + private def createSortPlan( + plan: SparkPlan, + requiredOrdering: Seq[Expression], + outputSpec: OutputSpec): SortExec = { + // SPARK-21165: the `requiredOrdering` is based on the attributes from analyzed plan, and + // the physical plan may have different attribute ids due to optimizer removing some + // aliases. Here we bind the expression ahead to avoid potential attribute ids mismatch. + val orderingExpr = + bindReferences(requiredOrdering.map(SortOrder(_, Ascending)), outputSpec.outputColumns) + SortExec(orderingExpr, global = false, child = plan) + } + + private def createConcurrentOutputWriterSpec( + sparkSession: SparkSession, + sortPlan: SortExec, + sortColumns: Seq[Attribute]): Option[ConcurrentOutputWriterSpec] = { + val maxWriters = sparkSession.sessionState.conf.maxConcurrentOutputFileWriters + val concurrentWritersEnabled = maxWriters > 0 && sortColumns.isEmpty + if (concurrentWritersEnabled) { + Some(ConcurrentOutputWriterSpec(maxWriters, () => sortPlan.createSorter())) + } else { + None + } + } + + /** Writes data out in a single Spark task. */ + private def executeTask( + description: WriteJobDescription, + jobTrackerID: String, + sparkStageId: Int, + sparkPartitionId: Int, + sparkAttemptNumber: Int, + committer: FileCommitProtocol, + iterator: Iterator[InternalRow], + concurrentOutputWriterSpec: Option[ConcurrentOutputWriterSpec]): WriteTaskResult = { + + val jobId = SparkHadoopWriterUtils.createJobID(jobTrackerID, sparkStageId) + val taskId = new TaskID(jobId, TaskType.MAP, sparkPartitionId) + val taskAttemptId = new TaskAttemptID(taskId, sparkAttemptNumber) + + // Set up the attempt context required to use in the output committer. + val taskAttemptContext: TaskAttemptContext = { + // Set up the configuration object + val hadoopConf = description.serializableHadoopConf.value + hadoopConf.set("mapreduce.job.id", jobId.toString) + hadoopConf.set("mapreduce.task.id", taskAttemptId.getTaskID.toString) + hadoopConf.set("mapreduce.task.attempt.id", taskAttemptId.toString) + hadoopConf.setBoolean("mapreduce.task.ismap", true) + hadoopConf.setInt("mapreduce.task.partition", 0) + + new TaskAttemptContextImpl(hadoopConf, taskAttemptId) + } + + committer.setupTask(taskAttemptContext) + + val dataWriter = + if (sparkPartitionId != 0 && !iterator.hasNext) { + // In case of empty job, leave first partition to save meta for file format like parquet. + new EmptyDirectoryDataWriter(description, taskAttemptContext, committer) + } else if (description.partitionColumns.isEmpty && description.bucketSpec.isEmpty) { + new SingleDirectoryDataWriter(description, taskAttemptContext, committer) + } else { + concurrentOutputWriterSpec match { + case Some(spec) => + new DynamicPartitionDataConcurrentWriter( + description, + taskAttemptContext, + committer, + spec + ) + case _ => + new DynamicPartitionDataSingleWriter(description, taskAttemptContext, committer) + } + } + + try { + Utils.tryWithSafeFinallyAndFailureCallbacks(block = { + // Execute the task to write rows out and commit the task. + dataWriter.writeWithIterator(iterator) + dataWriter.commit() + })(catchBlock = { + // If there is an error, abort the task + dataWriter.abort() + logError(s"Job $jobId aborted.") + }, finallyBlock = { + dataWriter.close() + }) + } catch { + case e: FetchFailedException => + throw e + case f: FileAlreadyExistsException if SQLConf.get.fastFailFileFormatOutput => + // If any output file to write already exists, it does not make sense to re-run this task. + // We throw the exception and let Executor throw ExceptionFailure to abort the job. + throw new TaskOutputFileAlreadyExistException(f) + case t: Throwable => + throw QueryExecutionErrors.taskFailedWhileWritingRowsError(description.path, t) + } + } + + /** + * For every registered [[WriteJobStatsTracker]], call `processStats()` on it, passing it + * the corresponding [[WriteTaskStats]] from all executors. + */ + private def processStats( + statsTrackers: Seq[WriteJobStatsTracker], + statsPerTask: Seq[Seq[WriteTaskStats]], + jobCommitDuration: Long): Unit = { + + val numStatsTrackers = statsTrackers.length + assert( + statsPerTask.forall(_.length == numStatsTrackers), + s"""Every WriteTask should have produced one `WriteTaskStats` object for every tracker. + |There are $numStatsTrackers statsTrackers, but some task returned + |${statsPerTask.find(_.length != numStatsTrackers).get.length} results instead. + """.stripMargin + ) + + val statsPerTracker = if (statsPerTask.nonEmpty) { + statsPerTask.transpose + } else { + statsTrackers.map(_ => Seq.empty) + } + + statsTrackers.zip(statsPerTracker).foreach { + case (statsTracker, stats) => statsTracker.processStats(stats, jobCommitDuration) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaSourceSnapshot.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaSourceSnapshot.scala new file mode 100644 index 00000000000..316bd88a460 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaSourceSnapshot.scala @@ -0,0 +1,98 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.{DeltaLog, DeltaTableUtils, Snapshot} +import org.apache.spark.sql.delta.actions.SingleAction +import org.apache.spark.sql.delta.sources.IndexedFile +import org.apache.spark.sql.delta.stats.DataSkippingReader +import org.apache.spark.sql.delta.util.StateCache + +import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.functions._ + +/** + * Converts a `Snapshot` into the initial set of files read when starting a new streaming query. + * The list of files that represent the table at the time the query starts are selected by: + * - Adding `version` and `index` to each file to enable splitting of the initial state into + * multiple batches. + * - Filtering files that don't match partition predicates, while preserving the aforementioned + * indexing. + */ +class DeltaSourceSnapshot( + val spark: SparkSession, + val snapshot: Snapshot, + val filters: Seq[Expression]) + extends StateCache { + + protected val version = snapshot.version + protected val path = snapshot.path + + protected lazy val (partitionFilters, dataFilters) = { + val partitionCols = snapshot.metadata.partitionColumns + val (part, data) = filters.partition { e => + DeltaTableUtils.isPredicatePartitionColumnsOnly(e, partitionCols, spark) + } + logInfo(s"Classified filters: partition: $part, data: $data") + (part, data) + } + + private[delta] def filteredFiles: Dataset[IndexedFile] = { + import spark.implicits.rddToDatasetHolder + import org.apache.spark.sql.delta.implicits._ + + val initialFiles = snapshot.allFiles + // This allows us to control the number of partitions created from the sort instead of + // using the shufflePartitions setting + .repartitionByRange(snapshot.getNumPartitions, col("modificationTime"), col("path")) + .sort("modificationTime", "path") + .rdd.zipWithIndex() + .toDF("add", "index") + // Stats aren't used for streaming reads right now, so decrease + // the size of the files by nulling out the stats if they exist + .withColumn("add", col("add").withField("stats", DataSkippingReader.nullStringLiteral)) + .withColumn("remove", SingleAction.nullLitForRemoveFile) + .withColumn("cdc", SingleAction.nullLitForAddCDCFile) + .withColumn("version", lit(version)) + .withColumn("isLast", lit(false)) + .withColumn("shouldSkip", lit(false)) + + DeltaLog.filterFileList( + snapshot.metadata.partitionSchema, + initialFiles, + partitionFilters, + Seq("add")).as[IndexedFile] + } + + private lazy val cachedState = { + cacheDS(filteredFiles, s"Delta Source Snapshot #$version - ${snapshot.redactedPath}") + } + + def iterator(): Iterator[IndexedFile] = { + cachedState.getDS.toLocalIterator().asScala + } + + def close(unpersistSnapshot: Boolean): Unit = { + uncache() + if (unpersistSnapshot) { + snapshot.uncache() + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/SQLMetricsReporting.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/SQLMetricsReporting.scala new file mode 100644 index 00000000000..1e79beba8a7 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/SQLMetricsReporting.scala @@ -0,0 +1,57 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +import org.apache.spark.sql.delta.DeltaOperations.Operation +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.metric.SQLMetric + +/** + * This trait is used to register SQL metrics for a Delta Operation. + * Registering will allow the metrics to be instrumented via the CommitInfo and is accessible via + * DescribeHistory + */ +trait SQLMetricsReporting { + + // Map of SQL Metrics + private var operationSQLMetrics = Map[String, SQLMetric]() + + /** + * Register SQL metrics for an operation by appending the supplied metrics map to the + * operationSQLMetrics map. + */ + def registerSQLMetrics(spark: SparkSession, metrics: Map[String, SQLMetric]): Unit = { + if (spark.conf.get(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED)) { + operationSQLMetrics = operationSQLMetrics ++ metrics + } + } + + /** + * Get the metrics for an operation based on collected SQL Metrics and filtering out + * the ones based on the metric parameters for that operation. + */ + def getMetricsForOperation(operation: Operation): Map[String, String] = { + operation.transformMetrics(operationSQLMetrics) + } + + /** Returns the metric with `name` registered for the given transaction if it exists. */ + def getMetric(name: String): Option[SQLMetric] = { + operationSQLMetrics.get(name) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeChangeFileIndex.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeChangeFileIndex.scala new file mode 100644 index 00000000000..044e5ce5023 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeChangeFileIndex.scala @@ -0,0 +1,72 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +import org.apache.spark.sql.delta.{DeltaLog, Snapshot, SnapshotDescriptor} +import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile} +import org.apache.spark.sql.delta.commands.cdc.CDCReader.{CDC_COMMIT_TIMESTAMP, CDC_COMMIT_VERSION, CDCDataSpec} +import org.apache.spark.sql.delta.implicits._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.types.{LongType, StructType, TimestampType} + +/** + * A [[TahoeFileIndex]] for scanning a sequence of CDC files. Similar to [[TahoeBatchFileIndex]], + * the equivalent for reading [[AddFile]] actions. + */ +class TahoeChangeFileIndex( + spark: SparkSession, + val filesByVersion: Seq[CDCDataSpec[AddCDCFile]], + deltaLog: DeltaLog, + path: Path, + snapshot: SnapshotDescriptor) + extends TahoeFileIndexWithSnapshotDescriptor(spark, deltaLog, path, snapshot) { + + override def matchingFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[AddFile] = { + // Make some fake AddFiles to satisfy the interface. + val addFiles = filesByVersion.flatMap { + case CDCDataSpec(version, ts, files, ci) => + files.map { f => + // We add the metadata as faked partition columns in order to attach it on a per-file + // basis. + val newPartitionVals = f.partitionValues + + (CDC_COMMIT_VERSION -> version.toString) + + (CDC_COMMIT_TIMESTAMP -> Option(ts).map(_.toString).orNull) + AddFile(f.path, newPartitionVals, f.size, 0, dataChange = false, tags = f.tags) + } + } + DeltaLog.filterFileList(partitionSchema, addFiles.toDF(spark), partitionFilters) + .as[AddFile] + .collect() + } + + override def inputFiles: Array[String] = { + filesByVersion.flatMap(_.actions).map(f => absolutePath(f.path).toString).toArray + } + + override val partitionSchema: StructType = super.partitionSchema + .add(CDC_COMMIT_VERSION, LongType) + .add(CDC_COMMIT_TIMESTAMP, TimestampType) + + override def refresh(): Unit = {} + + override val sizeInBytes: Long = filesByVersion.flatMap(_.actions).map(_.size).sum +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeFileIndex.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeFileIndex.scala new file mode 100644 index 00000000000..130e78ababd --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeFileIndex.scala @@ -0,0 +1,309 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +// scalastyle:off import.ordering.noEmptyLine +import java.net.URI +import java.util.Objects + +import org.apache.spark.sql.delta.RowIndexFilterType +import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaErrors, DeltaLog, NoMapping, Snapshot, SnapshotDescriptor} +import org.apache.spark.sql.delta.actions.{AddFile, Metadata, Protocol} +import org.apache.spark.sql.delta.implicits._ +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, GenericInternalRow, Literal} +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.types.StructType + +/** + * A [[FileIndex]] that generates the list of files managed by the Tahoe protocol. + */ +abstract class TahoeFileIndex( + val spark: SparkSession, + override val deltaLog: DeltaLog, + val path: Path) + extends FileIndex + with SupportsRowIndexFilters + with SnapshotDescriptor { + + override def rootPaths: Seq[Path] = path :: Nil + + /** + * Returns all matching/valid files by the given `partitionFilters` and `dataFilters`. + * Implementations may avoid evaluating data filters when doing so would be expensive, but + * *must* evaluate the partition filters; wrong results will be produced if AddFile entries + * which don't match the partition filters are returned. + */ + def matchingFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[AddFile] + + override def listFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[PartitionDirectory] = { + val partitionValuesToFiles = listAddFiles(partitionFilters, dataFilters) + makePartitionDirectories(partitionValuesToFiles.toSeq) + } + + + private def listAddFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Map[Map[String, String], Seq[AddFile]] = { + matchingFiles(partitionFilters, dataFilters).groupBy(_.partitionValues) + } + + private def makePartitionDirectories( + partitionValuesToFiles: Seq[(Map[String, String], Seq[AddFile])]): Seq[PartitionDirectory] = { + val timeZone = spark.sessionState.conf.sessionLocalTimeZone + partitionValuesToFiles.map { + case (partitionValues, files) => + val partitionValuesRow = getPartitionValuesRow(partitionValues) + + val fileStatuses = files.map { f => + new FileStatus( + /* length */ f.size, + /* isDir */ false, + /* blockReplication */ 0, + /* blockSize */ 1, + /* modificationTime */ f.modificationTime, + absolutePath(f.path)) + }.toArray + + PartitionDirectory(partitionValuesRow, fileStatuses) + } + } + + protected def getPartitionValuesRow(partitionValues: Map[String, String]): GenericInternalRow = { + val timeZone = spark.sessionState.conf.sessionLocalTimeZone + val partitionRowValues = partitionSchema.map { p => + val colName = DeltaColumnMapping.getPhysicalName(p) + val partValue = Literal(partitionValues.get(colName).orNull) + Cast(partValue, p.dataType, Option(timeZone), ansiEnabled = false).eval() + }.toArray + new GenericInternalRow(partitionRowValues) + } + + override def partitionSchema: StructType = metadata.partitionSchema + + protected def absolutePath(child: String): Path = { + // scalastyle:off pathfromuri + val p = new Path(new URI(child)) + // scalastyle:on pathfromuri + if (p.isAbsolute) { + p + } else { + new Path(path, p) + } + } + + override def toString: String = { + // the rightmost 100 characters of the path + val truncatedPath = truncateRight(path.toString, len = 100) + s"Delta[version=$version, $truncatedPath]" + } + + /** + * Gets the rightmost {@code len} characters of a String. + * + * @return the trimmed and formatted string. + */ + private def truncateRight(input: String, len: Int): String = { + if (input.length > len) { + "... " + input.takeRight(len) + } else { + input + } + } + + /** + * Returns the path of the base directory of the given file path (i.e. its parent directory with + * all the partition directories stripped off). + */ + def getBasePath(filePath: Path): Option[Path] = Some(path) + +} + +/** A [[TahoeFileIndex]] that works with a specific [[SnapshotDescriptor]]. */ +abstract class TahoeFileIndexWithSnapshotDescriptor( + spark: SparkSession, + deltaLog: DeltaLog, + path: Path, + snapshot: SnapshotDescriptor) extends TahoeFileIndex(spark, deltaLog, path) { + + override def version: Long = snapshot.version + override def metadata: Metadata = snapshot.metadata + override def protocol: Protocol = snapshot.protocol + + + protected[delta] def numOfFilesIfKnown: Option[Long] = snapshot.numOfFilesIfKnown + protected[delta] def sizeInBytesIfKnown: Option[Long] = snapshot.sizeInBytesIfKnown +} + + +/** + * A [[TahoeFileIndex]] that generates the list of files from DeltaLog with given partition filters. + * + * NOTE: This is NOT a [[TahoeFileIndexWithSnapshotDescriptor]] because we only use + * [[snapshotAtAnalysis]] for actual data skipping if this is a time travel query. + */ +case class TahoeLogFileIndex( + override val spark: SparkSession, + override val deltaLog: DeltaLog, + override val path: Path, + snapshotAtAnalysis: Snapshot, + partitionFilters: Seq[Expression] = Nil, + isTimeTravelQuery: Boolean = false) + extends TahoeFileIndex(spark, deltaLog, path) { + + + // WARNING: Stability of this method is _NOT_ guaranteed! + override def version: Long = { + if (isTimeTravelQuery) snapshotAtAnalysis.version else deltaLog.unsafeVolatileSnapshot.version + } + + // WARNING: These methods are intentionally pinned to the analysis-time snapshot, which may differ + // from the one returned by [[getSnapshot]] that we will eventually scan. + override def metadata: Metadata = snapshotAtAnalysis.metadata + override def protocol: Protocol = snapshotAtAnalysis.protocol + + private def checkSchemaOnRead: Boolean = { + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SCHEMA_ON_READ_CHECK_ENABLED) + } + + protected def getSnapshotToScan: Snapshot = { + if (isTimeTravelQuery) { + snapshotAtAnalysis + } else { + deltaLog.update(stalenessAcceptable = true) + } + } + + /** Provides the version that's being used as part of the scan if this is a time travel query. */ + def versionToUse: Option[Long] = if (isTimeTravelQuery) Some(snapshotAtAnalysis.version) else None + + def getSnapshot: Snapshot = { + val snapshotToScan = getSnapshotToScan + // Always check read compatibility with column mapping tables + if (checkSchemaOnRead) { + // Ensure that the schema hasn't changed in an incompatible manner since analysis time: + // 1. Check logical schema incompatibility + // 2. Check column mapping read compatibility. The above check is not sufficient + // when the schema's logical names are not changing but the underlying physical name has + // changed. In this case, the data files cannot be read using the old schema any more. + val snapshotSchema = snapshotToScan.metadata.schema + if (!SchemaUtils.isReadCompatible(snapshotAtAnalysis.schema, snapshotSchema) || + !DeltaColumnMapping.hasNoColumnMappingSchemaChanges( + snapshotToScan.metadata, snapshotAtAnalysis.metadata)) { + throw DeltaErrors.schemaChangedSinceAnalysis(snapshotAtAnalysis.schema, snapshotSchema) + } + } + + // disallow reading table with empty schema, which we support creating now + if (snapshotToScan.schema.isEmpty) { + // print the catalog identifier or delta.`/path/to/table` + var message = TableIdentifier(deltaLog.dataPath.toString, Some("delta")).quotedString + throw DeltaErrors.readTableWithoutSchemaException(message) + } + + snapshotToScan + } + + override def matchingFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[AddFile] = { + getSnapshot.filesForScan(this.partitionFilters ++ partitionFilters ++ dataFilters).files + } + + override def inputFiles: Array[String] = { + getSnapshot + .filesForScan(partitionFilters).files + .map(f => absolutePath(f.path).toString) + .toArray + } + + override def refresh(): Unit = {} + override def sizeInBytes: Long = deltaLog.unsafeVolatileSnapshot.sizeInBytes + + override def equals(that: Any): Boolean = that match { + case t: TahoeLogFileIndex => + t.path == path && t.deltaLog.isSameLogAs(deltaLog) && + t.versionToUse == versionToUse && t.partitionFilters == partitionFilters + case _ => false + } + + override def hashCode: scala.Int = { + Objects.hashCode(path, deltaLog.compositeId, versionToUse, partitionFilters) + } + + protected[delta] def numOfFilesIfKnown: Option[Long] = + deltaLog.unsafeVolatileSnapshot.numOfFilesIfKnown + + protected[delta] def sizeInBytesIfKnown: Option[Long] = + deltaLog.unsafeVolatileSnapshot.sizeInBytesIfKnown +} + +object TahoeLogFileIndex { + def apply(spark: SparkSession, deltaLog: DeltaLog): TahoeLogFileIndex = + TahoeLogFileIndex(spark, deltaLog, deltaLog.dataPath, deltaLog.unsafeVolatileSnapshot) +} + +/** + * A [[TahoeFileIndex]] that generates the list of files from a given list of files + * that are within a version range of DeltaLog. + */ +class TahoeBatchFileIndex( + spark: SparkSession, + val actionType: String, + val addFiles: Seq[AddFile], + deltaLog: DeltaLog, + path: Path, + val snapshot: SnapshotDescriptor, + val partitionFiltersGenerated: Boolean = false) + extends TahoeFileIndexWithSnapshotDescriptor(spark, deltaLog, path, snapshot) { + + override def matchingFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[AddFile] = { + DeltaLog.filterFileList(partitionSchema, addFiles.toDF(spark), partitionFilters) + .as[AddFile] + .collect() + } + + override def inputFiles: Array[String] = { + addFiles.map(a => absolutePath(a.path).toString).toArray + } + + override def refresh(): Unit = {} + override lazy val sizeInBytes: Long = addFiles.map(_.size).sum +} + +trait SupportsRowIndexFilters { + /** + * If we know a-priori which exact rows we want to read (e.g., from a previous scan) + * find the per-file filter here, which must be passed down to the appropriate reader. + * + * @return a mapping from file names to the row index filter for that file. + */ + def rowIndexFilters: Option[Map[String, RowIndexFilterType]] = None +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeRemoveFileIndex.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeRemoveFileIndex.scala new file mode 100644 index 00000000000..3eca5e43a76 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/TahoeRemoveFileIndex.scala @@ -0,0 +1,94 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{AddFile, RemoveFile} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.commands.cdc.CDCReader._ +import org.apache.spark.sql.delta.implicits._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.types.StructType + +/** + * A [[TahoeFileIndex]] for scanning a sequence of removed files as CDC. Similar to + * [[TahoeBatchFileIndex]], the equivalent for reading [[AddFile]] actions. + * @param spark The Spark session. + * @param filesByVersion Grouped FileActions, one per table version. + * @param deltaLog The delta log instance. + * @param path The table's data path. + * @param snapshot The snapshot where we read CDC from. + * @param rowIndexFilters Map from URI-encoded file path to a row index filter type. + */ +class TahoeRemoveFileIndex( + spark: SparkSession, + val filesByVersion: Seq[CDCDataSpec[RemoveFile]], + deltaLog: DeltaLog, + path: Path, + snapshot: SnapshotDescriptor, + override val rowIndexFilters: Option[Map[String, RowIndexFilterType]] = None + ) extends TahoeFileIndexWithSnapshotDescriptor(spark, deltaLog, path, snapshot) { + + override def matchingFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[AddFile] = { + // Make some fake AddFiles to satisfy the interface. + val addFiles = filesByVersion.flatMap { + case CDCDataSpec(version, ts, files, ci) => + files.map { r => + if (!r.extendedFileMetadata.getOrElse(false)) { + // This shouldn't happen in user queries - the CDC flag was added at the same time as + // extended metadata, so all removes in a table with CDC enabled should have it. (The + // only exception is FSCK removes, which we screen out separately because they have + // dataChange set to false.) + throw DeltaErrors.removeFileCDCMissingExtendedMetadata(r.toString) + } + // We add the metadata as faked partition columns in order to attach it on a per-file + // basis. + val newPartitionVals = r.partitionValues + + (CDC_COMMIT_VERSION -> version.toString) + + (CDC_COMMIT_TIMESTAMP -> Option(ts).map(_.toString).orNull) + + (CDC_TYPE_COLUMN_NAME -> CDC_TYPE_DELETE_STRING) + AddFile( + path = r.path, + partitionValues = newPartitionVals, + size = r.size.getOrElse(0L), + modificationTime = 0, + dataChange = r.dataChange, + tags = r.tags, + deletionVector = r.deletionVector + ) + } + } + DeltaLog.filterFileList(partitionSchema, addFiles.toDF(spark), partitionFilters) + .as[AddFile] + .collect() + } + + override def inputFiles: Array[String] = { + filesByVersion.flatMap(_.actions).map(f => absolutePath(f.path).toString).toArray + } + + override def partitionSchema: StructType = CDCReader.cdcReadSchema(super.partitionSchema) + + override def refresh(): Unit = {} + + override val sizeInBytes: Long = filesByVersion.flatMap(_.actions).map(_.size.getOrElse(0L)).sum +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/TransactionalWrite.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/TransactionalWrite.scala new file mode 100644 index 00000000000..09a9bf5764c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/TransactionalWrite.scala @@ -0,0 +1,509 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +import scala.collection.mutable.ListBuffer + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.constraints.{Constraint, Constraints, DeltaInvariantCheckerExec} +import org.apache.spark.sql.delta.hooks.AutoCompact +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.perf.DeltaOptimizedWriterExec +import org.apache.spark.sql.delta.schema._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.sources.DeltaSQLConf.DELTA_COLLECT_STATS_USING_TABLE_SCHEMA +import org.apache.spark.sql.delta.stats.{ + DeltaJobStatisticsTracker, + StatisticsCollection +} +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical.LocalRelation +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.connector.catalog._ +import org.apache.spark.sql.execution._ +import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FileFormatWriter, WriteJobStatsTracker} +import org.apache.spark.sql.functions.{col, to_json} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{StringType, StructField, StructType} +import org.apache.spark.util.SerializableConfiguration + +/** + * Adds the ability to write files out as part of a transaction. Checks + * are performed to ensure that the data being written matches either the + * current metadata or the new metadata being set by this transaction. + */ +trait TransactionalWrite extends DeltaLogging { self: OptimisticTransactionImpl => + + def deltaLog: DeltaLog + + def protocol: Protocol + + protected def snapshot: Snapshot + + protected def metadata: Metadata + + protected var hasWritten = false + + private[delta] val deltaDataSubdir = + if (spark.sessionState.conf.getConf(DeltaSQLConf.WRITE_DATA_FILES_TO_SUBDIR)) { + Some("data") + } else None + + protected def getCommitter(outputPath: Path): DelayedCommitProtocol = + new DelayedCommitProtocol("delta", outputPath.toString, None, deltaDataSubdir) + + /** Makes the output attributes nullable, so that we don't write unreadable parquet files. */ + protected def makeOutputNullable(output: Seq[Attribute]): Seq[Attribute] = { + output.map { + case ref: AttributeReference => + val nullableDataType = SchemaUtils.typeAsNullable(ref.dataType) + ref.copy(dataType = nullableDataType, nullable = true)(ref.exprId, ref.qualifier) + case attr => attr.withNullability(true) + } + } + + /** Replace the output attributes with the physical mapping information. */ + protected def mapColumnAttributes( + output: Seq[Attribute], + mappingMode: DeltaColumnMappingMode): Seq[Attribute] = { + DeltaColumnMapping.createPhysicalAttributes(output, metadata.schema, mappingMode) + } + + /** + * Normalize the schema of the query, and return the QueryExecution to execute. If the table has + * generated columns and users provide these columns in the output, we will also return + * constraints that should be respected. If any constraints are returned, the caller should apply + * these constraints when writing data. + * + * Note: The output attributes of the QueryExecution may not match the attributes we return as the + * output schema. This is because streaming queries create `IncrementalExecution`, which cannot be + * further modified. We can however have the Parquet writer use the physical plan from + * `IncrementalExecution` and the output schema provided through the attributes. + */ + protected def normalizeData( + deltaLog: DeltaLog, + options: Option[DeltaOptions], + data: Dataset[_]): (QueryExecution, Seq[Attribute], Seq[Constraint], Set[String]) = { + val normalizedData = SchemaUtils.normalizeColumnNames(metadata.schema, data) + val nullAsDefault = options.isDefined && + options.get.options.contains(ColumnWithDefaultExprUtils.USE_NULL_AS_DEFAULT_DELTA_OPTION) + val enforcesDefaultExprs = ColumnWithDefaultExprUtils.tableHasDefaultExpr( + protocol, metadata, nullAsDefault) + val (dataWithDefaultExprs, generatedColumnConstraints, trackHighWaterMarks) = + if (enforcesDefaultExprs) { + ColumnWithDefaultExprUtils.addDefaultExprsOrReturnConstraints( + deltaLog, + protocol, + // We need the original query execution if this is a streaming query, because + // `normalizedData` may add a new projection and change its type. + data.queryExecution, + metadata.schema, + normalizedData, + nullAsDefault) + } else { + (normalizedData, Nil, Set[String]()) + } + val cleanedData = SchemaUtils.dropNullTypeColumns(dataWithDefaultExprs) + val queryExecution = if (cleanedData.schema != dataWithDefaultExprs.schema) { + // This must be batch execution as DeltaSink doesn't accept NullType in micro batch DataFrame. + // For batch executions, we need to use the latest DataFrame query execution + cleanedData.queryExecution + } else if (enforcesDefaultExprs) { + dataWithDefaultExprs.queryExecution + } else { + assert( + normalizedData == dataWithDefaultExprs, + "should not change data when there is no generate column") + // Ideally, we should use `normalizedData`. But it may use `QueryExecution` rather than + // `IncrementalExecution`. So we use the input `data` and leverage the `nullableOutput` + // below to fix the column names. + data.queryExecution + } + val nullableOutput = makeOutputNullable(cleanedData.queryExecution.analyzed.output) + val columnMapping = metadata.columnMappingMode + // Check partition column errors + checkPartitionColumns( + metadata.partitionSchema, nullableOutput, nullableOutput.length < data.schema.size + ) + // Rewrite column physical names if using a mapping mode + val mappedOutput = if (columnMapping == NoMapping) nullableOutput else { + mapColumnAttributes(nullableOutput, columnMapping) + } + (queryExecution, mappedOutput, generatedColumnConstraints, trackHighWaterMarks) + } + + protected def checkPartitionColumns( + partitionSchema: StructType, + output: Seq[Attribute], + colsDropped: Boolean): Unit = { + val partitionColumns: Seq[Attribute] = partitionSchema.map { col => + // schema is already normalized, therefore we can do an equality check + output.find(f => f.name == col.name).getOrElse( + throw DeltaErrors.partitionColumnNotFoundException(col.name, output) + ) + } + if (partitionColumns.nonEmpty && partitionColumns.length == output.length) { + throw DeltaErrors.nonPartitionColumnAbsentException(colsDropped) + } + } + + protected def getPartitioningColumns( + partitionSchema: StructType, + output: Seq[Attribute]): Seq[Attribute] = { + val partitionColumns: Seq[Attribute] = partitionSchema.map { col => + // schema is already normalized, therefore we can do an equality check + // we have already checked for missing columns, so the fields must exist + output.find(f => f.name == col.name).get + } + partitionColumns + } + + /** + * If there is any string partition column and there are constraints defined, add a projection to + * convert empty string to null for that column. The empty strings will be converted to null + * eventually even without this convert, but we want to do this earlier before check constraints + * so that empty strings are correctly rejected. Note that this should not cause the downstream + * logic in `FileFormatWriter` to add duplicate conversions because the logic there checks the + * partition column using the original plan's output. When the plan is modified with additional + * projections, the partition column check won't match and will not add more conversion. + * + * @param plan The original SparkPlan. + * @param partCols The partition columns. + * @param constraints The defined constraints. + * @return A SparkPlan potentially modified with an additional projection on top of `plan` + */ + protected def convertEmptyToNullIfNeeded( + plan: SparkPlan, + partCols: Seq[Attribute], + constraints: Seq[Constraint]): SparkPlan = { + if (!spark.conf.get(DeltaSQLConf.CONVERT_EMPTY_TO_NULL_FOR_STRING_PARTITION_COL)) { + return plan + } + // No need to convert if there are no constraints. The empty strings will be converted later by + // FileFormatWriter and FileFormatDataWriter. Note that we might still do unnecessary convert + // here as the constraints might not be related to the string partition columns. A precise + // check will need to walk the constraints to see if such columns are really involved. It + // doesn't seem to worth the effort. + if (constraints.isEmpty) return plan + + val partSet = AttributeSet(partCols) + var needConvert = false + val projectList: Seq[NamedExpression] = plan.output.map { + case p if partSet.contains(p) && p.dataType == StringType => + needConvert = true + Alias(org.apache.spark.sql.catalyst.expressions.Empty2Null(p), p.name)() + case attr => attr + } + if (needConvert) ProjectExec(projectList, plan) else plan + } + + def writeFiles( + data: Dataset[_], + additionalConstraints: Seq[Constraint]): Seq[FileAction] = { + writeFiles(data, None, additionalConstraints) + } + + def writeFiles( + data: Dataset[_], + writeOptions: Option[DeltaOptions]): Seq[FileAction] = { + writeFiles(data, writeOptions, Nil) + } + + def writeFiles(data: Dataset[_]): Seq[FileAction] = { + writeFiles(data, Nil) + } + + def writeFiles( + data: Dataset[_], + deltaOptions: Option[DeltaOptions], + additionalConstraints: Seq[Constraint]): Seq[FileAction] = { + writeFiles(data, deltaOptions, isOptimize = false, additionalConstraints) + } + + /** + * Returns a tuple of (data, partition schema). For CDC writes, a `__is_cdc` column is added to + * the data and `__is_cdc=true/false` is added to the front of the partition schema. + */ + protected def performCDCPartition(inputData: Dataset[_]): (DataFrame, StructType) = { + // If this is a CDC write, we need to generate the CDC_PARTITION_COL in order to properly + // dispatch rows between the main table and CDC event records. This is a virtual partition + // and will be stripped out later in [[DelayedCommitProtocolEdge]]. + // Note that the ordering of the partition schema is relevant - CDC_PARTITION_COL must + // come first in order to ensure CDC data lands in the right place. + if (CDCReader.isCDCEnabledOnTable(metadata, spark) && + inputData.schema.fieldNames.contains(CDCReader.CDC_TYPE_COLUMN_NAME)) { + val augmentedData = inputData.withColumn( + CDCReader.CDC_PARTITION_COL, col(CDCReader.CDC_TYPE_COLUMN_NAME).isNotNull) + val partitionSchema = StructType( + StructField(CDCReader.CDC_PARTITION_COL, StringType) +: metadata.physicalPartitionSchema) + (augmentedData, partitionSchema) + } else { + (inputData.toDF(), metadata.physicalPartitionSchema) + } + } + + /** + * Return a tuple of (outputStatsCollectionSchema, statsCollectionSchema). + * outputStatsCollectionSchema is the data source schema from DataFrame used for stats collection. + * It contains the columns in the DataFrame output, excluding the partition columns. + * tableStatsCollectionSchema is the schema to collect stats for. It contains the columns in the + * table schema, excluding the partition columns. + * Note: We only collect NULL_COUNT stats (as the number of rows) for the columns in + * statsCollectionSchema but missing in outputStatsCollectionSchema + */ + protected def getStatsSchema( + dataFrameOutput: Seq[Attribute], + partitionSchema: StructType): (Seq[Attribute], Seq[Attribute]) = { + val partitionColNames = partitionSchema.map(_.name).toSet + + // The outputStatsCollectionSchema comes from DataFrame output + // schema should be normalized, therefore we can do an equality check + val outputStatsCollectionSchema = dataFrameOutput + .filterNot(c => partitionColNames.contains(c.name)) + + // The tableStatsCollectionSchema comes from table schema + val statsTableSchema = toAttributes(metadata.schema) + val mappedStatsTableSchema = if (metadata.columnMappingMode == NoMapping) { + statsTableSchema + } else { + mapColumnAttributes(statsTableSchema, metadata.columnMappingMode) + } + + // It's important to first do the column mapping and then drop the partition columns + val tableStatsCollectionSchema = mappedStatsTableSchema + .filterNot(c => partitionColNames.contains(c.name)) + + (outputStatsCollectionSchema, tableStatsCollectionSchema) + } + + protected def getStatsColExpr( + statsDataSchema: Seq[Attribute], + statsCollection: StatisticsCollection): Expression = { + Dataset.ofRows(spark, LocalRelation(statsDataSchema)) + .select(to_json(statsCollection.statsCollector)) + .queryExecution.analyzed.expressions.head + } + + + /** Return the pair of optional stats tracker and stats collection class */ + protected def getOptionalStatsTrackerAndStatsCollection( + output: Seq[Attribute], + outputPath: Path, + partitionSchema: StructType, data: DataFrame): ( + Option[DeltaJobStatisticsTracker], + Option[StatisticsCollection]) = { + // check whether we should collect Delta stats + val collectStats = + (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COLLECT_STATS) + ) + + if (collectStats) { + val (outputStatsCollectionSchema, tableStatsCollectionSchema) = + getStatsSchema(output, partitionSchema) + + val statsCollection = new StatisticsCollection { + override val columnMappingMode: DeltaColumnMappingMode = metadata.columnMappingMode + override def tableSchema: StructType = metadata.schema + override def outputTableStatsSchema: StructType = { + // If collecting stats uses the table schema, then we pass in tableStatsCollectionSchema; + // otherwise, pass in outputStatsCollectionSchema to collect stats using the DataFrame + // schema. + if (spark.sessionState.conf.getConf(DELTA_COLLECT_STATS_USING_TABLE_SCHEMA)) { + tableStatsCollectionSchema.toStructType + } else { + outputStatsCollectionSchema.toStructType + } + } + override def outputAttributeSchema: StructType = outputStatsCollectionSchema.toStructType + override val spark: SparkSession = data.sparkSession + override val statsColumnSpec = StatisticsCollection.configuredDeltaStatsColumnSpec(metadata) + override val protocol: Protocol = newProtocol.getOrElse(snapshot.protocol) + } + val statsColExpr = getStatsColExpr(outputStatsCollectionSchema, statsCollection) + + (Some(new DeltaJobStatisticsTracker(deltaLog.newDeltaHadoopConf(), + outputPath, + outputStatsCollectionSchema, + statsColExpr + )), + Some(statsCollection)) + } else { + (None, None) + } + } + + + /** + * Writes out the dataframe after performing schema validation. Returns a list of + * actions to append these files to the reservoir. + * + * @param inputData Data to write out. + * @param writeOptions Options to decide how to write out the data. + * @param isOptimize Whether the operation writing this is Optimize or not. + * @param additionalConstraints Additional constraints on the write. + */ + def writeFiles( + inputData: Dataset[_], + writeOptions: Option[DeltaOptions], + isOptimize: Boolean, + additionalConstraints: Seq[Constraint]): Seq[FileAction] = { + hasWritten = true + + val spark = inputData.sparkSession + val (data, partitionSchema) = performCDCPartition(inputData) + val outputPath = deltaLog.dataPath + + val (queryExecution, output, generatedColumnConstraints, _) = + normalizeData(deltaLog, writeOptions, data) + val partitioningColumns = getPartitioningColumns(partitionSchema, output) + + val committer = getCommitter(outputPath) + + // If Statistics Collection is enabled, then create a stats tracker that will be injected during + // the FileFormatWriter.write call below and will collect per-file stats using + // StatisticsCollection + val (optionalStatsTracker, _) = getOptionalStatsTrackerAndStatsCollection(output, outputPath, + partitionSchema, data) + + + val constraints = + Constraints.getAll(metadata, spark) ++ generatedColumnConstraints ++ additionalConstraints + + SQLExecution.withNewExecutionId(queryExecution, Option("deltaTransactionalWrite")) { + val outputSpec = FileFormatWriter.OutputSpec( + outputPath.toString, + Map.empty, + output) + + val empty2NullPlan = convertEmptyToNullIfNeeded(queryExecution.executedPlan, + partitioningColumns, constraints) + val checkInvariants = DeltaInvariantCheckerExec(empty2NullPlan, constraints) + // No need to plan optimized write if the write command is OPTIMIZE, which aims to produce + // evenly-balanced data files already. + val physicalPlan = if (!isOptimize && + shouldOptimizeWrite(writeOptions, spark.sessionState.conf)) { + DeltaOptimizedWriterExec(checkInvariants, metadata.partitionColumns, deltaLog) + } else { + checkInvariants + } + + val statsTrackers: ListBuffer[WriteJobStatsTracker] = ListBuffer() + + if (spark.conf.get(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED)) { + val basicWriteJobStatsTracker = new BasicWriteJobStatsTracker( + new SerializableConfiguration(deltaLog.newDeltaHadoopConf()), + BasicWriteJobStatsTracker.metrics) + registerSQLMetrics(spark, basicWriteJobStatsTracker.driverSideMetrics) + statsTrackers.append(basicWriteJobStatsTracker) + } + + // Iceberg spec requires partition columns in data files + val writePartitionColumns = IcebergCompat.isAnyEnabled(metadata) + // Retain only a minimal selection of Spark writer options to avoid any potential + // compatibility issues + val options = (writeOptions match { + case None => Map.empty[String, String] + case Some(writeOptions) => + writeOptions.options.filterKeys { key => + key.equalsIgnoreCase(DeltaOptions.MAX_RECORDS_PER_FILE) || + key.equalsIgnoreCase(DeltaOptions.COMPRESSION) + }.toMap + }) + (DeltaOptions.WRITE_PARTITION_COLUMNS -> writePartitionColumns.toString) + + try { + DeltaFileFormatWriter.write( + sparkSession = spark, + plan = physicalPlan, + fileFormat = deltaLog.fileFormat(protocol, metadata), // TODO support changing formats. + committer = committer, + outputSpec = outputSpec, + // scalastyle:off deltahadoopconfiguration + hadoopConf = + spark.sessionState.newHadoopConfWithOptions(metadata.configuration ++ deltaLog.options), + // scalastyle:on deltahadoopconfiguration + partitionColumns = partitioningColumns, + bucketSpec = None, + statsTrackers = optionalStatsTracker.toSeq + ++ statsTrackers, + options = options) + } catch { + case InnerInvariantViolationException(violationException) => + // Pull an InvariantViolationException up to the top level if it was the root cause. + throw violationException + } + } + + var resultFiles = + (if (optionalStatsTracker.isDefined) { + committer.addedStatuses.map { a => + a.copy(stats = optionalStatsTracker.map( + _.recordedStats(a.toPath.getName)).getOrElse(a.stats)) + } + } + else { + committer.addedStatuses + }) + .filter { + // In some cases, we can write out an empty `inputData`. Some examples of this (though, they + // may be fixed in the future) are the MERGE command when you delete with empty source, or + // empty target, or on disjoint tables. This is hard to catch before the write without + // collecting the DF ahead of time. Instead, we can return only the AddFiles that + // a) actually add rows, or + // b) don't have any stats so we don't know the number of rows at all + case a: AddFile => a.numLogicalRecords.forall(_ > 0) + case _ => true + } + + // add [[AddFile.Tags.ICEBERG_COMPAT_VERSION.name]] tags to addFiles + if (IcebergCompatV2.isEnabled(metadata)) { + resultFiles = resultFiles.map { addFile => + val tags = if (addFile.tags != null) addFile.tags else Map.empty[String, String] + addFile.copy(tags = tags + (AddFile.Tags.ICEBERG_COMPAT_VERSION.name -> "2")) + } + } + + + if (resultFiles.nonEmpty && !isOptimize) registerPostCommitHook(AutoCompact) + + resultFiles.toSeq ++ committer.changeFiles + } + + /** + * Optimized writes can be enabled/disabled through the following order: + * - Through DataFrameWriter options + * - Through SQL configuration + * - Through the table parameter + */ + private def shouldOptimizeWrite( + writeOptions: Option[DeltaOptions], sessionConf: SQLConf): Boolean = { + writeOptions.flatMap(_.optimizeWrite) + .getOrElse(TransactionalWrite.shouldOptimizeWrite(metadata, sessionConf)) + } +} + +object TransactionalWrite { + def shouldOptimizeWrite(metadata: Metadata, sessionConf: SQLConf): Boolean = { + sessionConf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_ENABLED) + .orElse(DeltaConfigs.OPTIMIZE_WRITE.fromMetaData(metadata)) + .getOrElse(false) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/hooks/AutoCompact.scala b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/AutoCompact.scala new file mode 100644 index 00000000000..9cd1ed5f179 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/AutoCompact.scala @@ -0,0 +1,254 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.hooks + +import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.commands.{DeltaOptimizeContext, OptimizeExecutor} +import org.apache.spark.sql.delta.commands.optimize._ +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.AutoCompactPartitionStats + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.internal.SQLConf + +/** + * A trait for post commit hook which compacts files in a Delta table. This hook acts as a cheaper + * version of the OPTIMIZE command, by attempting to compact small files together into fewer bigger + * files. + * + * Auto Compact chooses files to compact greedily by looking at partition directories which + * have the largest number of files that are under a certain size threshold and launches a bounded + * number of optimize tasks based on the capacity of the cluster. + */ +trait AutoCompactBase extends PostCommitHook with DeltaLogging { + + override val name: String = "Auto Compact" + + private[delta] val OP_TYPE = "delta.commit.hooks.autoOptimize" + + /** + * This method returns the type of Auto Compaction to use on a delta table or returns None + * if Auto Compaction is disabled. + * Prioritization: + * 1. The highest priority is given to [[DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED]] config. + * 2. Then we check if the deprecated property `DeltaConfigs.AUTO_OPTIMIZE` is set. If yes, then + * we return [[AutoCompactType.Enabled]] type. + * 3. Then we check the table property [[DeltaConfigs.AUTO_COMPACT]]. + * 4. If none of 1/2/3 are set explicitly, then we return None + */ + def getAutoCompactType(conf: SQLConf, metadata: Metadata): Option[AutoCompactType] = { + // If user-facing conf is set to something, use that value. + val autoCompactTypeFromConf = + conf.getConf(DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED).map(AutoCompactType(_)) + if (autoCompactTypeFromConf.nonEmpty) return autoCompactTypeFromConf.get + + // If user-facing conf is not set, use what table property says. + val deprecatedFlag = DeltaConfigs.AUTO_OPTIMIZE.fromMetaData(metadata) + val autoCompactTypeFromPropertyOrDefaultValue = deprecatedFlag match { + case Some(true) => + Some(AutoCompactType.Enabled) + case _ => + // If the legacy property `DeltaConfigs.AUTO_OPTIMIZE` is false or not set, then check + // the new table property `DeltaConfigs.AUTO_COMPACT`. + val confValueFromTableProperty = DeltaConfigs.AUTO_COMPACT.fromMetaData(metadata) + confValueFromTableProperty match { + case Some(v) => + // Table property is set to something explicitly by user. + AutoCompactType(v) + case None => + AutoCompactType(AutoCompactType.DISABLED) // Default to disabled + } + } + autoCompactTypeFromPropertyOrDefaultValue + } + + private[hooks] def shouldSkipAutoCompact( + autoCompactTypeOpt: Option[AutoCompactType], + spark: SparkSession, + txn: OptimisticTransactionImpl): Boolean = { + // If auto compact type is empty, then skip compaction + if (autoCompactTypeOpt.isEmpty) return true + + // Skip Auto Compaction, if one of the following conditions is satisfied: + // -- Auto Compaction is not enabled. + // -- Transaction execution time is empty, which means the parent transaction is not committed. + !AutoCompactUtils.isQualifiedForAutoCompact(spark, txn) + + } + + override def run( + spark: SparkSession, + txn: OptimisticTransactionImpl, + committedVersion: Long, + postCommitSnapshot: Snapshot, + actions: Seq[Action]): Unit = { + val conf = spark.sessionState.conf + val autoCompactTypeOpt = getAutoCompactType(conf, postCommitSnapshot.metadata) + // Skip Auto Compact if current transaction is not qualified or the table is not qualified + // based on the value of autoCompactTypeOpt. + if (shouldSkipAutoCompact(autoCompactTypeOpt, spark, txn)) return + compactIfNecessary( + spark, + txn, + postCommitSnapshot, + OP_TYPE, + maxDeletedRowsRatio = None) + } + + /** + * Compact the target table of write transaction `txn` only when there are sufficient amount of + * small size files. + */ + private[delta] def compactIfNecessary( + spark: SparkSession, + txn: OptimisticTransactionImpl, + postCommitSnapshot: Snapshot, + opType: String, + maxDeletedRowsRatio: Option[Double] + ): Seq[OptimizeMetrics] = { + val tableId = txn.deltaLog.tableId + val autoCompactRequest = AutoCompactUtils.prepareAutoCompactRequest( + spark, + txn, + postCommitSnapshot, + txn.partitionsAddedToOpt.map(_.toSet), + opType, + maxDeletedRowsRatio) + if (autoCompactRequest.shouldCompact) { + try { + val metrics = AutoCompact + .compact( + spark, + txn.deltaLog, + txn.catalogTable, + autoCompactRequest.targetPartitionsPredicate, + opType, + maxDeletedRowsRatio + ) + val partitionsStats = AutoCompactPartitionStats.instance(spark) + // Mark partitions as compacted before releasing them. + // Otherwise an already compacted partition might get picked up by a concurrent thread. + // But only marks it as compacted, if no exception was thrown by auto compaction so that the + // partitions stay eligible for subsequent auto compactions. + partitionsStats.markPartitionsAsCompacted( + tableId, + autoCompactRequest.allowedPartitions + ) + metrics + } catch { + case e: Throwable => + logError("Auto Compaction failed with: " + e.getMessage) + throw e + } finally { + if (AutoCompactUtils.reservePartitionEnabled(spark)) { + AutoCompactPartitionReserve.releasePartitions( + tableId, + autoCompactRequest.allowedPartitions + ) + } + } + } else { + Seq.empty[OptimizeMetrics] + } + } + + + /** + * Launch Auto Compaction jobs if there is sufficient capacity. + * @param spark The spark session of the parent transaction that triggers this Auto Compaction. + * @param deltaLog The delta log of the parent transaction. + * @return the optimize metrics of this compaction job. + */ + private[delta] def compact( + spark: SparkSession, + deltaLog: DeltaLog, + catalogTable: Option[CatalogTable], + partitionPredicates: Seq[Expression] = Nil, + opType: String = OP_TYPE, + maxDeletedRowsRatio: Option[Double] = None) + : Seq[OptimizeMetrics] = recordDeltaOperation(deltaLog, opType) { + val maxFileSize = spark.conf.get(DeltaSQLConf.DELTA_AUTO_COMPACT_MAX_FILE_SIZE) + val minFileSizeOpt = Some(spark.conf.get(DeltaSQLConf.DELTA_AUTO_COMPACT_MIN_FILE_SIZE) + .getOrElse(maxFileSize / 2)) + val maxFileSizeOpt = Some(maxFileSize) + recordDeltaOperation(deltaLog, s"$opType.execute") { + val txn = deltaLog.startTransaction(catalogTable) + val optimizeContext = DeltaOptimizeContext( + isPurge = false, + minFileSizeOpt, + maxFileSizeOpt, + maxDeletedRowsRatio = maxDeletedRowsRatio + ) + val rows = new OptimizeExecutor(spark, txn, partitionPredicates, Seq(), true, optimizeContext) + .optimize() + val metrics = rows.map(_.getAs[OptimizeMetrics](1)) + recordDeltaEvent(deltaLog, s"$opType.execute.metrics", data = metrics.head) + metrics + } + } + +} + +/** + * Post commit hook for Auto Compaction. + */ +case object AutoCompact extends AutoCompactBase +/** + * A trait describing the type of Auto Compaction. + */ +sealed trait AutoCompactType { + val configValueStrings: Seq[String] +} + +object AutoCompactType { + + private[hooks] val DISABLED = "false" + + /** + * Enable auto compact. + * 1. MAX_FILE_SIZE is configurable and defaults to 128 MB unless overridden. + * 2. MIN_FILE_SIZE is configurable and defaults to MAX_FILE_SIZE / 2 unless overridden. + * Note: User can use DELTA_AUTO_COMPACT_MAX_FILE_SIZE to override this value. + */ + case object Enabled extends AutoCompactType { + override val configValueStrings = Seq( + "true" + ) + } + + + /** + * Converts the config value String (coming from [[DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED]] conf + * or [[DeltaConfigs.AUTO_COMPACT]] table property) and translates into the [[AutoCompactType]]. + */ + def apply(value: String): Option[AutoCompactType] = { + if (Enabled.configValueStrings.contains(value)) return Some(Enabled) + if (value == DISABLED) return None + throw DeltaErrors.invalidAutoCompactType(value) + } + + // All allowed values for [[DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED]] and + // [[DeltaConfigs.AUTO_COMPACT]]. + val ALLOWED_VALUES = + Enabled.configValueStrings ++ + Seq(DISABLED) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/hooks/AutoCompactUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/AutoCompactUtils.scala new file mode 100644 index 00000000000..c3ed3f84397 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/AutoCompactUtils.scala @@ -0,0 +1,399 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.hooks + +import scala.collection.mutable + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.{DeltaLog, OptimisticTransactionImpl, Snapshot} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.sources.DeltaSQLConf._ +import org.apache.spark.sql.delta.stats.AutoCompactPartitionStats + +import org.apache.spark.internal.config.ConfigEntry +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.{And, Cast, EqualNullSafe, Expression, Literal, Or} +import org.apache.spark.sql.functions.collect_list + +/** + * The request class that contains all information needed for Auto Compaction. + * @param shouldCompact True if Auto Compact should start. + * @param optimizeContext The context that control execution of optimize command. + * @param targetPartitionsPredicate The predicate of the target partitions of this Auto Compact + * request. + */ +case class AutoCompactRequest( + shouldCompact: Boolean, + allowedPartitions: AutoCompactUtils.PartitionKeySet, + targetPartitionsPredicate: Seq[Expression] = Nil) { +} + +object AutoCompactRequest { + /** Return a default AutoCompactRequest object that doesn't trigger Auto Compact. */ + def noopRequest: AutoCompactRequest = + AutoCompactRequest( + shouldCompact = false, + allowedPartitions = Set.empty + ) +} + +object AutoCompactUtils extends DeltaLogging { + type PartitionKey = Map[String, String] + type PartitionKeySet = Set[PartitionKey] + + val STATUS_NAME = { + "status" + } + + /** Create partition predicate from a partition key. */ + private def createPartitionPredicate( + postCommitSnapshot: Snapshot, + partitions: PartitionKeySet): Seq[Expression] = { + val schema = postCommitSnapshot.metadata.physicalPartitionSchema + val partitionBranches = partitions.filterNot(_.isEmpty).map { partition => + partition.toSeq + .map { case (key, value) => + val field = schema(key) + EqualNullSafe(UnresolvedAttribute.quoted(key), Cast(Literal(value), field.dataType)) + } + .reduceLeft[Expression]((l, r) => And(l, r)) + } + if (partitionBranches.size > 1) { + Seq(partitionBranches.reduceLeft[Expression]((l, r) => Or(l, r))) + } else if (partitionBranches.size == 1) { + partitionBranches.toList + } else { + Seq.empty + } + } + + /** True if Auto Compaction only runs on modified partitions. */ + def isModifiedPartitionsOnlyAutoCompactEnabled(spark: SparkSession): Boolean = + spark.sessionState.conf.getConf(DELTA_AUTO_COMPACT_MODIFIED_PARTITIONS_ONLY_ENABLED) + + def reservePartitionEnabled(spark: SparkSession): Boolean = + spark.sessionState.conf.getConf(DELTA_AUTO_COMPACT_RESERVE_PARTITIONS_ENABLED) + + /** + * Get the minimum number of files to trigger Auto Compact. + */ + def minNumFilesForAutoCompact(spark: SparkSession): Int = { + spark.sessionState.conf.getConf(DELTA_AUTO_COMPACT_MIN_NUM_FILES) + } + + + /** + * Try to reserve partitions inside `partitionsAddedToOpt` for Auto Compaction. + * @return (shouldCompact, finalPartitions) The value of needCompaction is True if Auto + * Compaction needs to run. `finalPartitions` is the set of target partitions that were + * reserved for compaction. If finalPartitions is empty, then all partitions need to be + * considered. + */ + private def reserveTablePartitions( + spark: SparkSession, + deltaLog: DeltaLog, + postCommitSnapshot: Snapshot, + partitionsAddedToOpt: Option[PartitionKeySet], + opType: String, + maxDeletedRowsRatio: Option[Double]): (Boolean, PartitionKeySet) = { + import AutoCompactPartitionReserve._ + if (partitionsAddedToOpt.isEmpty) { + recordDeltaEvent(deltaLog, opType, data = Map(STATUS_NAME -> "skipEmptyIngestion")) + // If partitionsAddedToOpt is empty, then just skip compact since it means there is no file + // added in parent transaction and we do not want to hook AC on empty commits. + return (false, Set.empty[PartitionKey]) + } + + // Reserve partitions as following: + // 1) First check if any partitions are free, i.e. no concurrent auto-compact thread is running. + // 2) From free partitions check if any are eligible based on the number of small files. + // 3) From free partitions check if any are eligible based on the deletion vectors. + // 4) Try and reserve the union of the two lists. + // All concurrent accesses to partitions reservation and partition stats are managed by the + // [[AutoCompactPartitionReserve]] and [[AutoCompactPartitionStats]] singletons. + val shouldReservePartitions = + isModifiedPartitionsOnlyAutoCompactEnabled(spark) && reservePartitionEnabled(spark) + val freePartitions = + if (shouldReservePartitions) { + filterFreePartitions(deltaLog.tableId, partitionsAddedToOpt.get) + } else { + partitionsAddedToOpt.get + } + + // Early abort if all partitions are reserved. + if (freePartitions.isEmpty) { + recordDeltaEvent(deltaLog, opType, + data = Map(STATUS_NAME -> "skipAllPartitionsAlreadyReserved")) + return (false, Set.empty[PartitionKey]) + } + + // Check min number of files criteria. + val ChosenPartitionsResult(shouldCompactBasedOnNumFiles, + chosenPartitionsBasedOnNumFiles, minNumFilesLogMsg) = + choosePartitionsBasedOnMinNumSmallFiles( + spark, + deltaLog, + postCommitSnapshot, + freePartitions + ) + if (shouldCompactBasedOnNumFiles && chosenPartitionsBasedOnNumFiles.isEmpty) { + // Run on all partitions, no need to check other criteria. + // Note: this outcome of [choosePartitionsBasedOnMinNumSmallFiles] + // is also only possible if partitions reservation is turned off, + // so we do not need to reserve partitions. + recordDeltaEvent(deltaLog, opType, data = Map(STATUS_NAME -> "runOnAllPartitions")) + return (shouldCompactBasedOnNumFiles, chosenPartitionsBasedOnNumFiles) + } + + // Check files with DVs criteria. + val (shouldCompactBasedOnDVs, chosenPartitionsBasedOnDVs) = + choosePartitionsBasedOnDVs(freePartitions, postCommitSnapshot, maxDeletedRowsRatio) + + var finalPartitions = chosenPartitionsBasedOnNumFiles ++ chosenPartitionsBasedOnDVs + if (isModifiedPartitionsOnlyAutoCompactEnabled(spark)) { + val maxNumPartitions = spark.conf.get(DELTA_AUTO_COMPACT_MAX_NUM_MODIFIED_PARTITIONS) + finalPartitions = if (finalPartitions.size > maxNumPartitions) { + // Choose maxNumPartitions at random. + scala.util.Random.shuffle(finalPartitions.toIndexedSeq).take(maxNumPartitions).toSet + } else { + finalPartitions + } + } + + val numChosenPartitions = finalPartitions.size + if (shouldReservePartitions) { + finalPartitions = tryReservePartitions(deltaLog.tableId, finalPartitions) + } + // Abort if all chosen partitions were reserved by a concurrent thread. + if (numChosenPartitions > 0 && finalPartitions.isEmpty) { + recordDeltaEvent(deltaLog, opType, + data = Map(STATUS_NAME -> "skipAllPartitionsAlreadyReserved")) + return (false, Set.empty[PartitionKey]) + } + + val shouldCompact = shouldCompactBasedOnNumFiles || shouldCompactBasedOnDVs + val statusLogMessage = + if (!shouldCompact) { + "skip" + minNumFilesLogMsg + } else if (shouldCompactBasedOnNumFiles && !shouldCompactBasedOnDVs) { + "run" + minNumFilesLogMsg + } else if (shouldCompactBasedOnNumFiles && shouldCompactBasedOnDVs) { + "run" + minNumFilesLogMsg + "AndPartitionsWithDVs" + } else if (!shouldCompactBasedOnNumFiles && shouldCompactBasedOnDVs) { + "runOnPartitionsWithDVs" + } + val logData = scala.collection.mutable.Map(STATUS_NAME -> statusLogMessage) + if (finalPartitions.nonEmpty) { + logData += ("partitions" -> finalPartitions.size.toString) + } + recordDeltaEvent(deltaLog, opType, data = logData) + + (shouldCompactBasedOnNumFiles || shouldCompactBasedOnDVs, finalPartitions) + } + + private case class ChosenPartitionsResult( + shouldRunAC: Boolean, + chosenPartitions: PartitionKeySet, + logMessage: String) + + private def choosePartitionsBasedOnMinNumSmallFiles( + spark: SparkSession, + deltaLog: DeltaLog, + postCommitSnapshot: Snapshot, + freePartitionsAddedTo: PartitionKeySet + ) = { + def getConf[T](entry: ConfigEntry[T]): T = spark.sessionState.conf.getConf(entry) + + val minNumFiles = minNumFilesForAutoCompact(spark) + val partitionEarlySkippingEnabled = + getConf(DELTA_AUTO_COMPACT_EARLY_SKIP_PARTITION_TABLE_ENABLED) + val tablePartitionStats = AutoCompactPartitionStats.instance(spark) + if (isModifiedPartitionsOnlyAutoCompactEnabled(spark)) { + // If modified partition only Auto Compact is enabled, pick the partitions that have more + // number of files than minNumFiles. + // If table partition early skipping feature is enabled, use the current minimum number of + // files threshold; otherwise, use 0 to indicate that any partition is qualified. + val minNumFilesPerPartition = if (partitionEarlySkippingEnabled) minNumFiles else 0L + val pickedPartitions = tablePartitionStats.filterPartitionsWithSmallFiles( + deltaLog.tableId, + freePartitionsAddedTo, + minNumFilesPerPartition) + if (pickedPartitions.isEmpty) { + ChosenPartitionsResult(shouldRunAC = false, + chosenPartitions = pickedPartitions, + logMessage = "InsufficientFilesInModifiedPartitions") + } else { + ChosenPartitionsResult(shouldRunAC = true, + chosenPartitions = pickedPartitions, + logMessage = "OnModifiedPartitions") + } + } else if (partitionEarlySkippingEnabled) { + // If only early skipping is enabled, then check whether there is any partition with more + // files than minNumFiles. + val maxNumFiles = tablePartitionStats.maxNumFilesInTable(deltaLog.tableId) + val shouldCompact = maxNumFiles >= minNumFiles + if (shouldCompact) { + ChosenPartitionsResult(shouldRunAC = true, + chosenPartitions = Set.empty[PartitionKey], + logMessage = "OnAllPartitions") + } else { + ChosenPartitionsResult(shouldRunAC = false, + chosenPartitions = Set.empty[PartitionKey], + logMessage = "InsufficientInAllPartitions") + } + } else { + // If both are disabled, then Auto Compaction should search all partitions of the target + // table. + ChosenPartitionsResult(shouldRunAC = true, + chosenPartitions = Set.empty[PartitionKey], + logMessage = "OnAllPartitions") + } + } + + private def choosePartitionsBasedOnDVs( + freePartitionsAddedTo: PartitionKeySet, + postCommitSnapshot: Snapshot, + maxDeletedRowsRatio: Option[Double]) = { + var partitionsWithDVs = if (maxDeletedRowsRatio.nonEmpty) { + postCommitSnapshot.allFiles + .where("deletionVector IS NOT NULL") + .where( + s""" + |(deletionVector.cardinality / stats:`numRecords`) > ${maxDeletedRowsRatio.get} + |""".stripMargin) + // Cast map to string so we can group by it. + // The string representation might not be deterministic. + // Still, there is only a limited number of representations we could get for a given map, + // Which should sufficiently reduce the data collected on the driver. + // We then make sure the partitions are distinct on the driver. + .selectExpr("CAST(partitionValues AS STRING) as partitionValuesStr", "partitionValues") + .groupBy("partitionValuesStr") + .agg(collect_list("partitionValues").as("partitionValues")) + .selectExpr("partitionValues[0] as partitionValues") + .collect() + .map(_.getAs[Map[String, String]]("partitionValues")).toSet + } else { + Set.empty[PartitionKey] + } + partitionsWithDVs = partitionsWithDVs.intersect(freePartitionsAddedTo) + (partitionsWithDVs.nonEmpty, partitionsWithDVs) + } + + /** + * Prepare an [[AutoCompactRequest]] object based on the statistics of partitions inside + * `partitionsAddedToOpt`. + * + * @param partitionsAddedToOpt The partitions that contain AddFile objects created by parent + * transaction. + * @param maxDeletedRowsRatio If set, signals to Auto Compaction to rewrite files with + * DVs with maxDeletedRowsRatio above this threshold. + */ + def prepareAutoCompactRequest( + spark: SparkSession, + txn: OptimisticTransactionImpl, + postCommitSnapshot: Snapshot, + partitionsAddedToOpt: Option[PartitionKeySet], + opType: String, + maxDeletedRowsRatio: Option[Double]): AutoCompactRequest = { + val (needAutoCompact, reservedPartitions) = reserveTablePartitions( + spark, + txn.deltaLog, + postCommitSnapshot, + partitionsAddedToOpt, + opType, + maxDeletedRowsRatio) + AutoCompactRequest( + needAutoCompact, + reservedPartitions, + createPartitionPredicate(postCommitSnapshot, reservedPartitions)) + } + + /** + * True if this transaction is qualified for Auto Compaction. + * - When current transaction is not blind append, it is safe to enable Auto Compaction when + * DELTA_AUTO_COMPACT_MODIFIED_PARTITIONS_ONLY_ENABLED is true, or it's an un-partitioned table, + * because then we cannot introduce _additional_ conflicts with concurrent write transactions. + */ + def isQualifiedForAutoCompact( + spark: SparkSession, + txn: OptimisticTransactionImpl): Boolean = { + // If txnExecutionTimeMs is empty, there is no transaction commit. + if (txn.txnExecutionTimeMs.isEmpty) return false + // If modified partitions only mode is not enabled, return true to avoid subsequent checking. + if (!isModifiedPartitionsOnlyAutoCompactEnabled(spark)) return true + + val nonBlindAppendAutoCompactEnabled = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_AUTO_COMPACT_NON_BLIND_APPEND_ENABLED) + !(nonBlindAppendAutoCompactEnabled && txn.isBlindAppend) + } + +} + +/** + * Thread-safe singleton to keep track of partitions reserved for auto-compaction. + */ +object AutoCompactPartitionReserve { + + import org.apache.spark.sql.delta.hooks.AutoCompactUtils.PartitionKey + + // Key is table id and the value the set of currently reserved partition hashes. + private val reservedTablesPartitions = new mutable.LinkedHashMap[String, Set[Int]] + + /** + * @return Partitions from targetPartitions that are not reserved. + */ + def filterFreePartitions(tableId: String, targetPartitions: Set[PartitionKey]) + : Set[PartitionKey] = synchronized { + val reservedPartitionKeys = reservedTablesPartitions.getOrElse(tableId, Set.empty) + targetPartitions.filter(partition => !reservedPartitionKeys.contains(partition.##)) + } + + /** + * Try to reserve partitions from [[targetPartitions]] which are not yet reserved. + * @return partitions from targetPartitions which were not previously reserved. + */ + def tryReservePartitions(tableId: String, targetPartitions: Set[PartitionKey]) + : Set[PartitionKey] = synchronized { + val allReservedPartitions = reservedTablesPartitions.getOrElse(tableId, Set.empty) + val unReservedPartitionsFromTarget = targetPartitions + .filter(targetPartition => !allReservedPartitions.contains(targetPartition.##)) + val newAllReservedPartitions = allReservedPartitions ++ unReservedPartitionsFromTarget.map(_.##) + reservedTablesPartitions.update(tableId, newAllReservedPartitions) + unReservedPartitionsFromTarget + } + + + /** + * Releases the reserved table partitions to allow other threads to reserve them. + * @param tableId The identity of the target table of Auto Compaction. + * @param reservedPartitions The set of partitions, which were reserved and which need releasing. + */ + def releasePartitions( + tableId: String, + reservedPartitions: Set[PartitionKey]): Unit = synchronized { + val allReservedPartitions = reservedTablesPartitions.getOrElse(tableId, Set.empty) + val newPartitions = allReservedPartitions -- reservedPartitions.map(_.##) + reservedTablesPartitions.update(tableId, newPartitions) + } + + /** This is test only code to reset the state of table partition reservations. */ + private[delta] def resetTestOnly(): Unit = synchronized { + reservedTablesPartitions.clear() + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/hooks/CheckpointHook.scala b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/CheckpointHook.scala new file mode 100644 index 00000000000..fe8f3676196 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/CheckpointHook.scala @@ -0,0 +1,44 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.hooks + +import org.apache.spark.sql.delta.{CheckpointInstance, OptimisticTransactionImpl, Snapshot} +import org.apache.spark.sql.delta.actions.Action + +import org.apache.spark.sql.SparkSession + +/** Write a new checkpoint at the version committed by the txn if required. */ +object CheckpointHook extends PostCommitHook { + override val name: String = "Post commit checkpoint trigger" + + override def run( + spark: SparkSession, + txn: OptimisticTransactionImpl, + committedVersion: Long, + postCommitSnapshot: Snapshot, + committedActions: Seq[Action]): Unit = { + if (!txn.needsCheckpoint) return + + txn.deltaLog.ensureLogDirectoryExist() + + // Since the postCommitSnapshot isn't guaranteed to match committedVersion, we have to + // explicitly checkpoint the snapshot at the committedVersion. + val cp = postCommitSnapshot.checkpointProvider + txn.deltaLog.checkpoint(txn.deltaLog.getSnapshotAt(committedVersion, cp) + ) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/hooks/GenerateSymlinkManifest.scala b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/GenerateSymlinkManifest.scala new file mode 100644 index 00000000000..b8236b28366 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/GenerateSymlinkManifest.scala @@ -0,0 +1,421 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.hooks + +// scalastyle:off import.ordering.noEmptyLine +import java.net.URI + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.commands.DeletionVectorUtils.isTableDVFree +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.storage.LogStore +import org.apache.spark.sql.delta.util.DeltaFileOperations +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkEnv +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, Expression, Literal, ScalaUDF} +import org.apache.spark.sql.execution.datasources.InMemoryFileIndex +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.types.StringType +import org.apache.spark.util.SerializableConfiguration + +/** + * Post commit hook to generate hive-style manifests for Delta table. This is useful for + * compatibility with Presto / Athena. + */ +object GenerateSymlinkManifest extends GenerateSymlinkManifestImpl + +// A separate singleton to avoid creating encoders from scratch every time +object GenerateSymlinkManifestUtils extends DeltaLogging { + private[hooks] lazy val mapEncoder = try { + ExpressionEncoder[Map[String, String]]() + } catch { + case e: Throwable => + logError(e.getMessage, e) + throw e + } +} + +trait GenerateSymlinkManifestImpl extends PostCommitHook with DeltaLogging with Serializable { + val CONFIG_NAME_ROOT = "compatibility.symlinkFormatManifest" + + val MANIFEST_LOCATION = "_symlink_format_manifest" + + val OP_TYPE_ROOT = "delta.compatibility.symlinkFormatManifest" + val FULL_MANIFEST_OP_TYPE = s"$OP_TYPE_ROOT.full" + val INCREMENTAL_MANIFEST_OP_TYPE = s"$OP_TYPE_ROOT.incremental" + + override val name: String = "Generate Symlink Format Manifest" + + override def run( + spark: SparkSession, + txn: OptimisticTransactionImpl, + committedVersion: Long, + postCommitSnapshot: Snapshot, + committedActions: Seq[Action]): Unit = { + generateIncrementalManifest( + spark, txn.deltaLog, txn.snapshot, postCommitSnapshot, committedActions) + } + + override def handleError(error: Throwable, version: Long): Unit = { + error match { + case e: ColumnMappingUnsupportedException => throw e + case e: DeltaCommandUnsupportedWithDeletionVectorsException => throw e + case _ => + throw DeltaErrors.postCommitHookFailedException(this, version, name, error) + } + } + + /** + * Generate manifest files incrementally, that is, only for the table partitions touched by the + * given actions. + */ + protected def generateIncrementalManifest( + spark: SparkSession, + deltaLog: DeltaLog, + txnReadSnapshot: Snapshot, + currentSnapshot: Snapshot, + actions: Seq[Action]): Unit = recordManifestGeneration(deltaLog, full = false) { + + import org.apache.spark.sql.delta.implicits._ + + checkColumnMappingMode(currentSnapshot.metadata) + + val partitionCols = currentSnapshot.metadata.partitionColumns + val manifestRootDirPath = new Path(deltaLog.dataPath, MANIFEST_LOCATION) + val hadoopConf = new SerializableConfiguration(deltaLog.newDeltaHadoopConf()) + val fs = deltaLog.dataPath.getFileSystem(hadoopConf.value) + if (!fs.exists(manifestRootDirPath)) { + generateFullManifest(spark, deltaLog) + return + } + + // Find all the manifest partitions that need to updated or deleted + val (allFilesInUpdatedPartitions, nowEmptyPartitions) = if (partitionCols.nonEmpty) { + // Get the partitions where files were added + val partitionsOfAddedFiles = actions.collect { case a: AddFile => a.partitionValues }.toSet + + // Get the partitions where files were deleted + val removedFileNames = + spark.createDataset(actions.collect { case r: RemoveFile => r.path }).toDF("path") + val partitionValuesOfRemovedFiles = + txnReadSnapshot.allFiles.join(removedFileNames, "path").select("partitionValues").persist() + try { + val partitionsOfRemovedFiles = partitionValuesOfRemovedFiles + .as[Map[String, String]](GenerateSymlinkManifestUtils.mapEncoder).collect().toSet + + // Get the files present in the updated partitions + val partitionsUpdated: Set[Map[String, String]] = + partitionsOfAddedFiles ++ partitionsOfRemovedFiles + val filesInUpdatedPartitions = currentSnapshot.allFiles.filter { a => + partitionsUpdated.contains(a.partitionValues) + } + + // Find the current partitions + val currentPartitionRelativeDirs = + withRelativePartitionDir(spark, partitionCols, currentSnapshot.allFiles) + .select("relativePartitionDir").distinct() + + // Find the partitions that became empty and delete their manifests + val partitionRelativeDirsOfRemovedFiles = + withRelativePartitionDir(spark, partitionCols, partitionValuesOfRemovedFiles) + .select("relativePartitionDir").distinct() + + val partitionsThatBecameEmpty = + partitionRelativeDirsOfRemovedFiles.join( + currentPartitionRelativeDirs, Seq("relativePartitionDir"), "leftanti") + .as[String].collect() + + (filesInUpdatedPartitions, partitionsThatBecameEmpty) + } finally { + partitionValuesOfRemovedFiles.unpersist() + } + } else { + (currentSnapshot.allFiles, Array.empty[String]) + } + + val manifestFilePartitionsWritten = writeManifestFiles( + deltaLog.dataPath, + manifestRootDirPath.toString, + allFilesInUpdatedPartitions, + partitionCols, + hadoopConf) + + if (nowEmptyPartitions.nonEmpty) { + deleteManifestFiles(manifestRootDirPath.toString, nowEmptyPartitions, hadoopConf) + } + + // Post stats + val stats = SymlinkManifestStats( + filesWritten = manifestFilePartitionsWritten.size, + filesDeleted = nowEmptyPartitions.length, + partitioned = partitionCols.nonEmpty) + recordDeltaEvent(deltaLog, s"$INCREMENTAL_MANIFEST_OP_TYPE.stats", data = stats) + } + + /** + * Generate manifest files for all the partitions in the table. Note, this will ensure that + * that stale and unnecessary files will be vacuumed. + */ + def generateFullManifest( + spark: SparkSession, + deltaLog: DeltaLog): Unit = { + val snapshot = deltaLog.update(stalenessAcceptable = false) + assertTableIsDVFree(spark, snapshot) + generateFullManifestWithSnapshot(spark, deltaLog, snapshot) + } + + // Separated out to allow overriding with a specific snapshot. + protected def generateFullManifestWithSnapshot( + spark: SparkSession, + deltaLog: DeltaLog, + snapshot: Snapshot): Unit = recordManifestGeneration(deltaLog, full = true) { + val partitionCols = snapshot.metadata.partitionColumns + val manifestRootDirPath = new Path(deltaLog.dataPath, MANIFEST_LOCATION).toString + val hadoopConf = new SerializableConfiguration(deltaLog.newDeltaHadoopConf()) + + checkColumnMappingMode(snapshot.metadata) + + // Update manifest files of the current partitions + val newManifestPartitionRelativePaths = writeManifestFiles( + deltaLog.dataPath, + manifestRootDirPath, + snapshot.allFiles, + partitionCols, + hadoopConf) + + // Get the existing manifest files as relative partition paths, that is, + // [ "col1=0/col2=0", "col1=1/col2=1", "col1=2/col2=2" ] + val fs = deltaLog.dataPath.getFileSystem(hadoopConf.value) + val existingManifestPartitionRelativePaths = { + val manifestRootDirAbsPath = fs.makeQualified(new Path(manifestRootDirPath)) + if (fs.exists(manifestRootDirAbsPath)) { + val index = new InMemoryFileIndex( + spark, + Seq(manifestRootDirAbsPath), + deltaLog.options, + None) + val prefixToStrip = manifestRootDirAbsPath.toUri.getPath + index.inputFiles.map { p => + // Remove root directory "rootDir" path from the manifest file paths like + // "rootDir/col1=0/col2=0/manifest" to get the relative partition dir "col1=0/col2=0". + // Note: It important to compare only the "path" in the URI and not the user info in it. + // In s3a://access-key:secret-key@host/path, the access-key and secret-key may change + // unknowingly to `\` and `%` encoding between the root dir and file names generated + // by listing. + val relativeManifestFilePath = + new URI(p).getPath.stripPrefix(prefixToStrip).stripPrefix(Path.SEPARATOR) + new Path(relativeManifestFilePath).getParent.toString // returns "col1=0/col2=0" + }.filterNot(_.trim.isEmpty).toSet + } else Set.empty[String] + } + // paths returned from inputFiles are URI encoded so we need to convert them back to string. + // So that they can compared with newManifestPartitionRelativePaths in the next step. + + // Delete manifest files for partitions that are not in current and so weren't overwritten + val manifestFilePartitionsToDelete = + existingManifestPartitionRelativePaths.diff(newManifestPartitionRelativePaths) + deleteManifestFiles(manifestRootDirPath, manifestFilePartitionsToDelete, hadoopConf) + + // Post stats + val stats = SymlinkManifestStats( + filesWritten = newManifestPartitionRelativePaths.size, + filesDeleted = manifestFilePartitionsToDelete.size, + partitioned = partitionCols.nonEmpty) + recordDeltaEvent(deltaLog, s"$FULL_MANIFEST_OP_TYPE.stats", data = stats) + } + + protected def assertTableIsDVFree(spark: SparkSession, snapshot: Snapshot): Unit = { + if (!isTableDVFree(snapshot)) { + throw DeltaErrors.generateNotSupportedWithDeletionVectors() + } + } + + /** + * Write the manifest files and return the partition relative paths of the manifests written. + * + * @param deltaLogDataPath path of the table data (e.g., tablePath which has _delta_log in it) + * @param manifestRootDirPath root directory of the manifest files (e.g., tablePath/_manifest/) + * @param fileNamesForManifest relative paths or file names of data files for being written into + * the manifest (e.g., partition=1/xyz.parquet) + * @param partitionCols Table partition columns + * @param hadoopConf Hadoop configuration to use + * @return Set of partition relative paths of the written manifest files (e.g., part1=1/part2=2) + */ + private def writeManifestFiles( + deltaLogDataPath: Path, + manifestRootDirPath: String, + fileNamesForManifest: Dataset[AddFile], + partitionCols: Seq[String], + hadoopConf: SerializableConfiguration): Set[String] = { + + val spark = fileNamesForManifest.sparkSession + import org.apache.spark.sql.delta.implicits._ + + val tableAbsPathForManifest = LogStore(spark) + .resolvePathOnPhysicalStorage(deltaLogDataPath, hadoopConf.value).toString + + /** Write the data file relative paths to manifestDirAbsPath/manifest as absolute paths */ + def writeSingleManifestFile( + manifestDirAbsPath: String, + dataFileRelativePaths: Iterator[String]): Unit = { + + val manifestFilePath = new Path(manifestDirAbsPath, "manifest") + val fs = manifestFilePath.getFileSystem(hadoopConf.value) + fs.mkdirs(manifestFilePath.getParent()) + + val manifestContent = dataFileRelativePaths.map { relativePath => + DeltaFileOperations.absolutePath(tableAbsPathForManifest, relativePath).toString + } + val logStore = LogStore(SparkEnv.get.conf, hadoopConf.value) + logStore.write(manifestFilePath, manifestContent, overwrite = true, hadoopConf.value) + } + + val newManifestPartitionRelativePaths = + if (fileNamesForManifest.isEmpty && partitionCols.isEmpty) { + writeSingleManifestFile(manifestRootDirPath, Iterator()) + Set.empty[String] + } else { + withRelativePartitionDir(spark, partitionCols, fileNamesForManifest) + .select("relativePartitionDir", "path").as[(String, String)] + .groupByKey(_._1).mapGroups { + (relativePartitionDir: String, relativeDataFilePath: Iterator[(String, String)]) => + val manifestPartitionDirAbsPath = { + if (relativePartitionDir == null || relativePartitionDir.isEmpty) manifestRootDirPath + else new Path(manifestRootDirPath, relativePartitionDir).toString + } + writeSingleManifestFile(manifestPartitionDirAbsPath, relativeDataFilePath.map(_._2)) + relativePartitionDir + }.collect().toSet + } + + logInfo(s"Generated manifest partitions for $deltaLogDataPath " + + s"[${newManifestPartitionRelativePaths.size}]:\n\t" + + newManifestPartitionRelativePaths.mkString("\n\t")) + + newManifestPartitionRelativePaths + } + + /** + * Delete manifest files in the given paths. + * + * @param manifestRootDirPath root directory of the manifest files (e.g., tablePath/_manifest/) + * @param partitionRelativePathsToDelete partitions to delete manifest files from + * (e.g., part1=1/part2=2/) + * @param hadoopConf Hadoop configuration to use + */ + private def deleteManifestFiles( + manifestRootDirPath: String, + partitionRelativePathsToDelete: Iterable[String], + hadoopConf: SerializableConfiguration): Unit = { + + val fs = new Path(manifestRootDirPath).getFileSystem(hadoopConf.value) + partitionRelativePathsToDelete.foreach { path => + val absPathToDelete = new Path(manifestRootDirPath, path) + fs.delete(absPathToDelete, true) + } + + logInfo(s"Deleted manifest partitions [${partitionRelativePathsToDelete.size}]:\n\t" + + partitionRelativePathsToDelete.mkString("\n\t")) + } + + /** + * Append a column `relativePartitionDir` to the given Dataset which has `partitionValues` as + * one of the columns. `partitionValues` is a map-type column that contains values of the + * given `partitionCols`. + */ + private def withRelativePartitionDir( + spark: SparkSession, + partitionCols: Seq[String], + datasetWithPartitionValues: Dataset[_]) = { + + require(datasetWithPartitionValues.schema.fieldNames.contains("partitionValues")) + val colNamePrefix = "_col_" + + // Flatten out nested partition value columns while renaming them, so that the new columns do + // not conflict with existing columns in DF `pathsWithPartitionValues. + val colToRenamedCols = partitionCols.map { column => column -> s"$colNamePrefix$column" } + + val df = colToRenamedCols.foldLeft(datasetWithPartitionValues.toDF()) { + case(currentDs, (column, renamedColumn)) => + currentDs.withColumn(renamedColumn, col(s"partitionValues.`$column`")) + } + + // Mapping between original column names to use for generating partition path and + // attributes referring to corresponding columns added to DF `pathsWithPartitionValues`. + val colNameToAttribs = + colToRenamedCols.map { case (col, renamed) => col -> UnresolvedAttribute.quoted(renamed) } + + // Build an expression that can generate the path fragment col1=value/col2=value/ from the + // partition columns. Note: The session time zone maybe different from the time zone that was + // used to write the partition structure of the actual data files. This may lead to + // inconsistencies between the partition structure of metadata files and data files. + val relativePartitionDirExpression = generatePartitionPathExpression( + colNameToAttribs, + spark.sessionState.conf.sessionLocalTimeZone) + + df.withColumn("relativePartitionDir", new Column(relativePartitionDirExpression)) + .drop(colToRenamedCols.map(_._2): _*) + } + + /** Expression that given partition columns builds a path string like: col1=val/col2=val/... */ + protected def generatePartitionPathExpression( + partitionColNameToAttrib: Seq[(String, Attribute)], + timeZoneId: String): Expression = Concat( + + partitionColNameToAttrib.zipWithIndex.flatMap { case ((colName, col), i) => + val partitionName = ScalaUDF( + ExternalCatalogUtils.getPartitionPathString _, + StringType, + Seq(Literal(colName), Cast(col, StringType, Option(timeZoneId)))) + if (i == 0) Seq(partitionName) else Seq(Literal(Path.SEPARATOR), partitionName) + } + ) + + + private def recordManifestGeneration(deltaLog: DeltaLog, full: Boolean)(thunk: => Unit): Unit = { + val (opType, manifestType) = + if (full) FULL_MANIFEST_OP_TYPE -> "full" + else INCREMENTAL_MANIFEST_OP_TYPE -> "incremental" + recordDeltaOperation(deltaLog, opType) { + withStatusCode("DELTA", s"Updating $manifestType Hive manifest for the Delta table") { + thunk + } + } + } + + /** + * Generating manifests, when column mapping used is not supported, + * because external systems will not be able to read Delta tables that leverage + * column mapping correctly. + */ + private def checkColumnMappingMode(metadata: Metadata): Unit = { + if (metadata.columnMappingMode != NoMapping) { + throw DeltaErrors.generateManifestWithColumnMappingNotSupported + } + } + + case class SymlinkManifestStats( + filesWritten: Int, + filesDeleted: Int, + partitioned: Boolean) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/hooks/IcebergConverterHook.scala b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/IcebergConverterHook.scala new file mode 100644 index 00000000000..a2dbc5409e5 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/IcebergConverterHook.scala @@ -0,0 +1,55 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.hooks + +import org.apache.spark.sql.delta.{OptimisticTransactionImpl, Snapshot, UniversalFormat} +import org.apache.spark.sql.delta.actions.{Action, Metadata} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf.DELTA_UNIFORM_ICEBERG_SYNC_CONVERT_ENABLED + +import org.apache.spark.sql.SparkSession + +/** Write a new Iceberg metadata file at the version committed by the txn, if required. */ +object IcebergConverterHook extends PostCommitHook with DeltaLogging { + override val name: String = "Post-commit Iceberg metadata conversion" + + val ASYNC_ICEBERG_CONVERTER_THREAD_NAME = "async-iceberg-converter" + + override def run( + spark: SparkSession, + txn: OptimisticTransactionImpl, + committedVersion: Long, + postCommitSnapshot: Snapshot, + committedActions: Seq[Action]): Unit = { + // Only convert to Iceberg if the snapshot matches the version committed. + // This is to skip converting the same actions multiple times - they'll be written out + // by another commit anyways. + if (committedVersion != postCommitSnapshot.version || + !UniversalFormat.icebergEnabled(postCommitSnapshot.metadata)) { + return + } + + + val converter = postCommitSnapshot.deltaLog.icebergConverter + if (spark.sessionState.conf.getConf(DELTA_UNIFORM_ICEBERG_SYNC_CONVERT_ENABLED) || + !UniversalFormat.icebergEnabled(txn.snapshot.metadata)) { // UniForm was not enabled + converter.convertSnapshot(postCommitSnapshot, txn) + } else { + converter.enqueueSnapshotForConversion(postCommitSnapshot, txn) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/hooks/PostCommitHook.scala b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/PostCommitHook.scala new file mode 100644 index 00000000000..36252f24da6 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/PostCommitHook.scala @@ -0,0 +1,55 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.hooks + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.Action + +import org.apache.spark.sql.SparkSession + +/** + * A hook which can be executed after a transaction. These hooks are registered to a + * [[OptimisticTransaction]], and are executed after a *successful* commit takes place. + */ +trait PostCommitHook { + + /** A user friendly name for the hook for error reporting purposes. */ + val name: String + + /** + * Executes the hook. + * @param txn The txn that made the commit, after which this PostCommitHook was run + * @param committedVersion The version that was committed by the txn + * @param postCommitSnapshot the snapshot of the table after the txn successfully committed. + * NOTE: This may not match the committedVersion, if racing + * commits were written while the snapshot was computed. + * @param committedActions the actions that were committed in the txn. *May* be empty + * if the list of actions was too large. + */ + def run( + spark: SparkSession, + txn: OptimisticTransactionImpl, + committedVersion: Long, + postCommitSnapshot: Snapshot, + committedActions: Seq[Action]): Unit + + /** + * Handle any error caused while running the hook. By default, all errors are ignored as + * default policy should be to not let post-commit hooks to cause failures in the operation. + */ + def handleError(error: Throwable, version: Long): Unit = {} +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/hooks/UpdateCatalog.scala b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/UpdateCatalog.scala new file mode 100644 index 00000000000..d4e5c6162d7 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/hooks/UpdateCatalog.scala @@ -0,0 +1,370 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.hooks + +import java.nio.charset.Charset +import java.util.concurrent.atomic.AtomicInteger + +import scala.collection.JavaConverters._ +import scala.concurrent.{ExecutionContext, Future, TimeoutException} +import scala.util.Try +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaConfigs, DeltaTableIdentifier, OptimisticTransactionImpl, Snapshot} +import org.apache.spark.sql.delta.actions.{Action, Metadata} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.threads.DeltaThreadPool +import org.apache.commons.lang3.exception.ExceptionUtils + +import org.apache.spark.internal.config.ConfigEntry +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.util.ThreadUtils + +/** + * Factory object to create an UpdateCatalog post commit hook. This should always be used + * instead of directly creating a specific hook. + */ +object UpdateCatalogFactory { + def getUpdateCatalogHook(table: CatalogTable, spark: SparkSession): UpdateCatalogBase = { + UpdateCatalog(table) + } +} + +/** + * Base trait for post commit hooks that want to update the catalog with the + * latest table schema and properties. + */ +trait UpdateCatalogBase extends PostCommitHook with DeltaLogging { + + protected val table: CatalogTable + + override def run( + spark: SparkSession, + txn: OptimisticTransactionImpl, + committedVersion: Long, + postCommitSnapshot: Snapshot, + actions: Seq[Action]): Unit = { + // There's a potential race condition here, where a newer commit has already triggered + // this to run. That's fine. + executeOnWrite(spark, postCommitSnapshot) + } + + /** + * Used to manually execute an UpdateCatalog hook during a write. + */ + def executeOnWrite( + spark: SparkSession, + snapshot: Snapshot + ): Unit + + + /** + * Update the schema in the catalog based on the provided snapshot. + */ + def updateSchema(spark: SparkSession, snapshot: Snapshot): Unit + + /** + * Update the properties in the catalog based on the provided snapshot. + */ + protected def updateProperties(spark: SparkSession, snapshot: Snapshot): Unit + + /** + * Checks if the table schema has changed in the Snapshot with respect to what's stored in + * the catalog. + */ + protected def schemaHasChanged(snapshot: Snapshot, spark: SparkSession): Boolean + + /** + * Checks if the table properties have changed in the Snapshot with respect to what's stored in + * the catalog. + * + * Visible for testing. + */ + protected[sql] def propertiesHaveChanged( + properties: Map[String, String], + metadata: Metadata, + spark: SparkSession): Boolean + + protected def shouldRun( + spark: SparkSession, + snapshot: Snapshot + ): Boolean = { + if (!spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_UPDATE_CATALOG_ENABLED)) { + return false + } + // Do not execute for path based tables, because they don't exist in the MetaStore + if (isPathBasedDeltaTable(table, spark)) return false + // Only execute if this is a Delta table + if (snapshot.version < 0) return false + true + } + + private def isPathBasedDeltaTable(table: CatalogTable, spark: SparkSession): Boolean = { + return DeltaTableIdentifier.isDeltaPath(spark, table.identifier) + } + + + /** Update the entry in the Catalog to reflect the latest schema and table properties. */ + protected def execute( + spark: SparkSession, + snapshot: Snapshot): Unit = { + recordDeltaOperation(snapshot.deltaLog, "delta.catalog.update") { + val properties = snapshot.getProperties.toMap + val v = table.properties.get(DeltaConfigs.METASTORE_LAST_UPDATE_VERSION) + .flatMap(v => Try(v.toLong).toOption) + .getOrElse(-1L) + val lastCommitTimestamp = table.properties.get(DeltaConfigs.METASTORE_LAST_COMMIT_TIMESTAMP) + .flatMap(v => Try(v.toLong).toOption) + .getOrElse(-1L) + // If the metastore entry is at an older version and not the timestamp of that version, e.g. + // a table can be rm -rf'd and get the same version number with a different timestamp + if (v <= snapshot.version || lastCommitTimestamp < snapshot.timestamp) { + try { + val loggingData = Map( + "identifier" -> table.identifier, + "snapshotVersion" -> snapshot.version, + "snapshotTimestamp" -> snapshot.timestamp, + "catalogVersion" -> v, + "catalogTimestamp" -> lastCommitTimestamp + ) + if (schemaHasChanged(snapshot, spark)) { + updateSchema(spark, snapshot) + recordDeltaEvent( + snapshot.deltaLog, + "delta.catalog.update.schema", + data = loggingData + ) + } else if (propertiesHaveChanged(properties, snapshot.metadata, spark)) { + updateProperties(spark, snapshot) + recordDeltaEvent( + snapshot.deltaLog, + "delta.catalog.update.properties", + data = loggingData + ) + } + } catch { + case NonFatal(e) => + recordDeltaEvent( + snapshot.deltaLog, + "delta.catalog.update.error", + data = Map( + "exceptionMsg" -> ExceptionUtils.getMessage(e), + "stackTrace" -> ExceptionUtils.getStackTrace(e)) + ) + logWarning(s"Failed to update the catalog for ${table.identifier} with the latest " + + s"table information.", e) + } + } + } + } +} + +/** + * A post-commit hook that allows us to cache the most recent schema and table properties of a Delta + * table in an External Catalog. In addition to the schema and table properties, we also store the + * last commit timestamp and version for which we updated the catalog. This prevents us from + * updating the MetaStore with potentially stale information. + */ +case class UpdateCatalog(table: CatalogTable) extends UpdateCatalogBase { + + override val name: String = "Update Catalog" + + override def executeOnWrite( + spark: SparkSession, + snapshot: Snapshot + ): Unit = { + executeAsync(spark, snapshot) + } + + + override protected def schemaHasChanged(snapshot: Snapshot, spark: SparkSession): Boolean = { + // We need to check whether the schema in the catalog matches the current schema. If a + // field in the schema is very long, we cannot store the schema in the catalog, therefore + // here we have to compare what's in the catalog with what we actually can store in the + // catalog + val schemaChanged = UpdateCatalog.truncateSchemaIfNecessary(snapshot.schema) != table.schema + // The table may have been dropped as we're just about to update the information. There is + // unfortunately no great way to avoid a race condition, but we do one last check here as + // updates may have been queued for some time. + schemaChanged && spark.sessionState.catalog.tableExists(table.identifier) + } + + /** + * Checks if the table properties have changed in the Snapshot with respect to what's stored in + * the catalog. We check to see if our table properties are a subset of what is in the MetaStore + * to avoid flip-flopping the information between older and newer versions of Delta. The + * assumption here is that newer Delta releases will only add newer table properties and not + * remove them. + */ + override protected[sql] def propertiesHaveChanged( + properties: Map[String, String], + metadata: Metadata, + spark: SparkSession): Boolean = { + val propertiesChanged = !properties.forall { case (k, v) => + table.properties.get(k) == Some(v) + } + // The table may have been dropped as we're just about to update the information. There is + // unfortunately no great way to avoid a race condition, but we do one last check here as + // updates may have been queued for some time. + propertiesChanged && spark.sessionState.catalog.tableExists(table.identifier) + } + + override def updateSchema(spark: SparkSession, snapshot: Snapshot): Unit = { + UpdateCatalog.replaceTable(spark, snapshot, table) + } + + override protected def updateProperties(spark: SparkSession, snapshot: Snapshot): Unit = { + spark.sessionState.catalog.alterTable( + table.copy(properties = UpdateCatalog.updatedProperties(snapshot))) + } + + /** + * Update the entry in the Catalog to reflect the latest schema and table properties + * asynchronously. + */ + private def executeAsync( + spark: SparkSession, + snapshot: Snapshot): Unit = { + if (!shouldRun(spark, snapshot)) return + Future[Unit] { + UpdateCatalog.activeAsyncRequests.incrementAndGet() + execute(spark, snapshot) + }(UpdateCatalog.getOrCreateExecutionContext(spark.sessionState.conf)).onComplete { _ => + UpdateCatalog.activeAsyncRequests.decrementAndGet() + }(UpdateCatalog.getOrCreateExecutionContext(spark.sessionState.conf)) + } +} + +object UpdateCatalog { + private var tp: ExecutionContext = _ + + // This is the encoding of the database for the Hive MetaStore + private val latin1 = Charset.forName("ISO-8859-1") + + // Maximum number of characters that a catalog can store. + val MAX_CATALOG_TYPE_DDL_LENGTH = 4000 + val ERROR_KEY = "delta.catalogUpdateError" + val LONG_SCHEMA_ERROR: String = "The schema contains a very long nested field and cannot be " + + "stored in the catalog." + val HIVE_METASTORE_NAME = "hive_metastore" + + private def getOrCreateExecutionContext(conf: SQLConf): ExecutionContext = synchronized { + if (tp == null) { + tp = ExecutionContext.fromExecutorService(DeltaThreadPool.newDaemonCachedThreadPool( + "delta-catalog-update", + conf.getConf(DeltaSQLConf.DELTA_UPDATE_CATALOG_THREAD_POOL_SIZE) + ) + ) + } + tp + } + + /** Keeps track of active or queued async requests. */ + private val activeAsyncRequests = new AtomicInteger(0) + + /** + * Waits for all active and queued updates to finish until the given timeout. Will return true + * if all async threads have completed execution. Will return false if not. Exposed for tests. + */ + def awaitCompletion(timeoutMillis: Long): Boolean = { + try { + ThreadUtils.runInNewThread("UpdateCatalog-awaitCompletion") { + val startTime = System.currentTimeMillis() + while (activeAsyncRequests.get() > 0) { + Thread.sleep(100) + val currentTime = System.currentTimeMillis() + if (currentTime - startTime > timeoutMillis) { + throw new TimeoutException( + s"Timed out waiting for catalog updates to complete after $currentTime ms") + } + } + } + true + } catch { + case _: TimeoutException => + false + } + } + + /** Replace the table definition in the MetaStore. */ + private def replaceTable(spark: SparkSession, snapshot: Snapshot, table: CatalogTable): Unit = { + val catalog = spark.sessionState.catalog + val qualifiedIdentifier = + catalog.qualifyIdentifier(TableIdentifier(table.identifier.table, Some(table.database))) + val db = qualifiedIdentifier.database.get + val tblName = qualifiedIdentifier.table + val schema = truncateSchemaIfNecessary(snapshot.schema) + val additionalProperties = if (schema.isEmpty) { + Map(ERROR_KEY -> LONG_SCHEMA_ERROR) + } else { + Map.empty + } + + // We call the lower level API so that we can actually drop columns. We also assume that + // all columns are data columns so that we don't have to deal with partition columns + // having to be at the end of the schema, which Hive follows. + val catalogName = table.identifier.catalog.getOrElse( + spark.sessionState.catalogManager.currentCatalog.name()) + if ( + (catalogName == UpdateCatalog.HIVE_METASTORE_NAME + || catalogName == SESSION_CATALOG_NAME) && + catalog.externalCatalog.tableExists(db, tblName)) { + catalog.externalCatalog.alterTableDataSchema(db, tblName, schema) + } + + // We have to update the properties anyway with the latest version/timestamp information + catalog.alterTable(table.copy(properties = updatedProperties(snapshot) ++ additionalProperties)) + } + + /** Updates our properties map with the version and timestamp information of the snapshot. */ + def updatedProperties(snapshot: Snapshot): Map[String, String] = { + var newProperties = + snapshot.getProperties.toMap ++ Map( + DeltaConfigs.METASTORE_LAST_UPDATE_VERSION -> snapshot.version.toString, + DeltaConfigs.METASTORE_LAST_COMMIT_TIMESTAMP -> snapshot.timestamp.toString) + newProperties + } + + /** + * If a field in the schema has a very long string representation, then the schema will be + * truncated to an empty schema to avoid corruption. + * Also, if the schema contains non-latin encoding characters, the schema will be garbled. In + * this case we also truncate the schema. + */ + def truncateSchemaIfNecessary(schema: StructType): StructType = { + // Encoders are not threadsafe + val encoder = latin1.newEncoder() + def isColumnValid(f: StructField): Boolean = { + val typeString = f.dataType.catalogString + encoder.canEncode(f.name) && + typeString.length <= MAX_CATALOG_TYPE_DDL_LENGTH && + encoder.canEncode(typeString) + } + + if (schema.exists(f => !isColumnValid(f))) { + new StructType() + } else { + schema + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/implicits/RichSparkClasses.scala b/spark/src/main/scala/org/apache/spark/sql/delta/implicits/RichSparkClasses.scala new file mode 100644 index 00000000000..2c0dacb5d2b --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/implicits/RichSparkClasses.scala @@ -0,0 +1,120 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.implicits + +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.QueryPlan +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.{RuleId, UnknownRuleId} +import org.apache.spark.sql.catalyst.trees.{AlwaysProcess, TreePatternBits} +import org.apache.spark.sql.delta.util.DeltaEncoders +import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType} + +trait RichSparkClasses { + + /** + * This implicit class is used to provide helpful methods used throughout the code that are not + * provided by Spark-Catalyst's StructType. + */ + implicit class RichStructType(structType: StructType) { + + /** + * Returns a field in this struct and its child structs, case insensitively. + * + * If includeCollections is true, this will return fields that are nested in maps and arrays. + * + * @param fieldNames The path to the field, in order from the root. For example, the column + * nested.a.b.c would be Seq("nested", "a", "b", "c"). + */ + def findNestedFieldIgnoreCase( + fieldNames: Seq[String], + includeCollections: Boolean = false): Option[StructField] = { + val fieldOption = fieldNames.headOption.flatMap { + fieldName => structType.find(_.name.equalsIgnoreCase(fieldName)) + } + fieldOption match { + case Some(field) => + (fieldNames.tail, field.dataType, includeCollections) match { + case (Seq(), _, _) => + Some(field) + + case (names, struct: StructType, _) => + struct.findNestedFieldIgnoreCase(names, includeCollections) + + case (_, _, false) => + None // types nested in maps and arrays are not used + + case (Seq("key"), MapType(keyType, _, _), true) => + // return the key type as a struct field to include nullability + Some(StructField("key", keyType, nullable = false)) + + case (Seq("key", names @ _*), MapType(struct: StructType, _, _), true) => + struct.findNestedFieldIgnoreCase(names, includeCollections) + + case (Seq("value"), MapType(_, valueType, isNullable), true) => + // return the value type as a struct field to include nullability + Some(StructField("value", valueType, nullable = isNullable)) + + case (Seq("value", names @ _*), MapType(_, struct: StructType, _), true) => + struct.findNestedFieldIgnoreCase(names, includeCollections) + + case (Seq("element"), ArrayType(elementType, isNullable), true) => + // return the element type as a struct field to include nullability + Some(StructField("element", elementType, nullable = isNullable)) + + case (Seq("element", names @ _*), ArrayType(struct: StructType, _), true) => + struct.findNestedFieldIgnoreCase(names, includeCollections) + + case _ => + None + } + case _ => + None + } + } + } + + /** + * This implicit class is used to provide helpful methods used throughout the code that are not + * provided by Spark-Catalyst's LogicalPlan. + */ + implicit class RichLogicalPlan(plan: LogicalPlan) { + /** + * Returns the result of running QueryPlan.transformExpressionsUpWithPruning on this node + * and all its children. + */ + def transformAllExpressionsUpWithPruning( + cond: TreePatternBits => Boolean, + ruleId: RuleId = UnknownRuleId)( + rule: PartialFunction[Expression, Expression] + ): LogicalPlan = { + plan.transformUpWithPruning(cond, ruleId) { + case q: QueryPlan[_] => + q.transformExpressionsUpWithPruning(cond, ruleId)(rule) + } + } + + /** + * Returns the result of running QueryPlan.transformExpressionsUp on this node + * and all its children. + */ + def transformAllExpressionsUp( + rule: PartialFunction[Expression, Expression]): LogicalPlan = { + transformAllExpressionsUpWithPruning(AlwaysProcess.fn, UnknownRuleId)(rule) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/implicits/package.scala b/spark/src/main/scala/org/apache/spark/sql/delta/implicits/package.scala new file mode 100644 index 00000000000..3548c7e766d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/implicits/package.scala @@ -0,0 +1,49 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.implicits.RichSparkClasses +import org.apache.spark.sql.delta.util.DeltaEncoders + +import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} + +package object implicits extends DeltaEncoders with RichSparkClasses { + // Define a few implicit classes to provide the `toDF` method. These classes are not using generic + // types to avoid touching Scala reflection. + implicit class RichAddFileSeq(files: Seq[AddFile]) { + def toDF(spark: SparkSession): DataFrame = spark.implicits.localSeqToDatasetHolder(files).toDF() + + def toDS(spark: SparkSession): Dataset[AddFile] = + spark.implicits.localSeqToDatasetHolder(files).toDS() + } + + implicit class RichStringSeq(strings: Seq[String]) { + def toDF(spark: SparkSession): DataFrame = + spark.implicits.localSeqToDatasetHolder(strings).toDF() + + def toDF(spark: SparkSession, colNames: String*): DataFrame = + spark.implicits.localSeqToDatasetHolder(strings).toDF(colNames: _*) + } + + implicit class RichIntSeq(ints: Seq[Int]) { + def toDF(spark: SparkSession): DataFrame = spark.implicits.localSeqToDatasetHolder(ints).toDF() + + def toDF(spark: SparkSession, colNames: String*): DataFrame = + spark.implicits.localSeqToDatasetHolder(ints).toDF(colNames: _*) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/isolationLevels.scala b/spark/src/main/scala/org/apache/spark/sql/delta/isolationLevels.scala new file mode 100644 index 00000000000..cf2bce4a970 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/isolationLevels.scala @@ -0,0 +1,91 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +/** + * Trait that defines the level consistency guarantee is going to be provided by + * `OptimisticTransaction.commit()`. [[Serializable]] is the most + * strict level and [[SnapshotIsolation]] is the least strict one. + * + * @see [[IsolationLevel.allLevelsInDescOrder]] for all the levels in the descending order + * of strictness and [[IsolationLevel.DEFAULT]] for the default table isolation level. + */ +sealed trait IsolationLevel { + override def toString: String = this.getClass.getSimpleName.stripSuffix("$") +} + +/** + * This isolation level will ensure serializability between all read and write operations. + * Specifically, for write operations, this mode will ensure that the result of + * the table will be perfectly consistent with the visible history of operations, that is, + * as if all the operations were executed sequentially one by one. + */ +case object Serializable extends IsolationLevel + +/** + * This isolation level will ensure snapshot isolation consistency guarantee between write + * operations only. In other words, if only the write operations are considered, then + * there exists a serializable sequence between them that would produce the same result + * as seen in the table. However, if both read and write operations are considered, then + * there may not exist a serializable sequence that would explain all the observed reads. + * + * This provides a lower consistency guarantee than [[Serializable]] but a higher + * availability than that. For example, unlike [[Serializable]], this level allows an UPDATE + * operation to be committed even if there was a concurrent INSERT operation that has already + * added data that should have been read by the UPDATE. It will be as if the UPDATE was executed + * before the INSERT even if the former was committed after the latter. As a side effect, + * the visible history of operations may not be consistent with the + * result expected if these operations were executed sequentially one by one. + */ +case object WriteSerializable extends IsolationLevel + +/** + * This isolation level will ensure that all reads will see a consistent + * snapshot of the table and any transactional write will successfully commit only + * if the values updated by the transaction have not been changed externally since + * the snapshot was read by the transaction. + * + * This provides a lower consistency guarantee than [[WriteSerializable]] but a higher + * availability than that. For example, unlike [[WriteSerializable]], this level allows two + * concurrent UPDATE operations reading the same data to be committed successfully as long as + * they don't modify the same data. + * + * Note that for operations that do not modify data in the table, Snapshot isolation is same + * as Serializablity. Hence such operations can be safely committed with Snapshot isolation level. + */ +case object SnapshotIsolation extends IsolationLevel + + +object IsolationLevel { + + val DEFAULT = WriteSerializable + + /** All possible isolation levels in descending order of guarantees provided */ + val allLevelsInDescOrder: Seq[IsolationLevel] = Seq( + Serializable, + WriteSerializable, + SnapshotIsolation) + + /** All the valid isolation levels that can be specified as the table isolation level */ + val validTableIsolationLevels = Set[IsolationLevel](Serializable, WriteSerializable) + + def fromString(s: String): IsolationLevel = { + allLevelsInDescOrder.find(_.toString.equalsIgnoreCase(s)).getOrElse { + throw DeltaErrors.invalidIsolationLevelException(s) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala b/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala new file mode 100644 index 00000000000..40b121768c1 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala @@ -0,0 +1,162 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.metering + +import scala.util.Try +import scala.util.control.NonFatal + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.{DatabricksLogging, OpType, TagDefinition} +import com.databricks.spark.util.MetricDefinitions.{EVENT_LOGGING_FAILURE, EVENT_TAHOE} +import com.databricks.spark.util.TagDefinitions.{ + TAG_OP_TYPE, + TAG_TAHOE_ID, + TAG_TAHOE_PATH +} +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.actions.Metadata +import org.apache.spark.sql.delta.util.DeltaProgressReporter +import org.apache.spark.sql.delta.util.JsonUtils + +import org.apache.hadoop.fs.Path + + +/** + * Convenience wrappers for logging that include delta specific options and + * avoids the need to predeclare all operations. Metrics in Delta should respect the following + * conventions: + * - Tags should identify the context of the event (which shard, user, table, machine, etc). + * - All actions initiated by a user should be wrapped in a recordOperation so we can track usage + * latency and failures. If there is a significant (more than a few seconds) subaction like + * identifying candidate files, consider nested recordOperation. + * - Events should be used to return detailed statistics about usage. Generally these should be + * defined with a case class to ease analysis later. + * - Events can also be used to record that a particular codepath was hit (i.e. a checkpoint + * failure, a conflict, or a specific optimization). + * - Both events and operations should be named hierarchically to allow for analysis at different + * levels. For example, to look at the latency of all DDL operations we could scan for operations + * that match "delta.ddl.%". + * + * Underneath these functions use the standard usage log reporting defined in + * [[com.databricks.spark.util.DatabricksLogging]]. + */ +trait DeltaLogging + extends DeltaProgressReporter + with DatabricksLogging { + + /** + * Used to record the occurrence of a single event or report detailed, operation specific + * statistics. + * + * @param path Used to log the path of the delta table when `deltaLog` is null. + */ + protected def recordDeltaEvent( + deltaLog: DeltaLog, + opType: String, + tags: Map[TagDefinition, String] = Map.empty, + data: AnyRef = null, + path: Option[Path] = None): Unit = { + try { + val json = if (data != null) JsonUtils.toJson(data) else "" + val tableTags = if (deltaLog != null) { + getCommonTags(deltaLog, Try(deltaLog.unsafeVolatileSnapshot.metadata.id).getOrElse(null)) + } else if (path.isDefined) { + Map(TAG_TAHOE_PATH -> path.get.toString) + } else { + Map.empty[TagDefinition, String] + } + recordProductEvent( + EVENT_TAHOE, + Map((TAG_OP_TYPE: TagDefinition) -> opType) ++ tableTags ++ tags, + blob = json) + } catch { + case NonFatal(e) => + recordEvent( + EVENT_LOGGING_FAILURE, + blob = JsonUtils.toJson( + Map("exception" -> e.getMessage, + "opType" -> opType, + "method" -> "recordDeltaEvent")) + ) + } + } + + /** + * Used to report the duration as well as the success or failure of an operation on a `tahoePath`. + */ + protected def recordDeltaOperationForTablePath[A]( + tablePath: String, + opType: String, + tags: Map[TagDefinition, String] = Map.empty)( + thunk: => A): A = { + recordDeltaOperationInternal(Map(TAG_TAHOE_PATH -> tablePath), opType, tags)(thunk) + } + + /** + * Used to report the duration as well as the success or failure of an operation on a `deltaLog`. + */ + protected def recordDeltaOperation[A]( + deltaLog: DeltaLog, + opType: String, + tags: Map[TagDefinition, String] = Map.empty)( + thunk: => A): A = { + val tableTags: Map[TagDefinition, String] = if (deltaLog != null) { + getCommonTags(deltaLog, Try(deltaLog.unsafeVolatileSnapshot.metadata.id).getOrElse(null)) + } else { + Map.empty + } + recordDeltaOperationInternal(tableTags, opType, tags)(thunk) + } + + private def recordDeltaOperationInternal[A]( + tableTags: Map[TagDefinition, String], + opType: String, + tags: Map[TagDefinition, String])(thunk: => A): A = { + recordOperation( + new OpType(opType, ""), + extraTags = tableTags ++ tags) { + recordFrameProfile("Delta", opType) { + thunk + } + } + } + + protected def recordFrameProfile[T](group: String, name: String)(thunk: => T): T = { + // future work to capture runtime information ... + thunk + } + + private def withDmqTag[T](thunk: => T): T = { + thunk + } + + // Extract common tags from the delta log and snapshot. + def getCommonTags(deltaLog: DeltaLog, tahoeId: String): Map[TagDefinition, String] = { + ( + Map( + TAG_TAHOE_ID -> tahoeId, + TAG_TAHOE_PATH -> Try(deltaLog.dataPath.toString).getOrElse(null) + ) + ) + } +} + +object DeltaLogging { + + // The opType for delta commit stats. + final val DELTA_COMMIT_STATS_OPTYPE = "delta.commit.stats" +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/metering/ScanReport.scala b/spark/src/main/scala/org/apache/spark/sql/delta/metering/ScanReport.scala new file mode 100644 index 00000000000..2707693feaf --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/metering/ScanReport.scala @@ -0,0 +1,107 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.metering + +import org.apache.spark.sql.delta.stats.DataSize +import com.fasterxml.jackson.databind.annotation.JsonDeserialize + +case class ScanReport( + tableId: String, + path: String, + scanType: String, + deltaDataSkippingType: String, + partitionFilters: Seq[String], + dataFilters: Seq[String], + unusedFilters: Seq[String], + size: Map[String, DataSize], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + metrics: Map[String, Long], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + versionScanned: Option[Long], + annotations: Map[String, Long], + usedPartitionColumns: Seq[String], + numUsedPartitionColumns: Long, + allPartitionColumns: Seq[String], + numAllPartitionColumns: Long, + // Number of output rows from parent filter node if it is available and has the same + // predicates as dataFilters. + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + parentFilterOutputRows: Option[Long]) + +object ScanReport { + // Several of the ScanReport fields are only relevant for certain types of delta scans. + // Provide an alternative constructor for callers that don't need to set those fields. + // scalastyle:off argcount + def apply( + tableId: String, + path: String, + scanType: String, + partitionFilters: Seq[String], + dataFilters: Seq[String], + unusedFilters: Seq[String], + size: Map[String, DataSize], + metrics: Map[String, Long], + versionScanned: Option[Long], + annotations: Map[String, Long], + parentFilterOutputRows: Option[Long] + ): ScanReport = { + // scalastyle:on + ScanReport( + tableId = tableId, + path = path, + scanType = scanType, + deltaDataSkippingType = "", + partitionFilters = partitionFilters, + dataFilters = dataFilters, + unusedFilters = unusedFilters, + size = size, + metrics = metrics, + versionScanned = versionScanned, + annotations = annotations, + usedPartitionColumns = Nil, + numUsedPartitionColumns = 0L, + allPartitionColumns = Nil, + numAllPartitionColumns = 0L, + parentFilterOutputRows = parentFilterOutputRows) + } + + // Similar as above, but without parentFilterOutputRows + def apply( + tableId: String, + path: String, + scanType: String, + partitionFilters: Seq[String], + dataFilters: Seq[String], + unusedFilters: Seq[String], + size: Map[String, DataSize], + metrics: Map[String, Long], + versionScanned: Option[Long], + annotations: Map[String, Long]): ScanReport = { + ScanReport( + tableId = tableId, + path = path, + scanType = scanType, + partitionFilters = partitionFilters, + dataFilters = dataFilters, + unusedFilters = unusedFilters, + size = size, + metrics = metrics, + versionScanned = versionScanned, + annotations = annotations, + parentFilterOutputRows = None) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/metric/IncrementMetric.scala b/spark/src/main/scala/org/apache/spark/sql/delta/metric/IncrementMetric.scala new file mode 100644 index 00000000000..1d3524098b8 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/metric/IncrementMetric.scala @@ -0,0 +1,77 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.metric + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, Nondeterministic, UnaryExpression} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.types.DataType + +/** + * IncrementMetric is used to count the number of rows passing through it. It can be used to + * wrap a child expression to count the number of rows. Its currently only accessible via the Scala + * DSL. + * + * For example, consider the following expression returning a string literal: + * If(SomeCondition, + * IncrementMetric(Literal("ValueIfTrue"), countTrueMetric), + * IncrementMetric(Literal("ValueIfFalse"), countFalseMetric)) + * + * The SQLMetric `countTrueMetric` would be incremented whenever the condition `SomeCondition` is + * true, and conversely `countFalseMetric` would be incremented whenever the condition is false. + * + * The expression does not really compute anything, and merely forwards the value computed by the + * child expression. + * + * It is marked as non deterministic to ensure that it retains strong affinity with the `child` + * expression, so as to accurately update the `metric`. + * + * It takes the following parameters: + * @param child is the actual expression to call. + * @param metric is the SQLMetric to increment. + */ +@ExpressionDescription( + usage = "_FUNC_(expr, metric) - Returns `expr` as is, while incrementing metric.") +case class IncrementMetric(child: Expression, metric: SQLMetric) + extends UnaryExpression with Nondeterministic { + override def nullable: Boolean = child.nullable + + override def dataType: DataType = child.dataType + + override protected def initializeInternal(partitionIndex: Int): Unit = {} + + override def toString: String = child.toString + + override def prettyName: String = "increment_metric" + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + // codegen for children expressions + val eval = child.genCode(ctx) + val metricRef = ctx.addReferenceObj(metric.name.getOrElse("metric"), metric) + eval.copy(code = code"""$metricRef.add(1L);""" + eval.code) + } + + override def evalInternal(input: InternalRow): Any = { + metric.add(1L) + child.eval(input) + } + + override protected def withNewChildInternal(newChild: Expression): IncrementMetric = + copy(child = newChild) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/optimizablePartitionExpressions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/optimizablePartitionExpressions.scala new file mode 100644 index 00000000000..ea726aa9335 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/optimizablePartitionExpressions.scala @@ -0,0 +1,681 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.OptimizablePartitionExpression._ + +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.{Cast, DateFormatClass, DayOfMonth, Expression, Hour, IsNull, Literal, Month, Or, Substring, TruncDate, TruncTimestamp, UnixTimestamp, Year} +import org.apache.spark.sql.catalyst.util.quoteIfNeeded +import org.apache.spark.sql.types.{DateType, StringType, TimestampType} + +/** + * Defines rules to convert a data filter to a partition filter for a special generation expression + * of a partition column. + * + * Note: + * - This may be shared cross multiple `SparkSession`s, implementations should not store any + * state (such as expressions) referring to a specific `SparkSession`. + * - Partition columns may have different behaviors than data columns. For example, writing an empty + * string to a partition column would become `null` (SPARK-24438). We need to pay attention to + * these slight behavior differences and make sure applying the auto generated partition filters + * would still return the same result as if they were not applied. + */ +sealed trait OptimizablePartitionExpression { + /** + * Assume we have a partition column `part`, and a data column `col`. Return a partition filter + * based on `part` for a data filter `col < lit`. + */ + def lessThan(lit: Literal): Option[Expression] = None + + /** + * Assume we have a partition column `part`, and a data column `col`. Return a partition filter + * based on `part` for a data filter `col <= lit`. + */ + def lessThanOrEqual(lit: Literal): Option[Expression] = None + + /** + * Assume we have a partition column `part`, and a data column `col`. Return a partition filter + * based on `part` for a data filter `col = lit`. + */ + def equalTo(lit: Literal): Option[Expression] = None + + /** + * Assume we have a partition column `part`, and a data column `col`. Return a partition filter + * based on `part` for a data filter `col > lit`. + */ + def greaterThan(lit: Literal): Option[Expression] = None + + /** + * Assume we have a partition column `part`, and a data column `col`. Return a partition filter + * based on `part` for a data filter `col >= lit`. + */ + def greaterThanOrEqual(lit: Literal): Option[Expression] = None + + /** + * Assume we have a partition column `part`, and a data column `col`. Return a partition filter + * based on `part` for a data filter `col IS NULL`. + */ + def isNull(): Option[Expression] = None +} + +object OptimizablePartitionExpression { + /** Provide a convenient method to convert a string to a column expression */ + implicit class ColumnExpression(val colName: String) extends AnyVal { + // This will always be a top level column so quote it if necessary + def toPartCol: Expression = new Column(quoteIfNeeded(colName)).expr + } +} + +/** The rules for the generation expression `CAST(col AS DATE)`. */ +case class DatePartitionExpr(partitionColumn: String) extends OptimizablePartitionExpression { + override def lessThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn "<" to "<=". + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType => Some(partitionColumn.toPartCol <= Cast(lit, DateType)) + case DateType => Some(partitionColumn.toPartCol <= lit) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def equalTo(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType => Some(partitionColumn.toPartCol === Cast(lit, DateType)) + case DateType => Some(partitionColumn.toPartCol === lit) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def greaterThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn ">" to ">=". + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType => Some(partitionColumn.toPartCol >= Cast(lit, DateType)) + case DateType => Some(partitionColumn.toPartCol >= lit) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def isNull(): Option[Expression] = Some(partitionColumn.toPartCol.isNull) +} + +/** + * The rules for the generation expression `YEAR(col)`. + * + * @param yearPart the year partition column name. + */ +case class YearPartitionExpr(yearPart: String) extends OptimizablePartitionExpression { + + override def lessThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn "<" to "<=". + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType => Some(yearPart.toPartCol <= Year(lit)) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def equalTo(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType => Some(yearPart.toPartCol.expr === Year(lit)) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def greaterThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn ">" to ">=". + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType => Some(yearPart.toPartCol >= Year(lit)) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def isNull(): Option[Expression] = Some(yearPart.toPartCol.isNull) +} + +/** + * This is a placeholder to catch `month(col)` so that we can merge [[YearPartitionExpr]] and + * [[MonthPartitionExpr]]to [[YearMonthDayPartitionExpr]]. + * + * @param monthPart the month partition column name. + */ +case class MonthPartitionExpr(monthPart: String) extends OptimizablePartitionExpression + +/** + * This is a placeholder to catch `day(col)` so that we can merge [[YearPartitionExpr]], + * [[MonthPartitionExpr]] and [[DayPartitionExpr]] to [[YearMonthDayPartitionExpr]]. + * + * @param dayPart the day partition column name. + */ +case class DayPartitionExpr(dayPart: String) extends OptimizablePartitionExpression + +/** + * This is a placeholder to catch `hour(col)` so that we can merge [[YearPartitionExpr]], + * [[MonthPartitionExpr]], [[DayPartitionExpr]] and [[HourPartitionExpr]] to + * [[YearMonthDayHourPartitionExpr]]. + */ +case class HourPartitionExpr(hourPart: String) extends OptimizablePartitionExpression + +/** + * Optimize the case that two partition columns uses YEAR and MONTH using the same column, such + * as `YEAR(eventTime)` and `MONTH(eventTime)`. + * + * @param yearPart the year partition column name + * @param monthPart the month partition column name + */ +case class YearMonthPartitionExpr( + yearPart: String, + monthPart: String) extends OptimizablePartitionExpression { + + override def lessThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn "<" to "<=". + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + (yearPart.toPartCol < Year(lit)) || + (yearPart.toPartCol === Year(lit) && monthPart.toPartCol <= Month(lit)) + ) + case _ => None + } + } + + override def equalTo(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) + ) + case _ => None + } + } + + override def greaterThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn ">" to ">=". + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + (yearPart.toPartCol > Year(lit)) || + (yearPart.toPartCol === Year(lit) && monthPart.toPartCol >= Month(lit)) + ) + case _ => None + } + } + + override def isNull(): Option[Expression] = { + // `yearPart` and `monthPart` are derived columns, so they must be `null` when the input column + // is `null`. + Some(yearPart.toPartCol.isNull && monthPart.toPartCol.isNull) + } +} + +/** + * Optimize the case that three partition columns uses YEAR, MONTH and DAY using the same column, + * such as `YEAR(eventTime)`, `MONTH(eventTime)` and `DAY(eventTime)`. + * + * @param yearPart the year partition column name + * @param monthPart the month partition column name + * @param dayPart the day partition column name + */ +case class YearMonthDayPartitionExpr( + yearPart: String, + monthPart: String, + dayPart: String) extends OptimizablePartitionExpression { + override def lessThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn "<" to "<=". + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + (yearPart.toPartCol < Year(lit)) || + (yearPart.toPartCol === Year(lit) && monthPart.toPartCol < Month(lit)) || + ( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) && + dayPart.toPartCol <= DayOfMonth(lit) + ) + ) + case _ => None + } + } + + override def equalTo(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) && + dayPart.toPartCol === DayOfMonth(lit)) + case _ => None + } + } + + override def greaterThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn ">" to ">=". + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + (yearPart.toPartCol > Year(lit)) || + (yearPart.toPartCol === Year(lit) && monthPart.toPartCol > Month(lit)) || + ( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) && + dayPart.toPartCol >= DayOfMonth(lit) + ) + ) + case _ => None + } + } + + override def isNull(): Option[Expression] = { + // `yearPart`, `monthPart` and `dayPart` are derived columns, so they must be `null` when the + // input column is `null`. + Some(yearPart.toPartCol.isNull && monthPart.toPartCol.isNull && dayPart.toPartCol.isNull) + } +} + +/** + * Optimize the case that four partition columns uses YEAR, MONTH, DAY and HOUR using the same + * column, such as `YEAR(eventTime)`, `MONTH(eventTime)`, `DAY(eventTime)`, `HOUR(eventTime)`. + * + * @param yearPart the year partition column name + * @param monthPart the month partition column name + * @param dayPart the day partition column name + * @param hourPart the hour partition column name + */ +case class YearMonthDayHourPartitionExpr( + yearPart: String, + monthPart: String, + dayPart: String, + hourPart: String) extends OptimizablePartitionExpression { + override def lessThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn "<" to "<=". + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + (yearPart.toPartCol < Year(lit)) || + (yearPart.toPartCol === Year(lit) && monthPart.toPartCol < Month(lit)) || + ( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) && + dayPart.toPartCol < DayOfMonth(lit) + ) || + ( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) && + dayPart.toPartCol === DayOfMonth(lit) && hourPart.toPartCol <= Hour(lit) + ) + ) + case _ => None + } + } + + override def equalTo(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) && + dayPart.toPartCol === DayOfMonth(lit) && hourPart.toPartCol === Hour(lit)) + case _ => None + } + } + + override def greaterThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn ">" to ">=". + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + lit.dataType match { + case TimestampType => + Some( + (yearPart.toPartCol > Year(lit)) || + (yearPart.toPartCol === Year(lit) && monthPart.toPartCol > Month(lit)) || + ( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) && + dayPart.toPartCol > DayOfMonth(lit) + ) || + ( + yearPart.toPartCol === Year(lit) && monthPart.toPartCol === Month(lit) && + dayPart.toPartCol === DayOfMonth(lit) && hourPart.toPartCol >= Hour(lit) + ) + ) + case _ => None + } + } + + override def isNull(): Option[Expression] = { + // `yearPart`, `monthPart`, `dayPart` and `hourPart` are derived columns, so they must be `null` + // when the input column is `null`. + Some(yearPart.toPartCol.isNull && monthPart.toPartCol.isNull && + dayPart.toPartCol.isNull && hourPart.toPartCol.isNull) + } +} + +/** + * The rules for the generation expression `SUBSTRING(col, pos, len)`. Note: + * - Writing an empty string to a partition column would become `null` (SPARK-24438) so generated + * partition filters always pick up the `null` partition for safety. + * - When `pos` is 0, we also support optimizations for comparison operators. When `pos` is not 0, + * we only support optimizations for EqualTo. + * + * @param partitionColumn the partition column name using SUBSTRING in its generation expression. + * @param substringPos the `pos` parameter of SUBSTRING in the generation expression. + * @param substringLen the `len` parameter of SUBSTRING in the generation expression. + */ +case class SubstringPartitionExpr( + partitionColumn: String, + substringPos: Int, + substringLen: Int) extends OptimizablePartitionExpression { + + override def lessThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn "<" to "<=". + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + // Both `pos == 0` and `pos == 1` start from the first char. See UTF8String.substringSQL. + if (substringPos == 0 || substringPos == 1) { + lit.dataType match { + case StringType => + Some( + partitionColumn.toPartCol.isNull || + partitionColumn.toPartCol <= Substring(lit, substringPos, substringLen)) + case _ => None + } + } else { + None + } + } + + override def equalTo(lit: Literal): Option[Expression] = { + lit.dataType match { + case StringType => + Some( + partitionColumn.toPartCol.isNull || + partitionColumn.toPartCol === Substring(lit, substringPos, substringLen)) + case _ => None + } + } + + override def greaterThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn ">" to ">=". + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + // Both `pos == 0` and `pos == 1` start from the first char. See UTF8String.substringSQL. + if (substringPos == 0 || substringPos == 1) { + lit.dataType match { + case StringType => + Some( + partitionColumn.toPartCol.isNull || + partitionColumn.toPartCol >= Substring(lit, substringPos, substringLen)) + case _ => None + } + } else { + None + } + } + + override def isNull(): Option[Expression] = Some(partitionColumn.toPartCol.isNull) +} + +/** + * The rules for the generation expression `DATE_FORMAT(col, format)`, such as: + * DATE_FORMAT(timestamp, 'yyyy-MM'), DATE_FORMAT(timestamp, 'yyyy-MM-dd-HH') + * + * @param partitionColumn the partition column name using DATE_FORMAT in its generation expression. + * @param format the `format` parameter of DATE_FORMAT in the generation expression. + * + * unix_timestamp('12345-12', 'yyyy-MM') | unix_timestamp('+12345-12', 'yyyy-MM') + * EXCEPTION fail | 327432240000 + * CORRECTED null | 327432240000 + * LEGACY 327432240000 | null + */ +case class DateFormatPartitionExpr( + partitionColumn: String, format: String) extends OptimizablePartitionExpression { + + private val partitionColumnUnixTimestamp = UnixTimestamp(partitionColumn.toPartCol, format) + + private def litUnixTimestamp(lit: Literal): UnixTimestamp = + UnixTimestamp(DateFormatClass(lit, format), format) + + override def lessThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn "<" to "<=". + // timestamp + date are truncated to yyyy-MM + // timestamp are truncated to yyyy-MM-dd-HH + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType => + Some(partitionColumnUnixTimestamp <= litUnixTimestamp(lit)) + case _ => None + } + // when write and read timeParserPolicy-s are different, UnixTimestamp will yield null + // thus e would be null if either of two operands is null, we should not drop the data + expr.map(e => Or(e, IsNull(e))) + } + + override def equalTo(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType => + Some(partitionColumnUnixTimestamp === litUnixTimestamp(lit)) + case _ => None + } + // when write and read timeParserPolicy-s are different, UnixTimestamp will yield null + // thus e would be null if either of two operands is null, we should not drop the data + expr.map(e => Or(e, IsNull(e))) + } + + override def greaterThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn ">" to ">=". + // timestamp + date are truncated to yyyy-MM + // timestamp are truncated to yyyy-MM-dd-HH + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType => + Some(partitionColumnUnixTimestamp >= litUnixTimestamp(lit)) + case _ => None + } + // when write and read timeParserPolicy-s are different, UnixTimestamp will yield null + // thus e would be null if either of two operands is null, we should not drop the data + expr.map(e => Or(e, IsNull(e))) + } + + override def isNull(): Option[Expression] = { + Some(partitionColumn.toPartCol.isNull) + } +} + +/** The rules for the generation expression `date_trunc(field, col)`. */ +case class TimestampTruncPartitionExpr(format: String, partitionColumn: String) + extends OptimizablePartitionExpression { + override def lessThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn "<" to "<=". + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType => Some(partitionColumn.toPartCol <= TruncTimestamp(format, lit)) + case DateType => Some( + partitionColumn.toPartCol <= TruncTimestamp(format, Cast(lit, TimestampType))) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def equalTo(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType => Some(partitionColumn.toPartCol === TruncTimestamp(format, lit)) + case DateType => Some( + partitionColumn.toPartCol === TruncTimestamp(format, Cast(lit, TimestampType))) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def greaterThan(lit: Literal): Option[Expression] = { + // As the partition column has truncated information, we need to turn ">" to ">=". + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType => Some(partitionColumn.toPartCol >= TruncTimestamp(format, lit)) + case DateType => Some( + partitionColumn.toPartCol >= TruncTimestamp(format, Cast(lit, TimestampType))) + case _ => None + } + // to avoid any expression which yields null + expr.map(e => Or(e, IsNull(e))) + } + + override def isNull(): Option[Expression] = Some(partitionColumn.toPartCol.isNull) +} + +/** + * The rules for the generation of identity expressions, used for partitioning on a nested column. + * Note: + * - Writing an empty string to a partition column would become `null` (SPARK-24438) so generated + * partition filters always pick up the `null` partition for safety. + * + * @param partitionColumn the partition column name used in the generation expression. + */ +case class IdentityPartitionExpr(partitionColumn: String) + extends OptimizablePartitionExpression { + + override def lessThan(lit: Literal): Option[Expression] = { + Some(partitionColumn.toPartCol.isNull || partitionColumn.toPartCol < lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + Some(partitionColumn.toPartCol.isNull || partitionColumn.toPartCol <= lit) + } + + override def equalTo(lit: Literal): Option[Expression] = { + Some(partitionColumn.toPartCol.isNull || partitionColumn.toPartCol === lit) + } + + override def greaterThan(lit: Literal): Option[Expression] = { + Some(partitionColumn.toPartCol.isNull || partitionColumn.toPartCol > lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + Some(partitionColumn.toPartCol.isNull || partitionColumn.toPartCol >= lit) + } + + override def isNull(): Option[Expression] = Some(partitionColumn.toPartCol.isNull) +} + +/** + * The rules for generation expression that use the function trunc(col, format) such as + * trunc(timestamp, 'year'), trunc(date, 'week') and trunc(timestampStr, 'hour') + * @param partitionColumn partition column using trunc function in the generation expression + * @param format the format that specifies the unit of truncation applied to the partitionColumn + */ +case class TruncDatePartitionExpr(partitionColumn: String, format: String) + extends OptimizablePartitionExpression { + + override def lessThan(lit: Literal): Option[Expression] = { + lessThanOrEqual(lit) + } + + override def lessThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType | StringType => + Some(partitionColumn.toPartCol <= TruncDate(lit, Literal(format))) + case _ => None + } + expr.map(e => Or(e, IsNull(e))) + } + + override def equalTo(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType | StringType => + Some(partitionColumn.toPartCol === TruncDate(lit, Literal(format))) + case _ => None + } + expr.map(e => Or(e, IsNull(e))) + } + + override def greaterThan(lit: Literal): Option[Expression] = { + greaterThanOrEqual(lit) + } + + override def greaterThanOrEqual(lit: Literal): Option[Expression] = { + val expr = lit.dataType match { + case TimestampType | DateType | StringType => + Some(partitionColumn.toPartCol >= TruncDate(lit, Literal(format))) + case _ => None + } + expr.map(e => Or(e, IsNull(e))) + } + + override def isNull(): Option[Expression] = { + Some(partitionColumn.toPartCol.isNull) + } + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/optimizer/RangePartitionIdRewrite.scala b/spark/src/main/scala/org/apache/spark/sql/delta/optimizer/RangePartitionIdRewrite.scala new file mode 100644 index 00000000000..3abc6253a11 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/optimizer/RangePartitionIdRewrite.scala @@ -0,0 +1,110 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.optimizer + +import org.apache.spark.sql.delta.expressions.{PartitionerExpr, RangePartitionId} + +import org.apache.spark.{RangePartitioner, SparkContext} +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, IsNotNull, SortOrder} +import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, UnaryNode} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.{QueryExecution, SQLExecution} +import org.apache.spark.util.MutablePair + + +/** + * Rewrites all [[RangePartitionId]] into [[PartitionerExpr]] by running sampling jobs + * on the child RDD in order to determine the range boundaries. + */ +case class RangePartitionIdRewrite(session: SparkSession) + extends Rule[LogicalPlan] { + import RangePartitionIdRewrite._ + + private def sampleSizeHint: Int = conf.rangeExchangeSampleSizePerPartition + + def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { + case node: UnaryNode => node.transformExpressionsUp { + case RangePartitionId(expr, n) => + val aliasedExpr = Alias(expr, "__RPI_child_col__")() + val exprAttr = aliasedExpr.toAttribute + + val planForSampling = Filter(IsNotNull(exprAttr), Project(Seq(aliasedExpr), node.child)) + val qeForSampling = new QueryExecution(session, planForSampling) + + val desc = s"RangePartitionId($expr, $n) sampling" + val jobGroupId = session.sparkContext.getLocalProperty(SparkContext.SPARK_JOB_GROUP_ID) + withCallSite(session.sparkContext, desc) { + SQLExecution.withNewExecutionId(qeForSampling) { + withJobGroup(session.sparkContext, jobGroupId, desc) { + // The code below is inspired from ShuffleExchangeExec.prepareShuffleDependency() + + // Internally, RangePartitioner runs a job on the RDD that samples keys to compute + // partition bounds. To get accurate samples, we need to copy the mutable keys. + val rddForSampling = qeForSampling.toRdd.mapPartitionsInternal { iter => + val mutablePair = new MutablePair[InternalRow, Null]() + iter.map(row => mutablePair.update(row.copy(), null)) + } + + val sortOrder = SortOrder(exprAttr, Ascending) + implicit val ordering = new LazilyGeneratedOrdering(Seq(sortOrder), Seq(exprAttr)) + val partitioner = new RangePartitioner(n, rddForSampling, true, sampleSizeHint) + + PartitionerExpr(expr, partitioner) + } + } + } + } + } +} + +object RangePartitionIdRewrite { + /** + * Executes the equivalent [[SparkContext.setJobGroup()]] call, runs the given `body`, + * then restores the original jobGroup. + */ + private def withJobGroup[T]( + sparkContext: SparkContext, + groupId: String, + description: String) + (body: => T): T = { + val oldJobDesc = sparkContext.getLocalProperty("spark.job.description") + val oldGroupId = sparkContext.getLocalProperty("spark.jobGroup.id") + val oldJobInterrupt = sparkContext.getLocalProperty("spark.job.interruptOnCancel") + sparkContext.setJobGroup(groupId, description, interruptOnCancel = true) + try body finally { + sparkContext.setJobGroup( + oldGroupId, oldJobDesc, Option(oldJobInterrupt).map(_.toBoolean).getOrElse(false)) + } + } + + /** + * Executes the equivalent setCallSite() call, runs the given `body`, + * then restores the original call site. + */ + private def withCallSite[T](sparkContext: SparkContext, shortCallSite: String)(body: => T): T = { + val oldCallSiteShortForm = sparkContext.getLocalProperty("callSite.short") + val oldCallSiteLongForm = sparkContext.getLocalProperty("callSite.long") + sparkContext.setCallSite(shortCallSite) + try body finally { + sparkContext.setLocalProperty("callSite.short", oldCallSiteShortForm) + sparkContext.setLocalProperty("callSite.long", oldCallSiteLongForm) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/perf/DeltaOptimizedWriterExec.scala b/spark/src/main/scala/org/apache/spark/sql/delta/perf/DeltaOptimizedWriterExec.scala new file mode 100644 index 00000000000..3d816a1c5bc --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/perf/DeltaOptimizedWriterExec.scala @@ -0,0 +1,323 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.perf + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.duration.Duration + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.BinPackingUtils + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark._ +import org.apache.spark.internal.config +import org.apache.spark.internal.config.ConfigEntry +import org.apache.spark.network.util.ByteUnit +import org.apache.spark.rdd.RDD +import org.apache.spark.shuffle._ +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning +import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan, UnaryExecNode} +import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec +import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics, SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter} +import org.apache.spark.storage._ +import org.apache.spark.util.ThreadUtils + + +/** + * An execution node which shuffles data to a target output of `DELTA_OPTIMIZE_WRITE_SHUFFLE_BLOCKS` + * blocks, hash partitioned on the table partition columns. We group all blocks by their + * reducer_id's and bin-pack into `DELTA_OPTIMIZE_WRITE_BIN_SIZE` bins. Then we launch a Spark task + * per bin to write out a single file for each bin. + * + * @param child The execution plan + * @param partitionColumns The partition columns of the table. Used for hash partitioning the write + * @param deltaLog The DeltaLog for the table. Used for logging only + */ +case class DeltaOptimizedWriterExec( + child: SparkPlan, + partitionColumns: Seq[String], + @transient deltaLog: DeltaLog + ) extends UnaryExecNode with DeltaLogging { + + override def output: Seq[Attribute] = child.output + + private lazy val writeMetrics = + SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext) + private lazy val readMetrics = + SQLShuffleReadMetricsReporter.createShuffleReadMetrics(sparkContext) + override lazy val metrics: Map[String, SQLMetric] = Map( + "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size") + ) ++ readMetrics ++ writeMetrics + + private lazy val childNumPartitions = child.execute().getNumPartitions + + private lazy val numPartitions: Int = { + val targetShuffleBlocks = getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_SHUFFLE_BLOCKS) + math.min( + math.max(targetShuffleBlocks / childNumPartitions, 1), + getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_MAX_SHUFFLE_PARTITIONS)) + } + + @transient private var cachedShuffleRDD: ShuffledRowRDD = _ + + @transient private lazy val mapTracker = SparkEnv.get.mapOutputTracker + + /** Creates a ShuffledRowRDD for facilitating the shuffle in the map side. */ + private def getShuffleRDD: ShuffledRowRDD = { + if (cachedShuffleRDD == null) { + val resolver = org.apache.spark.sql.catalyst.analysis.caseInsensitiveResolution + val saltedPartitioning = HashPartitioning( + partitionColumns.map(p => output.find(o => resolver(p, o.name)).getOrElse( + throw DeltaErrors.failedFindPartitionColumnInOutputPlan(p))), + numPartitions) + + val shuffledRDD = + ShuffleExchangeExec(saltedPartitioning, child).execute().asInstanceOf[ShuffledRowRDD] + + cachedShuffleRDD = shuffledRDD + } + cachedShuffleRDD + } + + private def computeBins(): Array[List[(BlockManagerId, ArrayBuffer[(BlockId, Long, Int)])]] = { + // Get all shuffle information + val shuffleStats = getShuffleStats() + + // Group by blockId instead of block manager + val blockInfo = shuffleStats.flatMap { case (bmId, blocks) => + blocks.map { case (blockId, size, index) => + (blockId, (bmId, size, index)) + } + }.toMap + + val maxBinSize = + ByteUnit.BYTE.convertFrom(getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_BIN_SIZE), ByteUnit.MiB) + + val bins = shuffleStats.toSeq.flatMap(_._2).groupBy(_._1.asInstanceOf[ShuffleBlockId].reduceId) + .flatMap { case (_, blocks) => + BinPackingUtils.binPackBySize[(BlockId, Long, Int), BlockId]( + blocks, + _._2, // size + _._1, // blockId + maxBinSize) + } + + bins + .map { bin => + var binSize = 0L + val blockLocations = + new mutable.HashMap[BlockManagerId, ArrayBuffer[(BlockId, Long, Int)]]() + for (blockId <- bin) { + val (bmId, size, index) = blockInfo(blockId) + binSize += size + val blocksAtBM = blockLocations.getOrElseUpdate( + bmId, new ArrayBuffer[(BlockId, Long, Int)]()) + blocksAtBM.append((blockId, size, index)) + } + (binSize, blockLocations.toList) + } + .toArray + .sortBy(_._1)(Ordering[Long].reverse) // submit largest blocks first + .map(_._2) + } + + /** Performs the shuffle before the write, so that we can bin-pack output data. */ + private def getShuffleStats(): Array[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])] = { + val dep = getShuffleRDD.dependency + // Gets the shuffle output stats + def getStats() = mapTracker.getMapSizesByExecutorId( + dep.shuffleId, 0, Int.MaxValue, 0, numPartitions).toArray + + // Executes the shuffle map stage in case we are missing output stats + def awaitShuffleMapStage(): Unit = { + assert(dep != null, "Shuffle dependency should not be null") + // hack to materialize the shuffle files in a fault tolerant way + ThreadUtils.awaitResult(sparkContext.submitMapStage(dep), Duration.Inf) + } + + try { + val res = getStats() + if (res.isEmpty) awaitShuffleMapStage() + getStats() + } catch { + case e: FetchFailedException => + logWarning("Failed to fetch shuffle blocks for the optimized writer. Retrying", e) + awaitShuffleMapStage() + getStats() + } + } + + override def doExecute(): RDD[InternalRow] = { + // Single partitioned tasks can simply be written + if (childNumPartitions <= 1) return child.execute() + + val shuffledRDD = getShuffleRDD + + val partitions = computeBins() + + recordDeltaEvent(deltaLog, + "delta.optimizeWrite.planned", + data = Map( + "originalPartitions" -> childNumPartitions, + "outputPartitions" -> partitions.length, + "shufflePartitions" -> numPartitions, + "numShuffleBlocks" -> getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_SHUFFLE_BLOCKS), + "binSize" -> getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_BIN_SIZE), + "maxShufflePartitions" -> + getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_MAX_SHUFFLE_PARTITIONS) + ) + ) + + new DeltaOptimizedWriterRDD( + sparkContext, + shuffledRDD.dependency, + readMetrics, + new OptimizedWriterBlocks(partitions)) + } + + private def getConf[T](entry: ConfigEntry[T]): T = { + conf.getConf(entry) + } + + override protected def withNewChildInternal(newChild: SparkPlan): DeltaOptimizedWriterExec = + copy(child = newChild) +} + +/** + * A wrapper class to make the blocks non-serializable. If we serialize the blocks and send them to + * the executors, it may cause memory problems. + * NOTE!!!: By wrapping the Array in a non-serializable class we enforce that the field needs to + * be transient, and gives us extra security against a developer making a mistake. + */ +class OptimizedWriterBlocks( + val bins: Array[List[(BlockManagerId, ArrayBuffer[(BlockId, Long, Int)])]]) + +/** + * A specialized implementation similar to `ShuffledRowRDD`, where a partition reads a prepared + * set of shuffle blocks. + */ +private class DeltaOptimizedWriterRDD( + @transient sparkContext: SparkContext, + var dep: ShuffleDependency[Int, _, InternalRow], + metrics: Map[String, SQLMetric], + @transient blocks: OptimizedWriterBlocks) + extends RDD[InternalRow](sparkContext, Seq(dep)) with DeltaLogging { + + override def getPartitions: Array[Partition] = Array.tabulate(blocks.bins.length) { i => + ShuffleBlockRDDPartition(i, blocks.bins(i)) + } + + override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = { + val tempMetrics = context.taskMetrics().createTempShuffleReadMetrics() + val sqlMetricsReporter = new SQLShuffleReadMetricsReporter(tempMetrics, metrics) + + val blocks = if (context.stageAttemptNumber() > 0) { + // We lost shuffle blocks, so we need to now get new manager addresses + val executorTracker = SparkEnv.get.mapOutputTracker + val oldBlockLocations = split.asInstanceOf[ShuffleBlockRDDPartition].blocks + + // assumes we bin-pack by reducerId + val reducerId = oldBlockLocations.head._2.head._1.asInstanceOf[ShuffleBlockId].reduceId + // Get block addresses + val newLocations = executorTracker.getMapSizesByExecutorId(dep.shuffleId, reducerId) + .flatMap { case (bmId, newBlocks) => + newBlocks.map { blockInfo => + (blockInfo._3, (bmId, blockInfo)) + } + }.toMap + + val blockLocations = new mutable.HashMap[BlockManagerId, ArrayBuffer[(BlockId, Long, Int)]]() + oldBlockLocations.foreach { case (_, oldBlocks) => + oldBlocks.foreach { oldBlock => + val (bmId, blockInfo) = newLocations(oldBlock._3) + val blocksAtBM = blockLocations.getOrElseUpdate(bmId, + new ArrayBuffer[(BlockId, Long, Int)]()) + blocksAtBM.append(blockInfo) + } + } + + blockLocations.iterator + } else { + split.asInstanceOf[ShuffleBlockRDDPartition].blocks.iterator + } + + val reader = new OptimizedWriterShuffleReader( + dep, + context, + blocks, + sqlMetricsReporter) + reader.read().map(_._2) + } + + override def clearDependencies(): Unit = { + super.clearDependencies() + dep = null + } +} + +/** The list of blocks that need to be read by a partition of the ShuffleBlockRDD. */ +private case class ShuffleBlockRDDPartition( + index: Int, + blocks: List[(BlockManagerId, ArrayBuffer[(BlockId, Long, Int)])]) extends Partition + +/** A simplified implementation of the `BlockStoreShuffleReader` for reading shuffle blocks. */ +private class OptimizedWriterShuffleReader( + dep: ShuffleDependency[Int, _, InternalRow], + context: TaskContext, + blocks: Iterator[(BlockManagerId, ArrayBuffer[(BlockId, Long, Int)])], + readMetrics: ShuffleReadMetricsReporter) extends ShuffleReader[Int, InternalRow] { + + /** Read the combined key-values for this reduce task */ + override def read(): Iterator[Product2[Int, InternalRow]] = { + val wrappedStreams = new ShuffleBlockFetcherIterator( + context, + SparkEnv.get.blockManager.blockStoreClient, + SparkEnv.get.blockManager, + SparkEnv.get.mapOutputTracker, + blocks, + SparkEnv.get.serializerManager.wrapStream, + // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility + SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024, + SparkEnv.get.conf.getInt("spark.reducer.maxReqsInFlight", Int.MaxValue), + SparkEnv.get.conf.get(config.REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS), + SparkEnv.get.conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM), + SparkEnv.get.conf.get(config.SHUFFLE_MAX_ATTEMPTS_ON_NETTY_OOM), + SparkEnv.get.conf.getBoolean("spark.shuffle.detectCorrupt", true), + SparkEnv.get.conf.getBoolean("spark.shuffle.detectCorrupt.useExtraMemory", false), + SparkEnv.get.conf.getBoolean("spark.shuffle.checksum.enabled", true), + SparkEnv.get.conf.get("spark.shuffle.checksum.algorithm", "ADLER32"), + readMetrics, + false) + + val serializerInstance = dep.serializer.newInstance() + + // Create a key/value iterator for each stream + val recordIter = wrappedStreams.flatMap { case (_, wrappedStream) => + // Note: the asKeyValueIterator below wraps a key/value iterator inside of a + // NextIterator. The NextIterator makes sure that close() is called on the + // underlying InputStream when all records have been read. + serializerInstance.deserializeStream(wrappedStream).asKeyValueIterator + }.asInstanceOf[Iterator[Product2[Int, InternalRow]]] + + new InterruptibleIterator[Product2[Int, InternalRow]](context, recordIter) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala b/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala new file mode 100644 index 00000000000..f14c0c425ac --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala @@ -0,0 +1,374 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.perf + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate._ +import org.apache.spark.sql.catalyst.planning.PhysicalOperation +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} +import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaTable, Snapshot} +import org.apache.spark.sql.delta.commands.DeletionVectorUtils.isTableDVFree +import org.apache.spark.sql.delta.files.TahoeLogFileIndex +import org.apache.spark.sql.delta.stats.DeltaScanGenerator +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ + +import java.sql.Date +import java.util.Locale + +/** Optimize COUNT, MIN and MAX expressions on Delta tables. + * This optimization is only applied when the following conditions are met: + * - The MIN/MAX columns are not nested and data type is supported by the optimization (ByteType, + * ShortType, IntegerType, LongType, FloatType, DoubleType, DateType). + * - All AddFiles in the Delta Log must have stats on columns used in MIN/MAX expressions, + * or the columns must be partitioned, in the latter case it uses partitionValues, a required field. + * - Table has no deletion vectors, or query has no MIN/MAX expressions. + * - COUNT has no DISTINCT. + * - Query has no filters. + * - Query has no GROUP BY. + * Example of valid query: SELECT COUNT(*), MIN(id), MAX(partition_col) FROM MyDeltaTable + */ +trait OptimizeMetadataOnlyDeltaQuery extends Logging { + def optimizeQueryWithMetadata(plan: LogicalPlan): LogicalPlan = { + plan.transformUpWithSubqueries { + case agg@MetadataOptimizableAggregate(tahoeLogFileIndex) => + createLocalRelationPlan(agg, tahoeLogFileIndex) + } + } + + protected def getDeltaScanGenerator(index: TahoeLogFileIndex): DeltaScanGenerator + + private def createLocalRelationPlan( + plan: Aggregate, + tahoeLogFileIndex: TahoeLogFileIndex): LogicalPlan = { + + val aggColumnsNames = Set(extractMinMaxFieldNames(plan).map(_.toLowerCase(Locale.ROOT)) : _*) + val (rowCount, columnStats) = extractCountMinMaxFromDeltaLog(tahoeLogFileIndex, aggColumnsNames) + + def checkStatsExists(attrRef: AttributeReference): Boolean = { + columnStats.contains(attrRef.name) && + // Avoid StructType, it is not supported by this optimization. + // Sanity check only. If reference is nested column it would be GetStructType + // instead of AttributeReference. + attrRef.references.size == 1 && attrRef.references.head.dataType != StructType + } + + def convertValueIfRequired(attrRef: AttributeReference, value: Any): Any = { + if (attrRef.dataType == DateType && value != null) { + DateTimeUtils.fromJavaDate(value.asInstanceOf[Date]) + } else { + value + } + } + + val rewrittenAggregationValues = plan.aggregateExpressions.collect { + case Alias(AggregateExpression( + Count(Seq(Literal(1, _))), Complete, false, None, _), _) if rowCount.isDefined => + rowCount.get + case Alias(tps@ToPrettyString(AggregateExpression( + Count(Seq(Literal(1, _))), Complete, false, None, _), _), _) if rowCount.isDefined => + tps.copy(child = Literal(rowCount.get)).eval() + case Alias(AggregateExpression( + Min(minReference: AttributeReference), Complete, false, None, _), _) + if checkStatsExists(minReference) => + convertValueIfRequired(minReference, columnStats(minReference.name).min) + case Alias(tps@ToPrettyString(AggregateExpression( + Min(minReference: AttributeReference), Complete, false, None, _), _), _) + if checkStatsExists(minReference) => + val v = columnStats(minReference.name).min + tps.copy(child = Literal(v)).eval() + case Alias(AggregateExpression( + Max(maxReference: AttributeReference), Complete, false, None, _), _) + if checkStatsExists(maxReference) => + convertValueIfRequired(maxReference, columnStats(maxReference.name).max) + case Alias(tps@ToPrettyString(AggregateExpression( + Max(maxReference: AttributeReference), Complete, false, None, _), _), _) + if checkStatsExists(maxReference) => + val v = columnStats(maxReference.name).max + tps.copy(child = Literal(v)).eval() + } + + if (plan.aggregateExpressions.size == rewrittenAggregationValues.size) { + val r = LocalRelation( + plan.output, + Seq(InternalRow.fromSeq(rewrittenAggregationValues))) + r + } else { + logInfo(s"Query can't be optimized using metadata because stats are missing") + plan + } + } + + private def extractMinMaxFieldNames(plan: Aggregate): Seq[String] = { + plan.aggregateExpressions.collect { + case Alias(AggregateExpression( + Min(minReference: AttributeReference), _, _, _, _), _) => + minReference.name + case Alias(AggregateExpression( + Max(maxReference: AttributeReference), _, _, _, _), _) => + maxReference.name + case Alias(ToPrettyString(AggregateExpression( + Min(minReference: AttributeReference), _, _, _, _), _), _) => + minReference.name + case Alias(ToPrettyString(AggregateExpression( + Max(maxReference: AttributeReference), _, _, _, _), _), _) => + maxReference.name + } + } + + /** + * Min and max values from Delta Log stats or partitionValues. + */ + case class DeltaColumnStat(min: Any, max: Any) + + private def extractCountMinMaxFromStats( + deltaScanGenerator: DeltaScanGenerator, + lowerCaseColumnNames: Set[String]): (Option[Long], Map[String, DeltaColumnStat]) = { + val snapshot = deltaScanGenerator.snapshotToScan + + // Count - account for deleted rows according to deletion vectors + val dvCardinality = coalesce(col("deletionVector.cardinality"), lit(0)) + val numLogicalRecords = (col("stats.numRecords") - dvCardinality).as("numLogicalRecords") + + val filesWithStatsForScan = deltaScanGenerator.filesWithStatsForScan(Nil) + // Validate all the files has stats + val filesStatsCount = filesWithStatsForScan.select( + sum(numLogicalRecords).as("numLogicalRecords"), + count(when(col("stats.numRecords").isNull, 1)).as("missingNumRecords"), + count(when(col("stats.numRecords") > 0, 1)).as("countNonEmptyFiles")).head + + // If any numRecords is null, we have incomplete stats; + val allRecordsHasStats = filesStatsCount.getAs[Long]("missingNumRecords") == 0 + if (!allRecordsHasStats) { + return (None, Map.empty) + } + // the sum agg is either null (for an empty table) or gives an accurate record count. + val numRecords = if (filesStatsCount.isNullAt(0)) 0 else filesStatsCount.getLong(0) + lazy val numFiles: Long = filesStatsCount.getAs[Long]("countNonEmptyFiles") + + val dataColumns = snapshot.statCollectionPhysicalSchema.filter(col => + lowerCaseColumnNames.contains(col.name.toLowerCase(Locale.ROOT))) + + // DELETE operations creates AddFile records with 0 rows, and no column stats. + // We can safely ignore it since there is no data. + lazy val files = filesWithStatsForScan.filter(col("stats.numRecords") > 0) + lazy val statsMinMaxNullColumns = files.select(col("stats.*")) + + val minColName = "minValues" + val maxColName = "maxValues" + val nullColName = "nullCount" + + if (dataColumns.isEmpty + || dataColumns.size != lowerCaseColumnNames.size + || !isTableDVFree(snapshot) // When DV enabled we can't rely on stats values easily + || numFiles == 0 + || !statsMinMaxNullColumns.columns.contains(minColName) + || !statsMinMaxNullColumns.columns.contains(maxColName) + || !statsMinMaxNullColumns.columns.contains(nullColName)) { + return (Some(numRecords), Map.empty) + } + + // dataColumns can contain columns without stats if dataSkippingNumIndexedCols + // has been increased + val columnsWithStats = files.select( + col(s"stats.$minColName.*"), + col(s"stats.$maxColName.*"), + col(s"stats.$nullColName.*")) + .columns.groupBy(identity).mapValues(_.size) + .filter(x => x._2 == 3) // 3: minValues, maxValues, nullCount + .map(x => x._1).toSet + + // Creates a tuple with physical name to avoid recalculating it multiple times + val dataColumnsWithStats = dataColumns.map(x => (x, DeltaColumnMapping.getPhysicalName(x))) + .filter(x => columnsWithStats.contains(x._2)) + + val columnsToQuery = dataColumnsWithStats.flatMap { columnAndPhysicalName => + val dataType = columnAndPhysicalName._1.dataType + val physicalName = columnAndPhysicalName._2 + + Seq(col(s"stats.$minColName.`$physicalName`").cast(dataType).as(s"min.$physicalName"), + col(s"stats.$maxColName.`$physicalName`").cast(dataType).as(s"max.$physicalName"), + col(s"stats.$nullColName.`$physicalName`").as(s"null_count.$physicalName")) + } ++ Seq(col(s"stats.numRecords").as(s"numRecords")) + + val minMaxExpr = dataColumnsWithStats.flatMap { columnAndPhysicalName => + val physicalName = columnAndPhysicalName._2 + + // To validate if the column has stats we do two validation: + // 1-) COUNT(null_count.columnName) should be equals to numFiles, + // since null_count is always non-null. + // 2-) The number of files with non-null min/max: + // a. count(min.columnName)|count(max.columnName) + + // the number of files where all rows are NULL: + // b. count of (ISNULL(min.columnName) and null_count.columnName == numRecords) + // should be equals to numFiles + Seq( + s"""case when $numFiles = count(`null_count.$physicalName`) + | AND $numFiles = (count(`min.$physicalName`) + sum(case when + | ISNULL(`min.$physicalName`) and `null_count.$physicalName` = numRecords + | then 1 else 0 end)) + | AND $numFiles = (count(`max.$physicalName`) + sum(case when + | ISNULL(`max.$physicalName`) AND `null_count.$physicalName` = numRecords + | then 1 else 0 end)) + | then TRUE else FALSE end as `complete_$physicalName`""".stripMargin, + s"min(`min.$physicalName`) as `min_$physicalName`", + s"max(`max.$physicalName`) as `max_$physicalName`") + } + + val statsResults = files.select(columnsToQuery: _*).selectExpr(minMaxExpr: _*).head + + (Some(numRecords), dataColumnsWithStats + .filter(x => statsResults.getAs[Boolean](s"complete_${x._2}")) + .map { columnAndPhysicalName => + val column = columnAndPhysicalName._1 + val physicalName = columnAndPhysicalName._2 + column.name -> + DeltaColumnStat( + statsResults.getAs(s"min_$physicalName"), + statsResults.getAs(s"max_$physicalName")) + }.toMap) + } + + private def extractMinMaxFromPartitionValue( + snapshot: Snapshot, + lowerCaseColumnNames: Set[String]): Map[String, DeltaColumnStat] = { + + val partitionedColumns = snapshot.metadata.partitionSchema + .filter(col => lowerCaseColumnNames.contains(col.name.toLowerCase(Locale.ROOT))) + .map(col => (col, DeltaColumnMapping.getPhysicalName(col))) + + if (partitionedColumns.isEmpty) { + Map.empty + } else { + val partitionedColumnsValues = partitionedColumns.map { partitionedColumn => + val physicalName = partitionedColumn._2 + col(s"partitionValues.`$physicalName`") + .cast(partitionedColumn._1.dataType).as(physicalName) + } + + val partitionedColumnsAgg = partitionedColumns.flatMap { partitionedColumn => + val physicalName = partitionedColumn._2 + + Seq(min(s"`$physicalName`").as(s"min_$physicalName"), + max(s"`$physicalName`").as(s"max_$physicalName")) + } + + val partitionedColumnsQuery = snapshot.allFiles + .select(partitionedColumnsValues: _*) + .agg(partitionedColumnsAgg.head, partitionedColumnsAgg.tail: _*) + .head() + + partitionedColumns.map { partitionedColumn => + val physicalName = partitionedColumn._2 + + partitionedColumn._1.name -> + DeltaColumnStat( + partitionedColumnsQuery.getAs(s"min_$physicalName"), + partitionedColumnsQuery.getAs(s"max_$physicalName")) + }.toMap + } + } + + /** + * Extract the Count, Min and Max values from Delta Log stats and partitionValues. + * The first field is the rows count in the table or `None` if we cannot calculate it from stats + * If the column is not partitioned, the values are extracted from stats when it exists. + * If the column is partitioned, the values are extracted from partitionValues. + */ + private def extractCountMinMaxFromDeltaLog( + tahoeLogFileIndex: TahoeLogFileIndex, + lowerCaseColumnNames: Set[String]): + (Option[Long], CaseInsensitiveMap[DeltaColumnStat]) = { + val deltaScanGen = getDeltaScanGenerator(tahoeLogFileIndex) + + val partitionedValues = extractMinMaxFromPartitionValue( + deltaScanGen.snapshotToScan, + lowerCaseColumnNames) + + val partitionedColNames = partitionedValues.keySet.map(_.toLowerCase(Locale.ROOT)) + val dataColumnNames = lowerCaseColumnNames -- partitionedColNames + val (rowCount, columnStats) = extractCountMinMaxFromStats(deltaScanGen, dataColumnNames) + + (rowCount, CaseInsensitiveMap(columnStats ++ partitionedValues)) + } + + object MetadataOptimizableAggregate { + + /** Only data type that are stored in stats without any loss of precision are supported. */ + def isSupportedDataType(dataType: DataType): Boolean = { + // DecimalType is not supported because not all the values are correctly stored + // For example -99999999999999999999999999999999999999 in stats is -1e38 + (dataType.isInstanceOf[NumericType] && !dataType.isInstanceOf[DecimalType]) || + dataType.isInstanceOf[DateType] + } + + private def getAggFunctionOptimizable( + aggExpr: AggregateExpression): Option[DeclarativeAggregate] = { + + aggExpr match { + case AggregateExpression( + c@Count(Seq(Literal(1, _))), Complete, false, None, _) => + Some(c) + case AggregateExpression( + min@Min(minExpr), Complete, false, None, _) if isSupportedDataType(minExpr.dataType) => + Some(min) + case AggregateExpression( + max@Max(maxExpr), Complete, false, None, _) if isSupportedDataType(maxExpr.dataType) => + Some(max) + case _ => None + } + } + + private def isStatsOptimizable(aggExprs: Seq[Alias]): Boolean = aggExprs.forall { + case Alias(aggExpr: AggregateExpression, _) => getAggFunctionOptimizable(aggExpr).isDefined + case Alias(ToPrettyString(aggExpr: AggregateExpression, _), _) => + getAggFunctionOptimizable(aggExpr).isDefined + case _ => false + } + + private def fieldsAreAttributeReference(fields: Seq[NamedExpression]): Boolean = fields.forall { + // Fields should be AttributeReference to avoid getting the incorrect column name + // from stats when we create the Local Relation, example + // SELECT MAX(Column2) FROM (SELECT Column1 AS Column2 FROM TableName) + // the AggregateExpression contains a reference to Column2, instead of Column1 + case _: AttributeReference => true + case _ => false + } + + def unapply(plan: Aggregate): Option[TahoeLogFileIndex] = plan match { + case Aggregate( + Nil, // GROUP BY not supported + aggExprs: Seq[Alias @unchecked], // Underlying type is not checked because of type erasure. + // Alias type check is done in isStatsOptimizable. + PhysicalOperation(fields, Nil, DeltaTable(fileIndex: TahoeLogFileIndex))) + if fileIndex.partitionFilters.isEmpty && + fieldsAreAttributeReference(fields) && + isStatsOptimizable(aggExprs) => Some(fileIndex) + case Aggregate( + Nil, + aggExprs: Seq[Alias @unchecked], + // When all columns are selected, there are no Project/PhysicalOperation + DeltaTable(fileIndex: TahoeLogFileIndex)) + if fileIndex.partitionFilters.isEmpty && + isStatsOptimizable(aggExprs) => Some(fileIndex) + case _ => None + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/schema/ImplicitMetadataOperation.scala b/spark/src/main/scala/org/apache/spark/sql/delta/schema/ImplicitMetadataOperation.scala new file mode 100644 index 00000000000..9b108f9715e --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/schema/ImplicitMetadataOperation.scala @@ -0,0 +1,208 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils +import org.apache.spark.sql.delta.skipping.clustering.temp.ClusterBySpec +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.DomainMetadata +import org.apache.spark.sql.delta.actions.Metadata +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.util.PartitionUtils + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes +import org.apache.spark.sql.types.StructType + +/** + * A trait that writers into Delta can extend to update the schema and/or partitioning of the table. + */ +trait ImplicitMetadataOperation extends DeltaLogging { + + import ImplicitMetadataOperation._ + + protected val canMergeSchema: Boolean + protected val canOverwriteSchema: Boolean + + private def normalizePartitionColumns( + spark: SparkSession, + partitionCols: Seq[String], + schema: StructType): Seq[String] = { + partitionCols.map { columnName => + val colMatches = schema.filter(s => SchemaUtils.DELTA_COL_RESOLVER(s.name, columnName)) + if (colMatches.length > 1) { + throw DeltaErrors.ambiguousPartitionColumnException(columnName, colMatches) + } else if (colMatches.isEmpty) { + throw DeltaErrors.partitionColumnNotFoundException(columnName, toAttributes(schema)) + } + colMatches.head.name + } + } + + protected final def updateMetadata( + spark: SparkSession, + txn: OptimisticTransaction, + schema: StructType, + partitionColumns: Seq[String], + configuration: Map[String, String], + isOverwriteMode: Boolean, + rearrangeOnly: Boolean + ): Unit = { + // To support the new column mapping mode, we drop existing metadata on data schema + // so that all the column mapping related properties can be reinitialized in + // OptimisticTransaction.updateMetadata + val dataSchema = + DeltaColumnMapping.dropColumnMappingMetadata(schema.asNullable) + val mergedSchema = mergeSchema(txn, dataSchema, isOverwriteMode, canOverwriteSchema) + val normalizedPartitionCols = + normalizePartitionColumns(spark, partitionColumns, dataSchema) + // Merged schema will contain additional columns at the end + def isNewSchema: Boolean = txn.metadata.schema != mergedSchema + // We need to make sure that the partitioning order and naming is consistent + // if provided. Otherwise we follow existing partitioning + def isNewPartitioning: Boolean = normalizedPartitionCols.nonEmpty && + txn.metadata.partitionColumns != normalizedPartitionCols + def isPartitioningChanged: Boolean = txn.metadata.partitionColumns != normalizedPartitionCols + PartitionUtils.validatePartitionColumn( + mergedSchema, + normalizedPartitionCols, + // Delta is case insensitive regarding internal column naming + caseSensitive = false) + + if (!txn.deltaLog.tableExists) { + if (dataSchema.isEmpty) { + throw DeltaErrors.emptyDataException + } + recordDeltaEvent(txn.deltaLog, "delta.ddl.initializeSchema") + // If this is the first write, configure the metadata of the table. + if (rearrangeOnly) { + throw DeltaErrors.unexpectedDataChangeException("Create a Delta table") + } + val description = configuration.get("comment").orNull + val cleanedConfs = configuration.filterKeys(_ != "comment").toMap + txn.updateMetadata( + Metadata( + description = description, + schemaString = dataSchema.json, + partitionColumns = normalizedPartitionCols, + configuration = cleanedConfs + , + createdTime = Some(System.currentTimeMillis()))) + } else if (isOverwriteMode && canOverwriteSchema && (isNewSchema || isPartitioningChanged + )) { + // Can define new partitioning in overwrite mode + val newMetadata = txn.metadata.copy( + schemaString = dataSchema.json, + partitionColumns = normalizedPartitionCols + ) + recordDeltaEvent(txn.deltaLog, "delta.ddl.overwriteSchema") + if (rearrangeOnly) { + throw DeltaErrors.unexpectedDataChangeException("Overwrite the Delta table schema or " + + "change the partition schema") + } + txn.updateMetadataForTableOverwrite(newMetadata) + } else if (isNewSchema && canMergeSchema && !isNewPartitioning + ) { + logInfo(s"New merged schema: ${mergedSchema.treeString}") + recordDeltaEvent(txn.deltaLog, "delta.ddl.mergeSchema") + if (rearrangeOnly) { + throw DeltaErrors.unexpectedDataChangeException("Change the Delta table schema") + } + txn.updateMetadata(txn.metadata.copy(schemaString = mergedSchema.json + )) + } else if (isNewSchema || isNewPartitioning + ) { + recordDeltaEvent(txn.deltaLog, "delta.schemaValidation.failure") + val errorBuilder = new MetadataMismatchErrorBuilder + if (isNewSchema) { + errorBuilder.addSchemaMismatch(txn.metadata.schema, dataSchema, txn.metadata.id) + } + if (isNewPartitioning) { + errorBuilder.addPartitioningMismatch(txn.metadata.partitionColumns, normalizedPartitionCols) + } + if (isOverwriteMode) { + errorBuilder.addOverwriteBit() + } + errorBuilder.finalizeAndThrow(spark.sessionState.conf) + } + } + + /** + * Returns a sequence of new DomainMetadata if canUpdateMetadata is true and the operation is + * either create table or replace the whole table (not replaceWhere operation). This is because + * we only update Domain Metadata when creating or replacing table, and replace table for DDL + * and DataFrameWriterV2 are already handled in CreateDeltaTableCommand. In that case, + * canUpdateMetadata is false, so we don't update again. + * + * @param txn [[OptimisticTransaction]] being used to create or replace table. + * @param canUpdateMetadata true if the metadata is not updated yet. + * @param isReplacingTable true if the operation is replace table without replaceWhere option. + * @param clusterBySpecOpt optional ClusterBySpec containing user-specified clustering columns. + */ + protected final def getNewDomainMetadata( + txn: OptimisticTransaction, + canUpdateMetadata: Boolean, + isReplacingTable: Boolean, + clusterBySpecOpt: Option[ClusterBySpec] = None): Seq[DomainMetadata] = { + if (canUpdateMetadata && (!txn.deltaLog.tableExists || isReplacingTable)) { + val newDomainMetadata = Seq.empty[DomainMetadata] ++ + ClusteredTableUtils.getDomainMetadataOptional(clusterBySpecOpt, txn) + if (!txn.deltaLog.tableExists) { + newDomainMetadata + } else { + // Handle domain metadata for replacing a table. + DomainMetadataUtils.handleDomainMetadataForReplaceTable( + txn.snapshot.domainMetadata, newDomainMetadata) + } + } else { + Seq.empty + } + } +} + +object ImplicitMetadataOperation { + + /** + * Merge schemas based on transaction state and delta options + * @param txn Target transaction + * @param dataSchema New data schema + * @param isOverwriteMode Whether we are overwriting + * @param canOverwriteSchema Whether we can overwrite + * @return Merged schema + */ + private[delta] def mergeSchema( + txn: OptimisticTransaction, + dataSchema: StructType, + isOverwriteMode: Boolean, + canOverwriteSchema: Boolean): StructType = { + if (isOverwriteMode && canOverwriteSchema) { + dataSchema + } else { + val fixedTypeColumns = + if (GeneratedColumn.satisfyGeneratedColumnProtocol(txn.protocol)) { + txn.metadata.fixedTypeColumns + } else { + Set.empty[String] + } + SchemaMergingUtils.mergeSchemas( + txn.metadata.schema, + dataSchema, + fixedTypeColumns = fixedTypeColumns) + } + } + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/schema/InvariantViolationException.scala b/spark/src/main/scala/org/apache/spark/sql/delta/schema/InvariantViolationException.scala new file mode 100644 index 00000000000..b7cca557e18 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/schema/InvariantViolationException.scala @@ -0,0 +1,122 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.{DeltaThrowable, DeltaThrowableHelper} +import org.apache.spark.sql.delta.constraints.{CharVarcharConstraint, Constraints} +import org.apache.commons.lang3.exception.ExceptionUtils + +import org.apache.spark.SparkException +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute + +/** Thrown when the given data doesn't match the rules defined on the table. */ +case class InvariantViolationException(message: String) extends RuntimeException(message) + +/** + * Match a [[SparkException]] and return the root cause Exception if it is a + * InvariantViolationException. + */ +object InnerInvariantViolationException { + def unapply(t: Throwable): Option[InvariantViolationException] = t match { + case s: SparkException => + Option(ExceptionUtils.getRootCause(s)) match { + case Some(i: InvariantViolationException) => Some(i) + case _ => None + } + case _ => None + } +} + +object DeltaInvariantViolationException { + def getNotNullInvariantViolationException(colName: String): DeltaInvariantViolationException = { + new DeltaInvariantViolationException( + errorClass = "DELTA_NOT_NULL_CONSTRAINT_VIOLATED", + messageParameters = Array(colName) + ) + } + + def apply(constraint: Constraints.NotNull): DeltaInvariantViolationException = { + getNotNullInvariantViolationException(UnresolvedAttribute(constraint.column).name) + } + + def getCharVarcharLengthInvariantViolationException( + exprStr: String + ): DeltaInvariantViolationException = { + new DeltaInvariantViolationException( + errorClass = "DELTA_EXCEED_CHAR_VARCHAR_LIMIT", + messageParameters = Array(exprStr) + ) + } + + def getConstraintViolationWithValuesException( + constraintName: String, + sqlStr: String, + valueLines: String + ): DeltaInvariantViolationException = { + new DeltaInvariantViolationException( + errorClass = "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES", + messageParameters = Array(constraintName, sqlStr, valueLines) + ) + } + + /** + * Build an exception to report the current row failed a CHECK constraint. + * + * @param constraint the constraint definition + * @param values a map of full column names to their evaluated values in the failed row + */ + def apply( + constraint: Constraints.Check, + values: Map[String, Any]): DeltaInvariantViolationException = { + if (constraint.name == CharVarcharConstraint.INVARIANT_NAME) { + return getCharVarcharLengthInvariantViolationException(constraint.expression.toString) + } + + // Sort by the column name to generate consistent error messages in Scala 2.12 and 2.13. + val valueLines = values.toSeq.sortBy(_._1).map { + case (column, value) => + s" - $column : $value" + }.mkString("\n") + + getConstraintViolationWithValuesException( + constraint.name, + constraint.expression.sql, + valueLines + ) + } + + /** + * Columns and values in parallel lists as a shim for Java codegen compatibility. + */ + def apply( + constraint: Constraints.Check, + columns: java.util.List[String], + values: java.util.List[Any]): DeltaInvariantViolationException = { + apply(constraint, columns.asScala.zip(values.asScala).toMap) + } +} + +class DeltaInvariantViolationException( + errorClass: String, + messageParameters: Array[String]) + extends InvariantViolationException( + DeltaThrowableHelper.getMessage(errorClass, messageParameters)) with DeltaThrowable { + override def getErrorClass: String = errorClass +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaMergingUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaMergingUtils.scala new file mode 100644 index 00000000000..6948d87d02c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaMergingUtils.scala @@ -0,0 +1,374 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +import java.util.Locale + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.DeltaAnalysisException + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion, UnresolvedAttribute} +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.plans.logical.DeltaMergeInto +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.types.{ArrayType, ByteType, DataType, DecimalType, IntegerType, MapType, NullType, ShortType, StructField, StructType} + +/** + * Utils to merge table schema with data schema. + * This is split from SchemaUtils, because finalSchema is introduced into DeltaMergeInto, + * and resolving the final schema is now part of [[DeltaMergeInto.resolveReferencesAndSchema]]. + */ +object SchemaMergingUtils { + + val DELTA_COL_RESOLVER: (String, String) => Boolean = + org.apache.spark.sql.catalyst.analysis.caseInsensitiveResolution + + /** + * Returns pairs of (full column name path, field) in this schema as a list. For example, a schema + * like: + * | - a + * | | - 1 + * | | - 2 + * | - b + * | - c + * | | - `foo.bar` + * | | - 3 + * will return [ + * ([a], ), ([a, 1], ), ([a, 2], ), ([b], ), + * ([c], ), ([c, foo.bar], ), ([c, foo.bar, 3], ) + * ] + */ + def explode(schema: StructType): Seq[(Seq[String], StructField)] = { + def recurseIntoComplexTypes(complexType: DataType): Seq[(Seq[String], StructField)] = { + complexType match { + case s: StructType => explode(s) + case a: ArrayType => recurseIntoComplexTypes(a.elementType) + .map { case (path, field) => (Seq("element") ++ path, field) } + case m: MapType => + recurseIntoComplexTypes(m.keyType) + .map { case (path, field) => (Seq("key") ++ path, field) } ++ + recurseIntoComplexTypes(m.valueType) + .map { case (path, field) => (Seq("value") ++ path, field) } + case _ => Nil + } + } + + schema.flatMap { + case f @ StructField(name, s: StructType, _, _) => + Seq((Seq(name), f)) ++ + explode(s).map { case (path, field) => (Seq(name) ++ path, field) } + case f @ StructField(name, a: ArrayType, _, _) => + Seq((Seq(name), f)) ++ + recurseIntoComplexTypes(a).map { case (path, field) => (Seq(name) ++ path, field) } + case f @ StructField(name, m: MapType, _, _) => + Seq((Seq(name), f)) ++ + recurseIntoComplexTypes(m).map { case (path, field) => (Seq(name) ++ path, field) } + case f => (Seq(f.name), f) :: Nil + } + } + + /** + * Returns all column names in this schema as a flat list. For example, a schema like: + * | - a + * | | - 1 + * | | - 2 + * | - b + * | - c + * | | - nest + * | | - 3 + * will get flattened to: "a", "a.1", "a.2", "b", "c", "c.nest", "c.nest.3" + */ + def explodeNestedFieldNames(schema: StructType): Seq[String] = { + explode(schema).map { case (path, _) => path }.map(UnresolvedAttribute.apply(_).name) + } + + /** + * Checks if input column names have duplicate identifiers. This throws an exception if + * the duplication exists. + * + * @param schema the schema to check for duplicates + * @param colType column type name, used in an exception message + * @param caseSensitive Whether we should exception if two columns have casing conflicts. This + * should default to false for Delta. + */ + def checkColumnNameDuplication( + schema: StructType, + colType: String, + caseSensitive: Boolean = false): Unit = { + val columnNames = explodeNestedFieldNames(schema) + // scalastyle:off caselocale + val names = if (caseSensitive) { + columnNames + } else { + columnNames.map(_.toLowerCase) + } + // scalastyle:on caselocale + if (names.distinct.length != names.length) { + val duplicateColumns = names.groupBy(identity).collect { + case (x, ys) if ys.length > 1 => s"$x" + } + throw new DeltaAnalysisException( + errorClass = "DELTA_DUPLICATE_COLUMNS_FOUND", + messageParameters = Array(colType, duplicateColumns.mkString(", "))) + } + } + + /** + * Check whether we can write to the Delta table, which has `tableSchema`, using a query that has + * `dataSchema`. Our rules are that: + * - `dataSchema` may be missing columns or have additional columns + * - We don't trust the nullability in `dataSchema`. Assume fields are nullable. + * - We only allow nested StructType expansions. For all other complex types, we check for + * strict equality + * - `dataSchema` can't have duplicate column names. Columns that only differ by case are also + * not allowed. + * The following merging strategy is + * applied: + * - The name of the current field is used. + * - The data types are merged by calling this function. + * - We respect the current field's nullability. + * - The metadata is current field's metadata. + * + * Schema merging occurs in a case insensitive manner. Hence, column names that only differ + * by case are not accepted in the `dataSchema`. + * + * @param tableSchema The current schema of the table. + * @param dataSchema The schema of the new data being written. + * @param allowImplicitConversions Whether to allow Spark SQL implicit conversions. By default, + * we merge according to Parquet write compatibility - for + * example, an integer type data field will throw when merged to a + * string type table field, because int and string aren't stored + * the same way in Parquet files. With this flag enabled, the + * merge will succeed, because once we get to write time Spark SQL + * will support implicitly converting the int to a string. + * @param keepExistingType Whether to keep existing types instead of trying to merge types. + * @param fixedTypeColumns The set of columns whose type should not be changed in any case. + * @param caseSensitive Whether we should keep field mapping case-sensitively. + * This should default to false for Delta, which is case insensitive. + */ + def mergeSchemas( + tableSchema: StructType, + dataSchema: StructType, + allowImplicitConversions: Boolean = false, + keepExistingType: Boolean = false, + fixedTypeColumns: Set[String] = Set.empty, + caseSensitive: Boolean = false): StructType = { + checkColumnNameDuplication(dataSchema, "in the data to save", caseSensitive) + def merge( + current: DataType, + update: DataType, + fixedTypeColumnsSet: Set[String] = Set.empty): DataType = { + (current, update) match { + case (StructType(currentFields), StructType(updateFields)) => + // Merge existing fields. + val updateFieldMap = toFieldMap(updateFields, caseSensitive) + val updatedCurrentFields = currentFields.map { currentField => + updateFieldMap.get(currentField.name) match { + case Some(updateField) => + if (fixedTypeColumnsSet.contains(currentField.name.toLowerCase(Locale.ROOT)) && + !equalsIgnoreCaseAndCompatibleNullability( + currentField.dataType, updateField.dataType)) { + throw new DeltaAnalysisException( + errorClass = "DELTA_GENERATED_COLUMNS_DATA_TYPE_MISMATCH", + messageParameters = Array(currentField.name, currentField.dataType.sql, + updateField.dataType.sql) + ) + } + try { + StructField( + currentField.name, + merge(currentField.dataType, updateField.dataType), + currentField.nullable, + currentField.metadata) + } catch { + case NonFatal(e) => + throw new AnalysisException(s"Failed to merge fields '${currentField.name}' " + + s"and '${updateField.name}'. " + e.getMessage) + } + case None => + // Retain the old field. + currentField + } + } + + // Identify the newly added fields. + val nameToFieldMap = toFieldMap(currentFields, caseSensitive) + val newFields = updateFields.filterNot(f => nameToFieldMap.contains(f.name)) + + // Create the merged struct, the new fields are appended at the end of the struct. + StructType(updatedCurrentFields ++ newFields) + case (ArrayType(currentElementType, currentContainsNull), + ArrayType(updateElementType, _)) => + ArrayType( + merge(currentElementType, updateElementType), + currentContainsNull) + case (MapType(currentKeyType, currentElementType, currentContainsNull), + MapType(updateKeyType, updateElementType, _)) => + MapType( + merge(currentKeyType, updateKeyType), + merge(currentElementType, updateElementType), + currentContainsNull) + + // Simply keeps the existing type for primitive types + case (current, update) if keepExistingType => current + + // If implicit conversions are allowed, that means we can use any valid implicit cast to + // perform the merge. + case (current, update) + if allowImplicitConversions && typeForImplicitCast(update, current).isDefined => + typeForImplicitCast(update, current).get + + case (DecimalType.Fixed(leftPrecision, leftScale), + DecimalType.Fixed(rightPrecision, rightScale)) => + if ((leftPrecision == rightPrecision) && (leftScale == rightScale)) { + current + } else if ((leftPrecision != rightPrecision) && (leftScale != rightScale)) { + throw new DeltaAnalysisException( + errorClass = "DELTA_MERGE_INCOMPATIBLE_DECIMAL_TYPE", + messageParameters = Array( + s"precision $leftPrecision and $rightPrecision & scale $leftScale and $rightScale")) + } else if (leftPrecision != rightPrecision) { + throw new DeltaAnalysisException( + errorClass = "DELTA_MERGE_INCOMPATIBLE_DECIMAL_TYPE", + messageParameters = Array(s"precision $leftPrecision and $rightPrecision")) + } else { + throw new DeltaAnalysisException( + errorClass = "DELTA_MERGE_INCOMPATIBLE_DECIMAL_TYPE", + messageParameters = Array(s"scale $leftScale and $rightScale")) + } + case _ if current == update => + current + + // Parquet physically stores ByteType, ShortType and IntType as IntType, so when a parquet + // column is of one of these three types, you can read this column as any of these three + // types. Since Parquet doesn't complain, we should also allow upcasting among these + // three types when merging schemas. + case (ByteType, ShortType) => ShortType + case (ByteType, IntegerType) => IntegerType + + case (ShortType, ByteType) => ShortType + case (ShortType, IntegerType) => IntegerType + + case (IntegerType, ShortType) => IntegerType + case (IntegerType, ByteType) => IntegerType + + case (NullType, _) => + update + case (_, NullType) => + current + case _ => + throw new AnalysisException( + s"Failed to merge incompatible data types $current and $update") + } + } + merge(tableSchema, dataSchema, fixedTypeColumns.map(_.toLowerCase(Locale.ROOT))) + .asInstanceOf[StructType] + } + + /** + * Try to cast the source data type to the target type, returning the final type or None if + * there's no valid cast. + */ + private def typeForImplicitCast(sourceType: DataType, targetType: DataType): Option[DataType] = { + TypeCoercion.implicitCast(Literal.default(sourceType), targetType).map(_.dataType) + } + + def toFieldMap( + fields: Seq[StructField], + caseSensitive: Boolean = false): Map[String, StructField] = { + val fieldMap = fields.map(field => field.name -> field).toMap + if (caseSensitive) { + fieldMap + } else { + CaseInsensitiveMap(fieldMap) + } + } + + /** + * Transform (nested) columns in a schema. + * + * @param schema to transform. + * @param tf function to apply. + * @return the transformed schema. + */ + def transformColumns( + schema: StructType)( + tf: (Seq[String], StructField, Resolver) => StructField): StructType = { + def transform[E <: DataType](path: Seq[String], dt: E): E = { + val newDt = dt match { + case StructType(fields) => + StructType(fields.map { field => + val newField = tf(path, field, DELTA_COL_RESOLVER) + // maintain the old name as we recurse into the subfields + newField.copy(dataType = transform(path :+ field.name, newField.dataType)) + }) + case ArrayType(elementType, containsNull) => + ArrayType(transform(path :+ "element", elementType), containsNull) + case MapType(keyType, valueType, valueContainsNull) => + MapType( + transform(path :+ "key", keyType), + transform(path :+ "value", valueType), + valueContainsNull) + case other => other + } + newDt.asInstanceOf[E] + } + transform(Seq.empty, schema) + } + + /** + * + * Taken from DataType + * + * Compares two types, ignoring compatible nullability of ArrayType, MapType, StructType, and + * ignoring case sensitivity of field names in StructType. + * + * Compatible nullability is defined as follows: + * - If `from` and `to` are ArrayTypes, `from` has a compatible nullability with `to` + * if and only if `to.containsNull` is true, or both of `from.containsNull` and + * `to.containsNull` are false. + * - If `from` and `to` are MapTypes, `from` has a compatible nullability with `to` + * if and only if `to.valueContainsNull` is true, or both of `from.valueContainsNull` and + * `to.valueContainsNull` are false. + * - If `from` and `to` are StructTypes, `from` has a compatible nullability with `to` + * if and only if for all every pair of fields, `to.nullable` is true, or both + * of `fromField.nullable` and `toField.nullable` are false. + */ + def equalsIgnoreCaseAndCompatibleNullability(from: DataType, to: DataType): Boolean = { + (from, to) match { + case (ArrayType(fromElement, fn), ArrayType(toElement, tn)) => + (tn || !fn) && equalsIgnoreCaseAndCompatibleNullability(fromElement, toElement) + + case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) => + (tn || !fn) && + equalsIgnoreCaseAndCompatibleNullability(fromKey, toKey) && + equalsIgnoreCaseAndCompatibleNullability(fromValue, toValue) + + case (StructType(fromFields), StructType(toFields)) => + fromFields.length == toFields.length && + fromFields.zip(toFields).forall { case (fromField, toField) => + fromField.name.equalsIgnoreCase(toField.name) && + (toField.nullable || !fromField.nullable) && + equalsIgnoreCaseAndCompatibleNullability(fromField.dataType, toField.dataType) + } + + case (fromDataType, toDataType) => fromDataType == toDataType + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala new file mode 100644 index 00000000000..8e2d1f1b92e --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala @@ -0,0 +1,1261 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaColumnMappingMode, DeltaErrors, DeltaLog, GeneratedColumn, NoMapping, TimestampNTZTableFeature} +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaMergingUtils._ +import org.apache.spark.sql.delta.sources.DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.functions.{col, struct} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +object SchemaUtils extends DeltaLogging { + // We use case insensitive resolution while writing into Delta + val DELTA_COL_RESOLVER: (String, String) => Boolean = + org.apache.spark.sql.catalyst.analysis.caseInsensitiveResolution + private val ARRAY_ELEMENT_INDEX = 0 + private val MAP_KEY_INDEX = 0 + private val MAP_VALUE_INDEX = 1 + + /** + * Finds `StructField`s that match a given check `f`. Returns the path to the column, and the + * field. + * + * @param checkComplexTypes While `StructType` is also a complex type, since we're returning + * StructFields, we definitely recurse into StructTypes. This flag + * defines whether we should recurse into ArrayType and MapType. + */ + def filterRecursively( + schema: StructType, + checkComplexTypes: Boolean)(f: StructField => Boolean): Seq[(Seq[String], StructField)] = { + def recurseIntoComplexTypes( + complexType: DataType, + columnStack: Seq[String]): Seq[(Seq[String], StructField)] = complexType match { + case s: StructType => + s.fields.flatMap { sf => + val includeLevel = if (f(sf)) Seq((columnStack, sf)) else Nil + includeLevel ++ recurseIntoComplexTypes(sf.dataType, columnStack :+ sf.name) + } + case a: ArrayType if checkComplexTypes => + recurseIntoComplexTypes(a.elementType, columnStack :+ "element") + case m: MapType if checkComplexTypes => + recurseIntoComplexTypes(m.keyType, columnStack :+ "key") ++ + recurseIntoComplexTypes(m.valueType, columnStack :+ "value") + case _ => Nil + } + + recurseIntoComplexTypes(schema, Nil) + } + + /** Copied over from DataType for visibility reasons. */ + def typeExistsRecursively(dt: DataType)(f: DataType => Boolean): Boolean = dt match { + case s: StructType => + f(s) || s.fields.exists(field => typeExistsRecursively(field.dataType)(f)) + case a: ArrayType => + f(a) || typeExistsRecursively(a.elementType)(f) + case m: MapType => + f(m) || typeExistsRecursively(m.keyType)(f) || typeExistsRecursively(m.valueType)(f) + case other => + f(other) + } + + def findAnyTypeRecursively(dt: DataType)(f: DataType => Boolean): Option[DataType] = dt match { + case s: StructType => + Some(s).filter(f).orElse(s.fields + .find(field => findAnyTypeRecursively(field.dataType)(f).nonEmpty).map(_.dataType)) + case a: ArrayType => + Some(a).filter(f).orElse(findAnyTypeRecursively(a.elementType)(f)) + case m: MapType => + Some(m).filter(f).orElse(findAnyTypeRecursively(m.keyType)(f)) + .orElse(findAnyTypeRecursively(m.valueType)(f)) + case other => + Some(other).filter(f) + } + + /** Turns the data types to nullable in a recursive manner for nested columns. */ + def typeAsNullable(dt: DataType): DataType = dt match { + case s: StructType => s.asNullable + case a @ ArrayType(s: StructType, _) => a.copy(s.asNullable, containsNull = true) + case a: ArrayType => a.copy(containsNull = true) + case m @ MapType(s1: StructType, s2: StructType, _) => + m.copy(s1.asNullable, s2.asNullable, valueContainsNull = true) + case m @ MapType(s1: StructType, _, _) => + m.copy(keyType = s1.asNullable, valueContainsNull = true) + case m @ MapType(_, s2: StructType, _) => + m.copy(valueType = s2.asNullable, valueContainsNull = true) + case other => other + } + + /** + * Drops null types from the DataFrame if they exist. We don't have easy ways of generating types + * such as MapType and ArrayType, therefore if these types contain NullType in their elements, + * we will throw an AnalysisException. + */ + def dropNullTypeColumns(df: DataFrame): DataFrame = { + val schema = df.schema + if (!typeExistsRecursively(schema)(_.isInstanceOf[NullType])) return df + def generateSelectExpr(sf: StructField, nameStack: Seq[String]): Column = sf.dataType match { + case st: StructType => + val nested = st.fields.flatMap { f => + if (f.dataType.isInstanceOf[NullType]) { + None + } else { + Some(generateSelectExpr(f, nameStack :+ sf.name)) + } + } + struct(nested: _*).alias(sf.name) + case a: ArrayType if typeExistsRecursively(a)(_.isInstanceOf[NullType]) => + val colName = UnresolvedAttribute.apply(nameStack :+ sf.name).name + throw new DeltaAnalysisException( + errorClass = "DELTA_COMPLEX_TYPE_COLUMN_CONTAINS_NULL_TYPE", + messageParameters = Array(colName, "ArrayType")) + case m: MapType if typeExistsRecursively(m)(_.isInstanceOf[NullType]) => + val colName = UnresolvedAttribute.apply(nameStack :+ sf.name).name + throw new DeltaAnalysisException( + errorClass = "DELTA_COMPLEX_TYPE_COLUMN_CONTAINS_NULL_TYPE", + messageParameters = Array(colName, "NullType")) + case _ => + val colName = UnresolvedAttribute.apply(nameStack :+ sf.name).name + col(colName).alias(sf.name) + } + + val selectExprs = schema.flatMap { f => + if (f.dataType.isInstanceOf[NullType]) None else Some(generateSelectExpr(f, Nil)) + } + df.select(selectExprs: _*) + } + + /** + * Drops null types from the schema if they exist. We do not recurse into Array and Map types, + * because we do not expect null types to exist in those columns, as Delta doesn't allow it during + * writes. + */ + def dropNullTypeColumns(schema: StructType): StructType = { + def recurseAndRemove(struct: StructType): Seq[StructField] = { + struct.flatMap { + case sf @ StructField(_, s: StructType, _, _) => + Some(sf.copy(dataType = StructType(recurseAndRemove(s)))) + case StructField(_, n: NullType, _, _) => None + case other => Some(other) + } + } + StructType(recurseAndRemove(schema)) + } + + /** + * Returns the name of the first column/field that has null type (void). + */ + def findNullTypeColumn(schema: StructType): Option[String] = { + // Helper method to recursively check nested structs. + def findNullTypeColumnRec(s: StructType, nameStack: Seq[String]): Option[String] = { + val nullFields = s.flatMap { + case StructField(name, n: NullType, _, _) => Some((nameStack :+ name).mkString(".")) + case StructField(name, s: StructType, _, _) => findNullTypeColumnRec(s, nameStack :+ name) + // Note that we don't recursively check Array and Map types because NullTypes are already + // not allowed (see 'dropNullTypeColumns'). + case _ => None + } + return nullFields.headOption + } + + if (typeExistsRecursively(schema)(_.isInstanceOf[NullType])) { + findNullTypeColumnRec(schema, Seq.empty) + } else { + None + } + } + + /** + * Rewrite the query field names according to the table schema. This method assumes that all + * schema validation checks have been made and this is the last operation before writing into + * Delta. + */ + def normalizeColumnNames(baseSchema: StructType, data: Dataset[_]): DataFrame = { + val dataSchema = data.schema + val dataFields = explodeNestedFieldNames(dataSchema).toSet + val tableFields = explodeNestedFieldNames(baseSchema).toSet + if (dataFields.subsetOf(tableFields)) { + data.toDF() + } else { + // Allow the same shortcut logic (as the above `if` stmt) if the only extra fields are CDC + // metadata fields. + val nonCdcFields = dataFields.filterNot { f => + f == CDCReader.CDC_PARTITION_COL || f == CDCReader.CDC_TYPE_COLUMN_NAME + } + if (nonCdcFields.subsetOf(tableFields)) { + return data.toDF() + } + // Check that nested columns don't need renaming. We can't handle that right now + val topLevelDataFields = dataFields.map(UnresolvedAttribute.parseAttributeName(_).head) + if (topLevelDataFields.subsetOf(tableFields)) { + val columnsThatNeedRenaming = dataFields -- tableFields + throw DeltaErrors.nestedFieldsNeedRename(columnsThatNeedRenaming, baseSchema) + } + + val baseFields = toFieldMap(baseSchema) + val aliasExpressions = dataSchema.map { field => + val originalCase: String = baseFields.get(field.name) match { + case Some(original) => original.name + // This is a virtual partition column used for doing CDC writes. It's not actually + // in the table schema. + case None if field.name == CDCReader.CDC_TYPE_COLUMN_NAME || + field.name == CDCReader.CDC_PARTITION_COL => field.name + case None => + throw DeltaErrors.cannotResolveColumn(field.name, baseSchema) + } + if (originalCase != field.name) { + fieldToColumn(field).as(originalCase) + } else { + fieldToColumn(field) + } + } + data.select(aliasExpressions: _*) + } + } + + /** + * A helper function to check if partition columns are the same. + * This function only checks for partition column names. + * Please use with other schema check functions for detecting type change etc. + */ + def isPartitionCompatible( + newPartitionColumns: Seq[String] = Seq.empty, + oldPartitionColumns: Seq[String] = Seq.empty): Boolean = { + newPartitionColumns == oldPartitionColumns + } + + /** + * As the Delta snapshots update, the schema may change as well. This method defines whether the + * new schema of a Delta table can be used with a previously analyzed LogicalPlan. Our + * rules are to return false if: + * - Dropping any column that was present in the existing schema, if not allowMissingColumns + * - Any change of datatype + * - Change of partition columns. Although analyzed LogicalPlan is not changed, + * physical structure of data is changed and thus is considered not read compatible. + * - If `forbidTightenNullability` = true: + * - Forbids tightening the nullability (existing nullable=true -> read nullable=false) + * - Typically Used when the existing schema refers to the schema of written data, such as + * when a Delta streaming source reads a schema change (existingSchema) which + * has nullable=true, using the latest schema which has nullable=false, so we should not + * project nulls from the data into the non-nullable read schema. + * - Otherwise: + * - Forbids relaxing the nullability (existing nullable=false -> read nullable=true) + * - Typically Used when the read schema refers to the schema of written data, such as during + * Delta scan, the latest schema during execution (readSchema) has nullable=true but during + * analysis phase the schema (existingSchema) was nullable=false, so we should not project + * nulls from the later data onto a non-nullable schema analyzed in the past. + */ + def isReadCompatible( + existingSchema: StructType, + readSchema: StructType, + forbidTightenNullability: Boolean = false, + allowMissingColumns: Boolean = false, + newPartitionColumns: Seq[String] = Seq.empty, + oldPartitionColumns: Seq[String] = Seq.empty): Boolean = { + + def isNullabilityCompatible(existingNullable: Boolean, readNullable: Boolean): Boolean = { + if (forbidTightenNullability) { + readNullable || !existingNullable + } else { + existingNullable || !readNullable + } + } + + def isDatatypeReadCompatible(existing: DataType, newtype: DataType): Boolean = { + (existing, newtype) match { + case (e: StructType, n: StructType) => + isReadCompatible(e, n, forbidTightenNullability) + case (e: ArrayType, n: ArrayType) => + // if existing elements are non-nullable, so should be the new element + isNullabilityCompatible(e.containsNull, n.containsNull) && + isDatatypeReadCompatible(e.elementType, n.elementType) + case (e: MapType, n: MapType) => + // if existing value is non-nullable, so should be the new value + isNullabilityCompatible(e.valueContainsNull, n.valueContainsNull) && + isDatatypeReadCompatible(e.keyType, n.keyType) && + isDatatypeReadCompatible(e.valueType, n.valueType) + case (a, b) => a == b + } + } + + def isStructReadCompatible(existing: StructType, newtype: StructType): Boolean = { + val existingFields = toFieldMap(existing) + // scalastyle:off caselocale + val existingFieldNames = existing.fieldNames.map(_.toLowerCase).toSet + assert(existingFieldNames.size == existing.length, + "Delta tables don't allow field names that only differ by case") + val newFields = newtype.fieldNames.map(_.toLowerCase).toSet + assert(newFields.size == newtype.length, + "Delta tables don't allow field names that only differ by case") + // scalastyle:on caselocale + + if (!allowMissingColumns && + !(existingFieldNames.subsetOf(newFields) && + isPartitionCompatible(newPartitionColumns, oldPartitionColumns))) { + // Dropped a column that was present in the DataFrame schema + return false + } + newtype.forall { newField => + // new fields are fine, they just won't be returned + existingFields.get(newField.name).forall { existingField => + // we know the name matches modulo case - now verify exact match + (existingField.name == newField.name + // if existing value is non-nullable, so should be the new value + && isNullabilityCompatible(existingField.nullable, newField.nullable) + // and the type of the field must be compatible, too + && isDatatypeReadCompatible(existingField.dataType, newField.dataType)) + } + } + } + + isStructReadCompatible(existingSchema, readSchema) + } + + /** + * Compare an existing schema to a specified new schema and + * return a message describing the first difference found, if any: + * - different field name or datatype + * - different metadata + */ + def reportDifferences(existingSchema: StructType, specifiedSchema: StructType): Seq[String] = { + + def canOrNot(can: Boolean) = if (can) "can" else "can not" + def isOrNon(b: Boolean) = if (b) "" else "non-" + + def missingFieldsMessage(fields: Set[String]) : String = { + s"Specified schema is missing field(s): ${fields.mkString(", ")}" + } + def additionalFieldsMessage(fields: Set[String]) : String = { + s"Specified schema has additional field(s): ${fields.mkString(", ")}" + } + def fieldNullabilityMessage(field: String, specified: Boolean, existing: Boolean) : String = { + s"Field $field is ${isOrNon(specified)}nullable in specified " + + s"schema but ${isOrNon(existing)}nullable in existing schema." + } + def arrayNullabilityMessage(field: String, specified: Boolean, existing: Boolean) : String = { + s"Array field $field ${canOrNot(specified)} contain null in specified schema " + + s"but ${canOrNot(existing)} in existing schema" + } + def valueNullabilityMessage(field: String, specified: Boolean, existing: Boolean) : String = { + s"Map field $field ${canOrNot(specified)} contain null values in specified schema " + + s"but ${canOrNot(existing)} in existing schema" + } + def removeGenerationExpressionMetadata(metadata: Metadata): Metadata = { + new MetadataBuilder() + .withMetadata(metadata) + .remove(GENERATION_EXPRESSION_METADATA_KEY) + .build() + } + def metadataDifferentMessage(field: String, specified: Metadata, existing: Metadata) + : String = { + val specifiedGenerationExpr = GeneratedColumn.getGenerationExpressionStr(specified) + val existingGenerationExpr = GeneratedColumn.getGenerationExpressionStr(existing) + var metadataDiffMessage = "" + if (specifiedGenerationExpr != existingGenerationExpr) { + metadataDiffMessage += + s"""Specified generation expression for field $field is different from existing schema: + |Specified: ${specifiedGenerationExpr.getOrElse("")} + |Existing: ${existingGenerationExpr.getOrElse("")}""".stripMargin + } + val specifiedMetadataWithoutGenerationExpr = removeGenerationExpressionMetadata(specified) + val existingMetadataWithoutGenerationExpr = removeGenerationExpressionMetadata(existing) + if (specifiedMetadataWithoutGenerationExpr != existingMetadataWithoutGenerationExpr) { + if (metadataDiffMessage.nonEmpty) metadataDiffMessage += "\n" + metadataDiffMessage += + s"""Specified metadata for field $field is different from existing schema: + |Specified: $specifiedMetadataWithoutGenerationExpr + |Existing: $existingMetadataWithoutGenerationExpr""".stripMargin + } + metadataDiffMessage + } + def typeDifferenceMessage(field: String, specified: DataType, existing: DataType) + : String = { + s"""Specified type for $field is different from existing schema: + |Specified: ${specified.typeName} + |Existing: ${existing.typeName}""".stripMargin + } + + // prefix represents the nested field(s) containing this schema + def structDifference(existing: StructType, specified: StructType, prefix: String) + : Seq[String] = { + + // 1. ensure set of fields is the same + val existingFieldNames = existing.fieldNames.toSet + val specifiedFieldNames = specified.fieldNames.toSet + + val missingFields = existingFieldNames diff specifiedFieldNames + val missingFieldsDiffs = + if (missingFields.isEmpty) Nil + else Seq(missingFieldsMessage(missingFields.map(prefix + _))) + + val extraFields = specifiedFieldNames diff existingFieldNames + val extraFieldsDiffs = + if (extraFields.isEmpty) Nil + else Seq(additionalFieldsMessage(extraFields.map(prefix + _))) + + // 2. for each common field, ensure it has the same type and metadata + val existingFields = toFieldMap(existing) + val specifiedFields = toFieldMap(specified) + val fieldsDiffs = (existingFieldNames intersect specifiedFieldNames).flatMap( + (name: String) => fieldDifference(existingFields(name), specifiedFields(name), prefix)) + + missingFieldsDiffs ++ extraFieldsDiffs ++ fieldsDiffs + } + + def fieldDifference(existing: StructField, specified: StructField, prefix: String) + : Seq[String] = { + + val name = s"$prefix${existing.name}" + val nullabilityDiffs = + if (existing.nullable == specified.nullable) Nil + else Seq(fieldNullabilityMessage(s"$name", specified.nullable, existing.nullable)) + val metadataDiffs = + if (existing.metadata == specified.metadata) Nil + else Seq(metadataDifferentMessage(s"$name", specified.metadata, existing.metadata)) + val typeDiffs = + typeDifference(existing.dataType, specified.dataType, name) + + nullabilityDiffs ++ metadataDiffs ++ typeDiffs + } + + def typeDifference(existing: DataType, specified: DataType, field: String) + : Seq[String] = { + + (existing, specified) match { + case (e: StructType, s: StructType) => structDifference(e, s, s"$field.") + case (e: ArrayType, s: ArrayType) => arrayDifference(e, s, s"$field[]") + case (e: MapType, s: MapType) => mapDifference(e, s, s"$field") + case (e, s) if e != s => Seq(typeDifferenceMessage(field, s, e)) + case _ => Nil + } + } + + def arrayDifference(existing: ArrayType, specified: ArrayType, field: String): Seq[String] = { + + val elementDiffs = + typeDifference(existing.elementType, specified.elementType, field) + val nullabilityDiffs = + if (existing.containsNull == specified.containsNull) Nil + else Seq(arrayNullabilityMessage(field, specified.containsNull, existing.containsNull)) + + elementDiffs ++ nullabilityDiffs + } + + def mapDifference(existing: MapType, specified: MapType, field: String) : Seq[String] = { + + val keyDiffs = + typeDifference(existing.keyType, specified.keyType, s"$field[key]") + val valueDiffs = + typeDifference(existing.valueType, specified.valueType, s"$field[value]") + val nullabilityDiffs = + if (existing.valueContainsNull == specified.valueContainsNull) Nil + else Seq( + valueNullabilityMessage(field, specified.valueContainsNull, existing.valueContainsNull)) + + keyDiffs ++ valueDiffs ++ nullabilityDiffs + } + + structDifference( + existingSchema, + CharVarcharUtils.replaceCharVarcharWithStringInSchema(specifiedSchema), + "" + ) + } + + /** + * Copied verbatim from Apache Spark. + * + * Returns a field in this struct and its child structs, case insensitively. This is slightly less + * performant than the case sensitive version. + * + * If includeCollections is true, this will return fields that are nested in maps and arrays. + * + * @param fieldNames The path to the field, in order from the root. For example, the column + * nested.a.b.c would be Seq("nested", "a", "b", "c"). + */ + def findNestedFieldIgnoreCase( + schema: StructType, + fieldNames: Seq[String], + includeCollections: Boolean = false): Option[StructField] = { + + @scala.annotation.tailrec + def findRecursively( + dataType: DataType, + fieldNames: Seq[String], + includeCollections: Boolean): Option[StructField] = { + + (fieldNames, dataType, includeCollections) match { + case (Seq(fieldName, names @ _*), struct: StructType, _) => + val field = struct.find(_.name.equalsIgnoreCase(fieldName)) + if (names.isEmpty || field.isEmpty) { + field + } else { + findRecursively(field.get.dataType, names, includeCollections) + } + + case (_, _, false) => None // types nested in maps and arrays are not used + + case (Seq("key"), MapType(keyType, _, _), true) => + // return the key type as a struct field to include nullability + Some(StructField("key", keyType, nullable = false)) + + case (Seq("key", names @ _*), MapType(keyType, _, _), true) => + findRecursively(keyType, names, includeCollections) + + case (Seq("value"), MapType(_, valueType, isNullable), true) => + // return the value type as a struct field to include nullability + Some(StructField("value", valueType, nullable = isNullable)) + + case (Seq("value", names @ _*), MapType(_, valueType, _), true) => + findRecursively(valueType, names, includeCollections) + + case (Seq("element"), ArrayType(elementType, isNullable), true) => + // return the element type as a struct field to include nullability + Some(StructField("element", elementType, nullable = isNullable)) + + case (Seq("element", names @ _*), ArrayType(elementType, _), true) => + findRecursively(elementType, names, includeCollections) + + case _ => + None + } + } + + findRecursively(schema, fieldNames, includeCollections) + } + + /** + * Returns the path of the given column in `schema` as a list of ordinals (0-based), each value + * representing the position at the current nesting level starting from the root. + * + * For ArrayType: accessing the array's element adds a position 0 to the position list. + * e.g. accessing a.element.y would have the result -> Seq(..., positionOfA, 0, positionOfY) + * + * For MapType: accessing the map's key adds a position 0 to the position list. + * e.g. accessing m.key.y would have the result -> Seq(..., positionOfM, 0, positionOfY) + * + * For MapType: accessing the map's value adds a position 1 to the position list. + * e.g. accessing m.key.y would have the result -> Seq(..., positionOfM, 1, positionOfY) + * + * @param column The column to search for in the given struct. If the length of `column` is + * greater than 1, we expect to enter a nested field. + * @param schema The current struct we are looking at. + * @param resolver The resolver to find the column. + */ + def findColumnPosition( + column: Seq[String], + schema: StructType, + resolver: Resolver = DELTA_COL_RESOLVER): Seq[Int] = { + def findRecursively( + searchPath: Seq[String], + currentType: DataType, + currentPath: Seq[String] = Nil): Seq[Int] = { + if (searchPath.isEmpty) return Nil + + val currentFieldName = searchPath.head + val currentPathWithNestedField = currentPath :+ currentFieldName + (currentType, currentFieldName) match { + case (struct: StructType, _) => + lazy val columnPath = UnresolvedAttribute(currentPathWithNestedField).name + val pos = struct.indexWhere(f => resolver(f.name, currentFieldName)) + if (pos == -1) { + throw DeltaErrors.columnNotInSchemaException(columnPath, schema) + } + val childPosition = findRecursively( + searchPath = searchPath.tail, + currentType = struct(pos).dataType, + currentPath = currentPathWithNestedField) + pos +: childPosition + + case (map: MapType, "key") => + val childPosition = findRecursively( + searchPath = searchPath.tail, + currentType = map.keyType, + currentPath = currentPathWithNestedField) + MAP_KEY_INDEX +: childPosition + + case (map: MapType, "value") => + val childPosition = findRecursively( + searchPath = searchPath.tail, + currentType = map.valueType, + currentPath = currentPathWithNestedField) + MAP_VALUE_INDEX +: childPosition + + case (_: MapType, _) => + throw DeltaErrors.foundMapTypeColumnException( + prettyFieldName(currentPath :+ "key"), + prettyFieldName(currentPath :+ "value")) + + case (array: ArrayType, "element") => + val childPosition = findRecursively( + searchPath = searchPath.tail, + currentType = array.elementType, + currentPath = currentPathWithNestedField) + ARRAY_ELEMENT_INDEX +: childPosition + + case (_: ArrayType, _) => + throw DeltaErrors.incorrectArrayAccessByName( + prettyFieldName(currentPath :+ "element"), + prettyFieldName(currentPath)) + case _ => + throw DeltaErrors.columnPathNotNested(currentFieldName, currentType, currentPath) + } + } + + try { + findRecursively(column, schema) + } catch { + case e: AnalysisException => + throw new AnalysisException(e.getMessage + s":\n${schema.treeString}") + } + } + + /** + * Returns the nested field at the given position in `parent`. See [[findColumnPosition]] for the + * representation used for `position`. + * @param parent The field used for the lookup. + * @param position A list of ordinals (0-based) representing the path to the nested field in + * `parent`. + */ + def getNestedFieldFromPosition(parent: StructField, position: Seq[Int]): StructField = { + if (position.isEmpty) return parent + + val fieldPos = position.head + parent.dataType match { + case struct: StructType if fieldPos >= 0 && fieldPos < struct.size => + getNestedFieldFromPosition(struct(fieldPos), position.tail) + case map: MapType if fieldPos == MAP_KEY_INDEX => + getNestedFieldFromPosition(StructField("key", map.keyType), position.tail) + case map: MapType if fieldPos == MAP_VALUE_INDEX => + getNestedFieldFromPosition(StructField("value", map.valueType), position.tail) + case array: ArrayType if fieldPos == ARRAY_ELEMENT_INDEX => + getNestedFieldFromPosition(StructField("element", array.elementType), position.tail) + case _: StructType | _: ArrayType | _: MapType => + throw new IllegalArgumentException( + s"Invalid child position $fieldPos in ${parent.dataType}") + case other => + throw new IllegalArgumentException(s"Invalid indexing into non-nested type $other") + } + } + + /** + * Returns the nested type at the given position in `schema`. See [[findColumnPosition]] for the + * representation used for `position`. + * @param parent The root schema used for the lookup. + * @param position A list of ordinals (0-based) representing the path to the nested field in + * `parent`. + */ + def getNestedTypeFromPosition(schema: StructType, position: Seq[Int]): DataType = + getNestedFieldFromPosition(StructField("schema", schema), position).dataType + + /** + * Pretty print the column path passed in. + */ + def prettyFieldName(columnPath: Seq[String]): String = { + UnresolvedAttribute(columnPath).name + } + + /** + * Add `column` to the specified `position` in `schema`. + * @param position A Seq of ordinals on where this column should go. It is a Seq to denote + * positions in nested columns (0-based). For example: + * + * tableSchema: , b,c:STRUCT> + * column: c2 + * position: Seq(2, 1) + * will return + * result: , b,c:STRUCT> + */ + def addColumn(schema: StructType, column: StructField, position: Seq[Int]): StructType = { + def addColumnInChild(parent: DataType, column: StructField, position: Seq[Int]): DataType = { + require(position.nonEmpty, s"Don't know where to add the column $column") + parent match { + case struct: StructType => + addColumn(struct, column, position) + case map: MapType if position.head == MAP_KEY_INDEX => + map.copy(keyType = addColumnInChild(map.keyType, column, position.tail)) + case map: MapType if position.head == MAP_VALUE_INDEX => + map.copy(valueType = addColumnInChild(map.valueType, column, position.tail)) + case array: ArrayType if position.head == ARRAY_ELEMENT_INDEX => + array.copy(elementType = addColumnInChild(array.elementType, column, position.tail)) + case _: ArrayType => + throw DeltaErrors.incorrectArrayAccess() + case other => + throw DeltaErrors.addColumnParentNotStructException(column, other) + } + } + // If the proposed new column includes a default value, return a specific "not supported" error. + // The rationale is that such operations require the data source scan operator to implement + // support for filling in the specified default value when the corresponding field is not + // present in storage. That is not implemented yet for Delta, so we return this error instead. + // The error message is descriptive and provides an easy workaround for the user. + if (column.metadata.contains("CURRENT_DEFAULT")) { + throw new DeltaAnalysisException( + errorClass = "WRONG_COLUMN_DEFAULTS_FOR_DELTA_ALTER_TABLE_ADD_COLUMN_NOT_SUPPORTED", + messageParameters = Array.empty) + } + + require(position.nonEmpty, s"Don't know where to add the column $column") + val slicePosition = position.head + if (slicePosition < 0) { + throw DeltaErrors.addColumnAtIndexLessThanZeroException( + slicePosition.toString, column.toString) + } + val length = schema.length + if (slicePosition > length) { + throw DeltaErrors.indexLargerThanStruct(slicePosition, column, length) + } + if (slicePosition == length) { + if (position.length > 1) { + throw DeltaErrors.addColumnStructNotFoundException(slicePosition.toString) + } + return StructType(schema :+ column) + } + val (pre, post) = schema.splitAt(slicePosition) + if (position.length > 1) { + val field = post.head + if (!column.nullable && field.nullable) { + throw DeltaErrors.nullableParentWithNotNullNestedField + } + val mid = field.copy(dataType = addColumnInChild(field.dataType, column, position.tail)) + StructType(pre ++ Seq(mid) ++ post.tail) + } else { + StructType(pre ++ Seq(column) ++ post) + } + } + + /** + * Drop from the specified `position` in `schema` and return with the original column. + * @param position A Seq of ordinals on where this column should go. It is a Seq to denote + * positions in nested columns (0-based). For example: + * + * tableSchema: , b,c:STRUCT> + * position: Seq(2, 1) + * will return + * result: , b,c:STRUCT> + */ + def dropColumn(schema: StructType, position: Seq[Int]): (StructType, StructField) = { + def dropColumnInChild(parent: DataType, position: Seq[Int]): (DataType, StructField) = { + require(position.nonEmpty, s"Don't know where to drop the column") + parent match { + case struct: StructType => + dropColumn(struct, position) + case map: MapType if position.head == MAP_KEY_INDEX => + val (newKeyType, droppedColumn) = dropColumnInChild(map.keyType, position.tail) + map.copy(keyType = newKeyType) -> droppedColumn + case map: MapType if position.head == MAP_VALUE_INDEX => + val (newValueType, droppedColumn) = dropColumnInChild(map.valueType, position.tail) + map.copy(valueType = newValueType) -> droppedColumn + case array: ArrayType if position.head == ARRAY_ELEMENT_INDEX => + val (newElementType, droppedColumn) = dropColumnInChild(array.elementType, position.tail) + array.copy(elementType = newElementType) -> droppedColumn + case _: ArrayType => + throw DeltaErrors.incorrectArrayAccess() + case other => + throw DeltaErrors.dropNestedColumnsFromNonStructTypeException(other) + } + } + + require(position.nonEmpty, "Don't know where to drop the column") + val slicePosition = position.head + if (slicePosition < 0) { + throw DeltaErrors.dropColumnAtIndexLessThanZeroException(slicePosition) + } + val length = schema.length + if (slicePosition >= length) { + throw DeltaErrors.indexLargerOrEqualThanStruct(slicePosition, length) + } + val (pre, post) = schema.splitAt(slicePosition) + val field = post.head + if (position.length > 1) { + val (newType, droppedColumn) = dropColumnInChild(field.dataType, position.tail) + val mid = field.copy(dataType = newType) + + StructType(pre ++ Seq(mid) ++ post.tail) -> droppedColumn + } else { + if (length == 1) { + throw new AnalysisException( + "Cannot drop column from a struct type with a single field: " + schema) + } + StructType(pre ++ post.tail) -> field + } + } + + /** + * Check if the two data types can be changed. + * + * @param failOnAmbiguousChanges Throw an error if a StructField both has columns dropped and new + * columns added. These are ambiguous changes, because we don't + * know if a column needs to be renamed, dropped, or added. + * @return None if the data types can be changed, otherwise Some(err) containing the reason. + */ + def canChangeDataType( + from: DataType, + to: DataType, + resolver: Resolver, + columnMappingMode: DeltaColumnMappingMode, + columnPath: Seq[String] = Nil, + failOnAmbiguousChanges: Boolean = false): Option[String] = { + def verify(cond: Boolean, err: => String): Unit = { + if (!cond) { + throw DeltaErrors.cannotChangeDataType(err) + } + } + + def verifyNullability(fn: Boolean, tn: Boolean, columnPath: Seq[String]): Unit = { + verify(tn || !fn, s"tightening nullability of ${UnresolvedAttribute(columnPath).name}") + } + + def check(fromDt: DataType, toDt: DataType, columnPath: Seq[String]): Unit = { + (fromDt, toDt) match { + case (ArrayType(fromElement, fn), ArrayType(toElement, tn)) => + verifyNullability(fn, tn, columnPath) + check(fromElement, toElement, columnPath :+ "element") + + case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) => + verifyNullability(fn, tn, columnPath) + check(fromKey, toKey, columnPath :+ "key") + check(fromValue, toValue, columnPath :+ "value") + + case (f @ StructType(fromFields), t @ StructType(toFields)) => + val remainingFields = mutable.Set[StructField]() + remainingFields ++= fromFields + var addingColumns = false + toFields.foreach { toField => + fromFields.find(field => resolver(field.name, toField.name)) match { + case Some(fromField) => + remainingFields -= fromField + + val newPath = columnPath :+ fromField.name + verifyNullability(fromField.nullable, toField.nullable, newPath) + check(fromField.dataType, toField.dataType, newPath) + case None => + addingColumns = true + verify(toField.nullable, + "adding non-nullable column " + + UnresolvedAttribute(columnPath :+ toField.name).name) + } + } + val columnName = UnresolvedAttribute(columnPath).name + if (failOnAmbiguousChanges && remainingFields.nonEmpty && addingColumns) { + throw DeltaErrors.ambiguousDataTypeChange(columnName, f, t) + } + if (columnMappingMode == NoMapping) { + verify(remainingFields.isEmpty, + s"dropping column(s) [${remainingFields.map(_.name).mkString(", ")}]" + + (if (columnPath.nonEmpty) s" from $columnName" else "")) + } + + case (fromDataType, toDataType) => + verify(fromDataType == toDataType, + s"changing data type of ${UnresolvedAttribute(columnPath).name} " + + s"from $fromDataType to $toDataType") + } + } + + try { + check(from, to, columnPath) + None + } catch { + case e: AnalysisException => + Some(e.message) + } + } + + /** + * Copy the nested data type between two data types. + */ + def changeDataType(from: DataType, to: DataType, resolver: Resolver): DataType = { + (from, to) match { + case (ArrayType(fromElement, fn), ArrayType(toElement, _)) => + ArrayType(changeDataType(fromElement, toElement, resolver), fn) + + case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, _)) => + MapType( + changeDataType(fromKey, toKey, resolver), + changeDataType(fromValue, toValue, resolver), + fn) + + case (StructType(fromFields), StructType(toFields)) => + StructType( + toFields.map { toField => + fromFields.find(field => resolver(field.name, toField.name)).map { fromField => + toField.getComment().map(fromField.withComment).getOrElse(fromField) + .copy( + dataType = changeDataType(fromField.dataType, toField.dataType, resolver), + nullable = toField.nullable) + }.getOrElse(toField) + } + ) + + case (_, toDataType) => toDataType + } + } + + /** + * Transform (nested) columns in a schema. Runs the transform function on all nested StructTypes + * + * If `colName` is defined, we also check if the struct to process contains the column name. + * + * @param schema to transform. + * @param colName Optional name to match for + * @param tf function to apply on the StructType. + * @return the transformed schema. + */ + def transformColumnsStructs( + schema: StructType, + colName: Option[String] = None)( + tf: (Seq[String], StructType, Resolver) => Seq[StructField]): StructType = { + def transform[E <: DataType](path: Seq[String], dt: E): E = { + val newDt = dt match { + case struct @ StructType(fields) => + val newFields = if (colName.isEmpty || fields.exists(f => colName.contains(f.name))) { + tf(path, struct, DELTA_COL_RESOLVER) + } else { + fields.toSeq + } + + StructType(newFields.map { field => + field.copy(dataType = transform(path :+ field.name, field.dataType)) + }) + case ArrayType(elementType, containsNull) => + ArrayType(transform(path :+ "element", elementType), containsNull) + case MapType(keyType, valueType, valueContainsNull) => + MapType( + transform(path :+ "key", keyType), + transform(path :+ "value", valueType), + valueContainsNull) + case other => other + } + newDt.asInstanceOf[E] + } + transform(Seq.empty, schema) + } + + /** + * Transform (nested) columns in a schema using the given path and parameter pairs. The transform + * function is only invoked when a field's path matches one of the input paths. + * + * @param schema to transform + * @param input paths and parameter pairs. The paths point to fields we want to transform. The + * parameters will be passed to the transform function for a matching field. + * @param tf function to apply per matched field. This function takes the field path, the field + * itself and the input names and payload pairs that matched the field name. It should + * return a new field. + * @tparam E the type of the payload used for transforming fields. + * @return the transformed schema. + */ + def transformColumns[E]( + schema: StructType, + input: Seq[(Seq[String], E)])( + tf: (Seq[String], StructField, Seq[(Seq[String], E)]) => StructField): StructType = { + // scalastyle:off caselocale + val inputLookup = input.groupBy(_._1.map(_.toLowerCase)) + SchemaMergingUtils.transformColumns(schema) { (path, field, resolver) => + // Find the parameters that match this field name. + val fullPath = path :+ field.name + val normalizedFullPath = fullPath.map(_.toLowerCase) + val matches = inputLookup.get(normalizedFullPath).toSeq.flatMap { + // Keep only the input name(s) that actually match the field name(s). Note + // that the Map guarantees that the zipped sequences have the same size. + _.filter(_._1.zip(fullPath).forall(resolver.tupled)) + } + if (matches.nonEmpty) { + tf(path, field, matches) + } else { + field + } + } + // scalastyle:on caselocale + } + + /** + * Check if the schema contains invalid char in the column names depending on the mode. + */ + def checkSchemaFieldNames(schema: StructType, columnMappingMode: DeltaColumnMappingMode): Unit = { + if (columnMappingMode != NoMapping) return + try { + checkFieldNames(SchemaMergingUtils.explodeNestedFieldNames(schema)) + } catch { + case NonFatal(e) => + throw DeltaErrors.foundInvalidCharsInColumnNames(e) + } + } + + /** + * Verifies that the column names are acceptable by Parquet and henceforth Delta. Parquet doesn't + * accept the characters ' ,;{}()\n\t='. We ensure that neither the data columns nor the partition + * columns have these characters. + */ + def checkFieldNames(names: Seq[String]): Unit = { + names.foreach { name => + // ,;{}()\n\t= and space are special characters in Delta schema + if (name.matches(".*[ ,;{}()\n\t=].*")) { + throw QueryCompilationErrors.invalidColumnNameAsPathError("delta", name) + } + } + // The method checkFieldNames doesn't have a valid regex to search for '\n'. That should be + // fixed in Apache Spark, and we can remove this additional check here. + names.find(_.contains("\n")).foreach(col => throw DeltaErrors.invalidColumnName(col)) + } + + /** + * Go through the schema to look for unenforceable NOT NULL constraints. By default we'll throw + * when they're encountered, but if this is suppressed through SQLConf they'll just be silently + * removed. + * + * Note that this should only be applied to schemas created from explicit user DDL - in other + * scenarios, the nullability information may be inaccurate and Delta should always coerce the + * nullability flag to true. + */ + def removeUnenforceableNotNullConstraints(schema: StructType, conf: SQLConf): StructType = { + val allowUnenforceableNotNulls = + conf.getConf(DeltaSQLConf.ALLOW_UNENFORCED_NOT_NULL_CONSTRAINTS) + + def checkField(path: Seq[String], f: StructField, r: Resolver): StructField = f match { + case StructField(name, ArrayType(elementType, containsNull), nullable, metadata) => + val nullableElementType = SchemaUtils.typeAsNullable(elementType) + if (elementType != nullableElementType && !allowUnenforceableNotNulls) { + throw DeltaErrors.nestedNotNullConstraint( + prettyFieldName(path :+ f.name), elementType, nestType = "element") + } + StructField( + name, ArrayType(nullableElementType, containsNull), nullable, metadata) + + case f @ StructField( + name, MapType(keyType, valueType, containsNull), nullable, metadata) => + val nullableKeyType = SchemaUtils.typeAsNullable(keyType) + val nullableValueType = SchemaUtils.typeAsNullable(valueType) + + if (keyType != nullableKeyType && !allowUnenforceableNotNulls) { + throw DeltaErrors.nestedNotNullConstraint( + prettyFieldName(path :+ f.name), keyType, nestType = "key") + } + if (valueType != nullableValueType && !allowUnenforceableNotNulls) { + throw DeltaErrors.nestedNotNullConstraint( + prettyFieldName(path :+ f.name), valueType, nestType = "value") + } + + StructField( + name, + MapType(nullableKeyType, nullableValueType, containsNull), + nullable, + metadata) + + case s: StructField => s + } + + SchemaMergingUtils.transformColumns(schema)(checkField) + } + + def fieldToColumn(field: StructField): Column = { + new Column(UnresolvedAttribute.quoted(field.name)) + } + + /** converting field name to column type with quoted back-ticks */ + def fieldNameToColumn(field: String): Column = { + col(quoteIdentifier(field)) + } + // Escapes back-ticks within the identifier name with double-back-ticks, and then quote the + // identifier with back-ticks. + def quoteIdentifier(part: String): String = s"`${part.replace("`", "``")}`" + + /** + * Will a column change, e.g., rename, need to be populated to the expression. This is true when + * the column to change itself or any of its descendent column is referenced by expression. + * For example: + * - a, length(a) -> true + * - b, (b.c + 1) -> true, because renaming b1 will need to change the expr to (b1.c + 1). + * - b.c, (cast b as string) -> false, because you can change b.c to b.c1 without affecting b. + */ + def containsDependentExpression( + spark: SparkSession, + columnToChange: Seq[String], + exprString: String, + resolver: Resolver): Boolean = { + val expression = spark.sessionState.sqlParser.parseExpression(exprString) + expression.foreach { + case refCol: UnresolvedAttribute => + // columnToChange is the referenced column or its prefix + val prefixMatched = columnToChange.size <= refCol.nameParts.size && + refCol.nameParts.zip(columnToChange).forall(pair => resolver(pair._1, pair._2)) + if (prefixMatched) return true + case _ => + } + false + } + + /** + * Find the unsupported data type in a table schema. Return all columns that are using unsupported + * data types. For example, + * `findUnsupportedDataType(struct<a: struct<b: unsupported_type>>)` will return + * `Some(unsupported_type, Some("a.b"))`. + */ + def findUnsupportedDataTypes(schema: StructType): Seq[UnsupportedDataTypeInfo] = { + val unsupportedDataTypes = mutable.ArrayBuffer[UnsupportedDataTypeInfo]() + findUnsupportedDataTypesRecursively(unsupportedDataTypes, schema) + unsupportedDataTypes.toSeq + } + + /** + * Find TimestampNTZ columns in the table schema. + */ + def checkForTimestampNTZColumnsRecursively(schema: StructType): Boolean = { + SchemaUtils.typeExistsRecursively(schema)(_.isInstanceOf[TimestampNTZType]) + } + + /** + * Find the unsupported data types in a `DataType` recursively. Add the unsupported data types to + * the provided `unsupportedDataTypes` buffer. + * + * @param unsupportedDataTypes the buffer to store the found unsupport data types and the column + * paths. + * @param dataType the data type to search. + * @param columnPath the column path to access the given data type. The callder should make sure + * `columnPath` is not empty when `dataType` is not `StructType`. + */ + private def findUnsupportedDataTypesRecursively( + unsupportedDataTypes: mutable.ArrayBuffer[UnsupportedDataTypeInfo], + dataType: DataType, + columnPath: Seq[String] = Nil): Unit = dataType match { + case NullType => + case BooleanType => + case ByteType => + case ShortType => + case IntegerType => + case dt: YearMonthIntervalType => + assert(columnPath.nonEmpty, "'columnPath' must not be empty") + unsupportedDataTypes += UnsupportedDataTypeInfo(prettyFieldName(columnPath), dt) + case LongType => + case dt: DayTimeIntervalType => + assert(columnPath.nonEmpty, "'columnPath' must not be empty") + unsupportedDataTypes += UnsupportedDataTypeInfo(prettyFieldName(columnPath), dt) + case FloatType => + case DoubleType => + case StringType => + case DateType => + case TimestampType => + case TimestampNTZType => + case BinaryType => + case _: DecimalType => + case a: ArrayType => + assert(columnPath.nonEmpty, "'columnPath' must not be empty") + findUnsupportedDataTypesRecursively( + unsupportedDataTypes, + a.elementType, + columnPath.dropRight(1) :+ columnPath.last + "[]") + case m: MapType => + assert(columnPath.nonEmpty, "'columnPath' must not be empty") + findUnsupportedDataTypesRecursively( + unsupportedDataTypes, + m.keyType, + columnPath.dropRight(1) :+ columnPath.last + "[key]") + findUnsupportedDataTypesRecursively( + unsupportedDataTypes, + m.valueType, + columnPath.dropRight(1) :+ columnPath.last + "[value]") + case s: StructType => + s.fields.foreach { f => + findUnsupportedDataTypesRecursively( + unsupportedDataTypes, + f.dataType, + columnPath :+ f.name) + } + case udt: UserDefinedType[_] => + findUnsupportedDataTypesRecursively(unsupportedDataTypes, udt.sqlType, columnPath) + case dt: DataType => + assert(columnPath.nonEmpty, "'columnPath' must not be empty") + unsupportedDataTypes += UnsupportedDataTypeInfo(prettyFieldName(columnPath), dt) + } + + /** + * Find all the generated columns that depend on the given target column. + */ + def findDependentGeneratedColumns( + sparkSession: SparkSession, + targetColumn: Seq[String], + protocol: Protocol, + schema: StructType): Seq[StructField] = { + if (GeneratedColumn.satisfyGeneratedColumnProtocol(protocol) && + GeneratedColumn.hasGeneratedColumns(schema)) { + + val dependentGenCols = ArrayBuffer[StructField]() + SchemaMergingUtils.transformColumns(schema) { (_, field, _) => + GeneratedColumn.getGenerationExpressionStr(field.metadata).foreach { exprStr => + val needsToChangeExpr = SchemaUtils.containsDependentExpression( + sparkSession, targetColumn, exprStr, sparkSession.sessionState.conf.resolver) + if (needsToChangeExpr) dependentGenCols += field + } + field + } + dependentGenCols.toList + } else { + Seq.empty + } + } + + /** Recursively find all types not defined in Delta protocol but used in `dt` */ + def findUndefinedTypes(dt: DataType): Seq[DataType] = dt match { + // Types defined in Delta protocol + case NullType => Nil + case BooleanType => Nil + case ByteType | ShortType | IntegerType | LongType => Nil + case FloatType | DoubleType | _: DecimalType => Nil + case StringType | BinaryType => Nil + case DateType | TimestampType => Nil + // Recursively search complex data types + case s: StructType => s.fields.flatMap(f => findUndefinedTypes(f.dataType)) + case a: ArrayType => findUndefinedTypes(a.elementType) + case m: MapType => findUndefinedTypes(m.keyType) ++ findUndefinedTypes(m.valueType) + // Other types are not defined in Delta protocol + case undefinedType => Seq(undefinedType) + } + + /** Record all types not defined in Delta protocol but used in the `schema`. */ + def recordUndefinedTypes(deltaLog: DeltaLog, schema: StructType): Unit = { + try { + findUndefinedTypes(schema).map(_.getClass.getName).toSet.foreach { className: String => + recordDeltaEvent(deltaLog, "delta.undefined.type", data = Map("className" -> className)) + } + } catch { + case NonFatal(e) => + logWarning(s"Failed to log undefined types for table ${deltaLog.logPath}", e) + } + } +} + +/** + * The information of unsupported data type returned by [[SchemaUtils.findUnsupportedDataTypes]]. + * + * @param column the column path to access the column using an unsupported data type, such as `a.b`. + * @param dataType the unsupported data type. + */ +case class UnsupportedDataTypeInfo(column: String, dataType: DataType) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/skipping/MultiDimClustering.scala b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/MultiDimClustering.scala new file mode 100644 index 00000000000..46c3e0132bc --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/MultiDimClustering.scala @@ -0,0 +1,113 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping + +import java.util.UUID + +import org.apache.spark.sql.delta.skipping.MultiDimClusteringFunctions._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging +import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ + +/** Trait for changing the data layout using a multi-dimensional clustering algorithm */ +trait MultiDimClustering extends Logging { + /** Repartition the given `df` into `approxNumPartitions` based on the provided `colNames`. */ + def cluster( + df: DataFrame, + colNames: Seq[String], + approxNumPartitions: Int, + randomizationExpressionOpt: Option[Column] + ): DataFrame +} + +object MultiDimClustering { + /** + * Repartition the given dataframe `df` based on the given `curve` type into + * `approxNumPartitions` on the given `colNames`. + */ + def cluster( + df: DataFrame, + approxNumPartitions: Int, + colNames: Seq[String], + curve: String): DataFrame = { + assert(colNames.nonEmpty, "Cannot cluster by zero columns!") + val clusteringImpl = curve match { + case "hilbert" => HilbertClustering + case "zorder" => ZOrderClustering + case unknownCurve => + throw new SparkException(s"Unknown curve ($unknownCurve), unable to perform multi " + + "dimensional clustering.") + } + clusteringImpl.cluster(df, colNames, approxNumPartitions, randomizationExpressionOpt = None) + } +} + +/** Base class for space filling curve based clustering e.g. ZOrder */ +trait SpaceFillingCurveClustering extends MultiDimClustering { + + protected def getClusteringExpression(cols: Seq[Column], numRanges: Int): Column + + override def cluster( + df: DataFrame, + colNames: Seq[String], + approxNumPartitions: Int, + randomizationExpressionOpt: Option[Column]): DataFrame = { + val conf = df.sparkSession.sessionState.conf + val numRanges = conf.getConf(DeltaSQLConf.MDC_NUM_RANGE_IDS) + val addNoise = conf.getConf(DeltaSQLConf.MDC_ADD_NOISE) + + val cols = colNames.map(df(_)) + val mdcCol = getClusteringExpression(cols, numRanges) + val repartitionKeyColName = s"${UUID.randomUUID().toString}-rpKey1" + + var repartitionedDf = if (addNoise) { + val randByteColName = s"${UUID.randomUUID().toString}-rpKey2" + val randByteCol = randomizationExpressionOpt.getOrElse((rand() * 255 - 128).cast(ByteType)) + df.withColumn(repartitionKeyColName, mdcCol).withColumn(randByteColName, randByteCol) + .repartitionByRange(approxNumPartitions, col(repartitionKeyColName), col(randByteColName)) + .drop(randByteColName) + } else { + df.withColumn(repartitionKeyColName, mdcCol) + .repartitionByRange(approxNumPartitions, col(repartitionKeyColName)) + } + + repartitionedDf.drop(repartitionKeyColName) + } +} + +/** Implement Z-Order clustering */ +object ZOrderClustering extends SpaceFillingCurveClustering { + override protected[skipping] def getClusteringExpression( + cols: Seq[Column], numRanges: Int): Column = { + assert(cols.size >= 1, "Cannot do Z-Order clustering by zero columns!") + val rangeIdCols = cols.map(range_partition_id(_, numRanges)) + interleave_bits(rangeIdCols: _*).cast(StringType) + } +} + +object HilbertClustering extends SpaceFillingCurveClustering with Logging { + override protected def getClusteringExpression(cols: Seq[Column], numRanges: Int): Column = { + assert(cols.size > 1, "Cannot do Hilbert clustering by zero or one column!") + val rangeIdCols = cols.map(range_partition_id(_, numRanges)) + val numBits = Integer.numberOfTrailingZeros(Integer.highestOneBit(numRanges)) + 1 + hilbert_index(numBits, rangeIdCols: _*) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringFunctions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringFunctions.scala new file mode 100644 index 00000000000..68496dbdcae --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringFunctions.scala @@ -0,0 +1,81 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.expressions.{HilbertByteArrayIndex, HilbertLongIndex, InterleaveBits, RangePartitionId} + +import org.apache.spark.SparkException +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Cast, Expression} +import org.apache.spark.sql.types.StringType + +/** Functions for multi-dimensional clustering of the data */ +object MultiDimClusteringFunctions { + private def withExpr(expr: Expression): Column = new Column(expr) + + /** + * Conceptually range-partitions the domain of values of the given column into `numPartitions` + * partitions and computes the partition number that every value of that column corresponds to. + * One can think of this as an approximate rank() function. + * + * Ex. For a column with values (0, 1, 3, 15, 36, 99) and numPartitions = 3 returns + * partition range ids as (0, 0, 1, 1, 2, 2). + */ + def range_partition_id(col: Column, numPartitions: Int): Column = withExpr { + RangePartitionId(col.expr, numPartitions) + } + + /** + * Interleaves the bits of its input data in a round-robin fashion. + * + * If the input data is seen as a series of multidimensional points, this function computes the + * corresponding Z-values, in a way that's preserving data locality: input points that are close + * in the multidimensional space will be mapped to points that are close on the Z-order curve. + * + * The returned value is a byte array where the size of the array is 4 * num of input columns. + * + * @see https://en.wikipedia.org/wiki/Z-order_curve + * + * @note Only supports input expressions of type Int for now. + */ + def interleave_bits(cols: Column*): Column = withExpr { + InterleaveBits(cols.map(_.expr)) + } + + // scalastyle:off line.size.limit + /** + * Transforms the provided integer columns into their corresponding position in the hilbert + * curve for the given dimension. + * @see https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=bfd6d94c98627756989b0147a68b7ab1f881a0d6 + * @see https://en.wikipedia.org/wiki/Hilbert_curve + * @param numBits The number of bits to consider in each column. + * @param cols The integer columns to map to the curve. + */ + // scalastyle:on line.size.limit + def hilbert_index(numBits: Int, cols: Column*): Column = withExpr { + if (cols.size > 9) { + throw new SparkException("Hilbert indexing can only be used on 9 or fewer columns.") + } + val hilbertBits = cols.length * numBits + if (hilbertBits < 64) { + HilbertLongIndex(numBits, cols.map(_.expr)) + } else { + Cast(HilbertByteArrayIndex(numBits, cols.map(_.expr)), StringType) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableUtils.scala new file mode 100644 index 00000000000..247a96a325c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableUtils.scala @@ -0,0 +1,345 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping.clustering + +import org.apache.spark.sql.delta.skipping.clustering.temp.ClusterBySpec +import org.apache.spark.sql.delta.{ClusteringTableFeature, DeltaColumnMappingMode, DeltaErrors, DeltaLog, OptimisticTransaction, Snapshot} +import org.apache.spark.sql.delta.actions.{DomainMetadata, Metadata, Protocol, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.clustering.ClusteringMetadataDomain +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.{DeltaStatistics, StatisticsCollection} +import org.apache.spark.sql.delta.util.{Utils => DeltaUtils} + +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{StructField, StructType} + +/** + * Clustered table utility functions. + */ +trait ClusteredTableUtilsBase extends DeltaLogging { + // Clustering columns property key. The column names are logical and separated by comma. + // This will be removed when we integrate with OSS Spark and use + // [[CatalogTable.PROP_CLUSTERING_COLUMNS]] directly. + val PROP_CLUSTERING_COLUMNS: String = "clusteringColumns" + + /** + * Returns whether the protocol version supports the Liquid table feature. + */ + def isSupported(protocol: Protocol): Boolean = protocol.isFeatureSupported(ClusteringTableFeature) + + /** The clustering implementation name for [[AddFile.clusteringProvider]] */ + def clusteringProvider: String = "liquid" + + /** + * Validate the clustering table preview is enabled. If not, throw an exception. + * This version is used when checking existing tables with updated metadata / protocol. + */ + def validatePreviewEnabled(protocol: Protocol): Unit = { + if (isSupported(protocol) && + !SQLConf.get.getConf(DeltaSQLConf.DELTA_CLUSTERING_TABLE_PREVIEW_ENABLED) && + !DeltaUtils.isTesting) { + throw DeltaErrors.clusteringTablePreviewDisabledException() + } + } + + /** + * Validate the clustering table preview is enabled. If not, throw an exception. + * This version is used for `CREATE TABLE...` where the initial snapshot doesn't have + * updated metadata / protocol yet. + */ + def validatePreviewEnabled(maybeClusterBySpec: Option[ClusterBySpec]): Unit = { + maybeClusterBySpec.foreach { _ => + if (!SQLConf.get.getConf(DeltaSQLConf.DELTA_CLUSTERING_TABLE_PREVIEW_ENABLED) && + !DeltaUtils.isTesting) { + throw DeltaErrors.clusteringTablePreviewDisabledException() + } + } + } + + /** + * Returns an optional [[ClusterBySpec]] from the given CatalogTable. + */ + def getClusterBySpecOptional(table: CatalogTable): Option[ClusterBySpec] = { + table.properties.get(PROP_CLUSTERING_COLUMNS).map(ClusterBySpec.fromProperty) + } + + /** + * Extract clustering columns from ClusterBySpec. + * + * @param maybeClusterBySpec optional ClusterBySpec. If it's empty, will return the + * original properties. + * @return an optional pair with clustering columns. + */ + def getClusteringColumnsAsProperty( + maybeClusterBySpec: Option[ClusterBySpec]): Option[(String, String)] = { + maybeClusterBySpec.map(ClusterBySpec.toProperty) + } + + /** + * Returns table feature properties that's required to create a clustered table. + * + * @param existingProperties Table properties set by the user when creating a clustered table. + */ + def getTableFeatureProperties(existingProperties: Map[String, String]): Map[String, String] = { + val properties = collection.mutable.Map.empty[String, String] + properties += TableFeatureProtocolUtils.propertyKey(ClusteringTableFeature) -> + TableFeatureProtocolUtils.FEATURE_PROP_SUPPORTED + + properties.toMap + } + + /** + * Verify user didn't set clustering table feature in table properties. + * + * @param existingProperties Table properties set by the user when creating a clustered table. + */ + def validateExistingTableFeatureProperties(existingProperties: Map[String, String]): Unit = { + if (existingProperties.contains( + TableFeatureProtocolUtils.propertyKey(ClusteringTableFeature))) { + throw DeltaErrors.createTableSetClusteringTableFeatureException(ClusteringTableFeature.name) + } + } + + /** + * Validate the number of clustering columns doesn't exceed the limit. + * + * @param clusteringColumns clustering columns for the table. + * @param deltaLogOpt optional delta log. If present, will be used to record a delta event. + */ + def validateNumClusteringColumns( + clusteringColumns: Seq[Seq[String]], + deltaLogOpt: Option[DeltaLog] = None): Unit = { + val numColumnsLimit = + SQLConf.get.getConf(DeltaSQLConf.DELTA_NUM_CLUSTERING_COLUMNS_LIMIT) + val actualNumColumns = clusteringColumns.size + if (actualNumColumns > numColumnsLimit) { + deltaLogOpt.foreach { deltaLog => + recordDeltaEvent( + deltaLog, + opType = "delta.clusteredTable.invalidNumClusteringColumns", + data = Map( + "numCols" -> clusteringColumns.size, + "numColsLimit" -> numColumnsLimit)) + } + throw DeltaErrors.clusterByInvalidNumColumnsException(numColumnsLimit, actualNumColumns) + } + } + + /** + * Remove PROP_CLUSTERING_COLUMNS from metadata action. + * Clustering columns should only exist in: + * 1. CatalogTable.properties(PROP_CLUSTERING_COLUMNS) + * 2. Clustering metadata domain. + * @param configuration original configuration. + * @return new configuration without clustering columns property + */ + def removeClusteringColumnsProperty(configuration: Map[String, String]): Map[String, String] = { + configuration - PROP_CLUSTERING_COLUMNS + } + + /** + * Create an optional [[DomainMetadata]] action to store clustering columns. + */ + def getDomainMetadataOptional( + clusterBySpecOpt: Option[ClusterBySpec], + txn: OptimisticTransaction): Option[DomainMetadata] = { + clusterBySpecOpt.map { clusterBy => + ClusteredTableUtils.validateClusteringColumnsInStatsSchema( + txn.protocol, txn.metadata, clusterBy) + val clusteringColumns = + clusterBy.columnNames.map(_.toString).map(ClusteringColumn(txn.metadata.schema, _)) + createDomainMetadata(clusteringColumns) + } + } + + /** + * Create a [[DomainMetadata]] action to store clustering columns. + */ + def createDomainMetadata(clusteringColumns: Seq[ClusteringColumn]): DomainMetadata = { + ClusteringMetadataDomain.fromClusteringColumns(clusteringColumns).toDomainMetadata + } + + /** + * Create a [[ClusteringMetadataDomain]] with the given CatalogTable's clustering column property. + */ + def getDomainMetadataOptional( + table: CatalogTable, + txn: OptimisticTransaction): Option[DomainMetadata] = { + getDomainMetadataOptional(getClusterBySpecOptional(table), txn) + } + + /** + * Extract [[ClusteringColumn]]s from a given snapshot. Return None if the clustering domain + * metadata is missing. + */ + def getClusteringColumnsOptional(snapshot: Snapshot): Option[Seq[ClusteringColumn]] = { + ClusteringMetadataDomain + .fromSnapshot(snapshot) + .map(_.clusteringColumns.map(ClusteringColumn.apply)) + } + + /** + * Extract [[DomainMetadata]] for storing clustering columns from a given snapshot. + * It returns clustering domain metadata if exists. + * Return empty if the clustering domain metadata is missing. + */ + def getClusteringDomainMetadata(snapshot: Snapshot): Seq[DomainMetadata] = { + ClusteringMetadataDomain.fromSnapshot(snapshot).map(_.toDomainMetadata).toSeq + } + + /** + * Validate stats will be collected for all clustering columns. + */ + def validateClusteringColumnsInStatsSchema( + snapshot: Snapshot, + logicalClusteringColumns: Seq[String]): Unit = { + validateClusteringColumnsInStatsSchema( + snapshot, + logicalClusteringColumns.map { name => + ClusteringColumnInfo(snapshot.schema, ClusteringColumn(snapshot.schema, name)) + }) + } + + /** + * Returns true if stats will be collected for all clustering columns. + */ + def areClusteringColumnsInStatsSchema( + snapshot: Snapshot, + logicalClusteringColumns: Seq[String]): Boolean = { + getClusteringColumnsNotInStatsSchema( + snapshot, + logicalClusteringColumns.map { name => + ClusteringColumnInfo(snapshot.schema, ClusteringColumn(snapshot.schema, name)) + }).isEmpty + } + + /** + * Validate stats will be collected for all clustering columns. + * + * This version is used when [[Snapshot]] doesn't have latest stats column information such as + * `CREATE TABLE...` where the initial snapshot doesn't have updated metadata / protocol yet. + */ + def validateClusteringColumnsInStatsSchema( + protocol: Protocol, + metadata: Metadata, + clusterBy: ClusterBySpec): Unit = { + validateClusteringColumnsInStatsSchema( + statisticsCollectionFromMetadata(protocol, metadata), + clusterBy.columnNames.map { column => + ClusteringColumnInfo(metadata.schema, ClusteringColumn(metadata.schema, column.toString)) + }) + } + + /** + * Build a [[StatisticsCollection]] with minimal requirements that can be used to find stats + * columns. + * + * We can not use [[Snapshot]] as in a normal case during table creation such as `CREATE TABLE` + * because the initial snapshot doesn't have the updated metadata / protocol to find latest stats + * columns. + */ + private def statisticsCollectionFromMetadata( + p: Protocol, + metadata: Metadata): StatisticsCollection = { + new StatisticsCollection { + override val tableSchema: StructType = metadata.schema + override val outputAttributeSchema: StructType = tableSchema + // [[outputTableStatsSchema]] is the candidate schema to find statistics columns. + override val outputTableStatsSchema: StructType = tableSchema + override val statsColumnSpec = StatisticsCollection.configuredDeltaStatsColumnSpec(metadata) + override val columnMappingMode: DeltaColumnMappingMode = metadata.columnMappingMode + override val protocol: Protocol = p + + override def spark: SparkSession = { + throw new Exception("Method not used in statisticsCollectionFromMetadata") + } + } + } + + /** + * Validate physical clustering columns can be found in the latest stats columns. + * + * @param statsCollection Provides latest stats columns. + * @param clusteringColumnInfos Clustering columns in physical names. + * + * A [[AnalysisException]] is thrown if the clustering column can not be found in the latest + * stats columns. The error message contains logical names only for better user experience. + */ + private def validateClusteringColumnsInStatsSchema( + statsCollection: StatisticsCollection, + clusteringColumnInfos: Seq[ClusteringColumnInfo]): Unit = { + val missingColumn = getClusteringColumnsNotInStatsSchema(statsCollection, clusteringColumnInfos) + if (missingColumn.nonEmpty) { + // Convert back to logical names. + throw DeltaErrors.clusteringColumnMissingStats( + missingColumn.mkString(", "), + statsCollection.statCollectionLogicalSchema.treeString) + } + } + + /** + * Validate that the given clusterBySpec matches the existing table's in the given snapshot. + * This is used for append mode and replaceWhere. + */ + def validateClusteringColumnsInSnapshot( + snapshot: Snapshot, + clusterBySpec: ClusterBySpec): Unit = { + // This uses physical column names to compare. + val providedClusteringColumns = + Some(clusterBySpec.columnNames.map(col => ClusteringColumn(snapshot.schema, col.toString))) + val existingClusteringColumns = ClusteredTableUtils.getClusteringColumnsOptional(snapshot) + if (providedClusteringColumns != existingClusteringColumns) { + throw DeltaErrors.clusteringColumnsMismatchException( + clusterBySpec.columnNames.map(_.toString).mkString(","), + existingClusteringColumns.map(_.map( + ClusteringColumnInfo(snapshot.schema, _).logicalName).mkString(",")).getOrElse("") + ) + } + } + + /** + * Returns empty if all physical clustering columns can be found in the latest stats columns. + * Otherwise, returns the logical names of the all clustering columns that are not found. + * + * [[StatisticsCollection.statsSchema]] has converted field's name to physical name and also it + * filters out any columns that are NOT qualified as a stats data type + * through [[SkippingEligibleDataType]]. + * + * @param statsCollection Provides latest stats columns. + * @param clusteringColumnInfos Clustering columns in physical names. + */ + private def getClusteringColumnsNotInStatsSchema( + statsCollection: StatisticsCollection, + clusteringColumnInfos: Seq[ClusteringColumnInfo]): Seq[String] = { + clusteringColumnInfos.flatMap { info => + val path = DeltaStatistics.MIN +: info.physicalName + SchemaUtils.findNestedFieldIgnoreCase(statsCollection.statsSchema, path) match { + // Validate that the column exists in the stats schema and is not a struct + // in the stats schema (to catch CLUSTER BY an entire struct). + case None | Some(StructField(_, _: StructType, _, _)) => + Some(info.logicalName) + case _ => None + } + } + } +} + +object ClusteredTableUtils extends ClusteredTableUtilsBase diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteringColumn.scala b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteringColumn.scala new file mode 100644 index 00000000000..7faa3533311 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteringColumn.scala @@ -0,0 +1,95 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping.clustering + +import org.apache.spark.sql.delta.{DeltaColumnMapping, Snapshot} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils + +import org.apache.spark.sql.connector.expressions.FieldReference +import org.apache.spark.sql.types.{DataType, StructType} + +/** + * A wrapper class that stores a clustering column's physical name parts. + */ +case class ClusteringColumn(physicalName: Seq[String]) + +object ClusteringColumn { + /** + * Note: `logicalName` must be validated to exist in the given `schema`. + */ + def apply(schema: StructType, logicalName: String): ClusteringColumn = { + val resolver = SchemaUtils.DELTA_COL_RESOLVER + // Note that we use AttributeNameParser instead of CatalystSqlParser to account for the case + // where the column name is a backquoted string with spaces. + val logicalNameParts = FieldReference(logicalName).fieldNames + val physicalNameParts = logicalNameParts.foldLeft[(DataType, Seq[String])]((schema, Nil)) { + (partial, namePart) => + val (currStructType, currPhysicalNameSeq) = partial + val field = + currStructType.asInstanceOf[StructType].find(field => resolver(field.name, namePart)).get + (field.dataType, currPhysicalNameSeq :+ DeltaColumnMapping.getPhysicalName(field)) + }._2 + ClusteringColumn(physicalNameParts) + } +} + +/** + * A wrapper class that stores a clustering column's physical name parts and data type. + */ +case class ClusteringColumnInfo( + physicalName: Seq[String], dataType: DataType, schema: StructType) { + lazy val logicalName: String = { + val reversePhysicalNameParts = physicalName.reverse + val resolver = SchemaUtils.DELTA_COL_RESOLVER + val logicalNameParts = + reversePhysicalNameParts + .foldRight[(Seq[String], DataType)]((Nil, schema)) { + (namePart, state) => + val (logicalNameParts, parentRawDataType) = state + val parentDataType = parentRawDataType.asInstanceOf[StructType] + val nextField = + parentDataType + .find(field => resolver(DeltaColumnMapping.getPhysicalName(field), namePart)) + .get + (nextField.name +: logicalNameParts, nextField.dataType) + }._1.reverse + FieldReference(logicalNameParts).toString + } +} + +object ClusteringColumnInfo extends DeltaLogging { + def apply(schema: StructType, clusteringColumn: ClusteringColumn): ClusteringColumnInfo = + apply(schema, clusteringColumn.physicalName) + + def apply(schema: StructType, physicalName: Seq[String]): ClusteringColumnInfo = { + val resolver = SchemaUtils.DELTA_COL_RESOLVER + val dataType = physicalName.foldLeft[DataType](schema) { + (currStructType, namePart) => + currStructType.asInstanceOf[StructType].find { field => + resolver(DeltaColumnMapping.getPhysicalName(field), namePart) + }.get.dataType + } + ClusteringColumnInfo(physicalName, dataType, schema) + } + + def extractLogicalNames(snapshot: Snapshot): Seq[String] = { + ClusteredTableUtils.getClusteringColumnsOptional(snapshot).map { clusteringColumns => + clusteringColumns.map(ClusteringColumnInfo(snapshot.schema, _).logicalName) + }.getOrElse(Seq.empty) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/temp/ClusterBySpec.scala b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/temp/ClusterBySpec.scala new file mode 100644 index 00000000000..72510c514bb --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/temp/ClusterBySpec.scala @@ -0,0 +1,160 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping.clustering.temp + +import scala.reflect.ClassTag + +import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule} +import org.antlr.v4.runtime.ParserRuleContext + +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.parser.{ParseException, ParserInterface, ParserUtils} +import org.apache.spark.sql.catalyst.plans.logical.{CreateTable, CreateTableAsSelect, LeafNode, LogicalPlan, ReplaceTable, ReplaceTableAsSelect} +import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, NamedReference, Transform} + +/** + * A container for clustering information. Copied from OSS Spark. + * + * This class will be removed when we integrate with OSS Spark's CLUSTER BY implementation. + * @see https://github.com/apache/spark/pull/42577 + * + * @param columnNames the names of the columns used for clustering. + */ +case class ClusterBySpec(columnNames: Seq[NamedReference]) { + override def toString: String = toJson + + def toJson: String = + ClusterBySpec.mapper.writeValueAsString(columnNames.map(_.fieldNames)) +} + +object ClusterBySpec { + private val mapper = { + val ret = new ObjectMapper() with ClassTagExtensions + ret.setSerializationInclusion(Include.NON_ABSENT) + ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + ret.registerModule(DefaultScalaModule) + ret + } + + // ClassTag is added to avoid the "same type after erasure" issue with the case class. + def apply[_: ClassTag](columnNames: Seq[Seq[String]]): ClusterBySpec = { + ClusterBySpec(columnNames.map(FieldReference(_))) + } + + // Convert from table property back to ClusterBySpec. + def fromProperty(columns: String): ClusterBySpec = { + ClusterBySpec(mapper.readValue[Seq[Seq[String]]](columns).map(FieldReference(_))) + } + + def toProperty(clusterBySpec: ClusterBySpec): (String, String) = { + ClusteredTableUtils.PROP_CLUSTERING_COLUMNS -> clusterBySpec.toJson + } +} + +/** + * A [[LogicalPlan]] representing a CLUSTER BY clause. + * + * This class will be removed when we integrate with OSS Spark's CLUSTER BY implementation. + * @see https://github.com/apache/spark/pull/42577 + * + * @param clusterBySpec: clusterBySpec which contains the clustering columns. + * @param startIndex: start index of CLUSTER BY clause. + * @param stopIndex: stop index of CLUSTER BY clause. + * @param parenStartIndex: start index of the left parenthesis in CLUSTER BY clause. + * @param parenStopIndex: stop index of the right parenthesis in CLUSTER BY clause. + * @param ctx: parser rule context of the CLUSTER BY clause. + */ +case class ClusterByPlan( + clusterBySpec: ClusterBySpec, + startIndex: Int, + stopIndex: Int, + parenStartIndex: Int, + parenStopIndex: Int, + ctx: ParserRuleContext) + extends LeafNode { + override def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = this + override def output: Seq[Attribute] = Seq.empty +} + +/** + * Parser utils for parsing a [[ClusterByPlan]] and converts it to table properties. + * + * This class will be removed when we integrate with OSS Spark's CLUSTER BY implementation. + * @see https://github.com/apache/spark/pull/42577 + * + * @param clusterByPlan: the ClusterByPlan to parse. + * @param delegate: delegate parser. + */ +case class ClusterByParserUtils(clusterByPlan: ClusterByPlan, delegate: ParserInterface) { + // Update partitioning to include clustering columns as transforms. + private def updatePartitioning(partitioning: Seq[Transform]): Seq[Transform] = { + // Validate no bucketing is specified. + if (partitioning.exists(t => t.isInstanceOf[BucketTransform])) { + ParserUtils.operationNotAllowed( + "Clustering and bucketing cannot both be specified. " + + "Please remove CLUSTERED BY INTO BUCKETS if you " + + "want to create a Delta table with clustering", + clusterByPlan.ctx) + } + Seq(ClusterByTransform(clusterByPlan.clusterBySpec.columnNames)) + } + + /** + * Parse the [[ClusterByPlan]] by replacing CLUSTER BY with PARTITIONED BY and + * leverage Spark SQL parser to perform the validation. After parsing, store the + * clustering columns in the logical plan's partitioning transforms. + * + * @param sqlText: original SQL text. + * @return the logical plan after parsing. + */ + def parsePlan(sqlText: String): LogicalPlan = { + val colText = + sqlText.substring(clusterByPlan.parenStartIndex, clusterByPlan.parenStopIndex + 1) + // Replace CLUSTER BY with PARTITIONED BY to let SparkSqlParser do the validation for us. + // This serves as a short-term workaround until Spark incorporates CREATE TABLE ... CLUSTER BY + // syntax. + val partitionedByText = "PARTITIONED BY " + colText + val newSqlText = + sqlText.substring(0, clusterByPlan.startIndex) + + partitionedByText + + sqlText.substring(clusterByPlan.stopIndex + 1) + try { + delegate.parsePlan(newSqlText) match { + case create: CreateTable => + create.copy(partitioning = updatePartitioning(create.partitioning)) + case ctas: CreateTableAsSelect => + ctas.copy(partitioning = updatePartitioning(ctas.partitioning)) + case replace: ReplaceTable => + replace.copy(partitioning = updatePartitioning(replace.partitioning)) + case rtas: ReplaceTableAsSelect => + rtas.copy(partitioning = updatePartitioning(rtas.partitioning)) + case plan => plan + } + } catch { + case e: ParseException if (e.errorClass.contains("DUPLICATE_CLAUSES")) => + // Since we replace CLUSTER BY with PARTITIONED BY, duplicated clauses means we + // encountered CLUSTER BY with PARTITIONED BY. + ParserUtils.operationNotAllowed( + "Clustering and partitioning cannot both be specified. " + + "Please remove PARTITIONED BY if you want to create a Delta table with clustering", + clusterByPlan.ctx) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/temp/ClusterByTransform.scala b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/temp/ClusterByTransform.scala new file mode 100644 index 00000000000..8d16b525667 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/skipping/clustering/temp/ClusterByTransform.scala @@ -0,0 +1,59 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping.clustering.temp + +import org.apache.spark.sql.connector.expressions.{Expression, NamedReference, Transform} + +/** + * Minimal version of Spark's ClusterByTransform. We'll remove this when we integrate with OSS + * Spark's CLUSTER BY implementation. + * + * This class represents a transform for `ClusterBySpec`. This is used to bundle + * ClusterBySpec in CreateTable's partitioning transforms to pass it down to analyzer/delta. + */ +final case class ClusterByTransform( + columnNames: Seq[NamedReference]) extends Transform { + + override val name: String = "temp_cluster_by" + + override def arguments: Array[Expression] = columnNames.toArray + + override def toString: String = s"$name(${arguments.map(_.describe).mkString(", ")})" +} + +/** + * Convenience extractor for ClusterByTransform. + */ +object ClusterByTransform { + def unapply(transform: Transform): Option[Seq[NamedReference]] = + transform match { + case NamedTransform("temp_cluster_by", arguments) => + Some(arguments.map(_.asInstanceOf[NamedReference])) + case _ => + None + } +} + +/** + * Copied from OSS Spark. We'll remove this when we integrate with OSS Spark's CLUSTER BY. + * Convenience extractor for any Transform. + */ +private object NamedTransform { + def unapply(transform: Transform): Some[(String, Seq[Expression])] = { + Some((transform.name, transform.arguments)) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala new file mode 100644 index 00000000000..d523ad4f8ec --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala @@ -0,0 +1,473 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.util.{Failure, Success, Try} + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.DatabricksLogging +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.WriteIntoDelta +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.util.PartitionUtils +import org.apache.hadoop.fs.Path +import org.json4s.{Formats, NoTypeHints} +import org.json4s.jackson.Serialization + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.{EqualTo, Expression, Literal} +import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.connector.catalog.{Table, TableProvider} +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.execution.streaming.{Sink, Source} +import org.apache.spark.sql.sources._ +import org.apache.spark.sql.streaming.OutputMode +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** A DataSource V1 for integrating Delta into Spark SQL batch and Streaming APIs. */ +class DeltaDataSource + extends RelationProvider + with StreamSourceProvider + with StreamSinkProvider + with CreatableRelationProvider + with DataSourceRegister + with TableProvider + with DeltaLogging { + + def inferSchema: StructType = new StructType() // empty + + override def inferSchema(options: CaseInsensitiveStringMap): StructType = inferSchema + + override def getTable( + schema: StructType, + partitioning: Array[Transform], + properties: java.util.Map[String, String]): Table = { + val options = new CaseInsensitiveStringMap(properties) + val path = options.get("path") + if (path == null) throw DeltaErrors.pathNotSpecifiedException + DeltaTableV2(SparkSession.active, new Path(path), options = options.asScala.toMap) + } + + override def sourceSchema( + sqlContext: SQLContext, + schema: Option[StructType], + providerName: String, + parameters: Map[String, String]): (String, StructType) = { + if (schema.nonEmpty && schema.get.nonEmpty) { + throw DeltaErrors.specifySchemaAtReadTimeException + } + val path = parameters.getOrElse("path", { + throw DeltaErrors.pathNotSpecifiedException + }) + + val (_, maybeTimeTravel) = DeltaTableUtils.extractIfPathContainsTimeTravel( + sqlContext.sparkSession, path, Map.empty) + if (maybeTimeTravel.isDefined) throw DeltaErrors.timeTravelNotSupportedException + if (DeltaDataSource.getTimeTravelVersion(parameters).isDefined) { + throw DeltaErrors.timeTravelNotSupportedException + } + + val (_, snapshot) = DeltaLog.forTableWithSnapshot(sqlContext.sparkSession, new Path(path)) + // This is the analyzed schema for Delta streaming + val readSchema = { + // Check if we would like to merge consecutive schema changes, this would allow customers + // to write queries based on their latest changes instead of an arbitrary schema in the past. + val shouldMergeConsecutiveSchemas = sqlContext.sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_ENABLE_SCHEMA_TRACKING_MERGE_CONSECUTIVE_CHANGES + ) + // This method is invoked during the analysis phase and would determine the schema for the + // streaming dataframe. We only need to merge consecutive schema changes here because the + // process would create a new entry in the schema log such that when the schema log is + // looked up again in the execution phase, we would use the correct schema. + DeltaDataSource.getMetadataTrackingLogForDeltaSource( + sqlContext.sparkSession, snapshot, parameters, + mergeConsecutiveSchemaChanges = shouldMergeConsecutiveSchemas) + .flatMap(_.getCurrentTrackedMetadata.map(_.dataSchema)) + .getOrElse(snapshot.schema) + } + + val schemaToUse = DeltaTableUtils.removeInternalMetadata(sqlContext.sparkSession, readSchema) + if (schemaToUse.isEmpty) { + throw DeltaErrors.schemaNotSetException + } + val options = new CaseInsensitiveStringMap(parameters.asJava) + if (CDCReader.isCDCRead(options)) { + (shortName(), CDCReader.cdcReadSchema(schemaToUse)) + } else { + (shortName(), schemaToUse) + } + } + + override def createSource( + sqlContext: SQLContext, + metadataPath: String, + schema: Option[StructType], + providerName: String, + parameters: Map[String, String]): Source = { + if (schema.nonEmpty && schema.get.nonEmpty) { + throw DeltaErrors.specifySchemaAtReadTimeException + } + val path = parameters.getOrElse("path", { + throw DeltaErrors.pathNotSpecifiedException + }) + val options = new DeltaOptions(parameters, sqlContext.sparkSession.sessionState.conf) + val (deltaLog, snapshot) = + DeltaLog.forTableWithSnapshot(sqlContext.sparkSession, new Path(path)) + val schemaTrackingLogOpt = + DeltaDataSource.getMetadataTrackingLogForDeltaSource( + sqlContext.sparkSession, snapshot, parameters, + // Pass in the metadata path opt so we can use it for validation + sourceMetadataPathOpt = Some(metadataPath)) + + val readSchema = schemaTrackingLogOpt + .flatMap(_.getCurrentTrackedMetadata.map(_.dataSchema)) + .getOrElse(snapshot.schema) + + if (readSchema.isEmpty) { + throw DeltaErrors.schemaNotSetException + } + DeltaSource( + sqlContext.sparkSession, + deltaLog, + options, + snapshot, + metadataPath, + schemaTrackingLogOpt + ) + } + + override def createSink( + sqlContext: SQLContext, + parameters: Map[String, String], + partitionColumns: Seq[String], + outputMode: OutputMode): Sink = { + val path = parameters.getOrElse("path", { + throw DeltaErrors.pathNotSpecifiedException + }) + if (outputMode != OutputMode.Append && outputMode != OutputMode.Complete) { + throw DeltaErrors.outputModeNotSupportedException(getClass.getName, outputMode.toString) + } + val deltaOptions = new DeltaOptions(parameters, sqlContext.sparkSession.sessionState.conf) + // NOTE: Spark API doesn't give access to the CatalogTable here, but DeltaAnalysis will pick + // that info out of the containing WriteToStream (if present), and update the sink there. + new DeltaSink(sqlContext, new Path(path), partitionColumns, outputMode, deltaOptions) + } + + override def createRelation( + sqlContext: SQLContext, + mode: SaveMode, + parameters: Map[String, String], + data: DataFrame): BaseRelation = { + val path = parameters.getOrElse("path", { + throw DeltaErrors.pathNotSpecifiedException + }) + val partitionColumns = parameters.get(DeltaSourceUtils.PARTITIONING_COLUMNS_KEY) + .map(DeltaDataSource.decodePartitioningColumns) + .getOrElse(Nil) + + val deltaLog = DeltaLog.forTable(sqlContext.sparkSession, new Path(path), parameters) + WriteIntoDelta( + deltaLog = deltaLog, + mode = mode, + new DeltaOptions(parameters, sqlContext.sparkSession.sessionState.conf), + partitionColumns = partitionColumns, + configuration = DeltaConfigs.validateConfigurations( + parameters.filterKeys(_.startsWith("delta.")).toMap), + data = data, + // empty catalogTable is acceptable as the code path is only for path based writes + // (df.write.save("path")) which does not need to use/update catalog + catalogTableOpt = None + ).run(sqlContext.sparkSession) + + deltaLog.createRelation() + } + + override def createRelation( + sqlContext: SQLContext, + parameters: Map[String, String]): BaseRelation = { + recordFrameProfile("Delta", "DeltaDataSource.createRelation") { + val maybePath = parameters.getOrElse("path", { + throw DeltaErrors.pathNotSpecifiedException + }) + + // Log any invalid options that are being passed in + DeltaOptions.verifyOptions(CaseInsensitiveMap(parameters)) + + val timeTravelByParams = DeltaDataSource.getTimeTravelVersion(parameters) + var cdcOptions: mutable.Map[String, String] = mutable.Map.empty + val caseInsensitiveParams = new CaseInsensitiveStringMap(parameters.asJava) + if (CDCReader.isCDCRead(caseInsensitiveParams)) { + cdcOptions = mutable.Map[String, String](DeltaDataSource.CDC_ENABLED_KEY -> "true") + if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_START_VERSION_KEY)) { + cdcOptions(DeltaDataSource.CDC_START_VERSION_KEY) = caseInsensitiveParams.get( + DeltaDataSource.CDC_START_VERSION_KEY) + } + if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_START_TIMESTAMP_KEY)) { + cdcOptions(DeltaDataSource.CDC_START_TIMESTAMP_KEY) = caseInsensitiveParams.get( + DeltaDataSource.CDC_START_TIMESTAMP_KEY) + } + if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_END_VERSION_KEY)) { + cdcOptions(DeltaDataSource.CDC_END_VERSION_KEY) = caseInsensitiveParams.get( + DeltaDataSource.CDC_END_VERSION_KEY) + } + if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_END_TIMESTAMP_KEY)) { + cdcOptions(DeltaDataSource.CDC_END_TIMESTAMP_KEY) = caseInsensitiveParams.get( + DeltaDataSource.CDC_END_TIMESTAMP_KEY) + } + } + val dfOptions: Map[String, String] = + if (sqlContext.sparkSession.sessionState.conf.getConf( + DeltaSQLConf.LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS)) { + parameters ++ cdcOptions + } else { + cdcOptions.toMap + } + DeltaTableV2( + sqlContext.sparkSession, + new Path(maybePath), + timeTravelOpt = timeTravelByParams, + options = dfOptions + ).toBaseRelation + } + } + + override def shortName(): String = { + DeltaSourceUtils.ALT_NAME + } + +} + +object DeltaDataSource extends DatabricksLogging { + private implicit val formats: Formats = Serialization.formats(NoTypeHints) + + final val TIME_TRAVEL_SOURCE_KEY = "__time_travel_source__" + + /** + * The option key for time traveling using a timestamp. The timestamp should be a valid + * timestamp string which can be cast to a timestamp type. + */ + final val TIME_TRAVEL_TIMESTAMP_KEY = "timestampAsOf" + + /** + * The option key for time traveling using a version of a table. This value should be + * castable to a long. + */ + final val TIME_TRAVEL_VERSION_KEY = "versionAsOf" + + final val CDC_START_VERSION_KEY = "startingVersion" + + final val CDC_START_TIMESTAMP_KEY = "startingTimestamp" + + final val CDC_END_VERSION_KEY = "endingVersion" + + final val CDC_END_TIMESTAMP_KEY = "endingTimestamp" + + final val CDC_ENABLED_KEY = "readChangeFeed" + + final val CDC_ENABLED_KEY_LEGACY = "readChangeData" + + def encodePartitioningColumns(columns: Seq[String]): String = { + Serialization.write(columns) + } + + def decodePartitioningColumns(str: String): Seq[String] = { + Serialization.read[Seq[String]](str) + } + + /** + * Extract the Delta path if `dataset` is created to load a Delta table. Otherwise returns `None`. + * Table UI in universe will call this. + */ + def extractDeltaPath(dataset: Dataset[_]): Option[String] = { + if (dataset.isStreaming) { + dataset.queryExecution.logical match { + case logical: org.apache.spark.sql.execution.streaming.StreamingRelation => + if (logical.dataSource.providingClass == classOf[DeltaDataSource]) { + CaseInsensitiveMap(logical.dataSource.options).get("path") + } else { + None + } + case _ => None + } + } else { + dataset.queryExecution.analyzed match { + case DeltaTable(tahoeFileIndex) => + Some(tahoeFileIndex.path.toString) + case SubqueryAlias(_, DeltaTable(tahoeFileIndex)) => + Some(tahoeFileIndex.path.toString) + case _ => None + } + } + } + + /** + * For Delta, we allow certain magic to be performed through the paths that are provided by users. + * Normally, a user specified path should point to the root of a Delta table. However, some users + * are used to providing specific partition values through the path, because of how expensive it + * was to perform partition discovery before. We treat these partition values as logical partition + * filters, if a table does not exist at the provided path. + * + * In addition, we allow users to provide time travel specifications through the path. This is + * provided after an `@` symbol after a path followed by a time specification in + * `yyyyMMddHHmmssSSS` format, or a version number preceded by a `v`. + * + * This method parses these specifications and returns these modifiers only if a path does not + * really exist at the provided path. We first parse out the time travel specification, and then + * the partition filters. For example, a path specified as: + * /some/path/partition=1@v1234 + * will be parsed into `/some/path` with filters `partition=1` and a time travel spec of version + * 1234. + * + * @return A tuple of the root path of the Delta table, partition filters, and time travel options + */ + def parsePathIdentifier( + spark: SparkSession, + userPath: String, + options: Map[String, String]): (Path, Seq[(String, String)], Option[DeltaTimeTravelSpec]) = { + // Handle time travel + val (path, timeTravelByPath) = + DeltaTableUtils.extractIfPathContainsTimeTravel(spark, userPath, options) + + val hadoopPath = new Path(path) + val rootPath = + DeltaTableUtils.findDeltaTableRoot(spark, hadoopPath, options).getOrElse(hadoopPath) + + val partitionFilters = if (rootPath != hadoopPath) { + logConsole( + """ + |WARNING: loading partitions directly with delta is not recommended. + |If you are trying to read a specific partition, use a where predicate. + | + |CORRECT: spark.read.format("delta").load("/data").where("part=1") + |INCORRECT: spark.read.format("delta").load("/data/part=1") + """.stripMargin) + + val fragment = hadoopPath.toString.substring(rootPath.toString.length() + 1) + try { + PartitionUtils.parsePathFragmentAsSeq(fragment) + } catch { + case _: ArrayIndexOutOfBoundsException => + throw DeltaErrors.partitionPathParseException(fragment) + } + } else { + Nil + } + + (rootPath, partitionFilters, timeTravelByPath) + } + + /** + * Verifies that the provided partition filters are valid and returns the corresponding + * expressions. + */ + def verifyAndCreatePartitionFilters( + userPath: String, + snapshot: Snapshot, + partitionFilters: Seq[(String, String)]): Seq[Expression] = { + if (partitionFilters.nonEmpty) { + val metadata = snapshot.metadata + + val badColumns = partitionFilters.map(_._1).filterNot(metadata.partitionColumns.contains) + if (badColumns.nonEmpty) { + val fragment = partitionFilters.map(f => s"${f._1}=${f._2}").mkString("/") + throw DeltaErrors.partitionPathInvolvesNonPartitionColumnException(badColumns, fragment) + } + + val filters = partitionFilters.map { case (key, value) => + // Nested fields cannot be partitions, so we pass the key as a identifier + EqualTo(UnresolvedAttribute(Seq(key)), Literal(value)) + } + val files = DeltaLog.filterFileList( + metadata.partitionSchema, snapshot.allFiles.toDF(), filters) + if (files.count() == 0) { + throw DeltaErrors.pathNotExistsException(userPath) + } + filters + } else { + Nil + } + } + + /** Extracts whether users provided the option to time travel a relation. */ + def getTimeTravelVersion(parameters: Map[String, String]): Option[DeltaTimeTravelSpec] = { + val caseInsensitive = CaseInsensitiveMap[String](parameters) + val tsOpt = caseInsensitive.get(DeltaDataSource.TIME_TRAVEL_TIMESTAMP_KEY) + val versionOpt = caseInsensitive.get(DeltaDataSource.TIME_TRAVEL_VERSION_KEY) + val sourceOpt = caseInsensitive.get(DeltaDataSource.TIME_TRAVEL_SOURCE_KEY) + + if (tsOpt.isDefined && versionOpt.isDefined) { + throw DeltaErrors.provideOneOfInTimeTravel + } else if (tsOpt.isDefined) { + Some(DeltaTimeTravelSpec(Some(Literal(tsOpt.get)), None, sourceOpt.orElse(Some("dfReader")))) + } else if (versionOpt.isDefined) { + val version = Try(versionOpt.get.toLong) match { + case Success(v) => v + case Failure(t) => + throw DeltaErrors.timeTravelInvalidBeginValue(DeltaDataSource.TIME_TRAVEL_VERSION_KEY, t) + } + Some(DeltaTimeTravelSpec(None, Some(version), sourceOpt.orElse(Some("dfReader")))) + } else { + None + } + } + + /** + * Extract the schema tracking location from options. + */ + def extractSchemaTrackingLocationConfig( + spark: SparkSession, parameters: Map[String, String]): Option[String] = { + val options = new CaseInsensitiveStringMap(parameters.asJava) + + Option(options.get(DeltaOptions.SCHEMA_TRACKING_LOCATION)) + .orElse(Option(options.get(DeltaOptions.SCHEMA_TRACKING_LOCATION_ALIAS))) + } + + /** + * Create a schema log for Delta streaming source if possible + */ + def getMetadataTrackingLogForDeltaSource( + spark: SparkSession, + sourceSnapshot: SnapshotDescriptor, + parameters: Map[String, String], + sourceMetadataPathOpt: Option[String] = None, + mergeConsecutiveSchemaChanges: Boolean = false): Option[DeltaSourceMetadataTrackingLog] = { + val options = new CaseInsensitiveStringMap(parameters.asJava) + + DeltaDataSource.extractSchemaTrackingLocationConfig(spark, parameters) + .map { schemaTrackingLocation => + if (!spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_ENABLE_SCHEMA_TRACKING)) { + throw new UnsupportedOperationException( + "Schema tracking location is not supported for Delta streaming source") + } + + DeltaSourceMetadataTrackingLog.create( + spark, schemaTrackingLocation, sourceSnapshot, + Option(options.get(DeltaOptions.STREAMING_SOURCE_TRACKING_ID)), + sourceMetadataPathOpt, + mergeConsecutiveSchemaChanges + ) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSQLConf.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSQLConf.scala new file mode 100644 index 00000000000..f3ae10bdbf4 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSQLConf.scala @@ -0,0 +1,1633 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +import java.util.Locale +import java.util.concurrent.TimeUnit + +import org.apache.spark.internal.config.ConfigBuilder +import org.apache.spark.network.util.ByteUnit +import org.apache.spark.sql.catalyst.FileSourceOptions +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.Utils + +/** + * [[SQLConf]] entries for Delta features. + */ +trait DeltaSQLConfBase { + val SQL_CONF_PREFIX = "spark.databricks.delta" + + def buildConf(key: String): ConfigBuilder = SQLConf.buildConf(s"$SQL_CONF_PREFIX.$key") + def buildStaticConf(key: String): ConfigBuilder = + SQLConf.buildStaticConf(s"spark.databricks.delta.$key") + + val RESOLVE_TIME_TRAVEL_ON_IDENTIFIER = + buildConf("timeTravel.resolveOnIdentifier.enabled") + .internal() + .doc("When true, we will try to resolve patterns as `@v123` in identifiers as time " + + "travel nodes.") + .booleanConf + .createWithDefault(true) + + val DELTA_COMMIT_LOCK_ENABLED = + buildConf("commitLock.enabled") + .internal() + .doc("Whether to lock a Delta table when doing a commit.") + .booleanConf + .createOptional + + val DELTA_COLLECT_STATS = + buildConf("stats.collect") + .internal() + .doc("When true, statistics are collected while writing files into a Delta table.") + .booleanConf + .createWithDefault(true) + + val DELTA_DML_METRICS_FROM_METADATA = + buildConf("dmlMetricsFromMetadata.enabled") + .internal() + .doc( + """ When enabled, metadata only Delete, ReplaceWhere and Truncate operations will report row + | level operation metrics by reading the file statistics for number of rows. + | """.stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_COLLECT_STATS_USING_TABLE_SCHEMA = + buildConf("stats.collect.using.tableSchema") + .internal() + .doc("When collecting stats while writing files into Delta table" + + s" (${DELTA_COLLECT_STATS.key} needs to be true), whether to use the table schema (true)" + + " or the DataFrame schema (false) as the stats collection schema.") + .booleanConf + .createWithDefault(true) + + val DELTA_USER_METADATA = + buildConf("commitInfo.userMetadata") + .doc("Arbitrary user-defined metadata to include in CommitInfo.") + .stringConf + .createOptional + + val DELTA_CONVERT_USE_METADATA_LOG = + buildConf("convert.useMetadataLog") + .doc( + """ When converting to a Parquet table that was created by Structured Streaming, whether + | to use the transaction log under `_spark_metadata` as the source of truth for files + | contained in the table. + """.stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_CONVERT_USE_CATALOG_PARTITIONS = + buildConf("convert.useCatalogPartitions") + .internal() + .doc( + """ When converting a catalog Parquet table, whether to use the partition information from + | the Metastore catalog and only commit files under the directories of active partitions. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_CONVERT_USE_CATALOG_SCHEMA = + buildConf("convert.useCatalogSchema") + .doc( + """ When converting to a catalog Parquet table, whether to use the catalog schema as the + | source of truth. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_CONVERT_PARTITION_VALUES_IGNORE_CAST_FAILURE = + buildConf("convert.partitionValues.ignoreCastFailure") + .doc( + """ When converting to Delta, ignore the failure when casting a partition value to + | the specified data type, in which case the partition column will be filled with null. + """.stripMargin) + .booleanConf + .createWithDefault(false) + + val DELTA_CONVERT_ICEBERG_USE_NATIVE_PARTITION_VALUES = + buildConf("convert.iceberg.useNativePartitionValues") + .doc( + """ When enabled, obtain the partition values from Iceberg table's metadata, instead + | of inferring from file paths. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_SNAPSHOT_PARTITIONS = + buildConf("snapshotPartitions") + .internal() + .doc("Number of partitions to use when building a Delta Lake snapshot.") + .intConf + .checkValue(n => n > 0, "Delta snapshot partition number must be positive.") + .createOptional + + val DELTA_SNAPSHOT_LOADING_MAX_RETRIES = + buildConf("snapshotLoading.maxRetries") + .internal() + .doc("How many times to retry when failing to load a snapshot. Each retry will try to use " + + "a different checkpoint in order to skip potential corrupt checkpoints.") + .intConf + .checkValue(n => n >= 0, "must not be negative.") + .createWithDefault(2) + + val DELTA_SNAPSHOT_CACHE_STORAGE_LEVEL = + buildConf("snapshotCache.storageLevel") + .internal() + .doc("StorageLevel to use for caching the DeltaLog Snapshot. In general, this should not " + + "be used unless you are pretty sure that caching has a negative impact.") + .stringConf + .createWithDefault("MEMORY_AND_DISK_SER") + + val DELTA_PARTITION_COLUMN_CHECK_ENABLED = + buildConf("partitionColumnValidity.enabled") + .internal() + .doc("Whether to check whether the partition column names have valid names, just like " + + "the data columns.") + .booleanConf + .createWithDefault(true) + + val DELTA_COMMIT_VALIDATION_ENABLED = + buildConf("commitValidation.enabled") + .internal() + .doc("Whether to perform validation checks before commit or not.") + .booleanConf + .createWithDefault(true) + + val DELTA_SCHEMA_ON_READ_CHECK_ENABLED = + buildConf("checkLatestSchemaOnRead") + .doc("In Delta, we always try to give users the latest version of their data without " + + "having to call REFRESH TABLE or redefine their DataFrames when used in the context of " + + "streaming. There is a possibility that the schema of the latest version of the table " + + "may be incompatible with the schema at the time of DataFrame creation. This flag " + + "enables a check that ensures that users won't read corrupt data if the source schema " + + "changes in an incompatible way.") + .booleanConf + .createWithDefault(true) + + val DELTA_ALLOW_CREATE_EMPTY_SCHEMA_TABLE = + buildConf("createEmptySchemaTable.enabled") + .internal() + .doc( + s"""If enabled, creating a Delta table with an empty schema will be allowed through SQL API + |`CREATE TABLE table () USING delta ...`, or Delta table APIs. + |Creating a Delta table with empty schema table using dataframe operations or + |`CREATE OR REPLACE` syntax are not supported. + |The result Delta table can be updated using schema evolution operations such as + |`df.save()` with `mergeSchema = true`. + |Reading the empty schema table using DataframeReader or `SELECT` is not allowed. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val AUTO_COMPACT_ALLOWED_VALUES = Seq( + "false", + "true" + ) + + val DELTA_AUTO_COMPACT_ENABLED = + buildConf("autoCompact.enabled") + .doc(s"""Whether to compact files after writes made into Delta tables from this session. This + | conf can be set to "true" to enable Auto Compaction, OR "false" to disable Auto Compaction + | on all writes across all delta tables in this session. + | """.stripMargin) + .stringConf + .transform(_.toLowerCase(Locale.ROOT)) + .checkValue(AUTO_COMPACT_ALLOWED_VALUES.contains(_), + """"spark.databricks.delta.autoCompact.enabled" must be one of: """ + + s"""${AUTO_COMPACT_ALLOWED_VALUES.mkString("(", ",", ")")}""") + .createOptional + + val DELTA_AUTO_COMPACT_RECORD_PARTITION_STATS_ENABLED = + buildConf("autoCompact.recordPartitionStats.enabled") + .internal() + .doc(s"""When enabled, each committed write delta transaction records the number of qualified + |files of each partition of the target table for Auto Compact in driver's + |memory.""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_AUTO_COMPACT_EARLY_SKIP_PARTITION_TABLE_ENABLED = + buildConf("autoCompact.earlySkipPartitionTable.enabled") + .internal() + .doc(s"""Auto Compaction will be skipped if there is no partition with + |sufficient number of small files.""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_AUTO_COMPACT_MAX_TABLE_PARTITION_STATS = + buildConf("autoCompact.maxTablePartitionStats") + .internal() + .doc( + s"""The maximum number of Auto Compaction partition statistics of each table. This controls + |the maximum number of partitions statistics each delta table can have. Increasing + |this value reduces the hash conflict and makes partitions statistics more accurate with + |the cost of more memory consumption. + |""".stripMargin) + .intConf + .checkValue(_ > 0, "The value of maxTablePartitionStats should be positive.") + .createWithDefault(16 * 1024) + + val DELTA_AUTO_COMPACT_PARTITION_STATS_SIZE = + buildConf("autoCompact.partitionStatsSize") + .internal() + .doc( + s"""The total number of partitions statistics entries can be kept in memory for all + |tables in each driver. If this threshold is reached, the partitions statistics of + |least recently accessed tables will be evicted out.""".stripMargin) + .intConf + .checkValue(_ > 0, "The value of partitionStatsSize should be positive.") + .createWithDefault(64 * 1024) + + val DELTA_AUTO_COMPACT_MAX_FILE_SIZE = + buildConf("autoCompact.maxFileSize") + .internal() + .doc(s"Target file size produced by auto compaction. The default value of this config" + + " is 128 MB.") + .longConf + .checkValue(_ >= 0, "maxFileSize has to be positive") + .createWithDefault(128 * 1024 * 1024) + + val DELTA_AUTO_COMPACT_MIN_NUM_FILES = + buildConf("autoCompact.minNumFiles") + .internal() + .doc("Number of small files that need to be in a directory before it can be optimized.") + .intConf + .checkValue(_ >= 0, "minNumFiles has to be positive") + .createWithDefault(50) + + val DELTA_AUTO_COMPACT_MIN_FILE_SIZE = + buildConf("autoCompact.minFileSize") + .internal() + .doc("Files which are smaller than this threshold (in bytes) will be grouped together and " + + "rewritten as larger files by the Auto Compaction. The default value of this config " + + s"is set to half of the config ${DELTA_AUTO_COMPACT_MAX_FILE_SIZE.key}") + .longConf + .checkValue(_ >= 0, "minFileSize has to be positive") + .createOptional + + val DELTA_AUTO_COMPACT_MODIFIED_PARTITIONS_ONLY_ENABLED = + buildConf("autoCompact.modifiedPartitionsOnly.enabled") + .internal() + .doc( + s"""When enabled, Auto Compaction only works on the modified partitions of the delta + |transaction that triggers compaction.""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_AUTO_COMPACT_NON_BLIND_APPEND_ENABLED = + buildConf("autoCompact.nonBlindAppend.enabled") + .internal() + .doc( + s"""When enabled, Auto Compaction is only triggered by non-blind-append write + |transaction.""".stripMargin) + .booleanConf + .createWithDefault(false) + + val DELTA_AUTO_COMPACT_MAX_NUM_MODIFIED_PARTITIONS = + buildConf("autoCompact.maxNumModifiedPartitions") + .internal() + .doc( + s"""The maximum number of partition can be selected for Auto Compaction when + | Auto Compaction runs on modified partition is enabled.""".stripMargin) + .intConf + .checkValue(_ > 0, "The value of maxNumModifiedPartitions should be positive.") + .createWithDefault(128) + + val DELTA_AUTO_COMPACT_RESERVE_PARTITIONS_ENABLED = + buildConf("autoCompact.reservePartitions.enabled") + .internal() + .doc( + s"""When enabled, each Auto Compact thread reserves its target partitions and skips the + |partitions that are under Auto Compaction by another thread + |concurrently.""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_IMPORT_BATCH_SIZE_STATS_COLLECTION = + buildConf("import.batchSize.statsCollection") + .internal() + .doc("The number of files per batch for stats collection during import.") + .intConf + .createWithDefault(50000) + + val DELTA_IMPORT_BATCH_SIZE_SCHEMA_INFERENCE = + buildConf("import.batchSize.schemaInference") + .internal() + .doc("The number of files per batch for schema inference during import.") + .intConf + .createWithDefault(1000000) + + val DELTA_SAMPLE_ESTIMATOR_ENABLED = + buildConf("sampling.enabled") + .internal() + .doc("Enable sample based estimation.") + .booleanConf + .createWithDefault(false) + + val DELTA_CONVERT_METADATA_CHECK_ENABLED = + buildConf("convert.metadataCheck.enabled") + .doc( + """ + |If enabled, during convert to delta, if there is a difference between the catalog table's + |properties and the Delta table's configuration, we should error. If disabled, merge + |the two configurations with the same semantics as update and merge. + """.stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_STATS_SKIPPING = + buildConf("stats.skipping") + .internal() + .doc("When true, statistics are used for skipping") + .booleanConf + .createWithDefault(true) + + val DELTA_LIMIT_PUSHDOWN_ENABLED = + buildConf("stats.limitPushdown.enabled") + .internal() + .doc("If true, use the limit clause and file statistics to prune files before " + + "they are collected to the driver. ") + .booleanConf + .createWithDefault(true) + + val DELTA_MAX_RETRY_COMMIT_ATTEMPTS = + buildConf("maxCommitAttempts") + .internal() + .doc("The maximum number of commit attempts we will try for a single commit before failing") + .intConf + .checkValue(_ >= 0, "maxCommitAttempts has to be positive") + .createWithDefault(10000000) + + val DELTA_PROTOCOL_DEFAULT_WRITER_VERSION = + buildConf("properties.defaults.minWriterVersion") + .doc("The default writer protocol version to create new tables with, unless a feature " + + "that requires a higher version for correctness is enabled.") + .intConf + .checkValues(Set(1, 2, 3, 4, 5, 7)) + .createWithDefault(2) + + val DELTA_PROTOCOL_DEFAULT_READER_VERSION = + buildConf("properties.defaults.minReaderVersion") + .doc("The default reader protocol version to create new tables with, unless a feature " + + "that requires a higher version for correctness is enabled.") + .intConf + .checkValues(Set(1, 2, 3)) + .createWithDefault(1) + + val DELTA_MAX_SNAPSHOT_LINEAGE_LENGTH = + buildConf("maxSnapshotLineageLength") + .internal() + .doc("The max lineage length of a Snapshot before Delta forces to build a Snapshot from " + + "scratch.") + .intConf + .checkValue(_ > 0, "maxSnapshotLineageLength must be positive.") + .createWithDefault(50) + + val DELTA_REPLACE_COLUMNS_SAFE = + buildConf("alter.replaceColumns.safe.enabled") + .internal() + .doc("Prevents an ALTER TABLE REPLACE COLUMNS method from dropping all columns, which " + + "leads to losing all data. It will only allow safe, unambiguous column changes.") + .booleanConf + .createWithDefault(true) + + val DELTA_HISTORY_PAR_SEARCH_THRESHOLD = + buildConf("history.maxKeysPerList") + .internal() + .doc("How many commits to list when performing a parallel search. Currently set to 1000, " + + "which is the maximum keys returned by S3 per list call. Azure can return 5000, " + + "therefore we choose 1000.") + .intConf + .createWithDefault(1000) + + val DELTA_HISTORY_METRICS_ENABLED = + buildConf("history.metricsEnabled") + .doc("Enables Metrics reporting in Describe History. CommitInfo will now record the " + + "Operation Metrics.") + .booleanConf + .createWithDefault(true) + + val DELTA_VACUUM_LOGGING_ENABLED = + buildConf("vacuum.logging.enabled") + .doc("Whether to log vacuum information into the Delta transaction log." + + " Users should only set this config to 'true' when the underlying file system safely" + + " supports concurrent writes.") + .booleanConf + .createOptional + + val DELTA_VACUUM_RETENTION_CHECK_ENABLED = + buildConf("retentionDurationCheck.enabled") + .doc("Adds a check preventing users from running vacuum with a very short retention " + + "period, which may end up corrupting the Delta Log.") + .booleanConf + .createWithDefault(true) + + val DELTA_VACUUM_PARALLEL_DELETE_ENABLED = + buildConf("vacuum.parallelDelete.enabled") + .doc("Enables parallelizing the deletion of files during a vacuum command. Enabling " + + "may result hitting rate limits on some storage backends. When enabled, parallelization " + + "is controlled 'spark.databricks.delta.vacuum.parallelDelete.parallelism'.") + .booleanConf + .createWithDefault(false) + + val DELTA_VACUUM_PARALLEL_DELETE_PARALLELISM = + buildConf("vacuum.parallelDelete.parallelism") + .doc("Sets the number of partitions to use for parallel deletes. If not set, defaults to " + + "spark.sql.shuffle.partitions.") + .intConf + .checkValue(_ > 0, "parallelDelete.parallelism must be positive") + .createOptional + + val DELTA_SCHEMA_AUTO_MIGRATE = + buildConf("schema.autoMerge.enabled") + .doc("If true, enables schema merging on appends and on overwrites.") + .booleanConf + .createWithDefault(false) + + val DELTA_SCHEMA_TYPE_CHECK = + buildConf("schema.typeCheck.enabled") + .doc( + """Enable the data type check when updating the table schema. Disabling this flag may + | allow users to create unsupported Delta tables and should only be used when trying to + | read/write legacy tables.""".stripMargin) + .internal() + .booleanConf + .createWithDefault(true) + + val DELTA_SCHEMA_REMOVE_SPARK_INTERNAL_METADATA = + buildConf("schema.removeSparkInternalMetadata") + .doc( + """Whether to remove leaked Spark's internal metadata from the table schema before returning + |to Spark. These internal metadata might be stored unintentionally in tables created by + |old Spark versions""".stripMargin) + .internal() + .booleanConf + .createWithDefault(true) + + val DELTA_UPDATE_CATALOG_ENABLED = + buildConf("catalog.update.enabled") + .internal() + .doc("When enabled, we will cache the schema of the Delta table and the table properties " + + "in the external catalog, e.g. the Hive MetaStore.") + .booleanConf + .createWithDefault(false) + + val DELTA_UPDATE_CATALOG_THREAD_POOL_SIZE = + buildStaticConf("catalog.update.threadPoolSize") + .internal() + .doc("The size of the thread pool for updating the external catalog.") + .intConf + .checkValue(_ > 0, "threadPoolSize must be positive") + .createWithDefault(20) + + val DELTA_ASSUMES_DROP_CONSTRAINT_IF_EXISTS = + buildConf("constraints.assumesDropIfExists.enabled") + .doc("""If true, DROP CONSTRAINT quietly drops nonexistent constraints even without + |IF EXISTS. + """) + .booleanConf + .createWithDefault(false) + + val DELTA_ASYNC_UPDATE_STALENESS_TIME_LIMIT = + buildConf("stalenessLimit") + .doc( + """Setting a non-zero time limit will allow you to query the last loaded state of the Delta + |table without blocking on a table update. You can use this configuration to reduce the + |latency on queries when up-to-date results are not a requirement. Table updates will be + |scheduled on a separate scheduler pool in a FIFO queue, and will share cluster resources + |fairly with your query. If a table hasn't updated past this time limit, we will block + |on a synchronous state update before running the query. + """.stripMargin) + .timeConf(TimeUnit.MILLISECONDS) + .checkValue(_ >= 0, "Staleness limit cannot be negative") + .createWithDefault(0L) // Don't let tables go stale + + val DELTA_ALTER_LOCATION_BYPASS_SCHEMA_CHECK = + buildConf("alterLocation.bypassSchemaCheck") + .doc("If true, Alter Table Set Location on Delta will go through even if the Delta table " + + "in the new location has a different schema from the original Delta table.") + .booleanConf + .createWithDefault(false) + + val DUMMY_FILE_MANAGER_NUM_OF_FILES = + buildConf("dummyFileManager.numOfFiles") + .internal() + .doc("How many dummy files to write in DummyFileManager") + .intConf + .checkValue(_ >= 0, "numOfFiles can not be negative.") + .createWithDefault(3) + + val DUMMY_FILE_MANAGER_PREFIX = + buildConf("dummyFileManager.prefix") + .internal() + .doc("The file prefix to use in DummyFileManager") + .stringConf + .createWithDefault(".s3-optimization-") + + val MERGE_INSERT_ONLY_ENABLED = + buildConf("merge.optimizeInsertOnlyMerge.enabled") + .internal() + .doc( + """ + |If enabled, merge without any matched clause (i.e., insert-only merge) will be optimized + |by avoiding rewriting old files and just inserting new files. + """.stripMargin) + .booleanConf + .createWithDefault(true) + + val MERGE_REPARTITION_BEFORE_WRITE = + buildConf("merge.repartitionBeforeWrite.enabled") + .internal() + .doc( + """ + |When enabled, merge will repartition the output by the table's partition columns before + |writing the files. + """.stripMargin) + .booleanConf + .createWithDefault(true) + + val MERGE_MATCHED_ONLY_ENABLED = + buildConf("merge.optimizeMatchedOnlyMerge.enabled") + .internal() + .doc( + """If enabled, merge without 'when not matched' clause will be optimized to use a + |right outer join instead of a full outer join. + """.stripMargin) + .booleanConf + .createWithDefault(true) + + val MERGE_SKIP_OSS_RESOLUTION_WITH_STAR = + buildConf("merge.skipOssResolutionWithStar") + .internal() + .doc( + """ + |If enabled, then any MERGE operation having UPDATE * / INSERT * will skip Apache + |Spark's resolution logic and use Delta's specific resolution logic. This is to avoid + |bug with star and temp views. See SC-72276 for details. + """.stripMargin) + .booleanConf + .createWithDefault(true) + + val MERGE_FAIL_IF_SOURCE_CHANGED = + buildConf("merge.failIfSourceChanged") + .internal() + .doc( + """ + |When enabled, MERGE will fail if it detects that the source dataframe was changed. + |This can be triggered as a result of modified input data or the use of nondeterministic + |query plans. The detection is best-effort. + """.stripMargin) + .booleanConf + .createWithDefault(false) + + final object MergeMaterializeSource { + // See value explanations in the doc below. + final val NONE = "none" + final val ALL = "all" + final val AUTO = "auto" + + final val list = Set(NONE, ALL, AUTO) + } + + val MERGE_MATERIALIZE_SOURCE = + buildConf("merge.materializeSource") + .internal() + .doc("When to materialize the source plan during MERGE execution. " + + "The value 'none' means source will never be materialized. " + + "The value 'all' means source will always be materialized. " + + "The value 'auto' means sources will not be materialized when they are certain to be " + + "deterministic." + ) + .stringConf + .transform(_.toLowerCase(Locale.ROOT)) + .checkValues(MergeMaterializeSource.list) + .createWithDefault(MergeMaterializeSource.AUTO) + + val MERGE_FORCE_SOURCE_MATERIALIZATION_WITH_UNREADABLE_FILES = + buildConf("merge.forceSourceMaterializationWithUnreadableFilesConfig") + .internal() + .doc( + s""" + |When set to true, merge command will force source materialization if Spark configs + |${SQLConf.IGNORE_CORRUPT_FILES.key}, ${SQLConf.IGNORE_MISSING_FILES.key} or + |file source read options ${FileSourceOptions.IGNORE_CORRUPT_FILES} + |${FileSourceOptions.IGNORE_MISSING_FILES} are enabled on the source. + |This is done so to prevent irrecoverable data loss or unexpected results. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val MERGE_MATERIALIZE_SOURCE_RDD_STORAGE_LEVEL = + buildConf("merge.materializeSource.rddStorageLevel") + .internal() + .doc("What StorageLevel to use to persist the source RDD. Note: will always use disk.") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValue( v => + try { + StorageLevel.fromString(v).isInstanceOf[StorageLevel] + } catch { + case _: IllegalArgumentException => true + }, + """"spark.databricks.delta.merge.materializeSource.rddStorageLevel" """ + + "must be a valid StorageLevel") + .createWithDefault("DISK_ONLY") + + val MERGE_MATERIALIZE_SOURCE_RDD_STORAGE_LEVEL_RETRY = + buildConf("merge.materializeSource.rddStorageLevelRetry") + .internal() + .doc("What StorageLevel to use to persist the source RDD when MERGE is retried. " + + "Note: will always use disk.") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValue( v => + try { + StorageLevel.fromString(v).isInstanceOf[StorageLevel] + } catch { + case _: IllegalArgumentException => true + }, + """"spark.databricks.delta.merge.materializeSource.rddStorageLevelRetry" """ + + "must be a valid StorageLevel") + .createWithDefault("DISK_ONLY_2") + + val MERGE_MATERIALIZE_SOURCE_MAX_ATTEMPTS = + buildStaticConf("merge.materializeSource.maxAttempts") + .doc("How many times to try MERGE in case of lost RDD materialized source data") + .intConf + .createWithDefault(4) + + val MERGE_MATERIALIZE_SOURCE_EAGER = + buildConf("merge.materializeSource.eager") + .internal() + .doc("Materialize the source eagerly before Job 1") + .booleanConf + .createWithDefault(true) + + val DELTA_LAST_COMMIT_VERSION_IN_SESSION = + buildConf("lastCommitVersionInSession") + .doc("The version of the last commit made in the SparkSession for any table.") + .longConf + .checkValue(_ >= 0, "the version must be >= 0") + .createOptional + + val ALLOW_UNENFORCED_NOT_NULL_CONSTRAINTS = + buildConf("constraints.allowUnenforcedNotNull.enabled") + .internal() + .doc("If enabled, NOT NULL constraints within array and map types will be permitted in " + + "Delta table creation, even though Delta can't enforce them.") + .booleanConf + .createWithDefault(false) + + val CHECKPOINT_SCHEMA_WRITE_THRESHOLD_LENGTH = + buildConf("checkpointSchema.writeThresholdLength") + .internal() + .doc("Checkpoint schema larger than this threshold won't be written to the last checkpoint" + + " file") + .intConf + .createWithDefault(20000) + + val LAST_CHECKPOINT_CHECKSUM_ENABLED = + buildConf("lastCheckpoint.checksum.enabled") + .internal() + .doc("Controls whether to write the checksum while writing the LAST_CHECKPOINT file and" + + " whether to validate it while reading the LAST_CHECKPOINT file") + .booleanConf + .createWithDefault(true) + + val SUPPRESS_OPTIONAL_LAST_CHECKPOINT_FIELDS = + buildConf("lastCheckpoint.suppressOptionalFields") + .internal() + .doc("If set, the LAST_CHECKPOINT file will contain only version, size, and parts fields. " + + "For compatibility with broken third-party connectors that choke on unrecognized fields.") + .booleanConf + .createWithDefault(false) + + val DELTA_CHECKPOINT_PART_SIZE = + buildConf("checkpoint.partSize") + .internal() + .doc("The limit at which we will start parallelizing the checkpoint. We will attempt to " + + "write a maximum of this many actions per checkpoint file.") + .longConf + .checkValue(_ > 0, "partSize has to be positive") + .createOptional + + //////////////////////////////////// + // Checkpoint V2 Specific Configs + //////////////////////////////////// + + val CHECKPOINT_V2_DRIVER_THREADPOOL_PARALLELISM = + buildStaticConf("checkpointV2.threadpool.size") + .doc("The size of the threadpool for fetching CheckpointMetadata and SidecarFiles from a" + + " checkpoint.") + .internal() + .intConf + .createWithDefault(32) + + val CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT = + buildConf("checkpointV2.topLevelFileFormat") + .internal() + .doc( + """ + |The file format to use for the top level checkpoint file in V2 Checkpoints. + | This can be set to either json or parquet. The appropriate format will be + | picked automatically if this config is not specified. + |""".stripMargin) + .stringConf + .checkValues(Set("json", "parquet")) + .createOptional + + // This is temporary conf to make sure v2 checkpoints are not used by anyone other than devs as + // the feature is not fully ready. + val EXPOSE_CHECKPOINT_V2_TABLE_FEATURE_FOR_TESTING = + buildConf("checkpointV2.exposeTableFeatureForTesting") + .internal() + .doc( + """ + |This conf controls whether v2 checkpoints table feature is exposed or not. Note that + | v2 checkpoints are in development and this should config should be used only for + | testing/benchmarking. + |""".stripMargin) + .booleanConf + .createWithDefault(false) + + val LAST_CHECKPOINT_NON_FILE_ACTIONS_THRESHOLD = + buildConf("lastCheckpoint.nonFileActions.threshold") + .internal() + .doc(""" + |Threshold for total number of non file-actions to store in the last_checkpoint + | corresponding to the checkpoint v2. + |""".stripMargin) + .intConf + .createWithDefault(30) + + val LAST_CHECKPOINT_SIDECARS_THRESHOLD = + buildConf("lastCheckpoint.sidecars.threshold") + .internal() + .doc(""" + |Threshold for total number of sidecar files to store in the last_checkpoint + | corresponding to the checkpoint v2. + |""".stripMargin) + .intConf + .createWithDefault(30) + + val DELTA_WRITE_CHECKSUM_ENABLED = + buildConf("writeChecksumFile.enabled") + .doc("Whether the checksum file can be written.") + .booleanConf + .createWithDefault(true) + + val DELTA_CHECKPOINT_THROW_EXCEPTION_WHEN_FAILED = + buildConf("checkpoint.exceptionThrowing.enabled") + .internal() + .doc("Throw an error if checkpoint is failed. This flag is intentionally used for " + + "testing purpose to catch the checkpoint issues proactively. In production, we " + + "should not set this flag to be true because successful commit should return " + + "success to client regardless of the checkpoint result without throwing.") + .booleanConf + .createWithDefault(false) + + val DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME = + buildConf("resolveMergeUpdateStructsByName.enabled") + .internal() + .doc("Whether to resolve structs by name in UPDATE operations of UPDATE and MERGE INTO " + + "commands. If disabled, Delta will revert to the legacy behavior of resolving by position.") + .booleanConf + .createWithDefault(true) + + val DELTA_TIME_TRAVEL_STRICT_TIMESTAMP_PARSING = + buildConf("timeTravel.parsing.strict") + .internal() + .doc("Whether to require time travel timestamps to parse to a valid timestamp. If " + + "disabled, Delta will revert to the legacy behavior of treating invalid timestamps as " + + "equivalent to unix time 0 (1970-01-01 00:00:00).") + .booleanConf + .createWithDefault(true) + + val DELTA_STRICT_CHECK_DELTA_TABLE = + buildConf("isDeltaTable.strictCheck") + .internal() + .doc(""" + | When enabled, io.delta.tables.DeltaTable.isDeltaTable + | should return false when the _delta_log directory doesn't + | contain any transaction logs. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_LEGACY_STORE_WRITER_OPTIONS_AS_PROPS = + buildConf("legacy.storeOptionsAsProperties") + .internal() + .doc(""" + |Delta was unintentionally storing options provided by the DataFrameWriter in the + |saveAsTable method as table properties in the transaction log. This was unsupported + |behavior (it was a bug), and it has security implications (accidental storage of + |credentials). This flag prevents the storage of arbitrary options as table properties. + |Set this flag to true to continue setting non-delta prefixed table properties through + |table options. + |""".stripMargin) + .booleanConf + .createWithDefault(false) + + val DELTA_VACUUM_RELATIVIZE_IGNORE_ERROR = + buildConf("vacuum.relativize.ignoreError") + .internal() + .doc(""" + |When enabled, the error when trying to relativize an absolute path when + |vacuuming a delta table will be ignored. This usually happens when a table is + |shallow cloned across FileSystems, such as across buckets or across cloud storage + |systems. We do not recommend enabling this configuration in production or using it + |with production datasets. + |""".stripMargin) + .booleanConf + .createWithDefault(false) + val DELTA_LEGACY_ALLOW_AMBIGUOUS_PATHS = + buildConf("legacy.allowAmbiguousPathsInCreateTable") + .internal() + .doc(""" + |Delta was unintentionally allowing CREATE TABLE queries with both 'delta.`path`' + |and 'LOCATION path' clauses. In the new version, we will raise an error + |for this case. This flag is added to allow users to skip the check. When it's set to + |true and there are two paths in CREATE TABLE, the LOCATION path clause will be + |ignored like what the old version does.""".stripMargin) + .booleanConf + .createWithDefault(false) + + val REPLACEWHERE_DATACOLUMNS_ENABLED = + buildConf("replaceWhere.dataColumns.enabled") + .doc( + """ + |When enabled, replaceWhere on arbitrary expression and arbitrary columns is enabled. + |If disabled, it falls back to the old behavior + |to replace on partition columns only.""".stripMargin) + .booleanConf + .createWithDefault(true) + + val REPLACEWHERE_METRICS_ENABLED = + buildConf("replaceWhere.dataColumns.metrics.enabled") + .internal() + .doc( + """ + |When enabled, replaceWhere operations metrics on arbitrary expression and + |arbitrary columns is enabled. This will not report row level metrics for partitioned + |tables and tables with no stats.""".stripMargin) + .booleanConf + .createWithDefault(true) + val REPLACEWHERE_CONSTRAINT_CHECK_ENABLED = + buildConf("replaceWhere.constraintCheck.enabled") + .doc( + """ + |When enabled, replaceWhere on arbitrary expression and arbitrary columns will + |enforce the constraint check to replace the target table only when all the + |rows in the source dataframe match that constraint. + |If disabled, it will skip the constraint check and replace with all the rows + |from the new dataframe.""".stripMargin) + .booleanConf + .createWithDefault(true) + + val REPLACEWHERE_DATACOLUMNS_WITH_CDF_ENABLED = + buildConf("replaceWhere.dataColumnsWithCDF.enabled") + .internal() + .doc( + """ + |When enabled, replaceWhere on arbitrary expression and arbitrary columns will produce + |results for CDF. If disabled, it will fall back to the old behavior.""".stripMargin) + .booleanConf + .createWithDefault(true) + + val LOG_SIZE_IN_MEMORY_THRESHOLD = + buildConf("streaming.logSizeInMemoryThreshold") + .internal() + .doc( + """ + |The threshold of transaction log file size to read into the memory. When a file is larger + |than this, we will read the log file in multiple passes rather than loading it into + |the memory entirely.""".stripMargin) + .longConf + .createWithDefault(128L * 1024 * 1024) // 128MB + + val STREAMING_OFFSET_VALIDATION = + buildConf("streaming.offsetValidation.enabled") + .internal() + .doc("Whether to validate whether delta streaming source generates a smaller offset and " + + "moves backward.") + .booleanConf + .createWithDefault(true) + + val LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS = + buildConf("loadFileSystemConfigsFromDataFrameOptions") + .internal() + .doc( + """Whether to load file systems configs provided in DataFrameReader/Writer options when + |calling `DataFrameReader.load/DataFrameWriter.save` using a Delta table path. + |`DataFrameReader.table/DataFrameWriter.saveAsTable` doesn't support this.""".stripMargin) + .booleanConf + .createWithDefault(true) + + val CONVERT_EMPTY_TO_NULL_FOR_STRING_PARTITION_COL = + buildConf("convertEmptyToNullForStringPartitionCol") + .internal() + .doc( + """ + |If true, always convert empty string to null for string partition columns before + |constraint checks. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + /** + * The below confs have a special prefix `spark.databricks.io` because this is the conf value + * already used by Databricks' data skipping implementation. There's no benefit to making OSS + * users, some of whom are Databricks customers, have to keep track of two different conf + * values for the same data skipping parameter. + */ + val DATA_SKIPPING_STRING_PREFIX_LENGTH = + SQLConf.buildConf("spark.databricks.io.skipping.stringPrefixLength") + .internal() + .doc("For string columns, how long prefix to store in the data skipping index.") + .intConf + .createWithDefault(32) + + val MDC_NUM_RANGE_IDS = + SQLConf.buildConf("spark.databricks.io.skipping.mdc.rangeId.max") + .internal() + .doc("This controls the domain of rangeId values to be interleaved. The bigger, the better " + + "granularity, but at the expense of performance (more data gets sampled).") + .intConf + .checkValue(_ > 1, "'spark.databricks.io.skipping.mdc.rangeId.max' must be greater than 1") + .createWithDefault(1000) + + val MDC_ADD_NOISE = + SQLConf.buildConf("spark.databricks.io.skipping.mdc.addNoise") + .internal() + .doc("Whether or not a random byte should be added as a suffix to the interleaved bits " + + "when computing the Z-order values for MDC. This can help deal with skew, but may " + + "have a negative impact on overall min/max skipping effectiveness.") + .booleanConf + .createWithDefault(true) + + val DELTA_OPTIMIZE_ZORDER_COL_STAT_CHECK = + buildConf("optimize.zorder.checkStatsCollection.enabled") + .internal() + .doc(s"When enabled, we will check if the column we're actually collecting stats " + + "on the columns we are z-ordering on.") + .booleanConf + .createWithDefault(true) + + val FAST_INTERLEAVE_BITS_ENABLED = + buildConf("optimize.zorder.fastInterleaveBits.enabled") + .internal() + .doc("When true, a faster version of the bit interleaving algorithm is used.") + .booleanConf + .createWithDefault(false) + + val INTERNAL_UDF_OPTIMIZATION_ENABLED = + buildConf("internalUdfOptimization.enabled") + .internal() + .doc( + """If true, create udfs used by Delta internally from templates to reduce lock contention + |caused by Scala Reflection. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val GENERATED_COLUMN_PARTITION_FILTER_OPTIMIZATION_ENABLED = + buildConf("generatedColumn.partitionFilterOptimization.enabled") + .internal() + .doc( + "Whether to extract partition filters automatically from data filters for a partition" + + " generated column if possible") + .booleanConf + .createWithDefault(true) + + val GENERATED_COLUMN_ALLOW_NULLABLE = + buildConf("generatedColumn.allowNullableIngest.enabled") + .internal() + .doc("When enabled this will allow tables with generated columns enabled to be able " + + "to write data without providing values for a nullable column via DataFrame.write") + .booleanConf + .createWithDefault(true) + + val DELTA_CONVERT_ICEBERG_ENABLED = + buildConf("convert.iceberg.enabled") + .internal() + .doc("If enabled, Iceberg tables can be converted into a Delta table.") + .booleanConf + .createWithDefault(true) + + val DELTA_CONVERT_ICEBERG_PARTITION_EVOLUTION_ENABLED = + buildConf("convert.iceberg.partitionEvolution.enabled") + .doc("If enabled, support conversion of iceberg tables experienced partition evolution.") + .booleanConf + .createWithDefault(false) + + val DELTA_CONVERT_ICEBERG_UNSAFE_MOR_TABLE_ENABLE = + buildConf("convert.iceberg.unsafeConvertMorTable.enabled") + .doc("If enabled, iceberg merge-on-read tables can be unsafely converted by ignoring " + + "deletion files. This could cause data duplication and is strongly not recommended.") + .internal() + .booleanConf + .createWithDefault(false) + + final object NonDeterministicPredicateWidening { + final val OFF = "off" + final val LOGGING = "logging" + final val ON = "on" + + final val list = Set(OFF, LOGGING, ON) + } + + val DELTA_CONFLICT_DETECTION_WIDEN_NONDETERMINISTIC_PREDICATES = + buildConf("conflictDetection.partitionLevelConcurrency.widenNonDeterministicPredicates") + .doc("Whether to widen non-deterministic predicates during partition-level concurrency. " + + "Widening can lead to additional conflicts." + + "When the value is 'off', non-deterministic predicates are not widened during conflict " + + "resolution." + + "The value 'logging' will log whether the widening of non-deterministic predicates lead " + + "to additional conflicts. The conflict resolution is still done without widening. " + + "When the value is 'on', non-deterministic predicates are widened during conflict " + + "resolution.") + .internal() + .stringConf + .transform(_.toLowerCase(Locale.ROOT)) + .checkValues(NonDeterministicPredicateWidening.list) + .createWithDefault(NonDeterministicPredicateWidening.ON) + + val DELTA_UNIFORM_ICEBERG_SYNC_CONVERT_ENABLED = + buildConf("uniform.iceberg.sync.convert.enabled") + .doc("If enabled, iceberg conversion will be done synchronously. " + + "This can cause slow down in Delta commits and should only be used " + + "for debugging or in test suites.") + .internal() + .booleanConf + .createWithDefault(false) + + val DELTA_OPTIMIZE_MIN_FILE_SIZE = + buildConf("optimize.minFileSize") + .internal() + .doc( + """Files which are smaller than this threshold (in bytes) will be grouped together + | and rewritten as larger files by the OPTIMIZE command. + |""".stripMargin) + .longConf + .checkValue(_ >= 0, "minFileSize has to be positive") + .createWithDefault(1024 * 1024 * 1024) + + val DELTA_OPTIMIZE_MAX_FILE_SIZE = + buildConf("optimize.maxFileSize") + .internal() + .doc("Target file size produced by the OPTIMIZE command.") + .longConf + .checkValue(_ >= 0, "maxFileSize has to be positive") + .createWithDefault(1024 * 1024 * 1024) + + val DELTA_OPTIMIZE_MAX_THREADS = + buildConf("optimize.maxThreads") + .internal() + .doc( + """ + |Maximum number of parallel jobs allowed in OPTIMIZE command. Increasing the maximum + | parallel jobs allows the OPTIMIZE command to run faster, but increases the job + | management on the Spark driver side. + |""".stripMargin) + .intConf + .checkValue(_ > 0, "'optimize.maxThreads' must be positive.") + .createWithDefault(15) + + val DELTA_OPTIMIZE_REPARTITION_ENABLED = + buildConf("optimize.repartition.enabled") + .internal() + .doc("Use repartition(1) instead of coalesce(1) to merge small files. " + + "coalesce(1) is executed with only one task, if there are many tiny files " + + "within a bin (e.g. 1000 files of 50MB), it cannot be optimized with more executors. " + + "repartition(1) incurs a shuffle stage, but the job can be distributed." + ) + .booleanConf + .createWithDefault(false) + + val DELTA_ALTER_TABLE_CHANGE_COLUMN_CHECK_EXPRESSIONS = + buildConf("alterTable.changeColumn.checkExpressions") + .internal() + .doc( + """ + |Given an ALTER TABLE command that changes columns, check if there are expressions used + | in Check Constraints and Generated Columns that reference this column and thus will + | be affected by this change. + | + |This is a safety switch - we should only turn this off when there is an issue with + |expression checking logic that prevents a valid column change from going through. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_ALTER_TABLE_DROP_COLUMN_ENABLED = + buildConf("alterTable.dropColumn.enabled") + .internal() + .doc( + """Whether to enable the drop column feature for Delta. + |This is a safety switch - we should only turn this off when there is an issue. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_CDF_ALLOW_OUT_OF_RANGE_TIMESTAMP = { + buildConf("changeDataFeed.timestampOutOfRange.enabled") + .doc( + """When enabled, Change Data Feed queries with starting and ending timestamps + | exceeding the newest delta commit timestamp will not error out. For starting timestamp + | out of range we will return an empty DataFrame, for ending timestamps out of range we + | will consider the latest Delta version as the ending version.""".stripMargin) + .booleanConf + .createWithDefault(false) + } + + val DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_COLUMN_MAPPING_SCHEMA_CHANGES = + buildConf("streaming.unsafeReadOnIncompatibleColumnMappingSchemaChanges.enabled") + .doc( + "Streaming read on Delta table with column mapping schema operations " + + "(e.g. rename or drop column) is currently blocked due to potential data loss and " + + "schema confusion. However, existing users may use this flag to force unblock " + + "if they'd like to take the risk.") + .internal() + .booleanConf + .createWithDefault(false) + + val DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_SCHEMA_CHANGES_DURING_STREAM_START = + buildConf("streaming.unsafeReadOnIncompatibleSchemaChangesDuringStreamStart.enabled") + .doc( + """A legacy config to disable schema read-compatibility check on the start version schema + |when starting a streaming query. The config is added to allow legacy problematic queries + |disabling the check to keep running if users accept the potential risks of incompatible + |schema reading.""".stripMargin) + .internal() + .booleanConf + .createWithDefault(false) + + val DELTA_STREAMING_UNSAFE_READ_ON_PARTITION_COLUMN_CHANGE = + buildConf("streaming.unsafeReadOnPartitionColumnChanges.enabled") + .doc( + "Streaming read on Delta table with partition column overwrite " + + "(e.g. changing partition column) is currently blocked due to potential data loss. " + + "However, existing users may use this flag to force unblock " + + "if they'd like to take the risk.") + .internal() + .booleanConf + .createWithDefault(false) + + val DELTA_STREAMING_ENABLE_SCHEMA_TRACKING = + buildConf("streaming.schemaTracking.enabled") + .doc( + """If enabled, Delta streaming source can support non-additive schema evolution for + |operations such as rename or drop column on column mapping enabled tables. + |""".stripMargin) + .internal() + .booleanConf + .createWithDefault(true) + + val DELTA_STREAMING_ENABLE_SCHEMA_TRACKING_MERGE_CONSECUTIVE_CHANGES = + buildConf("streaming.schemaTracking.mergeConsecutiveSchemaChanges.enabled") + .doc( + "When enabled, schema tracking in Delta streaming would consider multiple consecutive " + + "schema changes as one.") + .internal() + .booleanConf + .createWithDefault(true) + + val DELTA_STREAMING_ALLOW_SCHEMA_LOCATION_OUTSIDE_CHECKPOINT_LOCATION = + buildConf("streaming.allowSchemaLocationOutsideCheckpointLocation") + .doc( + "When enabled, Delta streaming can set a schema location outside of the " + + "query's checkpoint location. This is not recommended.") + .internal() + .booleanConf + .createWithDefault(false) + + val DELTA_STREAMING_SCHEMA_TRACKING_METADATA_PATH_CHECK_ENABLED = + buildConf("streaming.schemaTracking.metadataPathCheck.enabled") + .doc( + "When enabled, Delta streaming with schema tracking will ensure the schema log entry " + + "must match the source's unique checkpoint metadata location.") + .internal() + .booleanConf + .createWithDefault(true) + + val DELTA_STREAM_UNSAFE_READ_ON_NULLABILITY_CHANGE = + buildConf("streaming.unsafeReadOnNullabilityChange.enabled") + .doc( + """A legacy config to disable unsafe nullability check. The config is added to allow legacy + |problematic queries disabling the check to keep running if users accept the potential + |risks of incompatible schema reading.""".stripMargin) + .internal() + .booleanConf + .createWithDefault(false) + + val DELTA_CDF_UNSAFE_BATCH_READ_ON_INCOMPATIBLE_SCHEMA_CHANGES = + buildConf("changeDataFeed.unsafeBatchReadOnIncompatibleSchemaChanges.enabled") + .doc( + "Reading change data in batch (e.g. using `table_changes()`) on Delta table with " + + "column mapping schema operations is currently blocked due to potential data loss and " + + "schema confusion. However, existing users may use this flag to force unblock " + + "if they'd like to take the risk.") + .internal() + .booleanConf + .createWithDefault(false) + + val DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE = + buildConf("changeDataFeed.defaultSchemaModeForColumnMappingTable") + .doc( + """Reading batch CDF on column mapping enabled table requires schema mode to be set to + |`endVersion` so the ending version's schema will be used. + |Set this to `latest` to use the schema of the latest available table version, + |or to `legacy` to fallback to the non column-mapping default behavior, in which + |the time travel option can be used to select the version of the schema.""".stripMargin) + .internal() + .stringConf + .createWithDefault("endVersion") + + val DELTA_CDF_ALLOW_TIME_TRAVEL_OPTIONS = + buildConf("changeDataFeed.allowTimeTravelOptionsForSchema") + .doc( + s"""If allowed, user can specify time-travel reader options such as + |'versionAsOf' or 'timestampAsOf' to specify the read schema while + |reading change data feed.""".stripMargin) + .internal() + .booleanConf + .createWithDefault(false) + + val DELTA_COLUMN_MAPPING_CHECK_MAX_COLUMN_ID = + buildConf("columnMapping.checkMaxColumnId") + .doc( + s"""If enabled, check if delta.columnMapping.maxColumnId is correctly assigned at each + |Delta transaction commit. + |""".stripMargin) + .internal() + .booleanConf + .createWithDefault(true) + + val DYNAMIC_PARTITION_OVERWRITE_ENABLED = + buildConf("dynamicPartitionOverwrite.enabled") + .doc("Whether to overwrite partitions dynamically when 'partitionOverwriteMode' is set to " + + "'dynamic' in either the SQL conf, or a DataFrameWriter option. When this is disabled " + + "'partitionOverwriteMode' will be ignored.") + .internal() + .booleanConf + .createWithDefault(true) + + val ALLOW_ARBITRARY_TABLE_PROPERTIES = + buildConf("allowArbitraryProperties.enabled") + .doc( + """Whether we allow arbitrary Delta table properties. When this is enabled, table properties + |with the prefix 'delta.' are not checked for validity. Table property validity is based + |on the current Delta version being used and feature support in that version. Arbitrary + |properties without the 'delta.' prefix are always allowed regardless of this config. + | + |Please use with caution. When enabled, there will be no warning when unsupported table + |properties for the Delta version being used are set, or when properties are set + |incorrectly (for example, misspelled).""".stripMargin + ) + .internal() + .booleanConf + .createWithDefault(false) + + val TABLE_BUILDER_FORCE_TABLEPROPERTY_LOWERCASE = + buildConf("deltaTableBuilder.forceTablePropertyLowerCase.enabled") + .internal() + .doc( + """Whether the keys of table properties should be set to lower case. + | Turn on this flag if you want keys of table properties not starting with delta + | to be backward compatible when the table is created via DeltaTableBuilder + | Please note that if you set this to true, the lower case of the + | key will be used for non delta prefix table properties. + |""".stripMargin) + .booleanConf + .createWithDefault(false) + + val DELTA_REQUIRED_SPARK_CONFS_CHECK = + buildConf("requiredSparkConfsCheck.enabled") + .doc("Whether to verify SparkSession is initialized with required configurations.") + .internal() + .booleanConf + .createWithDefault(true) + + val RESTORE_TABLE_PROTOCOL_DOWNGRADE_ALLOWED = + buildConf("restore.protocolDowngradeAllowed") + .doc(""" + | Whether a table RESTORE or CLONE operation may downgrade the protocol of the table. + | Note that depending on the protocol and the enabled table features, downgrading the + | protocol may break snapshot reconstruction and make the table unreadable. Protocol + | downgrades may also make the history unreadable.""".stripMargin) + .booleanConf + .createWithDefault(false) + + val DELTA_CLONE_REPLACE_ENABLED = + buildConf("clone.replaceEnabled") + .internal() + .doc("If enabled, the table will be replaced when cloning over an existing Delta table.") + .booleanConf + .createWithDefault(true) + + val DELTA_OPTIMIZE_METADATA_QUERY_ENABLED = + buildConf("optimizeMetadataQuery.enabled") + .internal() + .doc("Whether we can use the metadata in the DeltaLog to" + + " optimize queries that can be run purely on metadata.") + .booleanConf + .createWithDefault(true) + + val DELTA_SKIP_RECORDING_EMPTY_COMMITS = + buildConf("skipRecordingEmptyCommits") + .internal() + .doc( + """ + | Whether to skip recording an empty commit in the Delta Log. This only works when table + | is using SnapshotIsolation or Serializable Isolation Mode. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val REPLACE_TABLE_PROTOCOL_DOWNGRADE_ALLOWED = + buildConf("replace.protocolDowngradeAllowed") + .internal() + .doc(""" + | Whether a REPLACE operation may downgrade the protocol of the table. + | Note that depending on the protocol and the enabled table features, downgrading the + | protocol may break snapshot reconstruction and make the table unreadable. Protocol + | downgrades may also make the history unreadable.""".stripMargin) + .booleanConf + .createWithDefault(false) + + ////////////////// + // Idempotent DML + ////////////////// + + val DELTA_IDEMPOTENT_DML_TXN_APP_ID = + buildConf("write.txnAppId") + .internal() + .doc(""" + |The application ID under which this write will be committed. + | If specified, spark.databricks.delta.write.txnVersion also needs to + | be set. + |""".stripMargin) + .stringConf + .createOptional + + val DELTA_IDEMPOTENT_DML_TXN_VERSION = + buildConf("write.txnVersion") + .internal() + .doc(""" + |The user-defined version under which this write will be committed. + | If specified, spark.databricks.delta.write.txnAppId also needs to + | be set. To ensure idempotency, txnVersions across different writes + | need to be monotonically increasing. + |""".stripMargin) + .longConf + .createOptional + + val DELTA_IDEMPOTENT_DML_AUTO_RESET_ENABLED = + buildConf("write.txnVersion.autoReset.enabled") + .internal() + .doc(""" + |If true, will automatically reset spark.databricks.delta.write.txnVersion + |after every write. This is false by default. + |""".stripMargin) + .booleanConf + .createWithDefault(false) + + val DELTA_OPTIMIZE_MAX_DELETED_ROWS_RATIO = + buildConf("optimize.maxDeletedRowsRatio") + .internal() + .doc("Files with a ratio of deleted rows to the total rows larger than this threshold " + + "will be rewritten by the OPTIMIZE command.") + .doubleConf + .checkValue(_ >= 0, "maxDeletedRowsRatio must be in range [0.0, 1.0]") + .checkValue(_ <= 1, "maxDeletedRowsRatio must be in range [0.0, 1.0]") + .createWithDefault(0.05d) + + val DELTA_TABLE_PROPERTY_CONSTRAINTS_CHECK_ENABLED = + buildConf("tablePropertyConstraintsCheck.enabled") + .internal() + .doc( + """Check that all table-properties satisfy validity constraints. + |Only change this for testing!""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTA_DUPLICATE_ACTION_CHECK_ENABLED = + buildConf("duplicateActionCheck.enabled") + .internal() + .doc(""" + |Verify only one action is specified for each file path in one commit. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELETE_USE_PERSISTENT_DELETION_VECTORS = + buildConf("delete.deletionVectors.persistent") + .internal() + .doc("Enable persistent Deletion Vectors in the Delete command.") + .booleanConf + .createWithDefault(true) + + val MERGE_USE_PERSISTENT_DELETION_VECTORS = + buildConf("merge.deletionVectors.persistent") + .internal() + .doc("Enable persistent Deletion Vectors in Merge command.") + .booleanConf + .createWithDefault(true) + + val UPDATE_USE_PERSISTENT_DELETION_VECTORS = + buildConf("update.deletionVectors.persistent") + .internal() + .doc("Enable persistent Deletion Vectors in the Update command.") + .booleanConf + .createWithDefault(true) + + val DELETION_VECTOR_PACKING_TARGET_SIZE = + buildConf("deletionVectors.packing.targetSize") + .internal() + .doc("Controls the target file deletion vector file size when packing multiple" + + "deletion vectors in a single file.") + .bytesConf(ByteUnit.BYTE) + .createWithDefault(2L * 1024L * 1024L) + + val TIGHT_BOUND_COLUMN_ON_FILE_INIT_DISABLED = + buildConf("deletionVectors.disableTightBoundOnFileCreationForDevOnly") + .internal() + .doc("""Controls whether we generate a tightBounds column in statistics on file creation. + |The tightBounds column annotates whether the statistics of the file are tight or wide. + |This flag is only used for testing purposes. + """.stripMargin) + .booleanConf + .createWithDefault(false) + + val WRITE_DATA_FILES_TO_SUBDIR = buildConf("write.dataFilesToSubdir") + .internal() + .doc("Delta will write all data files to subdir 'data/' under table dir if enabled") + .booleanConf + .createWithDefault(false) + + val DELETION_VECTORS_COMMIT_CHECK_ENABLED = + buildConf("deletionVectors.skipCommitCheck") + .internal() + .doc( + """Check the table-property and verify that deletion vectors may be added + |to this table. + |Only change this for testing!""".stripMargin) + .booleanConf + .createWithDefault(true) + + val REUSE_COLUMN_MAPPING_METADATA_DURING_OVERWRITE = + buildConf("columnMapping.reuseColumnMetadataDuringOverwrite") + .internal() + .doc( + """ + |If enabled, when a column mapping table is overwritten, the new schema will reuse as many + |old schema's column mapping metadata (field id and physical name) as possible. + |This allows the analyzed schema from prior to the overwrite to be still read-compatible + |with the data post the overwrite, enabling better user experience when, for example, + |the column mapping table is being continuously scanned in a streaming query, the analyzed + |table schema will still be readable after the table is overwritten. + |""".stripMargin) + .booleanConf + .createWithDefault(true) + + val DELTALOG_MINOR_COMPACTION_USE_FOR_READS = + buildConf("deltaLog.minorCompaction.useForReads") + .doc("If true, minor compacted delta log files will be used for creating Snapshots") + .internal() + .booleanConf + .createWithDefault(true) + + val ICEBERG_MAX_COMMITS_TO_CONVERT = buildConf("iceberg.maxPendingCommits") + .doc(""" + |The maximum number of pending Delta commits to convert to Iceberg incrementally. + |If the table hasn't been converted to Iceberg in longer than this number of commits, + |we start from scratch, replacing the previously converted Iceberg table contents. + |""".stripMargin) + .intConf + .createWithDefault(100) + + val ICEBERG_MAX_ACTIONS_TO_CONVERT = buildConf("iceberg.maxPendingActions") + .doc(""" + |The maximum number of pending Delta actions to convert to Iceberg incrementally. + |If there are more than this number of outstanding actions, chunk them into separate + |Iceberg commits. + |""".stripMargin) + .intConf + .createWithDefault(100 * 1000) + + val UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG = + buildConf("updateAndMergeCastingFollowsAnsiEnabledFlag") + .internal() + .doc("""If false, casting behaviour in implicit casts in UPDATE and MERGE follows + |'spark.sql.storeAssignmentPolicy'. If true, these casts follow 'ansi.enabled'. + |""".stripMargin) + .booleanConf + .createWithDefault(false) + + val DELTA_USE_MULTI_THREADED_STATS_COLLECTION = + buildConf("collectStats.useMultiThreadedStatsCollection") + .internal() + .doc("Whether to use multi-threaded statistics collection. If false, statistics will be " + + "collected sequentially within each partition.") + .booleanConf + .createWithDefault(true) + + val DELTA_STATS_COLLECTION_NUM_FILES_PARTITION = + buildConf("collectStats.numFilesPerPartition") + .internal() + .doc("Controls the number of files that should be within a RDD partition " + + "during multi-threaded optimized statistics collection. A larger number will lead to " + + "less parallelism, but can reduce scheduling overhead.") + .intConf + .checkValue(v => v >= 1, "Must be at least 1.") + .createWithDefault(100) + + ///////////////////// + // Optimized Write + ///////////////////// + + val DELTA_OPTIMIZE_WRITE_ENABLED = + buildConf("optimizeWrite.enabled") + .doc("Whether to optimize writes made into Delta tables from this session.") + .booleanConf + .createOptional + + val DELTA_OPTIMIZE_WRITE_SHUFFLE_BLOCKS = + buildConf("optimizeWrite.numShuffleBlocks") + .internal() + .doc("Maximum number of shuffle blocks to target for the adaptive shuffle " + + "in optimized writes.") + .intConf + .createWithDefault(50000000) + + val DELTA_OPTIMIZE_WRITE_MAX_SHUFFLE_PARTITIONS = + buildConf("optimizeWrite.maxShufflePartitions") + .internal() + .doc("Max number of output buckets (reducers) that can be used by optimized writes. This " + + "can be thought of as: 'how many target partitions are we going to write to in our " + + "table in one write'. This should not be larger than " + + "spark.shuffle.minNumPartitionsToHighlyCompress. Otherwise, partition coalescing and " + + "skew split may not work due to incomplete stats from HighlyCompressedMapStatus") + .intConf + .createWithDefault(2000) + + val DELTA_OPTIMIZE_WRITE_BIN_SIZE = + buildConf("optimizeWrite.binSize") + .internal() + .doc("Bin size for the adaptive shuffle in optimized writes in megabytes.") + .bytesConf(ByteUnit.MiB) + .createWithDefault(512) + + ////////////////// + // Clustered Table + ////////////////// + + val DELTA_CLUSTERING_TABLE_PREVIEW_ENABLED = + buildConf("clusteredTable.enableClusteringTablePreview") + .internal() + .doc("Whether to enable the clustering table preview.") + .booleanConf + .createWithDefault(false) + + val DELTA_NUM_CLUSTERING_COLUMNS_LIMIT = + buildStaticConf("clusteredTable.numClusteringColumnsLimit") + .internal() + .doc("""The maximum number of clustering columns allowed for a clustered table. + """.stripMargin) + .intConf + .checkValue( + _ > 0, + "'clusteredTable.numClusteringColumnsLimit' must be positive." + ) + .createWithDefault(4) + + ////////////////// + // Delta Sharing + ////////////////// + + val DELTA_SHARING_ENABLE_DELTA_FORMAT_BATCH = + buildConf("spark.sql.delta.sharing.enableDeltaFormatBatch") + .doc("Enable delta format sharing in case of issues.") + .internal() + .booleanConf + .createWithDefault(true) +} + +object DeltaSQLConf extends DeltaSQLConfBase diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSink.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSink.scala new file mode 100644 index 00000000000..c105eb9e94d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSink.scala @@ -0,0 +1,156 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +import java.util.concurrent.ConcurrentHashMap + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.DeltaOperations.StreamingUpdate +import org.apache.spark.sql.delta.actions.{FileAction, SetTransaction} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.{ImplicitMetadataOperation, SchemaUtils} +import org.apache.hadoop.fs.Path + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.execution.SQLExecution +import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} +import org.apache.spark.sql.execution.metric.SQLMetrics.createMetric +import org.apache.spark.sql.execution.streaming.{Sink, StreamExecution} +import org.apache.spark.sql.streaming.OutputMode +import org.apache.spark.sql.types.NullType +import org.apache.spark.util.Utils + +/** + * A streaming sink that writes data into a Delta Table. + */ +case class DeltaSink( + sqlContext: SQLContext, + path: Path, + partitionColumns: Seq[String], + outputMode: OutputMode, + options: DeltaOptions, + catalogTable: Option[CatalogTable] = None) + extends Sink + with ImplicitMetadataOperation + with DeltaLogging { + + private val deltaLog = DeltaLog.forTable(sqlContext.sparkSession, path) + + private val sqlConf = sqlContext.sparkSession.sessionState.conf + + // This have to be lazy because queryId is a thread local property that is not available + // when the Sink object is created. + lazy val queryId = sqlContext.sparkContext.getLocalProperty(StreamExecution.QUERY_ID_KEY) + + override protected val canOverwriteSchema: Boolean = + outputMode == OutputMode.Complete() && options.canOverwriteSchema + + override protected val canMergeSchema: Boolean = options.canMergeSchema + + case class PendingTxn(batchId: Long, + optimisticTransaction: OptimisticTransaction, + streamingUpdate: StreamingUpdate, + newFiles: Seq[FileAction], + deletedFiles: Seq[FileAction]) { + def commit(): Unit = { + val sc = sqlContext.sparkContext + val metrics = Map[String, SQLMetric]( + "numAddedFiles" -> createMetric(sc, "number of files added"), + "numRemovedFiles" -> createMetric(sc, "number of files removed") + ) + metrics("numRemovedFiles").set(deletedFiles.size) + metrics("numAddedFiles").set(newFiles.size) + optimisticTransaction.registerSQLMetrics(sqlContext.sparkSession, metrics) + val setTxn = SetTransaction(appId = queryId, version = batchId, + lastUpdated = Some(deltaLog.clock.getTimeMillis())) :: Nil + val (_, durationMs) = Utils.timeTakenMs { + optimisticTransaction + .commit(actions = setTxn ++ newFiles ++ deletedFiles + , op = streamingUpdate) + } + logInfo( + s"Committed transaction, batchId=${batchId}, duration=${durationMs} ms, " + + s"added ${newFiles.size} files, removed ${deletedFiles.size} files.") + val executionId = sc.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) + SQLMetrics.postDriverMetricUpdates(sc, executionId, metrics.values.toSeq) + } + } + + override def addBatch(batchId: Long, data: DataFrame): Unit = { + addBatchWithStatusImpl(batchId, data) + } + + + private def addBatchWithStatusImpl(batchId: Long, data: DataFrame): Boolean = { + val txn = deltaLog.startTransaction(catalogTable) + assert(queryId != null) + + if (SchemaUtils.typeExistsRecursively(data.schema)(_.isInstanceOf[NullType])) { + throw DeltaErrors.streamWriteNullTypeException + } + + + // If the batch reads the same Delta table as this sink is going to write to, then this + // write has dependencies. Then make sure that this commit set hasDependencies to true + // by injecting a read on the whole table. This needs to be done explicitly because + // MicroBatchExecution has already enforced all the data skipping (by forcing the generation + // of the executed plan) even before the transaction was started. + val selfScan = data.queryExecution.analyzed.collectFirst { + case DeltaTable(index) if index.deltaLog.isSameLogAs(txn.deltaLog) => true + }.nonEmpty + if (selfScan) { + txn.readWholeTable() + } + + // Streaming sinks can't blindly overwrite schema. See Schema Management design doc for details + updateMetadata(data.sparkSession, txn, data.schema, partitionColumns, Map.empty, + outputMode == OutputMode.Complete(), rearrangeOnly = false) + + val currentVersion = txn.txnVersion(queryId) + if (currentVersion >= batchId) { + logInfo(s"Skipping already complete epoch $batchId, in query $queryId") + return false + } + + val deletedFiles = outputMode match { + case o if o == OutputMode.Complete() => + DeltaLog.assertRemovable(txn.snapshot) + txn.filterFiles().map(_.remove) + case _ => Nil + } + val (newFiles, writeFilesTimeMs) = Utils.timeTakenMs{ + txn.writeFiles(data, Some(options)) + } + val totalSize = newFiles.map(_.getFileSize).sum + val totalLogicalRecords = newFiles.map(_.numLogicalRecords.getOrElse(0L)).sum + logInfo( + s"Wrote ${newFiles.size} files, with total size ${totalSize}, " + + s"${totalLogicalRecords} logical records, duration=${writeFilesTimeMs} ms.") + + val info = DeltaOperations.StreamingUpdate(outputMode, queryId, batchId, options.userMetadata + ) + val pendingTxn = PendingTxn(batchId, txn, info, newFiles, deletedFiles) + pendingTxn.commit() + return true + } + + + override def toString(): String = s"DeltaSink[$path]" +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSource.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSource.scala new file mode 100644 index 00000000000..a6bb5a1488d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSource.scala @@ -0,0 +1,1428 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +// scalastyle:off import.ordering.noEmptyLine +import java.io.FileNotFoundException +import java.sql.Timestamp + +import scala.util.{Failure, Success, Try} +import scala.util.matching.Regex + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.files.DeltaSourceSnapshot +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.storage.{ClosableIterator, SupportsRewinding} +import org.apache.spark.sql.delta.storage.ClosableIterator._ +import org.apache.spark.sql.delta.util.{DateTimeUtils, TimestampFormatter} +import org.apache.hadoop.fs.FileStatus + +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.sql.connector.read.streaming +import org.apache.spark.sql.connector.read.streaming.{ReadAllAvailable, ReadLimit, ReadMaxFiles, SupportsAdmissionControl, SupportsTriggerAvailableNow} +import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.types.StructType + +/** + * A case class to help with `Dataset` operations regarding Offset indexing, representing AddFile + * actions in a Delta log. For proper offset tracking (SC-19523), there are also special sentinel + * values with negative index = [[DeltaSourceOffset.BASE_INDEX]] and add = null. + * + * This class is not designed to be persisted in offset logs or such. + * + * @param version The version of the Delta log containing this AddFile. + * @param index The index of this AddFile in the Delta log. + * @param add The AddFile. + * @param remove The RemoveFile if any. + * @param cdc the CDC File if any. + * @param isLast A flag to indicate whether this is the last AddFile in the version. This is used + * to resolve an off-by-one issue in the streaming offset interface; once we've read + * to the end of a log version file, we check this flag to advance immediately to the + * next one in the persisted offset. Without this special case we would re-read the + * already completed log file. + * @param shouldSkip A flag to indicate whether this IndexedFile should be skipped. Currently, we + * skip processing an IndexedFile on no-op merges to avoid producing redundant + * records. + */ +private[delta] case class IndexedFile( + version: Long, + index: Long, + add: AddFile, + remove: RemoveFile = null, + cdc: AddCDCFile = null, + shouldSkip: Boolean = false) { + + require(Option(add).size + Option(remove).size + Option(cdc).size <= 1, + "IndexedFile must have at most one of add, remove, or cdc") + + def getFileAction: FileAction = { + if (add != null) { + add + } else if (remove != null) { + remove + } else { + cdc + } + } + + def hasFileAction: Boolean = { + getFileAction != null + } + + def getFileSize: Long = { + if (add != null) { + add.size + } else if (remove != null) { + remove.size.getOrElse(0) + } else { + cdc.size + } + } +} + +/** + * Base trait for the Delta Source, that contains methods that deal with + * getting changes from the delta log. + */ +trait DeltaSourceBase extends Source + with SupportsAdmissionControl + with SupportsTriggerAvailableNow + with DeltaLogging { self: DeltaSource => + + /** + * Flag that allows user to force enable unsafe streaming read on Delta table with + * column mapping enabled AND drop/rename actions. + */ + protected lazy val allowUnsafeStreamingReadOnColumnMappingSchemaChanges: Boolean = { + val unsafeFlagEnabled = spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_COLUMN_MAPPING_SCHEMA_CHANGES) + if (unsafeFlagEnabled) { + recordDeltaEvent( + deltaLog, + "delta.unsafe.streaming.readOnColumnMappingSchemaChanges" + ) + } + unsafeFlagEnabled + } + + protected lazy val allowUnsafeStreamingReadOnPartitionColumnChanges: Boolean = + spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_UNSAFE_READ_ON_PARTITION_COLUMN_CHANGE + ) + + /** + * Flag that allows user to disable the read-compatibility check during stream start which + * protects against an corner case in which verifyStreamHygiene could not detect. + * This is a bug fix but yet a potential behavior change, so we add a flag to fallback. + */ + protected lazy val forceEnableStreamingReadOnReadIncompatibleSchemaChangesDuringStreamStart = + spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_SCHEMA_CHANGES_DURING_STREAM_START) + + /** + * Flag that allow user to fallback to the legacy behavior in which user can allow nullable=false + * schema to read nullable=true data, which is incorrect but a behavior change regardless. + */ + protected lazy val forceEnableUnsafeReadOnNullabilityChange = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_STREAM_UNSAFE_READ_ON_NULLABILITY_CHANGE) + + /** + * Whether we are streaming from a table with column mapping enabled + */ + protected val isStreamingFromColumnMappingTable: Boolean = + snapshotAtSourceInit.metadata.columnMappingMode != NoMapping + + /** + * The persisted schema from the schema log that must be used to read data files in this Delta + * streaming source. + */ + protected val persistedMetadataAtSourceInit: Option[PersistedMetadata] = + metadataTrackingLog.flatMap(_.getCurrentTrackedMetadata) + + /** + * The read schema for this source during initialization, taking in account of SchemaLog. + */ + protected val readSchemaAtSourceInit: StructType = readSnapshotDescriptor.metadata.schema + + protected val readPartitionSchemaAtSourceInit: StructType = + readSnapshotDescriptor.metadata.partitionSchema + + protected val readProtocolAtSourceInit: Protocol = readSnapshotDescriptor.protocol + + protected val readConfigurationsAtSourceInit: Map[String, String] = + readSnapshotDescriptor.metadata.configuration + + /** + * Create a snapshot descriptor, customizing its metadata using metadata tracking if necessary + */ + protected lazy val readSnapshotDescriptor: SnapshotDescriptor = + persistedMetadataAtSourceInit.map { customMetadata => + // Construct a snapshot descriptor with custom schema inline + new SnapshotDescriptor { + val deltaLog: DeltaLog = snapshotAtSourceInit.deltaLog + val metadata: Metadata = + snapshotAtSourceInit.metadata.copy( + schemaString = customMetadata.dataSchemaJson, + partitionColumns = customMetadata.partitionSchema.fieldNames, + // Copy the configurations so the correct file format can be constructed + configuration = customMetadata.tableConfigurations + // Fallback for backward compat only, this should technically not be triggered + .getOrElse { + val config = snapshotAtSourceInit.metadata.configuration + logWarning(s"Using snapshot's table configuration: $config") + config + } + ) + val protocol: Protocol = customMetadata.protocol.getOrElse { + val protocol = snapshotAtSourceInit.protocol + logWarning(s"Using snapshot's protocol: $protocol") + protocol + } + // The following are not important in stream reading + val version: Long = customMetadata.deltaCommitVersion + val numOfFilesIfKnown = snapshotAtSourceInit.numOfFilesIfKnown + val sizeInBytesIfKnown = snapshotAtSourceInit.sizeInBytesIfKnown + } + }.getOrElse(snapshotAtSourceInit) + + /** + * A global flag to mark whether we have done a per-stream start check for column mapping + * schema changes (rename / drop). + */ + @volatile protected var hasCheckedReadIncompatibleSchemaChangesOnStreamStart: Boolean = false + + override val schema: StructType = { + val schemaWithoutCDC = DeltaTableUtils.removeInternalMetadata(spark, readSchemaAtSourceInit) + if (options.readChangeFeed) { + CDCReader.cdcReadSchema(schemaWithoutCDC) + } else { + schemaWithoutCDC + } + } + + // A dummy empty dataframe that can be returned at various point during streaming + protected val emptyDataFrame: DataFrame = spark.sqlContext.internalCreateDataFrame( + spark.sparkContext.emptyRDD[InternalRow], schema, isStreaming = true) + + /** + * When `AvailableNow` is used, this offset will be the upper bound where this run of the query + * will process up. We may run multiple micro batches, but the query will stop itself when it + * reaches this offset. + */ + protected var lastOffsetForTriggerAvailableNow: Option[DeltaSourceOffset] = None + + private var isLastOffsetForTriggerAvailableNowInitialized = false + + private var isTriggerAvailableNow = false + + override def prepareForTriggerAvailableNow(): Unit = { + logInfo("The streaming query reports to use Trigger.AvailableNow.") + isTriggerAvailableNow = true + } + + /** + * initialize the internal states for AvailableNow if this method is called first time after + * `prepareForTriggerAvailableNow`. + */ + protected def initForTriggerAvailableNowIfNeeded( + startOffsetOpt: Option[DeltaSourceOffset]): Unit = { + if (isTriggerAvailableNow && !isLastOffsetForTriggerAvailableNowInitialized) { + isLastOffsetForTriggerAvailableNowInitialized = true + initLastOffsetForTriggerAvailableNow(startOffsetOpt) + } + } + + protected def initLastOffsetForTriggerAvailableNow( + startOffsetOpt: Option[DeltaSourceOffset]): Unit = { + val offset = latestOffsetInternal(startOffsetOpt, ReadLimit.allAvailable()) + lastOffsetForTriggerAvailableNow = offset + lastOffsetForTriggerAvailableNow.foreach { lastOffset => + logInfo(s"lastOffset for Trigger.AvailableNow has set to ${lastOffset.json}") + } + } + + /** An internal `latestOffsetInternal` to get the latest offset. */ + protected def latestOffsetInternal( + startOffset: Option[DeltaSourceOffset], limit: ReadLimit): Option[DeltaSourceOffset] + + protected def getFileChangesWithRateLimit( + fromVersion: Long, + fromIndex: Long, + isInitialSnapshot: Boolean, + limits: Option[AdmissionLimits] = Some(AdmissionLimits())): ClosableIterator[IndexedFile] = { + val iter = if (options.readChangeFeed) { + // In this CDC use case, we need to consider RemoveFile and AddCDCFiles when getting the + // offset. + + // This method is only used to get the offset so we need to return an iterator of IndexedFile. + getFileChangesForCDC(fromVersion, fromIndex, isInitialSnapshot, limits, None).flatMap(_._2) + .toClosable + } else { + val changes = getFileChanges(fromVersion, fromIndex, isInitialSnapshot) + + // Take each change until we've seen the configured number of addFiles. Some changes don't + // represent file additions; we retain them for offset tracking, but they don't count towards + // the maxFilesPerTrigger conf. + if (limits.isEmpty) { + changes + } else { + val admissionControl = limits.get + changes.withClose { it => it.takeWhile { admissionControl.admit(_) } + } + } + } + // Stop before any schema change barrier if detected. + stopIndexedFileIteratorAtSchemaChangeBarrier(iter) + } + + /** + * get the changes from startVersion, startIndex to the end + * @param startVersion - calculated starting version + * @param startIndex - calculated starting index + * @param isInitialSnapshot - whether the stream has to return the initial snapshot or not + * @param endOffset - Offset that signifies the end of the stream. + * @return + */ + protected def getFileChangesAndCreateDataFrame( + startVersion: Long, + startIndex: Long, + isInitialSnapshot: Boolean, + endOffset: DeltaSourceOffset): DataFrame = { + if (options.readChangeFeed) { + getCDCFileChangesAndCreateDataFrame(startVersion, startIndex, isInitialSnapshot, endOffset) + } else { + val fileActionsIter = getFileChanges( + startVersion, + startIndex, + isInitialSnapshot, + endOffset = Some(endOffset) + ) + try { + val filteredIndexedFiles = fileActionsIter.filter { indexedFile => + indexedFile.getFileAction != null && + excludeRegex.forall(_.findFirstIn(indexedFile.getFileAction.path).isEmpty) + } + + createDataFrame(filteredIndexedFiles) + } finally { + fileActionsIter.close() + } + } + } + + /** + * Given an iterator of file actions, create a DataFrame representing the files added to a table + * Only AddFile actions will be used to create the DataFrame. + * @param indexedFiles actions iterator from which to generate the DataFrame. + */ + protected def createDataFrame(indexedFiles: Iterator[IndexedFile]): DataFrame = { + val addFiles = indexedFiles + .filter(_.getFileAction.isInstanceOf[AddFile]) + .toSeq + val hasDeletionVectors = + addFiles.exists(_.getFileAction.asInstanceOf[AddFile].deletionVector != null) + if (hasDeletionVectors) { + // Read AddFiles from different versions in different scans. + // This avoids an issue where we might read the same file with different deletion vectors in + // the same scan, which we cannot support as long we broadcast a map of DVs for lookup. + // This code can be removed once we can pass the DVs into the scan directly together with the + // AddFile/PartitionedFile entry. + addFiles + .groupBy(_.version) + .values + .map { addFilesList => + deltaLog.createDataFrame( + readSnapshotDescriptor, + addFilesList.map(_.getFileAction.asInstanceOf[AddFile]), + isStreaming = true) + } + .reduceOption(_ union _) + .getOrElse { + // If we filtered out all the values before the groupBy, just return an empty DataFrame. + deltaLog.createDataFrame( + readSnapshotDescriptor, + Seq.empty[AddFile], + isStreaming = true) + } + } else { + deltaLog.createDataFrame( + readSnapshotDescriptor, + addFiles.map(_.getFileAction.asInstanceOf[AddFile]), + isStreaming = true) + } + } + + /** + * Returns the offset that starts from a specific delta table version. This function is + * called when starting a new stream query. + * + * @param fromVersion The version of the delta table to calculate the offset from. + * @param isInitialSnapshot Whether the delta version is for the initial snapshot or not. + * @param limits Indicates how much data can be processed by a micro batch. + */ + protected def getStartingOffsetFromSpecificDeltaVersion( + fromVersion: Long, + isInitialSnapshot: Boolean, + limits: Option[AdmissionLimits]): Option[DeltaSourceOffset] = { + // Initialize schema tracking log if possible, no-op if already initialized + // This is one of the two places can initialize schema tracking. + // This case specifically handles when we have a fresh stream. + if (readyToInitializeMetadataTrackingEagerly) { + initializeMetadataTrackingAndExitStream(fromVersion) + } + + val changes = getFileChangesWithRateLimit( + fromVersion, + fromIndex = DeltaSourceOffset.BASE_INDEX, + isInitialSnapshot = isInitialSnapshot, + limits) + + val lastFileChange = DeltaSource.iteratorLast(changes) + + if (lastFileChange.isEmpty) { + None + } else { + // Block latestOffset() from generating an invalid offset by proactively verifying + // incompatible schema changes under column mapping. See more details in the method doc. + checkReadIncompatibleSchemaChangeOnStreamStartOnce(fromVersion) + buildOffsetFromIndexedFile(lastFileChange.get, fromVersion, isInitialSnapshot) + } + } + + /** + * Return the next offset when previous offset exists. + */ + protected def getNextOffsetFromPreviousOffset( + previousOffset: DeltaSourceOffset, + limits: Option[AdmissionLimits]): Option[DeltaSourceOffset] = { + if (trackingMetadataChange) { + getNextOffsetFromPreviousOffsetIfPendingSchemaChange(previousOffset) match { + case None => + case updatedPreviousOffsetOpt => + // Stop generating new offset if there were pending schema changes + return updatedPreviousOffsetOpt + } + } + + val changes = getFileChangesWithRateLimit( + previousOffset.reservoirVersion, + previousOffset.index, + previousOffset.isInitialSnapshot, + limits) + + val lastFileChange = DeltaSource.iteratorLast(changes) + + if (lastFileChange.isEmpty) { + Some(previousOffset) + } else { + // Similarly, block latestOffset() from generating an invalid offset by proactively + // verifying incompatible schema changes under column mapping. See more details in the + // method scala doc. + checkReadIncompatibleSchemaChangeOnStreamStartOnce(previousOffset.reservoirVersion) + buildOffsetFromIndexedFile(lastFileChange.get, previousOffset.reservoirVersion, + previousOffset.isInitialSnapshot) + } + } + + /** + * Build the latest offset based on the last indexedFile. The function also checks if latest + * version is valid by comparing with previous version. + * @param indexedFile The last indexed file used to build offset from. + * @param version Previous offset reservoir version. + * @param isInitialSnapshot Whether previous offset is starting version or not. + */ + private def buildOffsetFromIndexedFile( + indexedFile: IndexedFile, + version: Long, + isInitialSnapshot: Boolean): Option[DeltaSourceOffset] = { + val (v, i) = (indexedFile.version, indexedFile.index) + assert(v >= version, + s"buildOffsetFromIndexedFile returns an invalid version: $v (expected: >= $version), " + + s"tableId: $tableId") + + // If the last file in previous batch is the end index of that version, automatically bump + // to next version to skip accessing that version file altogether. The END_INDEX should never + // be returned as an offset. + val offset = if (indexedFile.index == DeltaSourceOffset.END_INDEX) { + // isInitialSnapshot must be false here as we have bumped the version. + Some(DeltaSourceOffset( + tableId, + v + 1, + index = DeltaSourceOffset.BASE_INDEX, + isInitialSnapshot = false)) + } else { + // isInitialSnapshot will be true only if previous isInitialSnapshot is true and the next file + // is still at the same version (i.e v == version). + Some(DeltaSourceOffset( + tableId, v, i, + isInitialSnapshot = v == version && isInitialSnapshot + )) + } + offset + } + + /** + * Return the DataFrame between start and end offset. + */ + protected def createDataFrameBetweenOffsets( + startVersion: Long, + startIndex: Long, + isInitialSnapshot: Boolean, + startOffsetOption: Option[DeltaSourceOffset], + endOffset: DeltaSourceOffset): DataFrame = { + getFileChangesAndCreateDataFrame(startVersion, startIndex, isInitialSnapshot, endOffset) + } + + protected def cleanUpSnapshotResources(): Unit = { + if (initialState != null) { + initialState.close(unpersistSnapshot = initialStateVersion < snapshotAtSourceInit.version) + initialState = null + } + } + + /** + * Check read-incompatible schema changes during stream (re)start so we could fail fast. + * + * This only needs to be called ONCE in the life cycle of a stream, either at the very first + * latestOffset, or the very first getBatch to make sure we have detected an incompatible + * schema change. + * Typically, the verifyStreamHygiene that was called maybe good enough to detect these + * schema changes, there may be cases that wouldn't work, e.g. consider this sequence: + * 1. User starts a new stream @ startingVersion 1 + * 2. latestOffset is called before getBatch() because there was no previous commits so + * getBatch won't be called as a recovery mechanism. + * Suppose there's a single rename/drop/nullability change S during computing next offset, S + * would look exactly the same as the latest schema so verifyStreamHygiene would not work. + * 3. latestOffset would return this new offset cross the schema boundary. + * + * If a schema log is already initialized, we don't have to run the initialization nor schema + * checks any more. + * + * @param batchStartVersion Start version we want to verify read compatibility against + * @param batchEndVersionOpt Optionally, if we are checking against an existing constructed batch + * during streaming initialization, we would also like to verify all + * schema changes in between as well before we can lazily initialize the + * schema log if needed. + */ + protected def checkReadIncompatibleSchemaChangeOnStreamStartOnce( + batchStartVersion: Long, + batchEndVersionOpt: Option[Long] = None): Unit = { + if (trackingMetadataChange) return + if (hasCheckedReadIncompatibleSchemaChangesOnStreamStart) return + + lazy val (startVersionSnapshotOpt, errOpt) = + Try(deltaLog.getSnapshotAt(batchStartVersion)) match { + case Success(snapshot) => (Some(snapshot), None) + case Failure(exception) => (None, Some(exception)) + } + + // Cannot perfectly verify column mapping schema changes if we cannot compute a start snapshot. + if (!allowUnsafeStreamingReadOnColumnMappingSchemaChanges && + isStreamingFromColumnMappingTable && errOpt.isDefined) { + throw DeltaErrors.failedToGetSnapshotDuringColumnMappingStreamingReadCheck(errOpt.get) + } + + // Perform schema check if we need to, considering all escape flags. + if (!allowUnsafeStreamingReadOnColumnMappingSchemaChanges || + !forceEnableStreamingReadOnReadIncompatibleSchemaChangesDuringStreamStart) { + startVersionSnapshotOpt.foreach { snapshot => + checkReadIncompatibleSchemaChanges( + snapshot.metadata, + snapshot.version, + batchStartVersion, + batchEndVersionOpt, + validatedDuringStreamStart = true + ) + // If end version is defined (i.e. we have a pending batch), let's also eagerly check all + // intermediate schema changes against the stream read schema to capture corners cases such + // as rename and rename back. + for { + endVersion <- batchEndVersionOpt + (version, metadata) <- collectMetadataActions(batchStartVersion, endVersion) + } { + checkReadIncompatibleSchemaChanges( + metadata, + version, + batchStartVersion, + Some(endVersion), + validatedDuringStreamStart = true) + } + } + } + + // Mark as checked + hasCheckedReadIncompatibleSchemaChangesOnStreamStart = true + } + + /** + * Narrow waist to verify a metadata action for read-incompatible schema changes, specifically: + * 1. Any column mapping related schema changes (rename / drop) columns + * 2. Standard read-compatibility changes including: + * a) No missing columns + * b) No data type changes + * c) No read-incompatible nullability changes + * If the check fails, we throw an exception to exit the stream. + * If lazy log initialization is required, we also run a one time scan to safely initialize the + * metadata tracking log upon any non-additive schema change failures. + * @param metadata Metadata that contains a potential schema change + * @param version Version for the metadata action + * @param validatedDuringStreamStart Whether this check is being done during stream start. + */ + protected def checkReadIncompatibleSchemaChanges( + metadata: Metadata, + version: Long, + batchStartVersion: Long, + batchEndVersionOpt: Option[Long] = None, + validatedDuringStreamStart: Boolean = false): Unit = { + log.info(s"checking read incompatibility with schema at version $version, " + + s"inside batch[$batchStartVersion, ${batchEndVersionOpt.getOrElse("latest")}]") + + val (newMetadata, oldMetadata) = if (version < snapshotAtSourceInit.version) { + (snapshotAtSourceInit.metadata, metadata) + } else { + (metadata, snapshotAtSourceInit.metadata) + } + + // Column mapping schema changes + if (!allowUnsafeStreamingReadOnColumnMappingSchemaChanges) { + assert(!trackingMetadataChange, "should not check schema change while tracking it") + + if (!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(newMetadata, oldMetadata, + allowUnsafeStreamingReadOnPartitionColumnChanges)) { + throw DeltaErrors.blockStreamingReadsWithIncompatibleColumnMappingSchemaChanges( + spark, + oldMetadata.schema, + newMetadata.schema, + detectedDuringStreaming = !validatedDuringStreamStart) + } + } + + // Other standard read compatibility changes + if (!validatedDuringStreamStart || + !forceEnableStreamingReadOnReadIncompatibleSchemaChangesDuringStreamStart) { + + val schemaChange = if (options.readChangeFeed) { + CDCReader.cdcReadSchema(metadata.schema) + } else { + metadata.schema + } + + // There is a schema change. All of files after this commit will use `schemaChange`. Hence, we + // check whether we can use `schema` (the fixed source schema we use in the same run of the + // query) to read these new files safely. + val backfilling = version < snapshotAtSourceInit.version + // We forbid the case when the the schemaChange is nullable while the read schema is NOT + // nullable, or in other words, `schema` should not tighten nullability from `schemaChange`, + // because we don't ever want to read back any nulls when the read schema is non-nullable. + val shouldForbidTightenNullability = !forceEnableUnsafeReadOnNullabilityChange + if (!SchemaUtils.isReadCompatible( + schemaChange, schema, + forbidTightenNullability = shouldForbidTightenNullability, + // If a user is streaming from a column mapping table and enable the unsafe flag to ignore + // column mapping schema changes, we can allow the standard check to allow missing columns + // from the read schema in the schema change, because the only case that happens is when + // user rename/drops column but they don't care so they enabled the flag to unblock. + // This is only allowed when we are "backfilling", i.e. the stream progress is older than + // the analyzed table version. Any schema change past the analysis should still throw + // exception, because additive schema changes MUST be taken into account. + allowMissingColumns = + isStreamingFromColumnMappingTable && + allowUnsafeStreamingReadOnColumnMappingSchemaChanges && + backfilling, + // Partition column change will be ignored if user enable the unsafe flag + newPartitionColumns = if (allowUnsafeStreamingReadOnPartitionColumnChanges) Seq.empty + else newMetadata.partitionColumns, + oldPartitionColumns = if (allowUnsafeStreamingReadOnPartitionColumnChanges) Seq.empty + else oldMetadata.partitionColumns + )) { + // Only schema change later than the current read snapshot/schema can be retried, in other + // words, backfills could never be retryable, because we have no way to refresh + // the latest schema to "catch up" when the schema change happens before than current read + // schema version. + // If not backfilling, we do another check to determine retryability, in which we assume + // we will be reading using this later `schemaChange` back on the current outdated `schema`, + // and if it works (including that `schemaChange` should not tighten the nullability + // constraint from `schema`), it is a retryable exception. + val retryable = !backfilling && SchemaUtils.isReadCompatible( + schema, schemaChange, forbidTightenNullability = shouldForbidTightenNullability) + throw DeltaErrors.schemaChangedException( + schema, + schemaChange, + retryable = retryable, + Some(version), + includeStartingVersionOrTimestampMessage = options.containsStartingVersionOrTimestamp) + } + } + } +} + +/** + * A streaming source for a Delta table. + * + * When a new stream is started, delta starts by constructing a + * [[org.apache.spark.sql.delta.Snapshot]] at + * the current version of the table. This snapshot is broken up into batches until + * all existing data has been processed. Subsequent processing is done by tailing + * the change log looking for new data. This results in the streaming query returning + * the same answer as a batch query that had processed the entire dataset at any given point. + */ +case class DeltaSource( + spark: SparkSession, + deltaLog: DeltaLog, + options: DeltaOptions, + snapshotAtSourceInit: SnapshotDescriptor, + metadataPath: String, + metadataTrackingLog: Option[DeltaSourceMetadataTrackingLog] = None, + filters: Seq[Expression] = Nil) + extends DeltaSourceBase + with DeltaSourceCDCSupport + with DeltaSourceMetadataEvolutionSupport { + + private val shouldValidateOffsets = + spark.sessionState.conf.getConf(DeltaSQLConf.STREAMING_OFFSET_VALIDATION) + + // Deprecated. Please use `skipChangeCommits` from now on. + private val ignoreFileDeletion = { + if (options.ignoreFileDeletion) { + logConsole(DeltaErrors.ignoreStreamingUpdatesAndDeletesWarning(spark)) + recordDeltaEvent(deltaLog, "delta.deprecation.ignoreFileDeletion") + } + options.ignoreFileDeletion + } + + /** A check on the source table that skips commits that contain removes from the + * set of files. */ + private val skipChangeCommits = options.skipChangeCommits + + protected val excludeRegex: Option[Regex] = options.excludeRegex + + // This was checked before creating ReservoirSource + assert(schema.nonEmpty) + + protected val tableId = snapshotAtSourceInit.metadata.id + + // A metadata snapshot when starting the query. + protected var initialState: DeltaSourceSnapshot = null + protected var initialStateVersion: Long = -1L + + logInfo(s"Filters being pushed down: $filters") + + /** + * Get the changes starting from (startVersion, startIndex). The start point should not be + * included in the result. + * + * @param endOffset If defined, do not return changes beyond this offset. + * If not defined, we must be scanning the log to find the next offset. + * @param verifyMetadataAction If true, we will break the stream when we detect any + * read-incompatible metadata changes. + */ + protected def getFileChanges( + fromVersion: Long, + fromIndex: Long, + isInitialSnapshot: Boolean, + endOffset: Option[DeltaSourceOffset] = None, + verifyMetadataAction: Boolean = true + ): ClosableIterator[IndexedFile] = { + + /** Returns matching files that were added on or after startVersion among delta logs. */ + def filterAndIndexDeltaLogs(startVersion: Long): ClosableIterator[IndexedFile] = { + // TODO: handle the case when failOnDataLoss = false and we are missing change log files + // in that case, we need to recompute the start snapshot and evolve the schema if needed + require(options.failOnDataLoss || !trackingMetadataChange, + "Using schema from schema tracking log cannot tolerate missing commit files.") + deltaLog.getChangeLogFiles(startVersion, options.failOnDataLoss).flatMapWithClose { + case (version, filestatus) => + // First pass reads the whole commit and closes the iterator. + val iter = DeltaSource.createRewindableActionIterator(spark, deltaLog, filestatus) + val (shouldSkipCommit, metadataOpt, protocolOpt) = iter + .processAndClose { actionsIter => + validateCommitAndDecideSkipping( + actionsIter, version, + fromVersion, endOffset, + verifyMetadataAction && !trackingMetadataChange + ) + } + // Rewind the iterator to the beginning, if the actions are cached in memory, they will + // be reused again. + iter.rewind() + // Second pass reads the commit lazily. + iter.withClose { actionsIter => + filterAndGetIndexedFiles( + actionsIter, version, shouldSkipCommit, metadataOpt, protocolOpt) + } + } + } + + var iter = if (isInitialSnapshot) { + Iterator(1, 2).flatMapWithClose { // so that the filterAndIndexDeltaLogs call is lazy + case 1 => getSnapshotAt(fromVersion).toClosable + case 2 => filterAndIndexDeltaLogs(fromVersion + 1) + } + } else { + filterAndIndexDeltaLogs(fromVersion) + } + + iter = iter.withClose { it => + it.filter { file => + file.version > fromVersion || file.index > fromIndex + } + } + + // If endOffset is provided, we are getting a batch on a constructed range so we should use + // the endOffset as the limit. + // Otherwise, we are looking for a new offset, so we try to use the latestOffset we found for + // Trigger.availableNow() as limit. We know endOffset <= lastOffsetForTriggerAvailableNow. + val lastOffsetForThisScan = endOffset.orElse(lastOffsetForTriggerAvailableNow) + + lastOffsetForThisScan.foreach { bound => + iter = iter.withClose { it => + it.takeWhile { file => + file.version < bound.reservoirVersion || + (file.version == bound.reservoirVersion && file.index <= bound.index) + } + } + } + iter + } + + /** + * Adds dummy BEGIN_INDEX and END_INDEX IndexedFiles for @version before and after the + * contents of the iterator. The contents of the iterator must be the IndexedFiles that correspond + * to this version. + */ + protected def addBeginAndEndIndexOffsetsForVersion( + version: Long, iterator: Iterator[IndexedFile]): Iterator[IndexedFile] = { + Iterator.single(IndexedFile(version, DeltaSourceOffset.BASE_INDEX, add = null)) ++ + iterator ++ + Iterator.single(IndexedFile(version, DeltaSourceOffset.END_INDEX, add = null)) + } + + /** + * This method computes the initial snapshot to read when Delta Source was initialized on a fresh + * stream. + */ + protected def getSnapshotAt(version: Long): Iterator[IndexedFile] = { + if (initialState == null || version != initialStateVersion) { + super[DeltaSourceBase].cleanUpSnapshotResources() + val snapshot = getSnapshotFromDeltaLog(version) + + initialState = new DeltaSourceSnapshot(spark, snapshot, filters) + initialStateVersion = version + + // This handle a special case for schema tracking log when it's initialized but the initial + // snapshot's schema has changed, suppose: + // 1. The stream starts and looks at the initial snapshot to compute the starting offset, say + // at version 0 with schema + // 2. User renames a column, creates version 1 with schema + // 3. The read compatibility check fails during scanning version 1, initializes schema log + // using the initial snapshot's schema (, because that's the safest thing to do as we + // have not served any data from initial snapshot yet) and exits stream. + // 4. Stream restarts, since no starting offset was generated, it will retry loading the + // initial snapshot, which is now at version 1, but the tracked schema is now different + // from the "new" initial snapshot schema! Worse, since schema tracking ignores any schema + // changes inside initial snapshot, we will then be reading the files using a wrong schema! + // The below logic allows us to detect any discrepancies when reading initial snapshot using + // a tracked schema, and reinitialize the log if needed. + if (trackingMetadataChange && + initialState.snapshot.version >= readSnapshotDescriptor.version) { + updateMetadataTrackingLogAndFailTheStreamIfNeeded( + Some(initialState.snapshot.metadata), + Some(initialState.snapshot.protocol), + initialState.snapshot.version, + // The new schema should replace the previous initialized schema for initial snapshot + replace = true + ) + } + } + addBeginAndEndIndexOffsetsForVersion(version, initialState.iterator()) + } + + /** + * Narrow-waist for generating snapshot from Delta Log within Delta Source + */ + protected def getSnapshotFromDeltaLog(version: Long): Snapshot = { + try { + deltaLog.getSnapshotAt(version) + } catch { + case e: FileNotFoundException => + throw DeltaErrors.logFileNotFoundExceptionForStreamingSource(e) + } + } + + private def getStartingOffset(limits: Option[AdmissionLimits]): Option[DeltaSourceOffset] = { + + val (version, isInitialSnapshot) = getStartingVersion match { + case Some(v) => (v, false) + case None => (snapshotAtSourceInit.version, true) + } + if (version < 0) { + return None + } + + getStartingOffsetFromSpecificDeltaVersion(version, isInitialSnapshot, limits) + } + + override def getDefaultReadLimit: ReadLimit = { + AdmissionLimits().toReadLimit + } + + def toDeltaSourceOffset(offset: streaming.Offset): DeltaSourceOffset = { + DeltaSourceOffset(tableId, offset) + } + + /** + * This should only be called by the engine. Call `latestOffsetInternal` instead if you need to + * get the latest offset. + */ + override def latestOffset(startOffset: streaming.Offset, limit: ReadLimit): streaming.Offset = { + val deltaStartOffset = Option(startOffset).map(toDeltaSourceOffset) + initForTriggerAvailableNowIfNeeded(deltaStartOffset) + latestOffsetInternal(deltaStartOffset, limit).orNull + } + + override protected def latestOffsetInternal( + startOffset: Option[DeltaSourceOffset], limit: ReadLimit): Option[DeltaSourceOffset] = { + val limits = AdmissionLimits(limit) + + val endOffset = startOffset.map(getNextOffsetFromPreviousOffset(_, limits)) + .getOrElse(getStartingOffset(limits)) + + val startVersion = startOffset.map(_.reservoirVersion).getOrElse(-1L) + val endVersion = endOffset.map(_.reservoirVersion).getOrElse(-1L) + lazy val offsetRangeInfo = "(latestOffsetInternal)startOffset -> endOffset:" + + s" $startOffset -> $endOffset" + if (endVersion - startVersion > 1000L) { + // Improve the log level if the source is processing a large batch. + logInfo(offsetRangeInfo) + } else { + logDebug(offsetRangeInfo) + } + if (shouldValidateOffsets && startOffset.isDefined) { + endOffset.foreach { endOffset => + DeltaSourceOffset.validateOffsets(startOffset.get, endOffset) + } + } + endOffset + } + + override def getOffset: Option[Offset] = { + throw new UnsupportedOperationException( + "latestOffset(Offset, ReadLimit) should be called instead of this method") + } + + /** + * Filter the iterator with only add files that contain data change and get indexed files. + * @return indexed add files + */ + private def filterAndGetIndexedFiles( + iterator: Iterator[Action], + version: Long, + shouldSkipCommit: Boolean, + metadataOpt: Option[Metadata], + protocolOpt: Option[Protocol]): Iterator[IndexedFile] = { + val filteredIterator = + if (shouldSkipCommit) { + Iterator.empty + } else { + iterator.collect { case a: AddFile if a.dataChange => a } + } + + var index = -1L + val indexedFiles = new Iterator[IndexedFile] { + override def hasNext: Boolean = filteredIterator.hasNext + override def next(): IndexedFile = { + index += 1 // pre-increment the index (so it starts from 0) + val add = filteredIterator.next().copy(stats = null) + IndexedFile(version, index, add) + } + } + addBeginAndEndIndexOffsetsForVersion( + version, + getMetadataOrProtocolChangeIndexedFileIterator(metadataOpt, protocolOpt, version) ++ + indexedFiles) + } + + /** + * Check stream for violating any constraints. + * + * If verifyMetadataAction = true, we will break the stream when we detect any read-incompatible + * metadata changes. + * + * @return (true if commit should be skipped, a metadata action if found) + */ + protected def validateCommitAndDecideSkipping( + actions: Iterator[Action], + version: Long, + batchStartVersion: Long, + batchEndOffsetOpt: Option[DeltaSourceOffset] = None, + verifyMetadataAction: Boolean = true + ): (Boolean, Option[Metadata], Option[Protocol]) = { + // If the batch end is at the beginning of this exact version, then we actually stop reading + // just _before_ this version. So then we can ignore the version contents entirely. + if (batchEndOffsetOpt.exists(end => + end.reservoirVersion == version && end.index == DeltaSourceOffset.BASE_INDEX)) { + return (false, None, None) + } + + /** A check on the source table that disallows changes on the source data. */ + val shouldAllowChanges = options.ignoreChanges || ignoreFileDeletion || skipChangeCommits + /** A check on the source table that disallows commits that only include deletes to the data. */ + val shouldAllowDeletes = shouldAllowChanges || options.ignoreDeletes || ignoreFileDeletion + + var seenFileAdd = false + var skippedCommit = false + var metadataAction: Option[Metadata] = None + var protocolAction: Option[Protocol] = None + var removeFileActionPath: Option[String] = None + actions.foreach { + case a: AddFile if a.dataChange => + seenFileAdd = true + case r: RemoveFile if r.dataChange => + skippedCommit = skipChangeCommits + if (removeFileActionPath.isEmpty) { + removeFileActionPath = Some(r.path) + } + case m: Metadata => + if (verifyMetadataAction) { + checkReadIncompatibleSchemaChanges( + m, version, batchStartVersion, batchEndOffsetOpt.map(_.reservoirVersion)) + } + assert(metadataAction.isEmpty, + "Should not encounter two metadata actions in the same commit") + metadataAction = Some(m) + case protocol: Protocol => + deltaLog.protocolRead(protocol) + assert(protocolAction.isEmpty, + "Should not encounter two protocol actions in the same commit") + protocolAction = Some(protocol) + case _ => () + } + if (removeFileActionPath.isDefined) { + if (seenFileAdd && !shouldAllowChanges) { + throw DeltaErrors.deltaSourceIgnoreChangesError( + version, + removeFileActionPath.get, + deltaLog.dataPath.toString + ) + } else if (!seenFileAdd && !shouldAllowDeletes) { + throw DeltaErrors.deltaSourceIgnoreDeleteError( + version, + removeFileActionPath.get, + deltaLog.dataPath.toString + ) + } + } + (skippedCommit, metadataAction, protocolAction) + } + + override def getBatch(startOffsetOption: Option[Offset], end: Offset): DataFrame = { + val endOffset = toDeltaSourceOffset(end) + val startDeltaOffsetOption = startOffsetOption.map(toDeltaSourceOffset) + + val (startVersion, startIndex, isInitialSnapshot) = + extractStartingState(startDeltaOffsetOption, endOffset) + + if (startOffsetOption.contains(endOffset)) { + // This happens only if we recover from a failure and `MicroBatchExecution` tries to call + // us with the previous offsets. The returned DataFrame will be dropped immediately, so we + // can return any DataFrame. + return emptyDataFrame + } + + val offsetRangeInfo = s"(getBatch)start: $startDeltaOffsetOption end: $end" + if (endOffset.reservoirVersion - startVersion > 1000L) { + // Improve the log level if the source is processing a large batch. + logInfo(offsetRangeInfo) + } else { + logDebug(offsetRangeInfo) + } + + // Initialize schema tracking log if possible, no-op if already initialized. + // This is one of the two places can initialize schema tracking. + // This case specifically handles initialization when we are already working with an initialized + // stream. + // Here we may have two conditions: + // 1. We are dealing with the recovery getBatch() that gives us the previous committed offset + // where start and end corresponds to the previous batch. + // In this case, we should initialize the schema at the previous committed offset (endOffset), + // which can be done using the same `initializeMetadataTrackingAndExitStream` method. + // This also means we are caught up with the stream and we can start schema tracking in the + // next latestOffset call. + // 2. We are running an already-constructed batch, we need the schema to be compatible + // with the entire batch, so we also pass the batch end offset. The schema tracking log will + // only be initialized if there exists a consistent read schema for the entire batch. If such + // a consistent schema does not exist, the stream will be broken. This case will be rare: it can + // only happen for streams where the schema tracking log was added after the stream has already + // been running, *and* the stream was running on an older version of the DeltaSource that did + // not detect non-additive schema changes, *and* it was stopped while processing a batch that + // contained such a schema change. + // In either world, the initialization logic would find the superset compatible schema for this + // batch by scanning Delta log. + validateAndInitMetadataLogForPlannedBatchesDuringStreamStart(startVersion, endOffset) + + val createdDf = createDataFrameBetweenOffsets( + startVersion, startIndex, isInitialSnapshot, startDeltaOffsetOption, endOffset) + + createdDf + } + + /** + * Extracts the start state for a scan given an optional start offset and an end offset, so we + * know exactly where we should scan from for a batch end at the `endOffset`, invoked when: + * + * 1. We are in `getBatch` given a startOffsetOption and endOffset from streaming engine. + * 2. We are in the `init` method for every stream (re)start given a start offset for all pending + * batches and the latest planned offset, and trying to figure out if this range contains any + * non-additive schema changes. + * + * @param startOffsetOption Optional start offset, if not defined. This means we are trying to + * scan the very first batch where endOffset is the very first offset + * generated by `latestOffsets`, specifically `getStartingOffset` + * @param endOffset The end offset for a batch. + * @return (start commit version to scan from, + * start offset index to scan from, + * whether this version is part of the initial snapshot) + */ + private def extractStartingState( + startOffsetOption: Option[DeltaSourceOffset], + endOffset: DeltaSourceOffset): (Long, Long, Boolean) = { + val (startVersion, startIndex, isInitialSnapshot) = if (startOffsetOption.isEmpty) { + getStartingVersion match { + case Some(v) => + (v, DeltaSourceOffset.BASE_INDEX, false) + + case None => + if (endOffset.isInitialSnapshot) { + (endOffset.reservoirVersion, DeltaSourceOffset.BASE_INDEX, true) + } else { + assert( + endOffset.reservoirVersion > 0, s"invalid reservoirVersion in endOffset: $endOffset") + // Load from snapshot `endOffset.reservoirVersion - 1L` so that `index` in `endOffset` + // is still valid. + // It's OK to use the previous version as the updated initial snapshot, even if the + // initial snapshot might have been different from the last time when this starting + // offset was computed. + (endOffset.reservoirVersion - 1L, DeltaSourceOffset.BASE_INDEX, true) + } + } + } else { + val startOffset = startOffsetOption.get + if (!startOffset.isInitialSnapshot) { + // unpersist `snapshot` because it won't be used any more. + cleanUpSnapshotResources() + } + (startOffset.reservoirVersion, startOffset.index, startOffset.isInitialSnapshot) + } + (startVersion, startIndex, isInitialSnapshot) + } + + /** + * Centralized place for validating and initializing schema log for all pending batch(es). + * This is called only during stream start. + * + * @param startVersion Start version of the pending batch range + * @param endOffset End offset for the pending batch range. end offset >= start offset + */ + private def validateAndInitMetadataLogForPlannedBatchesDuringStreamStart( + startVersion: Long, + endOffset: DeltaSourceOffset): Unit = { + // We don't have to include the end reservoir version when the end offset is a base index, i.e. + // no data commit has been marked within a constructed batch, we can simply ignore end offset + // version. This can help us avoid overblocking a potential ending offset right at a schema + // change. + val endVersionForMetadataLogInit = if (endOffset.index == DeltaSourceOffset.BASE_INDEX) { + endOffset.reservoirVersion - 1 + } else { + endOffset.reservoirVersion + } + // For eager initialization, we initialize the log right now. + if (readyToInitializeMetadataTrackingEagerly) { + initializeMetadataTrackingAndExitStream(startVersion, Some(endVersionForMetadataLogInit)) + } + + // Check for column mapping + streaming incompatible schema changes + // Note for initial snapshot, the startVersion should be the same as the latestOffset's + // version and therefore this check won't have any effect. + // This method would also handle read-compatibility checks against the pending batch(es) + // as well as lazy metadata log initialization. + checkReadIncompatibleSchemaChangeOnStreamStartOnce( + startVersion, + Some(endVersionForMetadataLogInit) + ) + } + + override def stop(): Unit = { + cleanUpSnapshotResources() + } + + // Marks that the `end` offset is done and we can safely run any actions in response to that. + // This happens AFTER `end` offset is committed by the streaming engine so we can safely fail this + // if needed, e.g. for failing the stream to conduct schema evolution. + override def commit(end: Offset): Unit = { + super.commit(end) + // IMPORTANT: for future developers, please place any work you would like to do in commit() + // before `updateSchemaTrackingLogAndFailTheStreamIfNeeded(end)` as it may throw an exception. + updateMetadataTrackingLogAndFailTheStreamIfNeeded(end) + } + + override def toString(): String = s"DeltaSource[${deltaLog.dataPath}]" + + trait DeltaSourceAdmissionBase { self: AdmissionLimits => + // This variable indicates whether a commit has already been processed by a batch or not. + var commitProcessedInBatch = false + + protected def take(files: Int, bytes: Long): Unit = { + filesToTake -= files + bytesToTake -= bytes + } + + /** + * This overloaded method checks if all the FileActions for a commit can be accommodated by + * the rate limit. + */ + def admit(indexedFiles: Seq[IndexedFile]): Boolean = { + def getSize(actions: Seq[IndexedFile]): Long = { + actions.filter(_.hasFileAction).foldLeft(0L) { (l, r) => l + r.getFileAction.getFileSize } + } + if (indexedFiles.isEmpty) { + true + } else { + // if no files have been admitted, then admit all to avoid deadlock + // else check if all of the files together satisfy the limit, only then admit + val bytesInFiles = getSize(indexedFiles) + val shouldAdmit = !commitProcessedInBatch || + (filesToTake - indexedFiles.size >= 0 && bytesToTake - bytesInFiles >= 0) + + commitProcessedInBatch = true + take(files = indexedFiles.size, bytes = bytesInFiles) + shouldAdmit + } + } + + /** + * Whether to admit the next file. Dummy IndexedFile entries with no attached file action are + * always admitted. + */ + def admit(indexedFile: IndexedFile): Boolean = { + commitProcessedInBatch = true + + if (!indexedFile.hasFileAction) { + // Don't count placeholders. They are not files. If we have empty commits, then we should + // not count the placeholders as files, or else we'll end up with under-filled batches. + return true + } + + // We always admit a file if we still have capacity _before_ we take it. This ensures that we + // will even admit a file when it is larger than the remaining capacity, and that we will + // admit at least one file. + val shouldAdmit = hasCapacity + take(files = 1, bytes = indexedFile.getFileAction.getFileSize) + shouldAdmit + } + + /** Returns whether admission limits has capacity to accept files or bytes */ + def hasCapacity: Boolean = { + filesToTake > 0 && bytesToTake > 0 + } + + } + + /** + * Class that helps controlling how much data should be processed by a single micro-batch. + */ + case class AdmissionLimits( + maxFiles: Option[Int] = options.maxFilesPerTrigger, + var bytesToTake: Long = options.maxBytesPerTrigger.getOrElse(Long.MaxValue) + ) extends DeltaSourceAdmissionBase { + + var filesToTake = maxFiles.getOrElse { + if (options.maxBytesPerTrigger.isEmpty) { + DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION_DEFAULT + } else { + Int.MaxValue - 8 // - 8 to prevent JVM Array allocation OOM + } + } + + def toReadLimit: ReadLimit = { + if (options.maxFilesPerTrigger.isDefined && options.maxBytesPerTrigger.isDefined) { + CompositeLimit( + ReadMaxBytes(options.maxBytesPerTrigger.get), + ReadLimit.maxFiles(options.maxFilesPerTrigger.get).asInstanceOf[ReadMaxFiles]) + } else if (options.maxBytesPerTrigger.isDefined) { + ReadMaxBytes(options.maxBytesPerTrigger.get) + } else { + ReadLimit.maxFiles( + options.maxFilesPerTrigger.getOrElse(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION_DEFAULT)) + } + } + } + + object AdmissionLimits { + + def apply(limit: ReadLimit): Option[AdmissionLimits] = limit match { + case _: ReadAllAvailable => None + case maxFiles: ReadMaxFiles => Some(new AdmissionLimits(Some(maxFiles.maxFiles()))) + case maxBytes: ReadMaxBytes => Some(new AdmissionLimits(None, maxBytes.maxBytes)) + case composite: CompositeLimit => + Some(new AdmissionLimits(Some(composite.maxFiles.maxFiles()), composite.bytes.maxBytes)) + case other => throw DeltaErrors.unknownReadLimit(other.toString()) + } + } + + /** + * Extracts whether users provided the option to time travel a relation. If a query restarts from + * a checkpoint and the checkpoint has recorded the offset, this method should never been called. + */ + protected lazy val getStartingVersion: Option[Long] = { + // Note: returning a version beyond latest snapshot version won't be a problem as callers + // of this function won't use the version to retrieve snapshot(refer to [[getStartingOffset]]). + val allowOutOfRange = + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_CDF_ALLOW_OUT_OF_RANGE_TIMESTAMP) + /** DeltaOption validates input and ensures that only one is provided. */ + if (options.startingVersion.isDefined) { + val v = options.startingVersion.get match { + case StartingVersionLatest => + deltaLog.update().version + 1 + case StartingVersion(version) => + // when starting from a given version, we don't need the snapshot of this version. So + // `mustBeRecreatable` is set to `false`. + deltaLog.history.checkVersionExists(version, mustBeRecreatable = false, allowOutOfRange) + version + } + Some(v) + } else if (options.startingTimestamp.isDefined) { + val tt: DeltaTimeTravelSpec = DeltaTimeTravelSpec( + timestamp = options.startingTimestamp.map(Literal(_)), + version = None, + creationSource = Some("deltaSource")) + Some(DeltaSource + .getStartingVersionFromTimestamp( + spark, + deltaLog, + tt.getTimestamp(spark.sessionState.conf), + allowOutOfRange)) + } else { + None + } + } + +} + +object DeltaSource { + + /** + * - If a commit version exactly matches the provided timestamp, we return it. + * - Otherwise, we return the earliest commit version + * with a timestamp greater than the provided one. + * - If the provided timestamp is larger than the timestamp + * of any committed version, and canExceedLatest is disabled we throw an error. + * - If the provided timestamp is larger than the timestamp + * of any committed version, and canExceedLatest is enabled we return a version that is greater + * than deltaLog.snapshot.version by one + * + * @param spark - current spark session + * @param deltaLog - Delta log of the table for which we find the version. + * @param timestamp - user specified timestamp + * @param canExceedLatest - if true, version can be greater than the latest snapshot commit + * @return - corresponding version number for timestamp + */ + def getStartingVersionFromTimestamp( + spark: SparkSession, + deltaLog: DeltaLog, + timestamp: Timestamp, + canExceedLatest: Boolean = false): Long = { + val tz = spark.sessionState.conf.sessionLocalTimeZone + val commit = deltaLog.history.getActiveCommitAtTime( + timestamp, + canReturnLastCommit = true, + mustBeRecreatable = false, + canReturnEarliestCommit = true) + if (commit.timestamp >= timestamp.getTime) { + // Find the commit at the `timestamp` or the earliest commit + commit.version + } else { + // commit.timestamp is not the same, so this commit is a commit before the timestamp and + // the next version if exists should be the earliest commit after the timestamp. + // Note: `getActiveCommitAtTime` has called `update`, so we don't need to call it again. + // + // Note2: In the use case of [[CDCReader]] timestamp passed in can exceed the latest commit + // timestamp, caller doesn't expect exception, and can handle the non-existent version. + if (commit.version + 1 <= deltaLog.unsafeVolatileSnapshot.version || canExceedLatest) { + commit.version + 1 + } else { + val commitTs = new Timestamp(commit.timestamp) + val timestampFormatter = TimestampFormatter(DateTimeUtils.getTimeZone(tz)) + val tsString = DateTimeUtils.timestampToString( + timestampFormatter, DateTimeUtils.fromJavaTimestamp(commitTs)) + throw DeltaErrors.timestampGreaterThanLatestCommit(timestamp, commitTs, tsString) + } + } + } + + /** + * Read an [[ClosableIterator]] of Delta actions from file status, considering memory constraints + */ + def createRewindableActionIterator( + spark: SparkSession, + deltaLog: DeltaLog, + fileStatus: FileStatus): ClosableIterator[Action] with SupportsRewinding[Action] = { + val threshold = spark.sessionState.conf.getConf(DeltaSQLConf.LOG_SIZE_IN_MEMORY_THRESHOLD) + lazy val actions = + deltaLog.store.read(fileStatus, deltaLog.newDeltaHadoopConf()).map(Action.fromJson) + // Return a new [[CloseableIterator]] over the commit. If the commit is smaller than the + // threshold, we will read it into memory once and iterate over that every time. + // Otherwise, we read it again every time. + val shouldLoadIntoMemory = fileStatus.getLen < threshold + def createClosableIterator(): ClosableIterator[Action] = if (shouldLoadIntoMemory) { + // Reuse in the memory actions + actions.toIterator.toClosable + } else { + deltaLog.store.readAsIterator(fileStatus, deltaLog.newDeltaHadoopConf()) + .withClose { + _.map(Action.fromJson) + } + } + new ClosableIterator[Action] with SupportsRewinding[Action] { + var delegatedIterator: ClosableIterator[Action] = createClosableIterator() + override def hasNext: Boolean = delegatedIterator.hasNext + override def next(): Action = delegatedIterator.next() + override def close(): Unit = delegatedIterator.close() + override def rewind(): Unit = delegatedIterator = createClosableIterator() + } + } + + /** + * Scan and get the last item of the iterator. + */ + def iteratorLast[T](iter: ClosableIterator[T]): Option[T] = { + try { + var last: Option[T] = None + while (iter.hasNext) { + last = Some(iter.next()) + } + last + } finally { + iter.close() + } + } +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceCDCSupport.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceCDCSupport.scala new file mode 100644 index 00000000000..85bed2020fc --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceCDCSupport.scala @@ -0,0 +1,368 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.actions.DomainMetadata +import org.apache.spark.sql.delta.commands.cdc.CDCReader + +import org.apache.spark.sql.DataFrame + +/** + * Helper functions for CDC-specific handling for DeltaSource. + */ +trait DeltaSourceCDCSupport { self: DeltaSource => + + ///////////////////////// + // Nested helper class // + ///////////////////////// + + /** + * This class represents an iterator of Change metadata(AddFile, RemoveFile, AddCDCFile) + * for a particular version. + * @param fileActionsItr - Iterator of IndexedFiles for a particular commit. + * @param isInitialSnapshot - Indicates whether the commit version is the initial snapshot or not. + */ + class IndexedChangeFileSeq( + fileActionsItr: Iterator[IndexedFile], + isInitialSnapshot: Boolean) { + + private def moreThanFrom( + indexedFile: IndexedFile, fromVersion: Long, fromIndex: Long): Boolean = { + // we need to filter out files so that we get only files after the startingOffset + indexedFile.version > fromVersion || indexedFile.index > fromIndex + } + + private def lessThanEnd( + indexedFile: IndexedFile, + endOffset: Option[DeltaSourceOffset]): Boolean = { + // we need to filter out files so that they are within the end offsets. + if (endOffset.isEmpty) { + true + } else { + indexedFile.version < endOffset.get.reservoirVersion || + (indexedFile.version <= endOffset.get.reservoirVersion && + indexedFile.index <= endOffset.get.index) + } + } + + private def noMatchesRegex(indexedFile: IndexedFile): Boolean = { + if (hasNoFileActionAndStartOrEndIndex(indexedFile)) return true + + excludeRegex.forall(_.findFirstIn(indexedFile.getFileAction.path).isEmpty) + } + + private def hasFileAction(indexedFile: IndexedFile): Boolean = { + indexedFile.getFileAction != null + } + + private def hasNoFileActionAndStartOrEndIndex(indexedFile: IndexedFile): Boolean = { + !indexedFile.hasFileAction && + (indexedFile.index == DeltaSourceOffset.BASE_INDEX || + indexedFile.index == DeltaSourceOffset.END_INDEX) + } + + private def hasAddsOrRemoves(indexedFile: IndexedFile): Boolean = { + indexedFile.add != null || indexedFile.remove != null + } + + private def isSchemaChangeIndexedFile(indexedFile: IndexedFile): Boolean = { + indexedFile.index == DeltaSourceOffset.METADATA_CHANGE_INDEX || + indexedFile.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX + } + + private def isValidIndexedFile( + indexedFile: IndexedFile, + fromVersion: Long, + fromIndex: Long, + endOffset: Option[DeltaSourceOffset]): Boolean = { + !indexedFile.shouldSkip && + (hasFileAction(indexedFile) || + hasNoFileActionAndStartOrEndIndex(indexedFile) || + isSchemaChangeIndexedFile(indexedFile)) && + moreThanFrom(indexedFile, fromVersion, fromIndex) && + lessThanEnd(indexedFile, endOffset) && noMatchesRegex(indexedFile) && + lessThanEnd(indexedFile, lastOffsetForTriggerAvailableNow) + } + + /** + * Returns the IndexedFiles for particular commit version after rate-limiting and filtering + * out based on version boundaries. + */ + def filterFiles( + fromVersion: Long, + fromIndex: Long, + limits: Option[AdmissionLimits], + endOffset: Option[DeltaSourceOffset] = None): Iterator[IndexedFile] = { + + if (limits.isEmpty) { + return fileActionsItr.filter(isValidIndexedFile(_, fromVersion, fromIndex, endOffset)) + } + val admissionControl = limits.get + if (isInitialSnapshot) { + // NOTE: the initial snapshot can be huge hence we do not do a toSeq here. + fileActionsItr + .filter(isValidIndexedFile(_, fromVersion, fromIndex, endOffset)) + .takeWhile { admissionControl.admit(_) } + } else { + // Change data for a commit can be either recorded by a Seq[AddCDCFiles] or + // a Seq[AddFile]/ Seq[RemoveFile] + val fileActions = fileActionsItr.toSeq + + // If there exists a stopping iterator for this version, we should return right-away + fileActions.find(isSchemaChangeIndexedFile) match { + case Some(schemaChangeBarrier) => + return Seq(schemaChangeBarrier).toIterator + case _ => + } + + val cdcFiles = fileActions.filter(_.cdc != null) // get only cdc commits. + if (cdcFiles.nonEmpty) { + // CDC of commit is represented by AddCDCFile + val filteredFiles = cdcFiles + .filter(isValidIndexedFile(_, fromVersion, fromIndex, endOffset)) + // For CDC commits we either admit the entire commit or nothing at all. + // This is to avoid returning `update_preimage` and `update_postimage` in separate + // batches. + if (admissionControl.admit(filteredFiles)) { + filteredFiles.toIterator + } else { + Iterator() + } + } else { + // CDC is recorded as AddFile or RemoveFile + // We allow entries with no file actions and index as [[DeltaSourceOffset.BASE_INDEX]] + // that are used primarily to update latest offset when no other + // file action based entries are present. + val filteredFiles = fileActions + .filter { indexedFile => + hasAddsOrRemoves(indexedFile) || hasNoFileActionAndStartOrEndIndex(indexedFile) + } + .filter(isValidIndexedFile(_, fromVersion, fromIndex, endOffset)) + val hasDeletionVectors = fileActions.filter(_.hasFileAction).map(_.getFileAction).exists { + case add: AddFile => add.deletionVector != null + case remove: RemoveFile => remove.deletionVector != null + case _ => false + } + if (hasDeletionVectors) { + // We cannot split up add/remove pairs with Deletion Vectors, because we will get the + // wrong result. + // So in this case we behave as above with CDC files and either admit all or none. + if (admissionControl.admit(filteredFiles)) { + filteredFiles.toIterator + } else { + Iterator() + } + } else { + filteredFiles.takeWhile { admissionControl.admit(_) }.toIterator + } + } + } + } + } + + /////////////////////////////// + // Util methods for children // + /////////////////////////////// + + /** + * Get the changes from startVersion, startIndex to the end for CDC case. We need to call + * CDCReader to get the CDC DataFrame. + * + * @param startVersion - calculated starting version + * @param startIndex - calculated starting index + * @param isInitialSnapshot - whether the stream has to return the initial snapshot or not + * @param endOffset - Offset that signifies the end of the stream. + * @return the DataFrame containing the file changes (AddFile, RemoveFile, AddCDCFile) + */ + protected def getCDCFileChangesAndCreateDataFrame( + startVersion: Long, + startIndex: Long, + isInitialSnapshot: Boolean, + endOffset: DeltaSourceOffset): DataFrame = { + val changes: Iterator[(Long, Iterator[IndexedFile])] = + getFileChangesForCDC(startVersion, startIndex, isInitialSnapshot, None, Some(endOffset)) + + val groupedFileActions: Iterator[(Long, Seq[FileAction])] = + changes.map { case (v, indexFiles) => + (v, indexFiles.filter(_.hasFileAction).map { _.getFileAction }.toSeq) + } + + val cdcInfo = CDCReader.changesToDF( + readSnapshotDescriptor, + startVersion, + endOffset.reservoirVersion, + groupedFileActions, + spark, + isStreaming = true + ) + + cdcInfo.fileChangeDf + } + + /** + * Get the changes starting from (fromVersion, fromIndex). fromVersion is included. + * It returns an iterator of (log_version, fileActions) + * + * If verifyMetadataAction = true, we will break the stream when we detect any read-incompatible + * metadata changes. + */ + protected def getFileChangesForCDC( + fromVersion: Long, + fromIndex: Long, + isInitialSnapshot: Boolean, + limits: Option[AdmissionLimits], + endOffset: Option[DeltaSourceOffset], + verifyMetadataAction: Boolean = true): Iterator[(Long, Iterator[IndexedFile])] = { + + /** Returns matching files that were added on or after startVersion among delta logs. */ + def filterAndIndexDeltaLogs(startVersion: Long): Iterator[(Long, IndexedChangeFileSeq)] = { + // TODO: handle the case when failOnDataLoss = false and we are missing change log files + // in that case, we need to recompute the start snapshot and evolve the schema if needed + require(options.failOnDataLoss || !trackingMetadataChange, + "Using schema from schema tracking log cannot tolerate missing commit files.") + deltaLog.getChanges(startVersion, options.failOnDataLoss).map { case (version, actions) => + // skipIndexedFile must be applied after creating IndexedFile so that + // IndexedFile.index is consistent across all versions. + val (fileActions, skipIndexedFile, metadataOpt, protocolOpt) = + filterCDCActions( + actions, version, fromVersion, endOffset.map(_.reservoirVersion), + verifyMetadataAction && !trackingMetadataChange) + val itr = addBeginAndEndIndexOffsetsForVersion(version, + getMetadataOrProtocolChangeIndexedFileIterator(metadataOpt, protocolOpt, version) ++ + fileActions.zipWithIndex.map { + case (action: AddFile, index) => + IndexedFile( + version, + index.toLong, + action, + shouldSkip = skipIndexedFile) + case (cdcFile: AddCDCFile, index) => + IndexedFile( + version, + index.toLong, + add = null, + cdc = cdcFile, + shouldSkip = skipIndexedFile) + case (remove: RemoveFile, index) => + IndexedFile( + version, + index.toLong, + add = null, + remove = remove, + shouldSkip = skipIndexedFile) + }) + (version, new IndexedChangeFileSeq(itr, isInitialSnapshot = false)) + } + } + + val iter: Iterator[(Long, IndexedChangeFileSeq)] = if (isInitialSnapshot) { + // If we are reading change data from the start of the table we need to + // get the latest snapshot of the table as well. + val snapshot: Iterator[IndexedFile] = getSnapshotAt(fromVersion).map { m => + // When we get the snapshot the dataChange is false for the AddFile actions + // We need to set it to true for it to be considered by the CDCReader. + if (m.add != null) { + m.copy(add = m.add.copy(dataChange = true)) + } else { + m + } + } + val snapshotItr: Iterator[(Long, IndexedChangeFileSeq)] = Iterator(( + fromVersion, + new IndexedChangeFileSeq(snapshot, isInitialSnapshot = true) + )) + + snapshotItr ++ filterAndIndexDeltaLogs(fromVersion + 1) + } else { + filterAndIndexDeltaLogs(fromVersion) + } + // In this case, filterFiles will consume the available capacity. We use takeWhile + // to stop the iteration when we reach the limit which will save us from reading + // unnecessary log files. + iter.takeWhile(_ => limits.forall(_.hasCapacity)).map { case (version, indexItr) => + (version, indexItr.filterFiles(fromVersion, fromIndex, limits, endOffset)) + } + } + + ///////////////////// + // Private methods // + ///////////////////// + + /** + * Filter out non CDC actions and only return CDC ones. This will either be AddCDCFiles + * or AddFile and RemoveFiles + * + * If verifyMetadataAction = true, we will break the stream when we detect any read-incompatible + * metadata changes. + */ + private def filterCDCActions( + actions: Seq[Action], + version: Long, + batchStartVersion: Long, + batchEndVersionOpt: Option[Long] = None, + verifyMetadataAction: Boolean = true + ): (Seq[FileAction], Boolean, Option[Metadata], Option[Protocol]) = { + var shouldSkipIndexedFile = false + var metadataAction: Option[Metadata] = None + var protocolAction: Option[Protocol] = None + def checkAndCacheMetadata(m: Metadata): Unit = { + if (verifyMetadataAction) { + checkReadIncompatibleSchemaChanges(m, version, batchStartVersion, batchEndVersionOpt) + } + assert(metadataAction.isEmpty, + "Should not encounter two metadata actions in the same commit") + metadataAction = Some(m) + } + + if (actions.exists(_.isInstanceOf[AddCDCFile])) { + (actions.filter { + case _: AddCDCFile => true + case m: Metadata => + checkAndCacheMetadata(m) + false + case p: Protocol => + protocolAction = Some(p) + false + case _ => false + }.asInstanceOf[Seq[FileAction]], shouldSkipIndexedFile, metadataAction, protocolAction) + } else { + (actions.filter { + case a: AddFile => + a.dataChange + case r: RemoveFile => + r.dataChange + case m: Metadata => + checkAndCacheMetadata(m) + false + case protocol: Protocol => + deltaLog.protocolRead(protocol) + assert(protocolAction.isEmpty, + "Should not encounter two protocol actions in the same commit") + protocolAction = Some(protocol) + false + case commitInfo: CommitInfo => + shouldSkipIndexedFile = CDCReader.shouldSkipFileActionsInCommit(commitInfo) + false + case _: AddCDCFile | _: SetTransaction | _: DomainMetadata => + false + case null => // Some crazy future feature. Ignore + false + }.asInstanceOf[Seq[FileAction]], shouldSkipIndexedFile, metadataAction, protocolAction) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceMetadataEvolutionSupport.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceMetadataEvolutionSupport.scala new file mode 100644 index 00000000000..1f3799f3d0e --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceMetadataEvolutionSupport.scala @@ -0,0 +1,560 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +import java.util.Locale + +import scala.collection.mutable + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{Action, Metadata, Protocol} +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.storage.ClosableIterator +import org.apache.spark.sql.delta.storage.ClosableIterator._ + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.streaming.Offset +import org.apache.spark.sql.types.StructType + +/** + * Helper functions for metadata evolution related handling for DeltaSource. + * A metadata change is one of: + * 1. Schema change + * 2. Delta table configuration change + * 3. Delta protocol change + * The documentation below will use schema change as example throughout. + * + * To achieve schema evolution, we intercept in different stages of the normal streaming process to: + * 1. Capture all schema changes inside a stream + * 2. Stop the latestOffset from crossing the schema change boundary + * 3. Ensure the batch prior to the schema change can still be served correctly + * 4. Ensure the stream fails if and only if the prior batch is served successfully + * 5. Write the new schema to the schema tracking log prior to stream failure, so that next time + when it restarts we will use the updated schema. + * + * Specifically, + * 1. During latestOffset calls, if we detect schema change at version V, we generate a special + * barrier [[DeltaSourceOffset]] X that has ver=V and index=INDEX_METADATA_CHANGE. + * (We first generate an [[IndexedFile]] at this index, and that gets converted into an + * equivalent [[DeltaSourceOffset]].) + * [[INDEX_METADATA_CHANGE]] comes after [[INDEX_VERSION_BASE]] (the first + * offset index that exists for any reservoir version) and before the offsets that represent data + * changes. This ensures that we apply the schema change before processing the data + * that uses that schema. + * 2. When we see a schema change offset X, then this is treated as a barrier that ends the + * current batch. The remaining data is effectively unavailable until all the source data before + * the schema change has been committed. + * 3. Then, when a [[commit]] is invoked on the offset schema change barrier offset X, we can + * then officially write the new schema into the schema tracking log and fail the stream. + * [[commit]] is only called after this batch ending at X is completed, so it would be safe to + * fail there. + * 4. In between when offset X is generated and when it is committed, there could be arbitrary + * number of calls to [[latestOffset]], attempting to fetch new latestOffset. These calls mustn't + * generate new offsets until the schema change barrier offset has been committed, the new schema + * has been written to the schema tracking log, and the stream has been aborted and restarted. + * A nuance here - streaming engine won't [[commit]] until it sees a new offset that is + * semantically different, which is why we first generate an offset X with index + * INDEX_METADATA_CHANGE, but another second barrier offset X' immediately following + * it with index INDEX_POST_SCHEMA_CHANGE. + * In this way, we could ensure: + * a) Offset with index INDEX_METADATA_CHANGE is always committed (typically) + * b) Even if streaming engine changed its behavior and ONLY offset with index + * INDEX_POST_SCHEMA_CHANGE is committed, we can still see this is a + * schema change barrier with a schema change ready to be evolved. + * c) Whenever [[latestOffset]] sees a startOffset with a schema change barrier index, we can + * easily tell that we should not progress past the schema change, unless the schema change + * has actually happened. + * When a stream is restarted post a schema evolution (not initialization), it is guaranteed to have + * >= 2 entries in the schema log. To prevent users from shooting themselves in the foot while + * blindly restart stream without considering implications to downstream tables, by default we would + * not allow stream to restart without a magic SQL conf that user has to set to allow non-additive + * schema changes to propagate. We detect such non-additive schema changes during stream start by + * comparing the last schema log entry with the current one. + */ +trait DeltaSourceMetadataEvolutionSupport extends DeltaSourceBase { base: DeltaSource => + + /** + * Whether this DeltaSource is utilizing a schema log entry as its read schema. + * + * If user explicitly turn on the flag to fall back to using latest schema to read (i.e. the + * legacy mode), we will ignore the schema log. + */ + protected def trackingMetadataChange: Boolean = + !allowUnsafeStreamingReadOnColumnMappingSchemaChanges && + metadataTrackingLog.flatMap(_.getCurrentTrackedMetadata).nonEmpty + + /** + * Whether a schema tracking log is provided (and is empty), so we could initialize eagerly. + * This should only be used for the first write to the schema log, after then, schema tracking + * should not rely on this state any more. + */ + protected def readyToInitializeMetadataTrackingEagerly: Boolean = + !allowUnsafeStreamingReadOnColumnMappingSchemaChanges && + metadataTrackingLog.exists { log => + log.getCurrentTrackedMetadata.isEmpty && log.initMetadataLogEagerly + } + + + /** + * This is called from getFileChangesWithRateLimit() during latestOffset(). + */ + protected def stopIndexedFileIteratorAtSchemaChangeBarrier( + fileActionScanIter: ClosableIterator[IndexedFile]): ClosableIterator[IndexedFile] = { + fileActionScanIter.withClose { iter => + val (untilSchemaChange, fromSchemaChange) = iter.span { i => + i.index != DeltaSourceOffset.METADATA_CHANGE_INDEX + } + // This will end at the schema change indexed file (inclusively) + // If there are no schema changes, this is an no-op. + untilSchemaChange ++ fromSchemaChange.take(1) + } + } + + /** + * Check if a schema change is different from the stream read schema. We make sure: + * 1. A strict equality check on the schemas to capture all schema changes, OR + * 2. A strict equality check on the delta related table configurations, AND + * 3. The incoming metadata change should not be considered a failure-causing change if we have + * marked the persisted schema and the stream progress is behind that schema version. + * This could happen when we've already merged consecutive schema changes during the analysis + * phase and we are using the merged schema as the read schema. All the schema changes in + * between can be safely ignored because they won't contribute any data. + */ + private def hasMetadataOrProtocolChangeComparedToStreamMetadata( + metadataChangeOpt: Option[Metadata], + protocolChangeOpt: Option[Protocol], + newSchemaVersion: Long): Boolean = { + if (persistedMetadataAtSourceInit.exists(_.deltaCommitVersion >= newSchemaVersion)) { + false + } else { + protocolChangeOpt.exists(_ != readProtocolAtSourceInit) || + metadataChangeOpt.exists { newMetadata => + newMetadata.schema != readSchemaAtSourceInit || + newMetadata.partitionSchema != readPartitionSchemaAtSourceInit || + newMetadata.configuration.filterKeys(_.startsWith("delta.")).toMap != + readConfigurationsAtSourceInit.filterKeys(_.startsWith("delta.")).toMap + } + } + } + + /** + * If the current stream metadata is not equal to the metadata change in [[metadataChangeOpt]], + * return a metadata change barrier [[IndexedFile]]. + * Only returns something if [[trackingMetadataChange]]is true. + */ + protected def getMetadataOrProtocolChangeIndexedFileIterator( + metadataChangeOpt: Option[Metadata], + protocolChangeOpt: Option[Protocol], + version: Long): ClosableIterator[IndexedFile] = { + if (trackingMetadataChange && hasMetadataOrProtocolChangeComparedToStreamMetadata( + metadataChangeOpt, protocolChangeOpt, version)) { + // Create an IndexedFile with metadata change + Iterator.single(IndexedFile(version, DeltaSourceOffset.METADATA_CHANGE_INDEX, null)) + .toClosable + } else { + Iterator.empty.toClosable + } + } + + /** + * Collect all actions between start and end version, both inclusive + */ + private def collectActions( + startVersion: Long, + endVersion: Long + ): ClosableIterator[(Long, Action)] = { + deltaLog.getChangeLogFiles(startVersion, options.failOnDataLoss).takeWhile { + case (version, _) => version <= endVersion + }.flatMapWithClose { case (version, fileStatus) => + DeltaSource.createRewindableActionIterator(spark, deltaLog, fileStatus) + .map((version, _)) + .toClosable + } + } + + /** + * Given the version range for an ALREADY fetched batch, check if there are any + * read-incompatible schema changes or protocol changes. + * In this case, the streaming engine wants to getBatch(X,Y) on an existing Y that is already + * loaded and saved in the offset log in the past before requesting new offsets. Therefore we + * should verify if we could find a schema or protocol that is safe to read this constructed batch + * , which then can be used to initialize the metadata log. + * If not, there's not much we could do, even with metadata log, because unlike finding new + * offsets, we don't have a chance to "split" this batch at schema change boundaries any more. The + * streaming engine is not able to change the ranges of a batch after it has created it. + * If there are no non-additive schema changes, or incompatible protocol changes, it is safe to + * mark the metadata and protocol safe to read for all data files between startVersion and + * endVersion. + */ + private def validateAndResolveMetadataForLogInitialization( + startVersion: Long, endVersion: Long): (Metadata, Protocol) = { + val metadataChanges = collectMetadataActions(startVersion, endVersion).map(_._2) + val startSnapshot = getSnapshotFromDeltaLog(startVersion) + val startMetadata = startSnapshot.metadata + + // Try to find rename or drop columns in between, or nullability/datatype changes by using + // the last schema as the read schema and if so we cannot find a good read schema. + // Otherwise, the most recent metadata change will be the most encompassing schema as well. + val mostRecentMetadataChangeOpt = metadataChanges.lastOption + mostRecentMetadataChangeOpt.foreach { mostRecentMetadataChange => + val otherMetadataChanges = Seq(startMetadata) ++ metadataChanges.dropRight(1) + otherMetadataChanges.foreach { potentialSchemaChangeMetadata => + if (!DeltaColumnMapping.hasNoColumnMappingSchemaChanges( + newMetadata = mostRecentMetadataChange, + oldMetadata = potentialSchemaChangeMetadata) || + !SchemaUtils.isReadCompatible( + existingSchema = potentialSchemaChangeMetadata.schema, + readSchema = mostRecentMetadataChange.schema, + forbidTightenNullability = true)) { + throw DeltaErrors.streamingMetadataLogInitFailedIncompatibleMetadataException( + startVersion, endVersion) + } + } + } + + // Check protocol changes and use the most supportive protocol + val startProtocol = startSnapshot.protocol + val protocolChanges = collectProtocolActions(startVersion, endVersion).map(_._2) + + var mostSupportiveProtocol = startProtocol + protocolChanges.foreach { p => + if (mostSupportiveProtocol.readerAndWriterFeatureNames + .subsetOf(p.readerAndWriterFeatureNames)) { + mostSupportiveProtocol = p + } else { + // TODO: or use protocol union instead? + throw DeltaErrors.streamingMetadataLogInitFailedIncompatibleMetadataException( + startVersion, endVersion) + } + } + + (mostRecentMetadataChangeOpt.getOrElse(startMetadata), mostSupportiveProtocol) + } + + /** + * Collect a metadata action at the commit version if possible. + */ + private def collectMetadataAtVersion(version: Long): Option[Metadata] = { + collectActions(version, version).processAndClose { iter => + iter.map(_._2).collectFirst { + case a: Metadata => a + } + } + } + + protected def collectMetadataActions( + startVersion: Long, + endVersion: Long): Seq[(Long, Metadata)] = { + collectActions(startVersion, endVersion).processAndClose { iter => + iter.collect { + case (version, a: Metadata) => (version, a) + }.toSeq + } + } + + /** + * Collect a protocol action at the commit version if possible. + */ + private def collectProtocolAtVersion(version: Long): Option[Protocol] = { + collectActions(version, version).processAndClose { iter => + iter.map(_._2).collectFirst { + case a: Protocol => a + } + } + } + + protected def collectProtocolActions( + startVersion: Long, + endVersion: Long): Seq[(Long, Protocol)] = { + collectActions(startVersion, endVersion).processAndClose { iter => + iter.collect { + case (version, a: Protocol) => (version, a) + }.toSeq + } + } + + + /** + * If the given previous Delta source offset is a schema change offset, returns the appropriate + * next offset. This should be called before trying any other means of determining the next + * offset. + * If this returns None, then there is no schema change, and the caller should determine the next + * offset in the normal way. + */ + protected def getNextOffsetFromPreviousOffsetIfPendingSchemaChange( + previousOffset: DeltaSourceOffset): Option[DeltaSourceOffset] = { + // Check if we've generated a previous offset with schema change (i.e. offset X in class doc) + // Then, we will generate offset X' as mentioned in the class doc. + if (previousOffset.index == DeltaSourceOffset.METADATA_CHANGE_INDEX) { + return Some(previousOffset.copy(index = DeltaSourceOffset.POST_METADATA_CHANGE_INDEX)) + } + // If the previous offset is already POST the schema change and schema evolution has not + // occurred, simply block as no-op. + if (previousOffset.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX && + hasMetadataOrProtocolChangeComparedToStreamMetadata( + collectMetadataAtVersion(previousOffset.reservoirVersion), + collectProtocolAtVersion(previousOffset.reservoirVersion), + previousOffset.reservoirVersion)) { + return Some(previousOffset) + } + + // Otherwise, no special handling + None + } + + /** + * Initialize the schema tracking log if an empty schema tracking log is provided. + * This method also checks the range between batchStartVersion and batchEndVersion to ensure we + * a safe schema to be initialized in the log. + * @param batchStartVersion Start version of the batch of data to be proceed, it should typically + * be the schema that is safe to process incoming data. + * @param batchEndVersionOpt Optionally, if we are looking at a constructed batch with existing + * end offset, we need to double verify to ensure no read-incompatible + * within the batch range. + * @param alwaysFailUponLogInitialized Whether we should always fail with the schema evolution + * exception. + */ + protected def initializeMetadataTrackingAndExitStream( + batchStartVersion: Long, + batchEndVersionOpt: Option[Long] = None, + alwaysFailUponLogInitialized: Boolean = false): Unit = { + // If possible, initialize the metadata log with the desired start metadata instead of failing. + // If a `batchEndVersion` is provided, we also need to verify if there are no incompatible + // schema changes in a constructed batch, if so, we cannot find a proper schema to init the + // schema log. + val (version, metadata, protocol) = batchEndVersionOpt.map { endVersion => + val (validMetadata, validProtocol) = + validateAndResolveMetadataForLogInitialization(batchStartVersion, endVersion) + // `endVersion` should be valid for initialization + (endVersion, validMetadata, validProtocol) + }.getOrElse { + val startSnapshot = getSnapshotFromDeltaLog(batchStartVersion) + (startSnapshot.version, startSnapshot.metadata, startSnapshot.protocol) + } + + val newMetadata = PersistedMetadata(tableId, version, metadata, protocol, metadataPath) + // Always initialize the metadata log + metadataTrackingLog.get.writeNewMetadata(newMetadata) + if (hasMetadataOrProtocolChangeComparedToStreamMetadata( + Some(metadata), Some(protocol), version) || alwaysFailUponLogInitialized) { + // But trigger evolution exception when there's a difference + throw DeltaErrors.streamingMetadataEvolutionException( + newMetadata.dataSchema, + newMetadata.tableConfigurations.get, + newMetadata.protocol.get + ) + } + } + + /** + * Update the current stream schema in the schema tracking log and fail the stream. + * This is called during commit(). + * It's ok to fail during commit() because in streaming's semantics, the batch with offset ending + * at `end` should've already being processed completely. + */ + protected def updateMetadataTrackingLogAndFailTheStreamIfNeeded(end: Offset): Unit = { + val offset = DeltaSourceOffset(tableId, end) + if (trackingMetadataChange && + (offset.index == DeltaSourceOffset.METADATA_CHANGE_INDEX || + offset.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX)) { + // The offset must point to a metadata or protocol change action + val changedMetadataOpt = collectMetadataAtVersion(offset.reservoirVersion) + val changedProtocolOpt = collectProtocolAtVersion(offset.reservoirVersion) + + // Evolve the schema when the schema is indeed different from the current stream schema. We + // need to check this because we could potentially generate two offsets before schema + // evolution each with different indices. + // Typically streaming engine will commit the first one and evolve the schema log, however, + // to be absolutely safe, we also consider the case when the first is skipped and only the + // second one is committed. + // If the first one is committed (typically), the stream will fail and restart with the + // evolved schema, then we should NOT fail/evolve again when we commit the second offset. + updateMetadataTrackingLogAndFailTheStreamIfNeeded( + changedMetadataOpt, changedProtocolOpt, offset.reservoirVersion) + } + } + + /** + * Write a new potentially changed metadata into the metadata tracking log. Then fail the stream + * to allow reanalysis if there are changes. + * @param changedMetadataOpt Potentially changed metadata action + * @param changedProtocolOpt Potentially changed protocol action + * @param version The version of change + */ + protected def updateMetadataTrackingLogAndFailTheStreamIfNeeded( + changedMetadataOpt: Option[Metadata], + changedProtocolOpt: Option[Protocol], + version: Long, + replace: Boolean = false): Unit = { + if (hasMetadataOrProtocolChangeComparedToStreamMetadata( + changedMetadataOpt, changedProtocolOpt, version)) { + + val schemaToPersist = PersistedMetadata( + deltaLog.tableId, + version, + changedMetadataOpt.getOrElse(readSnapshotDescriptor.metadata), + changedProtocolOpt.getOrElse(readSnapshotDescriptor.protocol), + metadataPath + ) + // Update schema log + if (replace) { + metadataTrackingLog.get.writeNewMetadata(schemaToPersist, replaceCurrent = true) + } else { + metadataTrackingLog.get.writeNewMetadata(schemaToPersist) + } + // Fail the stream with schema evolution exception + throw DeltaErrors.streamingMetadataEvolutionException( + schemaToPersist.dataSchema, + schemaToPersist.tableConfigurations.get, + schemaToPersist.protocol.get + ) + } + } +} + +object NonAdditiveSchemaChangeTypes { + // Rename -> caused by a single column rename + val SCHEMA_CHANGE_RENAME = "RENAME COLUMN" + // Drop -> caused by a single column drop + val SCHEMA_CHANGE_DROP = "DROP COLUMN" + // A combination of rename and drop columns -> can be caused by a complete overwrite + val SCHEMA_CHANGE_RENAME_AND_DROP = "RENAME AND DROP COLUMN" +} + +object DeltaSourceMetadataEvolutionSupport { + + /** + * Determine the non-additive schema change type for an incoming schema change. None if it's + * additive. + */ + private def determineNonAdditiveSchemaChangeType( + newSchema: StructType, oldSchema: StructType): Option[String] = { + val isRenameColumn = DeltaColumnMapping.isRenameColumnOperation(newSchema, oldSchema) + val isDropColumn = DeltaColumnMapping.isDropColumnOperation(newSchema, oldSchema) + if (isRenameColumn && isDropColumn) { + Some(NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_RENAME_AND_DROP) + } else if (isRenameColumn) { + Some(NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_RENAME) + } else if (isDropColumn) { + Some(NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_DROP) + } else { + None + } + } + + def getCheckpointHash(path: String): Int = path.hashCode + + final val SQL_CONF_UNBLOCK_ALL = "allowSourceColumnRenameAndDrop" + final val SQL_CONF_UNBLOCK_RENAME = "allowSourceColumnRename" + final val SQL_CONF_UNBLOCK_DROP = "allowSourceColumnDrop" + + // scalastyle:off + /** + * Given a non-additive operation type from a previous schema evolution, check we can process + * using the new schema given any SQL conf users have explicitly set to unblock. + * The SQL conf can take one of following formats: + * 1. spark.databricks.delta.streaming.allowSourceColumnRenameAndDrop = true + * -> allows all non-additive schema changes to propagate. + * 2. spark.databricks.delta.streaming.allowSourceColumnRenameAndDrop.$checkpointHash = true + * -> allows all non-additive schema changes to propagate for this particular stream + * 3. spark.databricks.delta.streaming.allowSourceColumnRenameAndDrop.$checkpointHash = $deltaVersion + * + * The `allowSourceColumnRenameAndDrop` can be replaced with: + * 1. `allowSourceColumnRename` to just allow column rename + * 2. `allowSourceColumnDrop` to just allow column drops + * + * We will check for any of these configs given the non-additive operation, and throw a proper + * error message to instruct the user to set the SQL conf if they would like to unblock. + * + * @param metadataPath The path to the source-unique metadata location under checkpoint + * @param currentSchema The current persisted schema + * @param previousSchema The previous persisted schema + */ + // scalastyle:on + protected[sources] def validateIfSchemaChangeCanBeUnblockedWithSQLConf( + spark: SparkSession, + metadataPath: String, + currentSchema: PersistedMetadata, + previousSchema: PersistedMetadata): Unit = { + val sqlConfPrefix = s"${DeltaSQLConf.SQL_CONF_PREFIX}.streaming" + val checkpointHash = getCheckpointHash(metadataPath) + + def getConf(key: String): Option[String] = + Option(spark.sessionState.conf.getConfString(key, null)) + .map(_.toLowerCase(Locale.ROOT)) + + def getConfPairsToAllowSchemaChange( + allowSchemaChange: String, schemaChangeVersion: Long): Seq[(String, String)] = + Seq( + (s"$sqlConfPrefix.$allowSchemaChange", "always"), + (s"$sqlConfPrefix.$allowSchemaChange.ckpt_$checkpointHash", "always"), + (s"$sqlConfPrefix.$allowSchemaChange.ckpt_$checkpointHash", schemaChangeVersion.toString) + ) + + // The start version of a possible series of consecutive schema changes. + val previousSchemaChangeVersion = previousSchema.deltaCommitVersion + // The end version of a possible series of consecutive schema changes. + val currentSchemaChangeVersion = currentSchema.deltaCommitVersion + val confPairsToAllowAllSchemaChange = + getConfPairsToAllowSchemaChange(SQL_CONF_UNBLOCK_ALL, currentSchemaChangeVersion) + + determineNonAdditiveSchemaChangeType( + currentSchema.dataSchema, previousSchema.dataSchema).foreach { + case NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_DROP => + val validConfKeysValuePair = + getConfPairsToAllowSchemaChange(SQL_CONF_UNBLOCK_DROP, currentSchemaChangeVersion) ++ + confPairsToAllowAllSchemaChange + if (!validConfKeysValuePair.exists(p => getConf(p._1).contains(p._2))) { + // Throw error to prompt user to set the correct confs + throw DeltaErrors.cannotContinueStreamingPostSchemaEvolution( + NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_DROP, + previousSchemaChangeVersion, + currentSchemaChangeVersion, + checkpointHash, + SQL_CONF_UNBLOCK_ALL, + SQL_CONF_UNBLOCK_DROP) + } + case NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_RENAME => + val validConfKeysValuePair = + getConfPairsToAllowSchemaChange(SQL_CONF_UNBLOCK_RENAME, currentSchemaChangeVersion) ++ + confPairsToAllowAllSchemaChange + if (!validConfKeysValuePair.exists(p => getConf(p._1).contains(p._2))) { + // Throw error to prompt user to set the correct confs + throw DeltaErrors.cannotContinueStreamingPostSchemaEvolution( + NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_RENAME, + previousSchemaChangeVersion, + currentSchemaChangeVersion, + checkpointHash, + SQL_CONF_UNBLOCK_ALL, + SQL_CONF_UNBLOCK_RENAME) + } + case NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_RENAME_AND_DROP => + val validConfKeysValuePair = confPairsToAllowAllSchemaChange + if (!validConfKeysValuePair.exists(p => getConf(p._1).contains(p._2))) { + // Throw error to prompt user to set the correct confs + throw DeltaErrors.cannotContinueStreamingPostSchemaEvolution( + NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_RENAME_AND_DROP, + previousSchemaChangeVersion, + currentSchemaChangeVersion, + checkpointHash, + SQL_CONF_UNBLOCK_ALL, + SQL_CONF_UNBLOCK_ALL) + } + } + } + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceMetadataTrackingLog.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceMetadataTrackingLog.scala new file mode 100644 index 00000000000..df3e4317424 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceMetadataTrackingLog.scala @@ -0,0 +1,359 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +// scalastyle:off import.ordering.noEmptyLine +import java.io.InputStream + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.streaming.{JsonSchemaSerializer, PartitionAndDataSchema, SchemaTrackingLog} +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaOptions, SnapshotDescriptor} +import org.apache.spark.sql.delta.actions.{Action, FileAction, Metadata, Protocol} +import org.apache.spark.sql.delta.storage.ClosableIterator._ +import org.apache.spark.sql.delta.util.JsonUtils +import com.fasterxml.jackson.annotation.JsonIgnore +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import org.apache.hadoop.fs.Path + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.types.{DataType, StructType} +// scalastyle:on import.ordering.noEmptyLine + +/** + * A [[PersistedMetadata]] is an entry in Delta streaming source schema log, which can be used to + * read data files during streaming. + * + * @param tableId Delta table id + * @param deltaCommitVersion Delta commit version in which this change is captured. It does not + * necessarily have to be the commit when there's an actual change, e.g. + * during initialization. + * The invariant is that the metadata must be read-compatible with the + * table snapshot at this version. + * @param dataSchemaJson Full schema json + * @param partitionSchemaJson Partition schema json + * @param sourceMetadataPath The checkpoint path that is unique to each source. + * @param tableConfigurations The configurations of the table inside the metadata when the schema + * change was detected. It is used to correctly create the right file + * format when we use a particular schema to read. + * Default to None for backward compatibility. + * @param protocolJson JSON of the protocol change if any. + * Default to None for backward compatibility. + * @param previousMetadataSeqNum When defined, it points to the batch ID / seq num for the previous + * metadata in the log sequence. It is used when we could not reliably + * tell if the currentBatchId - 1 is indeed the previous schema evolution, + * e.g. when we are merging consecutive schema changes during the analysis + * phase and we are appending an extra schema after the merge to the log. + * Default to None for backward compatibility. + */ +case class PersistedMetadata( + tableId: String, + deltaCommitVersion: Long, + dataSchemaJson: String, + partitionSchemaJson: String, + sourceMetadataPath: String, + tableConfigurations: Option[Map[String, String]] = None, + protocolJson: Option[String] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + previousMetadataSeqNum: Option[Long] = None) extends PartitionAndDataSchema { + + private def parseSchema(schemaJson: String): StructType = { + try { + DataType.fromJson(schemaJson).asInstanceOf[StructType] + } catch { + case NonFatal(_) => + throw DeltaErrors.failToParseSchemaLog + } + } + + @JsonIgnore + lazy val dataSchema: StructType = parseSchema(dataSchemaJson) + + @JsonIgnore + lazy val partitionSchema: StructType = parseSchema(partitionSchemaJson) + + @JsonIgnore + lazy val protocol: Option[Protocol] = + protocolJson.map(Action.fromJson).map(_.asInstanceOf[Protocol]) + + def validateAgainstSnapshot(snapshot: SnapshotDescriptor): Unit = { + if (snapshot.deltaLog.tableId != tableId) { + throw DeltaErrors.incompatibleSchemaLogDeltaTable(tableId, snapshot.deltaLog.tableId) + } + } + +} + +object PersistedMetadata { + val VERSION = 1 + val EMPTY_JSON = "{}" + + def fromJson(json: String): PersistedMetadata = JsonUtils.fromJson[PersistedMetadata](json) + + def apply( + tableId: String, + deltaCommitVersion: Long, + metadata: Metadata, + protocol: Protocol, + sourceMetadataPath: String): PersistedMetadata = { + PersistedMetadata(tableId, deltaCommitVersion, + metadata.schema.json, metadata.partitionSchema.json, + // The schema is bound to the specific source + sourceMetadataPath, + // Table configurations come from the Metadata action + Some(metadata.configuration), + Some(protocol.json) + ) + } +} + +/** + * Tracks the metadata changes for a particular Delta streaming source in a particular stream, + * it is utilized to save and lookup the correct metadata during streaming from a Delta table. + * This schema log is NOT meant to be shared across different Delta streaming source instances. + * + * @param rootMetadataLocation Metadata log location + * @param sourceSnapshot Delta source snapshot for the Delta streaming source + * @param sourceMetadataPathOpt The source metadata path that is used during streaming execution. + * @param initMetadataLogEagerly If true, initialize metadata log as early as possible, otherwise, + * initialize only when detecting non-additive schema change. + */ +class DeltaSourceMetadataTrackingLog private( + sparkSession: SparkSession, + rootMetadataLocation: String, + sourceSnapshot: SnapshotDescriptor, + sourceMetadataPathOpt: Option[String] = None, + val initMetadataLogEagerly: Boolean = true) { + + import org.apache.spark.sql.delta.streaming.SchemaTrackingExceptions._ + + protected val schemaSerializer = + new JsonSchemaSerializer[PersistedMetadata](PersistedMetadata.VERSION) { + override def deserialize(in: InputStream): PersistedMetadata = + try super.deserialize(in) catch { + case FailedToDeserializeException => + throw DeltaErrors.failToDeserializeSchemaLog(rootMetadataLocation) + } + } + + protected val trackingLog = + new SchemaTrackingLog[PersistedMetadata]( + sparkSession, rootMetadataLocation, schemaSerializer) + + // Validate schema at log init + trackingLog.getCurrentTrackedSchema.foreach(_.validateAgainstSnapshot(sourceSnapshot)) + + /** + * Get the global latest metadata for this metadata location. + * Visible for testing + */ + private[delta] def getLatestMetadata: Option[PersistedMetadata] = + trackingLog.getLatest().map(_._2) + + /** + * Get the current schema that is being tracked by this schema log. This is typically the latest + * schema log entry to the best of this schema log's knowledge. + */ + def getCurrentTrackedMetadata: Option[PersistedMetadata] = + trackingLog.getCurrentTrackedSchema + + /** + * Get the logically-previous tracked seq num by this schema log. + * Considering the prev pointer from the latest entry if defined. + */ + private def getPreviousTrackedSeqNum: Long = { + getCurrentTrackedMetadata.flatMap(_.previousMetadataSeqNum) match { + case Some(previousSeqNum) => previousSeqNum + case None => trackingLog.getCurrentTrackedSeqNum - 1 + } + } + + /** + * Get the logically-previous tracked schema entry by this schema log. + * DeltaSource requires it to compare the previous schema with the latest schema to determine if + * an automatic stream restart is allowed. + */ + def getPreviousTrackedMetadata: Option[PersistedMetadata] = + trackingLog.getTrackedSchemaAtSeqNum(getPreviousTrackedSeqNum) + + /** + * Track a new schema to the log. + * + * @param newMetadata The incoming new metadata with schema. + * @param replaceCurrent If true, we will set a previous seq num pointer on the incoming metadata + * change pointing to the previous seq num of the current latest metadata. + * So that once the new metadata is written, getPreviousTrackedMetadata() + * will return the updated reference. + * If a previous metadata does not exist, this is noop. + */ + def writeNewMetadata( + newMetadata: PersistedMetadata, + replaceCurrent: Boolean = false): PersistedMetadata = { + try { + trackingLog.addSchemaToLog( + if (replaceCurrent && getCurrentTrackedMetadata.isDefined) { + newMetadata.copy(previousMetadataSeqNum = Some(getPreviousTrackedSeqNum)) + } else newMetadata + ) + } catch { + case FailedToEvolveSchema => + throw DeltaErrors.sourcesWithConflictingSchemaTrackingLocation( + rootMetadataLocation, sourceSnapshot.deltaLog.dataPath.toString) + } + } +} + +object DeltaSourceMetadataTrackingLog extends Logging { + + def fullMetadataTrackingLocation( + rootSchemaTrackingLocation: String, + tableId: String, + sourceTrackingId: Option[String] = None): String = { + val subdir = s"_schema_log_$tableId" + sourceTrackingId.map(n => s"_$n").getOrElse("") + new Path(rootSchemaTrackingLocation, subdir).toString + } + + /** + * Create a schema log instance for a schema location. + * The schema location is constructed as `$rootMetadataLocation/_schema_log_$tableId` + * a suffix of `_$sourceTrackingId` is appended if provided to further differentiate the sources. + * + * @param mergeConsecutiveSchemaChanges Defined during analysis phase. + * @param sourceMetadataPathOpt Defined during execution phase. + */ + def create( + sparkSession: SparkSession, + rootMetadataLocation: String, + sourceSnapshot: SnapshotDescriptor, + sourceTrackingId: Option[String] = None, + sourceMetadataPathOpt: Option[String] = None, + mergeConsecutiveSchemaChanges: Boolean = false, + initMetadataLogEagerly: Boolean = true): DeltaSourceMetadataTrackingLog = { + val metadataTrackingLocation = fullMetadataTrackingLocation( + rootMetadataLocation, sourceSnapshot.deltaLog.tableId, sourceTrackingId) + val log = new DeltaSourceMetadataTrackingLog( + sparkSession, + metadataTrackingLocation, + sourceSnapshot, + sourceMetadataPathOpt, + initMetadataLogEagerly + ) + + // During initialize schema log, validate against: + // 1. table snapshot to check for partition and tahoe id mismatch + // 2. source metadata path to ensure we are not using the wrong schema log for the source + log.getCurrentTrackedMetadata.foreach { schema => + schema.validateAgainstSnapshot(sourceSnapshot) + if (sparkSession.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STREAMING_SCHEMA_TRACKING_METADATA_PATH_CHECK_ENABLED)) { + sourceMetadataPathOpt.foreach { metadataPath => + require(metadataPath == schema.sourceMetadataPath, + s"The Delta source metadata path used for execution '${metadataPath}' is different " + + s"from the one persisted for previous processing '${schema.sourceMetadataPath}'. " + + s"Please check if the schema location has been reused across different streaming " + + s"sources. Pick a new `${DeltaOptions.SCHEMA_TRACKING_LOCATION}` or use " + + s"`${DeltaOptions.STREAMING_SOURCE_TRACKING_ID}` to " + + s"distinguish between streaming sources.") + } + } + } + + // The consecutive schema merging logic is run in the *analysis* phase, when we figure the final + // schema to read for the streaming dataframe. + if (mergeConsecutiveSchemaChanges && log.getCurrentTrackedMetadata.isDefined) { + // If enable schema merging, skim ahead on consecutive schema changes and use the latest one + // to update the log again if possible. + // We add the prev pointer to the merged schema so that SQL conf validation logic later can + // reliably fetch the previous read schema and the latest schema and then be able to determine + // if it's OK for the stream to proceed. + getMergedConsecutiveMetadataChanges( + sparkSession, + sourceSnapshot.deltaLog, + log.getCurrentTrackedMetadata.get + ).foreach { mergedSchema => + log.writeNewMetadata(mergedSchema, replaceCurrent = true) + } + } + + // The validation is ran in *execution* phase where the metadata path becomes available. + // While loading the current persisted schema, validate against previous persisted schema + // to check if the stream can move ahead with the custom SQL conf. + (log.getPreviousTrackedMetadata, log.getCurrentTrackedMetadata, sourceMetadataPathOpt) match { + case (Some(prev), Some(curr), Some(metadataPath)) => + DeltaSourceMetadataEvolutionSupport + .validateIfSchemaChangeCanBeUnblockedWithSQLConf(sparkSession, metadataPath, curr, prev) + case _ => + } + + log + } + + /** + * Speculate ahead and find the next merged consecutive metadata change if possible. + * A metadata change is either: + * 1. A [[Metadata]] action change. OR + * 2. A [[Protocol]] change. + */ + private def getMergedConsecutiveMetadataChanges( + spark: SparkSession, + deltaLog: DeltaLog, + currentMetadata: PersistedMetadata): Option[PersistedMetadata] = { + val currentMetadataVersion = currentMetadata.deltaCommitVersion + // We start from the currentSchemaVersion so that we can stop early in case the current + // version still has file actions that potentially needs to be processed. + val untilMetadataChange = + deltaLog.getChangeLogFiles(currentMetadataVersion).map { case (version, fileStatus) => + var metadataAction: Option[Metadata] = None + var protocolAction: Option[Protocol] = None + var hasFileAction = false + DeltaSource.createRewindableActionIterator(spark, deltaLog, fileStatus) + .processAndClose { actionsIter => + actionsIter.foreach { + case m: Metadata => metadataAction = Some(m) + case p: Protocol => protocolAction = Some(p) + case _: FileAction => hasFileAction = true + case _ => + } + } + (!hasFileAction && (metadataAction.isDefined || protocolAction.isDefined), + version, metadataAction, protocolAction) + }.takeWhile(_._1) + DeltaSource.iteratorLast(untilMetadataChange.toClosable) + .flatMap { case (_, version, metadataOpt, protocolOpt) => + if (version == currentMetadataVersion) { + None + } else { + log.info(s"Looked ahead from version $currentMetadataVersion and " + + s"will use metadata at version $version to read Delta stream.") + Some( + currentMetadata.copy( + deltaCommitVersion = version, + dataSchemaJson = + metadataOpt.map(_.schema.json).getOrElse(currentMetadata.dataSchemaJson), + partitionSchemaJson = + metadataOpt.map(_.partitionSchema.json) + .getOrElse(currentMetadata.partitionSchemaJson), + tableConfigurations = metadataOpt.map(_.configuration) + .orElse(currentMetadata.tableConfigurations), + protocolJson = protocolOpt.map(_.json).orElse(currentMetadata.protocolJson) + ) + ) + } + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceOffset.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceOffset.scala new file mode 100644 index 00000000000..4658df75455 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceOffset.scala @@ -0,0 +1,288 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +// scalastyle:off import.ordering.noEmptyLine +import java.io.IOException + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog} +import org.apache.spark.sql.delta.util.JsonUtils +import com.fasterxml.jackson.core.{JsonGenerator, JsonParseException, JsonParser, JsonProcessingException} +import com.fasterxml.jackson.databind.{DeserializationContext, SerializerProvider} +import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize} +import com.fasterxml.jackson.databind.deser.std.StdDeserializer +import com.fasterxml.jackson.databind.exc.InvalidFormatException +import com.fasterxml.jackson.databind.ser.std.StdSerializer + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2} +import org.apache.spark.sql.execution.streaming.Offset + +/** + * Tracks how far we processed in when reading changes from the [[DeltaLog]]. + * + * Note this class retains the naming of `Reservoir` to maintain compatibility + * with serialized offsets from the beta period. + * + * @param reservoirId The id of the table we are reading from. Used to detect + * misconfiguration when restarting a query. + * @param reservoirVersion The version of the table that we are current processing. + * @param index The index in the sequence of AddFiles in this version. Used to + * break large commits into multiple batches. This index is created by + * sorting on modificationTimestamp and path. + * @param isInitialSnapshot Whether this offset points into an initial full table snapshot at the + * provided reservoir version rather than into the changes at that version. + * When starting a new query, we first process all data present in the + * table at the start and then move on to processing new data that has + * arrived. + */ +@JsonDeserialize(using = classOf[DeltaSourceOffset.Deserializer]) +@JsonSerialize(using = classOf[DeltaSourceOffset.Serializer]) +case class DeltaSourceOffset private( + reservoirId: String, + reservoirVersion: Long, + index: Long, + isInitialSnapshot: Boolean + ) extends Offset with Comparable[DeltaSourceOffset] { + + import DeltaSourceOffset._ + + assert(index != -1, "Index should never be -1, it should be set to the BASE_INDEX instead.") + + override def json: String = { + JsonUtils.toJson(this) + } + + /** + * Compare two DeltaSourceOffsets which are on the same table. + * @return 0 for equivalent offsets. negative if this offset is less than `otherOffset`. Positive + * if this offset is greater than `otherOffset` + */ + def compare(otherOffset: DeltaSourceOffset): Int = { + assert(reservoirId == otherOffset.reservoirId, "Comparing offsets that do not refer to the" + + " same table is disallowed.") + implicitly[Ordering[(Long, Long)]].compare((reservoirVersion, index), + (otherOffset.reservoirVersion, otherOffset.index)) + } + override def compareTo(o: DeltaSourceOffset): Int = { + compare(o) + } +} + +object DeltaSourceOffset extends Logging { + + private[DeltaSourceOffset] val VERSION_1 = 1 + private[DeltaSourceOffset] val VERSION_2 = 2 // reserved + // Serialization version 3 adds support for schema change index values. + private[DeltaSourceOffset] val VERSION_3 = 3 + + private[DeltaSourceOffset] val CURRENT_VERSION = VERSION_3 + + // The base index within each reservoirVersion. This offset indicates the offset before all + // changes in the reservoirVersion. All other offsets within the reservoirVersion have an index + // that is higher than the base index. + // + // This index is for VERSION_3+. Unless there are other fields that force the version to be >=3, + // it should NOT be serialized into offset log for backward compatibility. Instead, we serialize + // this as INDEX_VERSION_BASE_V1, and set source version lower accordingly. It gets converted back + // to the VERSION_3 value at deserialization time, so that we only use the V3 value in memory. + private[DeltaSourceOffset] val BASE_INDEX_V3: Long = -100 + + // The V1 base index that should be serialized into the offset log + private[DeltaSourceOffset] val BASE_INDEX_V1: Long = -1 + + // The base index version clients of DeltaSourceOffset should use + val BASE_INDEX: Long = BASE_INDEX_V3 + + // The index for an IndexedFile that also contains a metadata change. (from VERSION_3) + val METADATA_CHANGE_INDEX: Long = -20 + // The index for an IndexedFile that is right after a metadata change. (from VERSION_3) + val POST_METADATA_CHANGE_INDEX: Long = -19 + + // A value close to the end of the Long space. This is used to indicate that we are at the end of + // a reservoirVersion and need to move on to the next one. This should never be serialized into + // the offset log. + val END_INDEX: Long = Long.MaxValue - 100 + + /** + * The ONLY external facing constructor to create a DeltaSourceOffset in memory. + * @param reservoirId Table id + * @param reservoirVersion Table commit version + * @param index File action index in the commit version + * @param isInitialSnapshot Whether this offset is still in initial snapshot + */ + def apply( + reservoirId: String, + reservoirVersion: Long, + index: Long, + isInitialSnapshot: Boolean + ): DeltaSourceOffset = { + // TODO should we detect `reservoirId` changes when a query is running? + new DeltaSourceOffset( + reservoirId, + reservoirVersion, + index, + isInitialSnapshot + ) + } + + /** + * Validate and parse a DeltaSourceOffset from its serialized format + * @param reservoirId Table id + * @param offset Raw streaming offset + */ + def apply(reservoirId: String, offset: OffsetV2): DeltaSourceOffset = { + offset match { + case o: DeltaSourceOffset => o + case s => + val o = JsonUtils.mapper.readValue[DeltaSourceOffset](s.json) + if (o.reservoirId != reservoirId) { + throw DeltaErrors.differentDeltaTableReadByStreamingSource( + newTableId = reservoirId, oldTableId = o.reservoirId) + } + o + } + } + + /** + * Validate offsets to make sure we always move forward. Moving backward may make the query + * re-process data and cause data duplication. + */ + def validateOffsets(previousOffset: DeltaSourceOffset, currentOffset: DeltaSourceOffset): Unit = { + if (previousOffset.isInitialSnapshot == false && currentOffset.isInitialSnapshot == true) { + throw new IllegalStateException( + s"Found invalid offsets: 'isInitialSnapshot' flipped incorrectly. " + + s"Previous: $previousOffset, Current: $currentOffset") + } + if (previousOffset.reservoirVersion > currentOffset.reservoirVersion) { + throw new IllegalStateException( + s"Found invalid offsets: 'reservoirVersion' moved back. " + + s"Previous: $previousOffset, Current: $currentOffset") + } + if (previousOffset.reservoirVersion == currentOffset.reservoirVersion && + previousOffset.index > currentOffset.index) { + throw new IllegalStateException( + s"Found invalid offsets. 'index' moved back. " + + s"Previous: $previousOffset, Current: $currentOffset") + } + } + + def isMetadataChangeIndex(index: Long): Boolean = + index == METADATA_CHANGE_INDEX || index == POST_METADATA_CHANGE_INDEX + + /** + * This is a 1:1 copy of [[DeltaSourceOffset]] used for JSON serialization. Our serializers only + * want to adjust some field values and then serialize in the normal way. But we cannot access the + * "default" serializers once we've overridden them. So instead, we use a separate case class that + * gets serialized "as-is". + */ + private case class DeltaSourceOffsetForSerialization private( + sourceVersion: Long, + reservoirId: String, + reservoirVersion: Long, + index: Long, + // This stores isInitialSnapshot. + // This was confusingly called "starting version" in earlier versions, even though enabling + // the option "startingVersion" actually causes this to be disabled. We still have to + // serialize it using the old name for backward compatibility. + isStartingVersion: Boolean + ) + + class Deserializer + extends StdDeserializer[DeltaSourceOffset](classOf[DeltaSourceOffset]) { + @throws[IOException] + @throws[JsonProcessingException] + override def deserialize(p: JsonParser, ctxt: DeserializationContext): DeltaSourceOffset = { + val o = try { + p.readValueAs(classOf[DeltaSourceOffsetForSerialization]) + } catch { + case e: Throwable if e.isInstanceOf[JsonParseException] || + e.isInstanceOf[InvalidFormatException] => + // The version may be there with a different format, or something else might be off. + throw DeltaErrors.invalidSourceOffsetFormat() + } + + if (o.sourceVersion < VERSION_1) { + throw DeltaErrors.invalidSourceVersion(o.sourceVersion.toString) + } + if (o.sourceVersion > CURRENT_VERSION) { + throw DeltaErrors.invalidFormatFromSourceVersion(o.sourceVersion, CURRENT_VERSION) + } + if (o.sourceVersion == VERSION_2) { + // Version 2 is reserved. + throw DeltaErrors.invalidSourceVersion(o.sourceVersion.toString) + } + // Always upgrade to use the current latest INDEX_VERSION_BASE + val offsetIndex = if (o.sourceVersion < VERSION_3 && o.index == BASE_INDEX_V1) { + logDebug(s"upgrading offset to use latest version base index") + BASE_INDEX + } else { + o.index + } + assert(offsetIndex != END_INDEX, "Should not deserialize END_INDEX") + + // Leverage the only external facing constructor to initialize with latest sourceVersion + DeltaSourceOffset( + reservoirId = o.reservoirId, + reservoirVersion = o.reservoirVersion, + index = offsetIndex, + isInitialSnapshot = o.isStartingVersion + ) + } + } + + class Serializer + extends StdSerializer[DeltaSourceOffset](classOf[DeltaSourceOffset]) { + + @throws[IOException] + override def serialize( + o: DeltaSourceOffset, + gen: JsonGenerator, + provider: SerializerProvider): Unit = { + assert(o.index != END_INDEX, "Should not serialize END_INDEX") + + // We handle a few backward compatibility scenarios during Serialization here: + // 1. [Backward compatibility] If the source index is a schema changing base index, then + // replace it with index = -1 and use VERSION_1. This allows older Delta to at least be + // able to read the non-schema-changes stream offsets. + // This needs to happen during serialization time so we won't be looking at a downgraded + // index right away when we need to utilize this offset in memory. + // 2. [Backward safety] If the source index is a new schema changing index, then use + // VERSION_3. Older Delta would explode upon seeing this, but that's the safe thing to do. + val minVersion = { + if (DeltaSourceOffset.isMetadataChangeIndex(o.index)) { + VERSION_3 + } + else { + VERSION_1 + } + } + val downgradedIndex = if (o.index == BASE_INDEX) { + BASE_INDEX_V1 + } else { + o.index + } + gen.writeObject(DeltaSourceOffsetForSerialization( + sourceVersion = minVersion, + reservoirId = o.reservoirId, + reservoirVersion = o.reservoirVersion, + index = downgradedIndex, + isStartingVersion = o.isInitialSnapshot + )) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceUtils.scala new file mode 100644 index 00000000000..ba3ae46d824 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSourceUtils.scala @@ -0,0 +1,105 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + +import java.util.Locale + +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.sources +import org.apache.spark.sql.sources.Filter + +object DeltaSourceUtils { + val NAME = "delta" + val ALT_NAME = "delta" + + // Batch relations don't pass partitioning columns to `CreatableRelationProvider`s, therefore + // as a hack, we pass in the partitioning columns among the options. + val PARTITIONING_COLUMNS_KEY = "__partition_columns" + + + // The metadata key recording the generation expression in a generated column's `StructField`. + val GENERATION_EXPRESSION_METADATA_KEY = "delta.generationExpression" + + + val IDENTITY_INFO_ALLOW_EXPLICIT_INSERT = "delta.identity.allowExplicitInsert" + val IDENTITY_INFO_START = "delta.identity.start" + val IDENTITY_INFO_STEP = "delta.identity.step" + val IDENTITY_INFO_HIGHWATERMARK = "delta.identity.highWaterMark" + + def isDeltaDataSourceName(name: String): Boolean = { + name.toLowerCase(Locale.ROOT) == NAME || name.toLowerCase(Locale.ROOT) == ALT_NAME + } + + /** Check whether this table is a Delta table based on information from the Catalog. */ + def isDeltaTable(provider: Option[String]): Boolean = { + provider.exists(isDeltaDataSourceName) + } + + /** Creates Spark literals from a value exposed by the public Spark API. */ + private def createLiteral(value: Any): expressions.Literal = value match { + case v: String => expressions.Literal.create(v) + case v: Int => expressions.Literal.create(v) + case v: Byte => expressions.Literal.create(v) + case v: Short => expressions.Literal.create(v) + case v: Long => expressions.Literal.create(v) + case v: Double => expressions.Literal.create(v) + case v: Float => expressions.Literal.create(v) + case v: Boolean => expressions.Literal.create(v) + case v: java.sql.Date => expressions.Literal.create(v) + case v: java.sql.Timestamp => expressions.Literal.create(v) + case v: BigDecimal => expressions.Literal.create(v) + } + + /** Translates the public Spark Filter APIs into Spark internal expressions. */ + def translateFilters(filters: Array[Filter]): Expression = filters.map { + case sources.EqualTo(attribute, value) => + expressions.EqualTo(UnresolvedAttribute(attribute), expressions.Literal.create(value)) + case sources.EqualNullSafe(attribute, value) => + expressions.EqualNullSafe(UnresolvedAttribute(attribute), expressions.Literal.create(value)) + case sources.GreaterThan(attribute, value) => + expressions.GreaterThan(UnresolvedAttribute(attribute), expressions.Literal.create(value)) + case sources.GreaterThanOrEqual(attribute, value) => + expressions.GreaterThanOrEqual( + UnresolvedAttribute(attribute), expressions.Literal.create(value)) + case sources.LessThan(attribute, value) => + expressions.LessThan(UnresolvedAttribute(attribute), expressions.Literal.create(value)) + case sources.LessThanOrEqual(attribute, value) => + expressions.LessThanOrEqual(UnresolvedAttribute(attribute), expressions.Literal.create(value)) + case sources.In(attribute, values) => + expressions.In(UnresolvedAttribute(attribute), values.map(createLiteral)) + case sources.IsNull(attribute) => expressions.IsNull(UnresolvedAttribute(attribute)) + case sources.IsNotNull(attribute) => expressions.IsNotNull(UnresolvedAttribute(attribute)) + case sources.Not(otherFilter) => expressions.Not(translateFilters(Array(otherFilter))) + case sources.And(filter1, filter2) => + expressions.And(translateFilters(Array(filter1)), translateFilters(Array(filter2))) + case sources.Or(filter1, filter2) => + expressions.Or(translateFilters(Array(filter1)), translateFilters(Array(filter2))) + case sources.StringStartsWith(attribute, value) => + new expressions.Like( + UnresolvedAttribute(attribute), expressions.Literal.create(s"${value}%")) + case sources.StringEndsWith(attribute, value) => + new expressions.Like( + UnresolvedAttribute(attribute), expressions.Literal.create(s"%${value}")) + case sources.StringContains(attribute, value) => + new expressions.Like( + UnresolvedAttribute(attribute), expressions.Literal.create(s"%${value}%")) + case sources.AlwaysTrue() => expressions.Literal.TrueLiteral + case sources.AlwaysFalse() => expressions.Literal.FalseLiteral + }.reduce(expressions.And) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/limits.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/limits.scala new file mode 100644 index 00000000000..277a4505c6e --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/limits.scala @@ -0,0 +1,38 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.sources + + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.connector.read.streaming.{ReadLimit, ReadMaxFiles} +import org.apache.spark.sql.internal.SQLConf + +/** A read limit that admits a soft-max of `maxBytes` per micro-batch. */ +case class ReadMaxBytes(maxBytes: Long) extends ReadLimit + +/** + * A read limit that admits the given soft-max of `bytes` or max `maxFiles`, once `minFiles` + * has been reached. Prior to that anything is admitted. + */ +case class CompositeLimit( + bytes: ReadMaxBytes, + maxFiles: ReadMaxFiles, + minFiles: ReadMinFiles = ReadMinFiles(-1)) extends ReadLimit + + +/** A read limit that admits a min of `minFiles` per micro-batch. */ +case class ReadMinFiles(minFiles: Int) extends ReadLimit diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/ArrayAccumulator.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/ArrayAccumulator.scala new file mode 100644 index 00000000000..3d76df07ad7 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/ArrayAccumulator.scala @@ -0,0 +1,60 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +// scalastyle:off import.ordering.noEmptyLine + +import org.apache.spark.util.AccumulatorV2 + +/** + * An accumulator that keeps arrays of counts. Counts from multiple partitions + * are merged by index. -1 indicates a null and is handled using TVL (-1 + N = -1) + */ +class ArrayAccumulator(val size: Int) extends AccumulatorV2[(Int, Long), Array[Long]] { + + protected val counts = new Array[Long](size) + + override def isZero: Boolean = counts.forall(_ == 0) + override def copy(): AccumulatorV2[(Int, Long), Array[Long]] = { + val newCopy = new ArrayAccumulator(size) + (0 until size).foreach(i => newCopy.counts(i) = counts(i)) + newCopy + } + override def reset(): Unit = (0 until size).foreach(counts(_) = 0) + override def add(v: (Int, Long)): Unit = { + if (v._2 == -1 || counts(v._1) == -1) { + counts(v._1) = -1 + } else { + counts(v._1) += v._2 + } + } + override def merge(o: AccumulatorV2[(Int, Long), Array[Long]]): Unit = { + val other = o.asInstanceOf[ArrayAccumulator] + assert(size == other.size) + + (0 until size).foreach(i => { + if (counts(i) == -1 || other.counts(i) == -1) { + counts(i) = -1 + } else { + counts(i) += other.counts(i) + } + }) + } + override def value: Array[Long] = counts + +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/AutoCompactPartitionStats.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/AutoCompactPartitionStats.scala new file mode 100644 index 00000000000..d9e24d89a45 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/AutoCompactPartitionStats.scala @@ -0,0 +1,375 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import scala.collection.mutable +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.actions.{Action, AddFile, FileAction, RemoveFile} +import org.apache.spark.sql.delta.hooks.AutoCompactPartitionReserve +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.SparkSession + +/** + * A collector used to aggregate auto-compaction stats for a single commit. The expectation + * is to spin this up for a commit and then merging those local stats with the global stats. + */ +trait AutoCompactPartitionStatsCollector { + def collectPartitionStatsForAdd(file: AddFile): Unit + def collectPartitionStatsForRemove(file: RemoveFile): Unit + def finalizeStats(tableId: String): Unit +} + +/** + * This singleton object collect the table partition statistic for each commit that creates + * AddFile or RemoveFile objects. + * To control the memory usage, there are `maxNumTablePartitions` per table and 'maxNumPartitions' + * partition entries across all tables. + * Note: + * 1. Since the partition of each table is limited, if this limitation is reached, the least + * recently used table partitions will be evicted. + * 2. If all 'maxNumPartitions' are occupied, the partition stats of least recently used tables + * will be evicted until the used partitions fall back below to 'maxNumPartitions'. + * 3. The un-partitioned tables are treated as tables with single partition. + * @param maxNumTablePartitions The hash space of partition key to reduce memory usage per table. + * @param maxNumPartitions The maximum number of partition that can be occupied. + */ +class AutoCompactPartitionStats( + private var maxNumTablePartitions: Int, + private var maxNumPartitions: Int +) { + + /** + * This class to store the states of one table partition. These state includes: + * -- the number of small files, + * -- the thread that assigned to compact this partition, and + * -- whether the partition was compacted. + * + * Note: Since this class keeps tracking of the statistics of the table partition and the state of + * the auto compaction thread that works on the table partition, any method that accesses any + * attribute of this class needs to be protected by synchronized context. + */ + class PartitionStat( + var numFiles: Long, + var wasAutoCompacted: Boolean = false) { + + /** + * Determine whether this partition can be autocompacted based on the number of small files or + * if this [[AutoCompactPartitionStats]] instance has not auto compacted it yet. + * @param minNumFiles The minimum number of files this table-partition should have to trigger + * Auto Compaction in case it has already been compacted once. + */ + def hasSufficientSmallFilesOrHasNotBeenCompacted(minNumFiles: Long): Boolean = + !wasAutoCompacted || hasSufficientFiles(minNumFiles) + + def hasSufficientFiles(minNumFiles: Long): Boolean = numFiles >= minNumFiles + } + + /** + * This hashtable is used to store all table partition states of a table, the key is the hashcode + * of the partition, the value is [[PartitionStat]] object. + */ + type TablePartitionStats = mutable.LinkedHashMap[Int, PartitionStat] + + // The hash map to store the number of small files in each partition. + // -- Key is the hash code of the partition value. + // -- Values is the number of small files inside the corresponding partition. + type PartitionFilesMap = mutable.LinkedHashMap[Int, Long] + + type PartitionKey = Map[String, String] + + type PartitionKeySet = Set[Map[String, String]] + + // This is a simple LRU to store the table partition statistics. + // Workspace private to enable testing. + private[delta] val tablePartitionStatsCache = + new mutable.LinkedHashMap[String, TablePartitionStats]() + + // The number of partitions in this cache. + private[delta] var numUsedPartitions = 0 + + /** + * Helper class used to keep state regarding tracking auto-compaction stats of AddFile and + * RemoveFile actions in a single run that are greater than a passed-in minimum file size. + * If the collector runs into any non-fatal errors, it will invoke the error reporter on the error + * and then skip further execution. + * + * @param minFileSize Minimum file size for files we track auto-compact stats + * @param errorReporter Function that reports the first error, if any + * @return A collector object that tracks the Add/Remove file actions of the current commit. + */ + def createStatsCollector( + minFileSize: Long, + errorReporter: Throwable => Unit): + AutoCompactPartitionStatsCollector = new AutoCompactPartitionStatsCollector { + private val inputPartitionFiles = new PartitionFilesMap() + private var shouldCollect = true + + /** + * If the file is less than the specified min file size, updates the partition file map + * of stats with add or remove actions. If we encounter an error during stats collection, + * the remainder of the files will not be collected as well. + */ + private def collectPartitionStatsForFile(file: FileAction, addSub: Int): Unit = { + try { + val minSizeThreshold = minFileSize + if (shouldCollect && + file.estLogicalFileSize.getOrElse(file.getFileSize) <= minSizeThreshold + ) { + updatePartitionFileCounter(inputPartitionFiles, file.partitionValues, addSub) + } + } catch { + case NonFatal(e) => + errorReporter(e) + shouldCollect = false + } + } + /** + * Adds one file to all the appropriate partition counters. + */ + override def collectPartitionStatsForAdd(file: AddFile): Unit = { + collectPartitionStatsForFile(file, addSub = 1) + } + /** + * Removes one file from all the appropriate partition counters. + */ + override def collectPartitionStatsForRemove(file: RemoveFile): Unit = { + collectPartitionStatsForFile(file, addSub = -1) + } + + /** + * Merges the current collector's stats with the global one. + */ + override def finalizeStats(tableId: String): Unit = { + try { + if (shouldCollect) merge(tableId, inputPartitionFiles.filter(_._2 != 0)) + } catch { + case NonFatal(e) => errorReporter(e) + } + } + } + + /** + * This method merges the `inputPartitionFiles` of current committed transaction to the + * global cache of table partition stats. After merge is completed, tablePath will be moved + * to most recently used position. If the number of occupied partitions exceeds + * MAX_NUM_PARTITIONS, the least recently used tables will be evicted out. + * + * @param tableId The path of the table that contains `inputPartitionFiles`. + * @param inputPartitionFiles The number of files, which are qualified for Auto Compaction, in + * each partition. + */ + def merge(tableId: String, inputPartitionFiles: PartitionFilesMap): Unit = { + if (inputPartitionFiles.isEmpty) return + synchronized { + tablePartitionStatsCache.get(tableId) match { + case Some(cachedPartitionStates) => + // If the table is already stored, merges inputPartitionFiles' content to + // existing PartitionFilesMap. + for ((partitionHashCode, numFilesDelta) <- inputPartitionFiles) { + assert(numFilesDelta != 0) + cachedPartitionStates.get(partitionHashCode) match { + case Some(partitionState) => + // If there is an entry of partitionHashCode, updates its number of files + // and moves it to the most recently used slot. + partitionState.numFiles += numFilesDelta + moveAccessedPartitionToMru(cachedPartitionStates, partitionHashCode, partitionState) + case None => + if (numFilesDelta > 0) { + // New table partition is always in the most recently used entry. + cachedPartitionStates.put(partitionHashCode, new PartitionStat(numFilesDelta)) + numUsedPartitions += 1 + } + } + } + // Move the accessed table to MRU position and evicts the LRU partitions from it + // if necessary. + moveAccessedTableToMru(tableId, cachedPartitionStates) + case None => + // If it is new table, just create new entry. + val newPartitionStates = inputPartitionFiles + .filter { case (_, numFiles) => numFiles > 0 } + .map { case (partitionHashCode, numFiles) => + (partitionHashCode, new PartitionStat(numFiles)) + } + tablePartitionStatsCache.put(tableId, newPartitionStates) + numUsedPartitions += newPartitionStates.size + moveAccessedTableToMru(tableId, newPartitionStates) + } + evictLruTablesIfNecessary() + } + } + + /** Move the accessed table partition to the most recently used position. */ + private def moveAccessedPartitionToMru( + cachedPartitionFiles: TablePartitionStats, + partitionHashCode: Int, + partitionState: PartitionStat): Unit = { + cachedPartitionFiles.remove(partitionHashCode) + if (partitionState.numFiles <= 0) { + numUsedPartitions -= 1 + } else { + // If the newNumFiles is not empty, add it back and make it to be the + // most recently used entry. + cachedPartitionFiles.put(partitionHashCode, partitionState) + } + } + + /** Move the accessed table to the most recently used position. */ + private def moveAccessedTableToMru( + tableId: String, + cachedPartitionFiles: TablePartitionStats): Unit = { + // The tablePartitionStatsCache is insertion order preserved hash table. Thus, + // removing and adding back the entry make this to be most recently used entry. + // If cachedPartitionFiles's size is empty, no need to add it back to LRU. + tablePartitionStatsCache.remove(tableId) + numUsedPartitions -= cachedPartitionFiles.size + if (cachedPartitionFiles.nonEmpty) { + // Evict the least recently used partitions' statistics from table if necessary + val numExceededPartitions = cachedPartitionFiles.size - maxNumTablePartitions + if (numExceededPartitions > 0) { + val newPartitionStats = cachedPartitionFiles.drop(numExceededPartitions) + tablePartitionStatsCache.put(tableId, newPartitionStats) + numUsedPartitions += newPartitionStats.size + } else { + tablePartitionStatsCache.put(tableId, cachedPartitionFiles) + numUsedPartitions += cachedPartitionFiles.size + } + } + } + + /** + * Evicts the Lru tables from 'tablePartitionStatsCache' until the total number of partitions + * is less than maxNumPartitions. + */ + private def evictLruTablesIfNecessary(): Unit = { + // Keep removing the least recently used table until the used partition is lower than + // threshold. + while (numUsedPartitions > maxNumPartitions && tablePartitionStatsCache.nonEmpty) { + // Pick the least recently accessed table and remove it. + val (lruTable, tablePartitionStat) = tablePartitionStatsCache.head + numUsedPartitions -= tablePartitionStat.size + tablePartitionStatsCache.remove(lruTable) + } + } + + /** Update the file count of `PartitionFilesMap` according to the hash value of `partition`. */ + private def updatePartitionFileCounter( + partitionFileCounter: PartitionFilesMap, + partition: PartitionKey, + addSub: Int): Unit = { + partitionFileCounter.get(partition.##) match { + case Some(numFiles) => + partitionFileCounter.update(partition.##, numFiles + addSub) + case None => + partitionFileCounter.put(partition.##, addSub) + } + } + + /** Get the maximum number of files among all partitions inside table `tableId`. */ + def maxNumFilesInTable(tableId: String): Long = { + synchronized { + tablePartitionStatsCache.get(tableId) match { + case Some(partitionFileCounter) => + if (partitionFileCounter.isEmpty) { + 0 + } else { + partitionFileCounter.map(_._2.numFiles).max + } + case None => 0 + } + } + } + + /** + * @return Filter partitions from targetPartitions that have not been auto-compacted or + * that have enough small files. + */ + def filterPartitionsWithSmallFiles(tableId: String, targetPartitions: Set[PartitionKey], + minNumFiles: Long): Set[PartitionKey] = synchronized { + tablePartitionStatsCache.get(tableId).map { tablePartitionStates => + targetPartitions.filter { partitionKey => + tablePartitionStates.get(partitionKey.##).exists { partitionState => + partitionState.hasSufficientSmallFilesOrHasNotBeenCompacted(minNumFiles) + } + } + }.getOrElse(Set.empty) + } + + def markPartitionsAsCompacted(tableId: String, compactedPartitions: Set[PartitionKey]) + : Unit = synchronized { + tablePartitionStatsCache.get(tableId).foreach { tablePartitionStats => + compactedPartitions + .foreach(partitionKey => tablePartitionStats.get(partitionKey.##) + .foreach(_.wasAutoCompacted = true)) + } + } + + /** + * Collect the number of files, which are less than minFileSize, added to or removed from each + * partition from `actions`. + */ + def collectPartitionStats( + collector: AutoCompactPartitionStatsCollector, + tableId: String, + actions: Iterator[Action]): Unit = { + val acts = actions.toVector + acts.foreach { + case addFile: AddFile => collector.collectPartitionStatsForAdd(addFile) + case removeFile: RemoveFile => collector.collectPartitionStatsForRemove(removeFile) + case _ => // do nothing + } + collector.finalizeStats(tableId) + } + + /** This is test only code to reset the state of table partition statistics. */ + private[delta] def resetTestOnly(newHashSpace: Int, newMaxNumPartitions: Int): Unit = { + synchronized { + tablePartitionStatsCache.clear() + maxNumTablePartitions = newHashSpace + maxNumPartitions = newMaxNumPartitions + numUsedPartitions = 0 + AutoCompactPartitionReserve.resetTestOnly() + } + } + + /** + * This is test only code to reset all partition statistic information and keep current + * configuration. + */ + private[delta] def resetTestOnly(): Unit = resetTestOnly(maxNumTablePartitions, maxNumPartitions) +} + +object AutoCompactPartitionStats { + private var _instance: AutoCompactPartitionStats = null + + /** The thread safe constructor of singleton. */ + def instance(spark: SparkSession): AutoCompactPartitionStats = { + synchronized { + if (_instance == null) { + val config = spark.conf + val hashSpaceSize = config.get(DeltaSQLConf.DELTA_AUTO_COMPACT_MAX_TABLE_PARTITION_STATS) + val maxNumPartitions = config.get(DeltaSQLConf.DELTA_AUTO_COMPACT_PARTITION_STATS_SIZE) + _instance = new AutoCompactPartitionStats( + hashSpaceSize, maxNumPartitions + ) + } + } + _instance + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingPredicateBuilder.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingPredicateBuilder.scala new file mode 100644 index 00000000000..b28ba77a7c2 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingPredicateBuilder.scala @@ -0,0 +1,110 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import org.apache.spark.sql.delta.stats.DeltaStatistics.{MAX, MIN} + +import org.apache.spark.sql.Column + +/** + * A trait that defines interfaces for a data skipping predicate builder. + * + * Note that 'IsNull', 'IsNotNull' and 'StartsWith' are handled at a column (not expression) level + * within [[DataSkippingReaderBase.DataFiltersBuilder.constructDataFilters]]. + * + * Note that the 'value' passed in for each of the interface should be [[SkippingEligibleLiteral]]. + */ +private [sql] trait DataSkippingPredicateBuilder { + /** The predicate should match any file which contains the requested point. */ + def equalTo(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] + + /** The predicate should match any file which contains anything other than the rejected point. */ + def notEqualTo(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] + + /** + * The predicate should match any file which contains values less than the requested upper bound. + */ + def lessThan(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] + + /** + * The predicate should match any file which contains values less than or equal to the requested + * upper bound. + */ + def lessThanOrEqual(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] + + /** + * The predicate should match any file which contains values larger than the requested lower + * bound. + */ + def greaterThan(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] + + /** + * The predicate should match any file which contains values larger than or equal to the requested + * lower bound. + */ + def greaterThanOrEqual(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] +} + +/** + * A collection of supported data skipping predicate builders. + */ +object DataSkippingPredicateBuilder { + /** Predicate builder for skipping eligible columns. */ + case object ColumnBuilder extends ColumnPredicateBuilder +} + +/** + * Predicate builder for skipping eligible columns. + */ +private [stats] class ColumnPredicateBuilder extends DataSkippingPredicateBuilder { + def equalTo(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] = { + statsProvider.getPredicateWithStatTypes(colPath, MIN, MAX) { (min, max) => + min <= value && value <= max + } + } + + def notEqualTo(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] = { + statsProvider.getPredicateWithStatTypes(colPath, MIN, MAX) { (min, max) => + min < value || value < max + } + } + + def lessThan(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] = + statsProvider.getPredicateWithStatType(colPath, MIN)(_ < value) + + def lessThanOrEqual(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] = + statsProvider.getPredicateWithStatType(colPath, MIN)(_ <= value) + + def greaterThan(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] = + statsProvider.getPredicateWithStatType(colPath, MAX)(_ > value) + + def greaterThanOrEqual(statsProvider: StatsProvider, colPath: Seq[String], value: Column) + : Option[DataSkippingPredicate] = + statsProvider.getPredicateWithStatType(colPath, MAX)(_ >= value) +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingReader.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingReader.scala new file mode 100644 index 00000000000..610224ac64d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingReader.scala @@ -0,0 +1,1110 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +// scalastyle:off import.ordering.noEmptyLine +import java.io.Closeable + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaLog, DeltaTableUtils} +import org.apache.spark.sql.delta.actions.{AddFile, Metadata} +import org.apache.spark.sql.delta.implicits._ +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.DeltaDataSkippingType.DeltaDataSkippingType +import org.apache.spark.sql.delta.stats.DeltaStatistics._ +import org.apache.spark.sql.delta.util.StateCache +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{DataFrame, _} +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} +import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.execution.InSubqueryExec +import org.apache.spark.sql.expressions.SparkUserDefinedFunction +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{AtomicType, BooleanType, CalendarIntervalType, DataType, DateType, LongType, NumericType, StringType, StructField, StructType, TimestampNTZType, TimestampType} +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} + +/** + * Used to hold the list of files and scan stats after pruning files using the limit. + */ +case class ScanAfterLimit( + files: Seq[AddFile], + byteSize: Option[Long], + numPhysicalRecords: Option[Long], + numLogicalRecords: Option[Long]) + +/** + * Used in deduplicateAndFilterRemovedLocally/getFilesAndNumRecords iterator for grouping + * physical and logical number of records. + * + * @param numPhysicalRecords The number of records physically present in the file. + * @param numLogicalRecords The physical number of records minus the Deletion Vector cardinality. + */ +case class NumRecords(numPhysicalRecords: java.lang.Long, numLogicalRecords: java.lang.Long) + +/** + * Represents a stats column (MIN, MAX, etc) for a given (nested) user table column name. Used to + * keep track of which stats columns a data skipping query depends on. + * + * The `statType` is any value accepted by `getStatsColumnOpt()` (see object `DeltaStatistics`); + * `pathToColumn` is the nested name of the user column whose stats are to be accessed. + */ +private [stats] case class StatsColumn( + statType: String, + pathToColumn: Seq[String] = Nil) + +/** + * A data skipping predicate, which includes the expression itself, plus the set of stats columns + * that expression depends on. The latter is required to correctly handle missing stats, which would + * make the predicate unreliable; for details, see `DataSkippingReader.verifyStatsForFilter`. + * + * NOTE: It would be more accurate to call these "file keeping" predicates, because they specify the + * set of files a query must examine, not the set of rows a query can safely skip. + */ +private [sql] case class DataSkippingPredicate( + expr: Column, + referencedStats: Set[StatsColumn] +) + +/** + * Overloads the constructor for `DataSkippingPredicate`, allowing callers to pass referenced stats + * as individual arguments, rather than wrapped up as a Set. + * + * For example, instead of this: + * + * DataSkippingPredicate(pred, Set(stat1, stat2)) + * + * We can just do: + * + * DataSkippingPredicate(pred, stat1, stat2) + */ +private [sql] object DataSkippingPredicate { + def apply(filters: Column, referencedStats: StatsColumn*): DataSkippingPredicate = { + DataSkippingPredicate(filters, referencedStats.toSet) + } +} + +/** + * An extractor that matches on access of a skipping-eligible column. We only collect stats for leaf + * columns, so internal columns of nested types are ineligible for skipping. + * + * NOTE: This check is sufficient for safe use of NULL_COUNT stats, but safe use of MIN and MAX + * stats requires additional restrictions on column data type (see SkippingEligibleLiteral). + * + * @return The path to the column and the column's data type if it exists and is eligible. + * Otherwise, return None. + */ +object SkippingEligibleColumn { + def unapply(arg: Expression): Option[(Seq[String], DataType)] = { + // Only atomic types are eligible for skipping, and args should always be resolved by now. + val eligible = arg.resolved && arg.dataType.isInstanceOf[AtomicType] + if (eligible) searchChain(arg).map(_ -> arg.dataType) else None + } + + private def searchChain(arg: Expression): Option[Seq[String]] = arg match { + case a: Attribute => Some(a.name :: Nil) + case GetStructField(child, _, Some(name)) => + searchChain(child).map(name +: _) + case g @ GetStructField(child, ord, None) if g.resolved => + searchChain(child).map(g.childSchema(ord).name +: _) + case _ => + None + } +} + +/** + * An extractor that matches on access of a skipping-eligible Literal. Delta tables track min/max + * stats for a limited set of data types, and only Literals of those types are skipping-eligible. + * + * @return The Literal, if it is eligible. Otherwise, return None. + */ +object SkippingEligibleLiteral { + def unapply(arg: Literal): Option[Column] = { + if (SkippingEligibleDataType(arg.dataType)) Some(new Column(arg)) else None + } +} + +object SkippingEligibleDataType { + // Call this directly, e.g. `SkippingEligibleDataType(dataType)` + def apply(dataType: DataType): Boolean = dataType match { + case _: NumericType | DateType | TimestampType | TimestampNTZType | StringType => true + case _ => false + } + + // Use these in `match` statements + def unapply(dataType: DataType): Option[DataType] = { + if (SkippingEligibleDataType(dataType)) Some(dataType) else None + } + + def unapply(f: StructField): Option[DataType] = unapply(f.dataType) +} + +private[delta] object DataSkippingReader { + + /** Default number of cols for which we should collect stats */ + val DATA_SKIPPING_NUM_INDEXED_COLS_DEFAULT_VALUE = 32 + + private[this] def col(e: Expression): Column = new Column(e) + def fold(e: Expression): Column = col(new Literal(e.eval(), e.dataType)) + + // Literals often used in the data skipping reader expressions. + val trueLiteral: Column = col(TrueLiteral) + val falseLiteral: Column = col(FalseLiteral) + val nullStringLiteral: Column = col(new Literal(null, StringType)) + val nullBooleanLiteral: Column = col(new Literal(null, BooleanType)) + val oneMillisecondLiteralExpr: Literal = { + val oneMillisecond = new CalendarInterval(0, 0, 1000 /* micros */) + new Literal(oneMillisecond, CalendarIntervalType) + } + + val sizeCollectorInputEncoders: Seq[Option[ExpressionEncoder[_]]] = Seq( + Option(ExpressionEncoder[Boolean]()), + Option(ExpressionEncoder[java.lang.Long]()), + Option(ExpressionEncoder[java.lang.Long]()), + Option(ExpressionEncoder[java.lang.Long]())) +} + +/** + * Adds the ability to use statistics to filter the set of files based on predicates + * to a [[org.apache.spark.sql.delta.Snapshot]] of a given Delta table. + */ +trait DataSkippingReaderBase + extends DeltaScanGenerator + with StatisticsCollection + with ReadsMetadataFields + with StateCache + with DeltaLogging { + + import DataSkippingReader._ + + def allFiles: Dataset[AddFile] + def path: Path + def version: Long + def metadata: Metadata + private[delta] def sizeInBytesIfKnown: Option[Long] + def deltaLog: DeltaLog + def schema: StructType + private[delta] def numOfFilesIfKnown: Option[Long] + def redactedPath: String + + private def useStats = spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_STATS_SKIPPING) + + /** Returns a DataFrame expression to obtain a list of files with parsed statistics. */ + private def withStatsInternal0: DataFrame = { + allFiles.withColumn("stats", from_json(col("stats"), statsSchema)) + } + + private lazy val withStatsCache = + cacheDS(withStatsInternal0, s"Delta Table State with Stats #$version - $redactedPath") + + protected def withStatsInternal: DataFrame = withStatsCache.getDS + + /** All files with the statistics column dropped completely. */ + def withNoStats: DataFrame = allFiles.drop("stats") + + /** + * Returns a parsed and cached representation of files with statistics. + * + * + * @return [[DataFrame]] + */ + final def withStats: DataFrame = { + withStatsInternal + } + + /** + * Constructs a [[DataSkippingPredicate]] for isNotNull predicates. + */ + protected def constructNotNullFilter( + statsProvider: StatsProvider, + pathToColumn: Seq[String]): Option[DataSkippingPredicate] = { + val nullCountCol = StatsColumn(NULL_COUNT, pathToColumn) + val numRecordsCol = StatsColumn(NUM_RECORDS) + statsProvider.getPredicateWithStatsColumns(nullCountCol, numRecordsCol) { + (nullCount, numRecords) => nullCount < numRecords + } + } + + def withStatsDeduplicated: DataFrame = withStats + + /** + * Builds the data filters for data skipping. + */ + class DataFiltersBuilder( + protected val spark: SparkSession, + protected val dataSkippingType: DeltaDataSkippingType) + { + protected val statsProvider: StatsProvider = new StatsProvider(getStatsColumnOpt) + + // Main function for building data filters. + def apply(dataFilter: Expression): Option[DataSkippingPredicate] = + constructDataFilters(dataFilter) + + // Helper method for expression types that represent an IN-list of literal values. + // + // + // For excessively long IN-lists, we just test whether the file's min/max range overlaps the + // range spanned by the list's smallest and largest elements. + private def constructLiteralInListDataFilters(a: Expression, possiblyNullValues: Seq[Any]): + Option[DataSkippingPredicate] = { + // The Ordering we use for sorting cannot handle null values, and these can anyway + // be safely ignored because they will never cause an IN-list predicate to return TRUE. + val values = possiblyNullValues.filter(_ != null) + if (values.isEmpty) { + // Handle the trivial empty case even for otherwise ineligible types. + // NOTE: SQL forbids empty in-list, but InSubqueryExec could have an empty subquery result + // or IN-list may contain only NULLs. + return Some(DataSkippingPredicate(falseLiteral)) + } + + val (pathToColumn, dt, builder) = SkippingEligibleExpression.unapply(a).getOrElse { + // The expression is not eligible for skipping, and we can stop constructing data filters + // for the expression by simply returning None. + return None + } + + lazy val ordering = TypeUtils.getInterpretedOrdering(dt) + if (!SkippingEligibleDataType(dt)) { + // Don't waste time building expressions for incompatible types + None + } + else { + // Emit filters for an imprecise range test that covers the entire entire list. + val min = Literal(values.min(ordering), dt) + val max = Literal(values.max(ordering), dt) + constructDataFilters(And(GreaterThanOrEqual(max, a), LessThanOrEqual(min, a))) + } + } + + /** + * Returns a file skipping predicate expression, derived from the user query, which uses column + * statistics to prune away files that provably contain no rows the query cares about. + * + * Specifically, the filter extraction code must obey the following rules: + * + * 1. Given a query predicate `e`, `constructDataFilters(e)` must return TRUE for a file unless + * we can prove `e` will not return TRUE for any row the file might contain. For example, + * given `a = 3` and min/max stat values [0, 100], this skipping predicate is safe: + * + * AND(minValues.a <= 3, maxValues.a >= 3) + * + * Because that condition must be true for any file that might possibly contain `a = 3`; the + * skipping predicate could return FALSE only if the max is too low, or the min too high; it + * could return NULL only if a is NULL in every row of the file. In both latter cases, it is + * safe to skip the file because `a = 3` can never evaluate to TRUE. + * + * 2. It is unsafe to apply skipping to operators that can evaluate to NULL or produce an error + * for non-NULL inputs. For example, consider this query predicate involving integer + * addition: + * + * a + 1 = 3 + * + * It might be tempting to apply the standard equality skipping predicate: + * + * AND(minValues.a + 1 <= 3, 3 <= maxValues.a + 1) + * + * However, the skipping predicate would be unsound, because the addition operator could + * trigger integer overflow (e.g. minValues.a = 0 and maxValues.a = INT_MAX), even though the + * file could very well contain rows satisfying a + 1 = 3. + * + * 3. Predicates involving NOT are ineligible for skipping, because + * `Not(constructDataFilters(e))` is seldom equivalent to `constructDataFilters(Not(e))`. + * For example, consider the query predicate: + * + * NOT(a = 1) + * + * A simple inversion of the data skipping predicate would be: + * + * NOT(AND(minValues.a <= 1, maxValues.a >= 1)) + * ==> OR(NOT(minValues.a <= 1), NOT(maxValues.a >= 1)) + * ==> OR(minValues.a > 1, maxValues.a < 1) + * + * By contrast, if we first combine the NOT with = to obtain + * + * a != 1 + * + * We get a different skipping predicate: + * + * NOT(AND(minValues.a = 1, maxValues.a = 1)) + * ==> OR(NOT(minValues.a = 1), NOT(maxValues.a = 1)) + * ==> OR(minValues.a != 1, maxValues.a != 1) + * + * A truth table confirms that the first (naively inverted) skipping predicate is incorrect: + * + * minValues.a + * | maxValues.a + * | | OR(minValues.a > 1, maxValues.a < 1) + * | | | OR(minValues.a != 1, maxValues.a != 1) + * 0 0 T T + * 0 1 F T !! first predicate wrongly skipped a = 0 + * 1 1 F F + * + * Fortunately, we may be able to eliminate NOT from some (branches of some) predicates: + * + * a. It is safe to push the NOT into the children of AND and OR using de Morgan's Law, e.g. + * + * NOT(AND(a, b)) ==> OR(NOT(a), NOT(B)). + * + * b. It is safe to fold NOT into other operators, when a negated form of the operator + * exists: + * + * NOT(NOT(x)) ==> x + * NOT(a == b) ==> a != b + * NOT(a > b) ==> a <= b + * + * NOTE: The skipping predicate must handle the case where min and max stats for a column are + * both NULL -- which indicates that all values in the file are NULL. Fortunately, most of the + * operators we support data skipping for are NULL intolerant, and thus trivially satisfy this + * requirement because they never return TRUE for NULL inputs. The only NULL tolerant operator + * we support -- IS [NOT] NULL -- is specifically NULL aware. + * + * NOTE: The skipping predicate does *NOT* need to worry about missing stats columns (which also + * manifest as NULL). That case is handled separately by `verifyStatsForFilter` (which disables + * skipping for any file that lacks the needed stats columns). + */ + private def constructDataFilters(dataFilter: Expression): + Option[DataSkippingPredicate] = dataFilter match { + // Push skipping predicate generation through the AND: + // + // constructDataFilters(AND(a, b)) + // ==> AND(constructDataFilters(a), constructDataFilters(b)) + // + // To see why this transformation is safe, consider that `constructDataFilters(a)` must + // evaluate to TRUE *UNLESS* we can prove that `a` would not evaluate to TRUE for any row the + // file might contain. Thus, if the rewritten form of the skipping predicate does not evaluate + // to TRUE, at least one of the skipping predicates must not have evaluated to TRUE, which in + // turn means we were able to prove that `a` and/or `b` will not evaluate to TRUE for any row + // of the file. If that is the case, then `AND(a, b)` also cannot evaluate to TRUE for any row + // of the file, which proves we have a valid data skipping predicate. + // + // NOTE: AND is special -- we can safely skip the file if one leg does not evaluate to TRUE, + // even if we cannot construct a skipping filter for the other leg. + case And(e1, e2) => + val e1Filter = constructDataFilters(e1) + val e2Filter = constructDataFilters(e2) + if (e1Filter.isDefined && e2Filter.isDefined) { + Some(DataSkippingPredicate( + e1Filter.get.expr && e2Filter.get.expr, + e1Filter.get.referencedStats ++ e2Filter.get.referencedStats)) + } else if (e1Filter.isDefined) { + e1Filter + } else { + e2Filter // possibly None + } + + // Use deMorgan's law to push the NOT past the AND. This is safe even with SQL tri-valued + // logic (see below), and is desirable because we cannot generally push predicate filters + // through NOT, but we *CAN* push predicate filters through AND and OR: + // + // constructDataFilters(NOT(AND(a, b))) + // ==> constructDataFilters(OR(NOT(a), NOT(b))) + // ==> OR(constructDataFilters(NOT(a)), constructDataFilters(NOT(b))) + // + // Assuming we can push the resulting NOT operations all the way down to some leaf operation + // it can fold into, the rewrite allows us to create a data skipping filter from the + // expression. + // + // a b AND(a, b) + // | | | NOT(AND(a, b)) + // | | | | OR(NOT(a), NOT(b)) + // T T T F F + // T F F T T + // T N N N N + // F F F T T + // F N F T T + // N N N N N + case Not(And(e1, e2)) => + constructDataFilters(Or(Not(e1), Not(e2))) + + // Push skipping predicate generation through OR (similar to AND case). + // + // constructDataFilters(OR(a, b)) + // ==> OR(constructDataFilters(a), constructDataFilters(b)) + // + // Similar to AND case, if the rewritten predicate does not evaluate to TRUE, then it means + // that neither `constructDataFilters(a)` nor `constructDataFilters(b)` evaluated to TRUE, + // which in turn means that neither `a` nor `b` could evaluate to TRUE for any row the file + // might contain, which proves we have a valid data skipping predicate. + // + // Unlike AND, a single leg of an OR expression provides no filtering power -- we can only + // reject a file if both legs evaluate to false. + case Or(e1, e2) => + val e1Filter = constructDataFilters(e1) + val e2Filter = constructDataFilters(e2) + if (e1Filter.isDefined && e2Filter.isDefined) { + Some(DataSkippingPredicate( + e1Filter.get.expr || e2Filter.get.expr, + e1Filter.get.referencedStats ++ e2Filter.get.referencedStats)) + } else { + None + } + + // Similar to AND, we can (and want to) push the NOT past the OR using deMorgan's law. + case Not(Or(e1, e2)) => + constructDataFilters(And(Not(e1), Not(e2))) + + // Match any file whose null count is larger than zero. + // Note DVs might result in a redundant read of a file. + // However, they cannot lead to a correctness issue. + case IsNull(SkippingEligibleColumn(a, _)) => + statsProvider.getPredicateWithStatType(a, NULL_COUNT) { nullCount => + nullCount > Literal(0L) + } + case Not(IsNull(e)) => + constructDataFilters(IsNotNull(e)) + + // Match any file whose null count is less than the row count. + case IsNotNull(SkippingEligibleColumn(a, _)) => + constructNotNullFilter(statsProvider, a) + + case Not(IsNotNull(e)) => + constructDataFilters(IsNull(e)) + + // Match any file whose min/max range contains the requested point. + case EqualTo(SkippingEligibleExpression(c, _, builder), SkippingEligibleLiteral(v)) => + builder.equalTo(statsProvider, c, v) + case EqualTo(v: Literal, a) => + constructDataFilters(EqualTo(a, v)) + + // Match any file whose min/max range contains anything other than the rejected point. + case Not(EqualTo(SkippingEligibleExpression(c, _, builder), SkippingEligibleLiteral(v))) => + builder.notEqualTo(statsProvider, c, v) + case Not(EqualTo(v: Literal, a)) => + constructDataFilters(Not(EqualTo(a, v))) + + // Rewrite `EqualNullSafe(a, NotNullLiteral)` as + // `And(IsNotNull(a), EqualTo(a, NotNullLiteral))` and rewrite `EqualNullSafe(a, null)` as + // `IsNull(a)` to let the existing logic handle it. + case EqualNullSafe(a, v: Literal) => + val rewrittenExpr = if (v.value != null) And(IsNotNull(a), EqualTo(a, v)) else IsNull(a) + constructDataFilters(rewrittenExpr) + case EqualNullSafe(v: Literal, a) => + constructDataFilters(EqualNullSafe(a, v)) + case Not(EqualNullSafe(a, v: Literal)) => + val rewrittenExpr = if (v.value != null) And(IsNotNull(a), EqualTo(a, v)) else IsNull(a) + constructDataFilters(Not(rewrittenExpr)) + case Not(EqualNullSafe(v: Literal, a)) => + constructDataFilters(Not(EqualNullSafe(a, v))) + + // Match any file whose min is less than the requested upper bound. + case LessThan(SkippingEligibleExpression(c, _, builder), SkippingEligibleLiteral(v)) => + builder.lessThan(statsProvider, c, v) + case LessThan(v: Literal, a) => + constructDataFilters(GreaterThan(a, v)) + case Not(LessThan(a, b)) => + constructDataFilters(GreaterThanOrEqual(a, b)) + + // Match any file whose min is less than or equal to the requested upper bound + case LessThanOrEqual(SkippingEligibleExpression(c, _, builder), SkippingEligibleLiteral(v)) => + builder.lessThanOrEqual(statsProvider, c, v) + case LessThanOrEqual(v: Literal, a) => + constructDataFilters(GreaterThanOrEqual(a, v)) + case Not(LessThanOrEqual(a, b)) => + constructDataFilters(GreaterThan(a, b)) + + // Match any file whose max is larger than the requested lower bound. + case GreaterThan(SkippingEligibleExpression(c, _, builder), SkippingEligibleLiteral(v)) => + builder.greaterThan(statsProvider, c, v) + case GreaterThan(v: Literal, a) => + constructDataFilters(LessThan(a, v)) + case Not(GreaterThan(a, b)) => + constructDataFilters(LessThanOrEqual(a, b)) + + // Match any file whose max is larger than or equal to the requested lower bound. + case GreaterThanOrEqual( + SkippingEligibleExpression(c, _, builder), SkippingEligibleLiteral(v)) => + builder.greaterThanOrEqual(statsProvider, c, v) + case GreaterThanOrEqual(v: Literal, a) => + constructDataFilters(LessThanOrEqual(a, v)) + case Not(GreaterThanOrEqual(a, b)) => + constructDataFilters(LessThan(a, b)) + + // Similar to an equality test, except comparing against a prefix of the min/max stats, and + // neither commutative nor invertible. + case StartsWith(SkippingEligibleColumn(a, _), v @ Literal(s: UTF8String, StringType)) => + statsProvider.getPredicateWithStatTypes(a, MIN, MAX) { (min, max) => + val sLen = s.numChars() + substring(min, 0, sLen) <= v && substring(max, 0, sLen) >= v + } + + // We can only handle-IN lists whose values can all be statically evaluated to literals. + case in @ In(a, values) if in.inSetConvertible => + constructLiteralInListDataFilters(a, values.map(_.asInstanceOf[Literal].value)) + + // The optimizer automatically converts all but the shortest eligible IN-lists to InSet. + case InSet(a, values) => + constructLiteralInListDataFilters(a, values.toSeq) + + // Treat IN(... subquery ...) as a normal IN-list, since the subquery already ran before now. + case in: InSubqueryExec => + // At this point the subquery has been materialized, but values() can return None if + // the subquery was bypassed at runtime. + in.values().flatMap(v => constructLiteralInListDataFilters(in.child, v.toSeq)) + + + // Remove redundant pairs of NOT + case Not(Not(e)) => + constructDataFilters(e) + + // WARNING: NOT is dangerous, because `Not(constructDataFilters(e))` is seldom equivalent to + // `constructDataFilters(Not(e))`. We must special-case every `Not(e)` we wish to support. + case Not(_) => None + + // Unknown expression type... can't use it for data skipping. + case _ => None + } + + /** + * An extractor that matches expressions that are eligible for data skipping predicates. + * + * @return A tuple of 1) column name referenced in the expression, 2) date type for the + * expression, 3) [[DataSkippingPredicateBuilder]] that builds the data skipping + * predicate for the expression, if the given expression is eligible. + * Otherwise, return None. + */ + object SkippingEligibleExpression { + def unapply(arg: Expression) + : Option[(Seq[String], DataType, DataSkippingPredicateBuilder)] = arg match { + case SkippingEligibleColumn(c, dt) => + Some((c, dt, DataSkippingPredicateBuilder.ColumnBuilder)) + case _ => None + } + } + } + + /** + * Returns an expression to access the given statistics for a specific column, or None if that + * stats column does not exist. + * + * @param statType One of the fields declared by object `DeltaStatistics` + * @param pathToColumn The components of the nested column name to get stats for. + */ + final protected def getStatsColumnOpt(statType: String, pathToColumn: Seq[String] = Nil) + : Option[Column] = { + // If the requested stats type doesn't even exist, just return None right away. This can + // legitimately happen if we have no stats at all, or if column stats are disabled (in which + // case only the NUM_RECORDS stat type is available). + if (!statsSchema.exists(_.name == statType)) { + return None + } + + // Given a set of path segments in reverse order, e.g. column a.b.c is Seq("c", "b", "a"), we + // use a foldRight operation to build up the requested stats column, by successively applying + // each new path step against both the table schema and the stats schema. We can't use the stats + // schema alone, because the caller-provided path segments use logical column names, while the + // stats schema requires physical column names. Instead, we must step into the table schema to + // extract that field's physical column name, and use the result to step into the stats schema. + // + // We use a three-tuple to track state. The traversal starts with the base column for the + // requested stat type, the stats schema for the requested stat type, and the table schema. Each + // step of the traversal emits the updated column, along with the stats schema and table schema + // elements corresponding to that column. + val initialState: Option[(Column, DataType, DataType)] = + Some((getBaseStatsColumn.getField(statType), statsSchema(statType).dataType, metadata.schema)) + pathToColumn + .foldRight(initialState) { + // NOTE: Only match on StructType, because we cannot traverse through other DataTypes. + case (fieldName, Some((statCol, statsSchema: StructType, tableSchema: StructType))) => + // First try to step into the table schema + val tableFieldOpt = tableSchema.findNestedFieldIgnoreCase(Seq(fieldName)) + + // If that worked, try to step into the stats schema, using its its physical name + val statsFieldOpt = tableFieldOpt + .map(DeltaColumnMapping.getPhysicalName) + .filter(physicalFieldName => statsSchema.exists(_.name == physicalFieldName)) + .map(statsSchema(_)) + + // If all that succeeds, return the new stats column and the corresponding data types. + statsFieldOpt.map(statsField => + (statCol.getField(statsField.name), statsField.dataType, tableFieldOpt.get.dataType)) + + // Propagate failure if the above match failed (or if already None) + case _ => None + } + // Filter out non-leaf columns -- they lack stats so skipping predicates can't use them. + .filterNot(_._2.isInstanceOf[StructType]) + .map { + case (statCol, TimestampType, _) if statType == MAX => + // SC-22824: For timestamps, JSON serialization will truncate to milliseconds. This means + // that we must adjust 1 millisecond upwards for max stats, or we will incorrectly skip + // records that differ only in microsecond precision. (For example, a file containing only + // 01:02:03.456789 will be written with min == max == 01:02:03.456, so we must consider it + // to contain the range from 01:02:03.456 to 01:02:03.457.) + // + // There is a longer term task SC-22825 to fix the serialization problem that caused this. + // But we need the adjustment in any case to correctly read stats written by old versions. + new Column(Cast(TimeAdd(statCol.expr, oneMillisecondLiteralExpr), TimestampType)) + case (statCol, TimestampNTZType, _) if statType == MAX => + // We also apply the same adjustment of max stats that was applied to Timestamp + // for TimestampNTZ because these 2 types have the same precision in terms of time. + new Column(Cast(TimeAdd(statCol.expr, oneMillisecondLiteralExpr), TimestampNTZType)) + case (statCol, _, _) => + statCol + } + } + + /** + * Returns an expression to access the given statistics for a specific column, or a NULL + * literal expression if that column does not exist. + */ + final protected[delta] def getStatsColumnOrNullLiteral( + statType: String, + pathToColumn: Seq[String] = Nil) : Column = + getStatsColumnOpt(statType, pathToColumn).getOrElse(lit(null)) + + /** Overload for convenience working with StatsColumn helpers */ + final protected def getStatsColumnOpt(stat: StatsColumn): Option[Column] = + getStatsColumnOpt(stat.statType, stat.pathToColumn) + + /** Overload for convenience working with StatsColumn helpers */ + final protected[delta] def getStatsColumnOrNullLiteral(stat: StatsColumn): Column = + getStatsColumnOrNullLiteral(stat.statType, stat.pathToColumn) + + /** + * Returns an expression that can be used to check that the required statistics are present for a + * given file. If any required statistics are missing we must include the corresponding file. + * + * NOTE: We intentionally choose to disable skipping for any file if any required stat is missing, + * because doing it that way allows us to check each stat only once (rather than once per + * use). Checking per-use would anyway only help for tables where the number of indexed columns + * has changed over time, producing add.stats_parsed records with differing schemas. That should + * be a rare enough case to not worry about optimizing for, given that the fix requires more + * complex skipping predicates that would penalize the common case. + */ + protected def verifyStatsForFilter(referencedStats: Set[StatsColumn]): Column = { + recordFrameProfile("Delta", "DataSkippingReader.verifyStatsForFilter") { + // The NULL checks for MIN and MAX stats depend on NULL_COUNT and NUM_RECORDS. Derive those + // implied dependencies first, so the main pass can treat them like any other column. + // + // NOTE: We must include explicit NULL checks on all stats columns we access here, because our + // caller will negate the expression we return. In case a stats column is NULL, `NOT(expr)` + // must return `TRUE`, and without these NULL checks it would instead return + // `NOT(NULL)` => `NULL`. + referencedStats.flatMap { stat => stat match { + case StatsColumn(MIN, _) | StatsColumn(MAX, _) => + Seq(stat, StatsColumn(NULL_COUNT, stat.pathToColumn), StatsColumn(NUM_RECORDS)) + case _ => + Seq(stat) + }}.map{stat => stat match { + // A usable MIN or MAX stat must be non-NULL, unless the column is provably all-NULL + // + // NOTE: We don't care about NULL/missing NULL_COUNT and NUM_RECORDS here, because the + // separate NULL checks we emit for those columns will force the overall validation + // predicate conjunction to FALSE in that case -- AND(FALSE, ) is FALSE. + case StatsColumn(MIN, _) | StatsColumn(MAX, _) => + getStatsColumnOrNullLiteral(stat).isNotNull || + (getStatsColumnOrNullLiteral(NULL_COUNT, stat.pathToColumn) === + getStatsColumnOrNullLiteral(NUM_RECORDS)) + case _ => + // Other stats, such as NULL_COUNT and NUM_RECORDS stat, merely need to be non-NULL + getStatsColumnOrNullLiteral(stat).isNotNull + }} + .reduceLeftOption(_.and(_)) + .getOrElse(trueLiteral) + } + } + + private def buildSizeCollectorFilter(): (ArrayAccumulator, Column => Column) = { + val bytesCompressed = col("size") + val rows = getStatsColumnOrNullLiteral(NUM_RECORDS) + val dvCardinality = coalesce(col("deletionVector.cardinality"), lit(0L)) + val logicalRows = (rows - dvCardinality).as("logicalRows") + + val accumulator = new ArrayAccumulator(4) + + spark.sparkContext.register(accumulator) + + // The arguments (order and datatype) must match the encoders defined in the + // `sizeCollectorInputEncoders` value. + val collector = (include: Boolean, + bytesCompressed: java.lang.Long, + logicalRows: java.lang.Long, + rows: java.lang.Long) => { + if (include) { + accumulator.add((0, bytesCompressed)) /* count bytes of AddFiles */ + accumulator.add((1, Option(rows).map(_.toLong).getOrElse(-1L))) /* count rows in AddFiles */ + accumulator.add((2, 1)) /* count number of AddFiles */ + accumulator.add((3, Option(logicalRows) + .map(_.toLong).getOrElse(-1L))) /* count logical rows in AddFiles */ + } + include + } + val collectorUdf = SparkUserDefinedFunction( + f = collector, + dataType = BooleanType, + inputEncoders = sizeCollectorInputEncoders, + deterministic = false) + + (accumulator, collectorUdf(_: Column, bytesCompressed, logicalRows, rows)) + } + + override def filesWithStatsForScan(partitionFilters: Seq[Expression]): DataFrame = { + DeltaLog.filterFileList(metadata.partitionSchema, withStats, partitionFilters) + } + + /** + * Get all the files in this table. + * + * @param keepNumRecords Also select `stats.numRecords` in the query. + * This may slow down the query as it has to parse json. + */ + protected def getAllFiles(keepNumRecords: Boolean): Seq[AddFile] = recordFrameProfile( + "Delta", "DataSkippingReader.getAllFiles") { + val ds = if (keepNumRecords) { + withStats // use withStats instead of allFiles so the `stats` column is already parsed + // keep only the numRecords field as a Json string in the stats field + .withColumn("stats", to_json(struct(col("stats.numRecords") as "numRecords"))) + } else { + allFiles.withColumn("stats", nullStringLiteral) + } + convertDataFrameToAddFiles(ds.toDF()) + } + + /** + * Given the partition filters on the data, rewrite these filters by pointing to the metadata + * columns. + */ + protected def constructPartitionFilters(filters: Seq[Expression]): Column = { + recordFrameProfile("Delta", "DataSkippingReader.constructPartitionFilters") { + val rewritten = DeltaLog.rewritePartitionFilters( + metadata.partitionSchema, spark.sessionState.conf.resolver, filters) + rewritten.reduceOption(And).map { expr => new Column(expr) }.getOrElse(trueLiteral) + } + } + + /** + * Get all the files in this table given the partition filter and the corresponding size of + * the scan. + * + * @param keepNumRecords Also select `stats.numRecords` in the query. + * This may slow down the query as it has to parse json. + */ + protected def filterOnPartitions( + partitionFilters: Seq[Expression], + keepNumRecords: Boolean): (Seq[AddFile], DataSize) = recordFrameProfile( + "Delta", "DataSkippingReader.filterOnPartitions") { + val df = if (keepNumRecords) { + // use withStats instead of allFiles so the `stats` column is already parsed + val filteredFiles = + DeltaLog.filterFileList(metadata.partitionSchema, withStats, partitionFilters) + filteredFiles + // keep only the numRecords field as a Json string in the stats field + .withColumn("stats", to_json(struct(col("stats.numRecords") as "numRecords"))) + } else { + val filteredFiles = + DeltaLog.filterFileList(metadata.partitionSchema, allFiles.toDF(), partitionFilters) + filteredFiles + .withColumn("stats", nullStringLiteral) + } + val files = convertDataFrameToAddFiles(df) + val sizeInBytesByPartitionFilters = files.map(_.size).sum + files.toSeq -> DataSize(Some(sizeInBytesByPartitionFilters), None, Some(files.size)) + } + + /** + * Given the partition and data filters, leverage data skipping statistics to find the set of + * files that need to be queried. Returns a tuple of the files and optionally the size of the + * scan that's generated if there were no filters, if there were only partition filters, and + * combined effect of partition and data filters respectively. + */ + protected def getDataSkippedFiles( + partitionFilters: Column, + dataFilters: DataSkippingPredicate, + keepNumRecords: Boolean): (Seq[AddFile], Seq[DataSize]) = recordFrameProfile( + "Delta", "DataSkippingReader.getDataSkippedFiles") { + val (totalSize, totalFilter) = buildSizeCollectorFilter() + val (partitionSize, partitionFilter) = buildSizeCollectorFilter() + val (scanSize, scanFilter) = buildSizeCollectorFilter() + + // NOTE: If any stats are missing, the value of `dataFilters` is untrustworthy -- it could be + // NULL or even just plain incorrect. We rely on `verifyStatsForFilter` to be FALSE in that + // case, forcing the overall OR to evaluate as TRUE no matter what value `dataFilters` takes. + val filteredFiles = withStats.where( + totalFilter(trueLiteral) && + partitionFilter(partitionFilters) && + scanFilter(dataFilters.expr || !verifyStatsForFilter(dataFilters.referencedStats)) + ) + + val statsColumn = if (keepNumRecords) { + // keep only the numRecords field as a Json string in the stats field + to_json(struct(col("stats.numRecords") as "numRecords")) + } else nullStringLiteral + + val files = + recordFrameProfile("Delta", "DataSkippingReader.getDataSkippedFiles.collectFiles") { + val df = filteredFiles.withColumn("stats", statsColumn) + convertDataFrameToAddFiles(df) + } + files.toSeq -> Seq(DataSize(totalSize), DataSize(partitionSize), DataSize(scanSize)) + } + + private def getCorrectDataSkippingType( + dataSkippingType: DeltaDataSkippingType): DeltaDataSkippingType = { + dataSkippingType + } + + /** + * Gathers files that should be included in a scan based on the given predicates. + * Statistics about the amount of data that will be read are gathered and returned. + * Note, the statistics column that is added when keepNumRecords = true should NOT + * take into account DVs. Consumers of this method might commit the file. The semantics + * of the statistics need to be consistent across all files. + */ + override def filesForScan(filters: Seq[Expression], keepNumRecords: Boolean): DeltaScan = { + val startTime = System.currentTimeMillis() + if (filters == Seq(TrueLiteral) || filters.isEmpty || schema.isEmpty) { + recordDeltaOperation(deltaLog, "delta.skipping.none") { + // When there are no filters we can just return allFiles with no extra processing + val dataSize = DataSize( + bytesCompressed = sizeInBytesIfKnown, + rows = None, + files = numOfFilesIfKnown) + return DeltaScan( + version = version, + files = getAllFiles(keepNumRecords), + total = dataSize, + partition = dataSize, + scanned = dataSize)( + scannedSnapshot = snapshotToScan, + partitionFilters = ExpressionSet(Nil), + dataFilters = ExpressionSet(Nil), + unusedFilters = ExpressionSet(Nil), + scanDurationMs = System.currentTimeMillis() - startTime, + dataSkippingType = getCorrectDataSkippingType(DeltaDataSkippingType.noSkippingV1) + ) + } + } + + import DeltaTableUtils._ + val partitionColumns = metadata.partitionColumns + + // For data skipping, avoid using the filters that involve subqueries. + + val (subqueryFilters, flatFilters) = filters.partition { + case f => containsSubquery(f) + } + + val (partitionFilters, dataFilters) = flatFilters + .partition(isPredicatePartitionColumnsOnly(_, partitionColumns, spark)) + + if (dataFilters.isEmpty) recordDeltaOperation(deltaLog, "delta.skipping.partition") { + // When there are only partition filters we can scan allFiles + // rather than withStats and thus we skip data skipping information. + val (files, scanSize) = filterOnPartitions(partitionFilters, keepNumRecords) + DeltaScan( + version = version, + files = files, + total = DataSize(sizeInBytesIfKnown, None, numOfFilesIfKnown), + partition = scanSize, + scanned = scanSize)( + scannedSnapshot = snapshotToScan, + partitionFilters = ExpressionSet(partitionFilters), + dataFilters = ExpressionSet(Nil), + unusedFilters = ExpressionSet(subqueryFilters), + scanDurationMs = System.currentTimeMillis() - startTime, + dataSkippingType = + getCorrectDataSkippingType(DeltaDataSkippingType.partitionFilteringOnlyV1) + ) + } else recordDeltaOperation(deltaLog, "delta.skipping.data") { + val finalPartitionFilters = constructPartitionFilters(partitionFilters) + + val dataSkippingType = if (partitionFilters.isEmpty) { + DeltaDataSkippingType.dataSkippingOnlyV1 + } else { + DeltaDataSkippingType.dataSkippingAndPartitionFilteringV1 + } + + val (skippingFilters, unusedFilters) = if (useStats) { + val constructDataFilters = new DataFiltersBuilder(spark, dataSkippingType) + dataFilters.map(f => (f, constructDataFilters(f))).partition(f => f._2.isDefined) + } else { + (Nil, dataFilters.map(f => (f, None))) + } + + val finalSkippingFilters = skippingFilters + .map(_._2.get) + .reduceOption((skip1, skip2) => DataSkippingPredicate( + // Fold the filters into a conjunction, while unioning their referencedStats. + skip1.expr && skip2.expr, skip1.referencedStats ++ skip2.referencedStats)) + .getOrElse(DataSkippingPredicate(trueLiteral)) + + val (files, sizes) = { + getDataSkippedFiles(finalPartitionFilters, finalSkippingFilters, keepNumRecords) + } + + DeltaScan( + version = version, + files = files, + total = sizes(0), + partition = sizes(1), + scanned = sizes(2))( + scannedSnapshot = snapshotToScan, + partitionFilters = ExpressionSet(partitionFilters), + dataFilters = ExpressionSet(skippingFilters.map(_._1)), + unusedFilters = ExpressionSet(unusedFilters.map(_._1) ++ subqueryFilters), + scanDurationMs = System.currentTimeMillis() - startTime, + dataSkippingType = getCorrectDataSkippingType(dataSkippingType) + ) + } + } + + /** + * Gathers files that should be included in a scan based on the given predicates and limit. + * This will be called only when all predicates are on partitioning columns. + * Statistics about the amount of data that will be read are gathered and returned. + */ + override def filesForScan(limit: Long, partitionFilters: Seq[Expression]): DeltaScan = + recordDeltaOperation(deltaLog, "delta.skipping.filteredLimit") { + val startTime = System.currentTimeMillis() + val finalPartitionFilters = constructPartitionFilters(partitionFilters) + + val scan = { + pruneFilesByLimit(withStats.where(finalPartitionFilters), limit) + } + + val totalDataSize = new DataSize( + sizeInBytesIfKnown, + None, + numOfFilesIfKnown, + None + ) + + val scannedDataSize = new DataSize( + scan.byteSize, + scan.numPhysicalRecords, + Some(scan.files.size), + scan.numLogicalRecords + ) + + DeltaScan( + version = version, + files = scan.files, + total = totalDataSize, + partition = null, + scanned = scannedDataSize)( + scannedSnapshot = snapshotToScan, + partitionFilters = ExpressionSet(partitionFilters), + dataFilters = ExpressionSet(Nil), + unusedFilters = ExpressionSet(Nil), + scanDurationMs = System.currentTimeMillis() - startTime, + dataSkippingType = DeltaDataSkippingType.filteredLimit + ) + } + + /** + * Get AddFile (with stats) actions corresponding to given set of paths in the Snapshot. + * If a path doesn't exist in snapshot, it will be ignored and no [[AddFile]] will be returned + * for it. + * @param paths Sequence of paths for which we want to get [[AddFile]] action + * @return a sequence of addFiles for the given `paths` + */ + def getSpecificFilesWithStats(paths: Seq[String]): Seq[AddFile] = { + recordFrameProfile("Delta", "DataSkippingReader.getSpecificFilesWithStats") { + val right = paths.toDF(spark, "path") + val df = allFiles.join(right, Seq("path"), "leftsemi") + convertDataFrameToAddFiles(df) + } + } + + /** Get the files and number of records within each file, to perform limit pushdown. */ + def getFilesAndNumRecords( + df: DataFrame): Iterator[(AddFile, NumRecords)] with Closeable = recordFrameProfile( + "Delta", "DataSkippingReaderEdge.getFilesAndNumRecords") { + import org.apache.spark.sql.delta.implicits._ + + val dvCardinality = coalesce(col("deletionVector.cardinality"), lit(0L)) + val numLogicalRecords = col("stats.numRecords") - dvCardinality + + val result = df.withColumn("numPhysicalRecords", col("stats.numRecords")) // Physical + .withColumn("numLogicalRecords", numLogicalRecords) // Logical + .withColumn("stats", nullStringLiteral) + .select(struct(col("*")).as[AddFile], + col("numPhysicalRecords").as[java.lang.Long], col("numLogicalRecords").as[java.lang.Long]) + .collectAsList() + + new Iterator[(AddFile, NumRecords)] with Closeable { + private val underlying = result.iterator + override def hasNext: Boolean = underlying.hasNext + override def next(): (AddFile, NumRecords) = { + val next = underlying.next() + (next._1, NumRecords(numPhysicalRecords = next._2, numLogicalRecords = next._3)) + } + + override def close(): Unit = { + } + + } + } + + protected def convertDataFrameToAddFiles(df: DataFrame): Array[AddFile] = { + df.as[AddFile].collect() + } + + protected[delta] def pruneFilesByLimit(df: DataFrame, limit: Long): ScanAfterLimit = { + val withNumRecords = { + getFilesAndNumRecords(df) + } + + var logicalRowsToScan = 0L + var physicalRowsToScan = 0L + var bytesToScan = 0L + var bytesToIgnore = 0L + var rowsUnknown = false + + val filesAfterLimit = try { + val iter = withNumRecords + val filesToScan = ArrayBuffer[AddFile]() + val filesToIgnore = ArrayBuffer[AddFile]() + while (iter.hasNext && logicalRowsToScan < limit) { + val file = iter.next() + if (file._2.numPhysicalRecords == null || file._2.numLogicalRecords == null) { + // this file has no stats, ignore for now + bytesToIgnore += file._1.size + filesToIgnore += file._1 + } else { + physicalRowsToScan += file._2.numPhysicalRecords.toLong + logicalRowsToScan += file._2.numLogicalRecords.toLong + bytesToScan += file._1.size + filesToScan += file._1 + } + } + + // If the files that have stats do not contain enough rows, fall back to reading all files + if (logicalRowsToScan < limit && filesToIgnore.nonEmpty) { + filesToScan ++= filesToIgnore + bytesToScan += bytesToIgnore + rowsUnknown = true + } + filesToScan.toSeq + } finally { + withNumRecords.close() + } + + if (rowsUnknown) { + ScanAfterLimit(filesAfterLimit, Some(bytesToScan), None, None) + } else { + ScanAfterLimit(filesAfterLimit, Some(bytesToScan), + Some(physicalRowsToScan), Some(logicalRowsToScan)) + } + } +} + +trait DataSkippingReader extends DataSkippingReaderBase diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingStatsTracker.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingStatsTracker.scala new file mode 100644 index 00000000000..eaff49795ba --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DataSkippingStatsTracker.scala @@ -0,0 +1,191 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import scala.collection.mutable + +import org.apache.spark.sql.delta.expressions.JoinedProjection +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate._ +import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.types._ +import org.apache.spark.util.SerializableConfiguration + +/** + * A [[WriteTaskStats]] that contains a map from file name to the json representation + * of the collected statistics. + */ +case class DeltaFileStatistics(stats: Map[String, String]) extends WriteTaskStats + +/** + * A per-task (i.e. one instance per executor) [[WriteTaskStatsTracker]] that collects the + * statistics defined by [[StatisticsCollection]] for files that are being written into a delta + * table. + * + * @param dataCols Resolved data (i.e. non-partitionBy) columns of the dataframe to be written. + * @param statsColExpr Resolved expression for computing all the statistics that we want to gather. + * @param rootPath The Reservoir's root path. + * @param hadoopConf Hadoop Config for being able to instantiate a [[FileSystem]]. + */ +class DeltaTaskStatisticsTracker( + dataCols: Seq[Attribute], + statsColExpr: Expression, + rootPath: Path, + hadoopConf: Configuration) extends WriteTaskStatsTracker { + + protected[this] val submittedFiles = mutable.HashMap[String, InternalRow]() + + // For example, when strings are involved, statsColExpr might look like + // struct( + // count(new Column("*")) as "numRecords" + // struct( + // substring(min(col), 0, stringPrefix)) + // ) as "minValues", + // struct( + // udf(max(col)) + // ) as "maxValues" + // ) as "stats" + + // [[DeclarativeAggregate]] is the API to the Catalyst machinery for initializing and updating + // the result of an aggregate function. We will be using it here the same way it's used during + // query execution. + + // Given the example above, aggregates would hold: Seq(count, min, max) + private val aggregates: Seq[DeclarativeAggregate] = statsColExpr.collect { + case ae: AggregateExpression if ae.aggregateFunction.isInstanceOf[DeclarativeAggregate] => + ae.aggregateFunction.asInstanceOf[DeclarativeAggregate] + } + + // The fields of aggBuffer - see below + protected val aggBufferAttrs: Seq[Attribute] = aggregates.flatMap(_.aggBufferAttributes) + + // This projection initializes aggBuffer with the neutral values for the agg fcns e.g. 0 for sum + protected val initializeStats: MutableProjection = GenerateMutableProjection.generate( + expressions = aggregates.flatMap(_.initialValues), + inputSchema = Seq.empty, + useSubexprElimination = false + ) + + // This projection combines the intermediate results stored by aggBuffer with the values of the + // currently processed row and updates aggBuffer in place. + private val updateStats: MutableProjection = GenerateMutableProjection.generate( + expressions = JoinedProjection.bind( + aggBufferAttrs, + dataCols, + aggregates.flatMap(_.updateExpressions)), + inputSchema = Nil, + useSubexprElimination = true + ) + + // This executes the whole statsColExpr in order to compute the final stats value for the file. + // In order to evaluate it, we have to replace its aggregate functions with the corresponding + // aggregates' evaluateExpressions that basically just return the results stored in aggBuffer. + private val resultExpr: Expression = statsColExpr.transform { + case ae: AggregateExpression if ae.aggregateFunction.isInstanceOf[DeclarativeAggregate] => + ae.aggregateFunction.asInstanceOf[DeclarativeAggregate].evaluateExpression + } + + // See resultExpr above + private val getStats: Projection = UnsafeProjection.create( + exprs = Seq(resultExpr), + inputSchema = aggBufferAttrs + ) + + // This serves as input to updateStats, with aggBuffer always on the left, while the right side + // is every time replaced with the row currently being processed - see updateStats and newRow. + private val extendedRow: GenericInternalRow = new GenericInternalRow(2) + + // file path to corresponding stats encoded as json + protected val results = new collection.mutable.HashMap[String, String] + + // called once per file, executes the getStats projection + override def closeFile(filePath: String): Unit = { + // We assume file names are unique + val fileName = new Path(filePath).getName + + assert(!results.contains(fileName), s"Stats already recorded for file: $filePath") + // this is statsColExpr's output (json string) + val jsonStats = getStats(submittedFiles(filePath)).getString(0) + results += ((fileName, jsonStats)) + submittedFiles.remove(filePath) + } + + override def newPartition(partitionValues: InternalRow): Unit = { } + + protected def initializeAggBuf(buffer: SpecificInternalRow): InternalRow = + initializeStats.target(buffer).apply(EmptyRow) + + override def newFile(newFilePath: String): Unit = { + submittedFiles.getOrElseUpdate(newFilePath, { + // `buffer` is a row that will start off by holding the initial values for the agg expressions + // (see the initializeStats: Projection), will then be updated in place every time a new row + // is processed (see updateStats: Projection), and will finally serve as an input for + // computing the per-file result of statsColExpr (see getStats: Projection) + val buffer = new SpecificInternalRow(aggBufferAttrs.map(_.dataType)) + initializeAggBuf(buffer) + }) + } + + override def newRow(filePath: String, currentRow: InternalRow): Unit = { + val aggBuffer = submittedFiles(filePath) + extendedRow.update(0, aggBuffer) + extendedRow.update(1, currentRow) + updateStats.target(aggBuffer).apply(extendedRow) + } + + override def getFinalStats(taskCommitTime: Long): DeltaFileStatistics = { + submittedFiles.keys.foreach(closeFile) + submittedFiles.clear() + DeltaFileStatistics(results.toMap) + } +} + +/** + * Serializable factory class that holds together all required parameters for being able to + * instantiate a [[DeltaTaskStatisticsTracker]] on an executor. + * + * @param hadoopConf The Hadoop configuration object to use on an executor. + * @param path Root Reservoir path + * @param dataCols Resolved data (i.e. non-partitionBy) columns of the dataframe to be written. + */ +class DeltaJobStatisticsTracker( + @transient private val hadoopConf: Configuration, + @transient val path: Path, + val dataCols: Seq[Attribute], + val statsColExpr: Expression +) extends WriteJobStatsTracker { + + var recordedStats: Map[String, String] = _ + + private val srlHadoopConf = new SerializableConfiguration(hadoopConf) + private val rootUri = path.getFileSystem(hadoopConf).makeQualified(path).toUri() + + override def newTaskInstance(): WriteTaskStatsTracker = { + val rootPath = new Path(rootUri) + val hadoopConf = srlHadoopConf.value + new DeltaTaskStatisticsTracker(dataCols, statsColExpr, rootPath, hadoopConf) + } + + override def processStats(stats: Seq[WriteTaskStats], jobCommitTime: Long): Unit = { + recordedStats = stats.map(_.asInstanceOf[DeltaFileStatistics]).flatMap(_.stats).toMap + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/DeltaScan.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DeltaScan.scala new file mode 100644 index 00000000000..082c3ef6257 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DeltaScan.scala @@ -0,0 +1,94 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.Snapshot +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.stats.DeltaDataSkippingType.DeltaDataSkippingType +import com.fasterxml.jackson.databind.annotation.JsonDeserialize + +import org.apache.spark.sql.catalyst.expressions._ + +/** + * DataSize describes following attributes for data that consists of a list of input files + * @param bytesCompressed total size of the data + * @param rows number of rows in the data + * @param files number of input files + * Note: Please don't add any new constructor to this class. `jackson-module-scala` always picks up + * the first constructor returned by `Class.getConstructors` but the order of the constructors list + * is non-deterministic. (SC-13343) + */ +case class DataSize( + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + bytesCompressed: Option[Long] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + rows: Option[Long] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + files: Option[Long] = None, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + logicalRows: Option[Long] = None +) + +object DataSize { + def apply(a: ArrayAccumulator): DataSize = { + DataSize( + Option(a.value(0)).filterNot(_ == -1), + Option(a.value(1)).filterNot(_ == -1), + Option(a.value(2)).filterNot(_ == -1), + Option(a.value(3)).filterNot(_ == -1) + ) + } +} + +object DeltaDataSkippingType extends Enumeration { + type DeltaDataSkippingType = Value + // V1: code path in DataSkippingReader.scala, which needs StateReconstruction + // noSkipping: no skipping and get all files from the Delta table + // partitionFiltering: filtering and skipping based on partition columns + // dataSkipping: filtering and skipping based on stats columns + // limit: skipping based on limit clause in DataSkippingReader.scala + // filteredLimit: skipping based on limit clause and partition columns in DataSkippingReader.scala + val noSkippingV1, noSkippingV2, partitionFilteringOnlyV1, partitionFilteringOnlyV2, + dataSkippingOnlyV1, dataSkippingOnlyV2, dataSkippingAndPartitionFilteringV1, + dataSkippingAndPartitionFilteringV2, limit, filteredLimit = Value +} + +/** + * Used to hold details the files and stats for a scan where we have already + * applied filters and a limit. + */ +case class DeltaScan( + version: Long, + files: Seq[AddFile], + total: DataSize, + partition: DataSize, + scanned: DataSize)( + // Moved to separate argument list, to not be part of case class equals check - + // expressions can differ by exprId or ordering, but as long as same files are scanned, the + // PreparedDeltaFileIndex and HadoopFsRelation should be considered equal for reuse purposes. + val scannedSnapshot: Snapshot, + val partitionFilters: ExpressionSet, + val dataFilters: ExpressionSet, + val unusedFilters: ExpressionSet, + val scanDurationMs: Long, + val dataSkippingType: DeltaDataSkippingType) { + assert(version == scannedSnapshot.version) + + lazy val filtersUsedForSkipping: ExpressionSet = partitionFilters ++ dataFilters + lazy val allFilters: ExpressionSet = filtersUsedForSkipping ++ unusedFilters +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/DeltaScanGenerator.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DeltaScanGenerator.scala new file mode 100644 index 00000000000..0ad154d89a3 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/DeltaScanGenerator.scala @@ -0,0 +1,40 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import org.apache.spark.sql.delta.{Snapshot, SnapshotDescriptor} + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} + +/** Trait representing a class that can generate [[DeltaScan]] given filters and a limit. */ +trait DeltaScanGenerator { + /** The snapshot that the scan is being generated on. */ + val snapshotToScan: Snapshot + + /** + * Returns a DataFrame for the given partition filters. The schema of returned DataFrame is nearly + * the same as `AddFile`, except that the `stats` field is parsed to a struct from a json string. + */ + def filesWithStatsForScan(partitionFilters: Seq[Expression]): DataFrame + + /** Returns a [[DeltaScan]] based on the given filters. */ + def filesForScan(filters: Seq[Expression], keepNumRecords: Boolean = false): DeltaScan + + /** Returns a [[DeltaScan]] based on the given partition filters and limits. */ + def filesForScan(limit: Long, partitionFilters: Seq[Expression]): DeltaScan +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/FileSizeHistogram.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/FileSizeHistogram.scala new file mode 100644 index 00000000000..2068c40b643 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/FileSizeHistogram.scala @@ -0,0 +1,115 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import java.util.Arrays + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize + +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.types.StructType + +/** + * A Histogram class tracking the file counts and total bytes in different size ranges + * @param sortedBinBoundaries - a sorted list of bin boundaries where each element represents the + * start of the bin (included) and the next element represents the end + * of the bin (excluded) + * @param fileCounts - an array of Int representing total number of files in different bins + * @param totalBytes - an array of Long representing total number of bytes in different bins + */ +case class FileSizeHistogram( + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + sortedBinBoundaries: IndexedSeq[Long], + fileCounts: Array[Long], + totalBytes: Array[Long]) { + + require(sortedBinBoundaries.nonEmpty) + require(sortedBinBoundaries.head == 0, "The first bin should start from 0") + require(sortedBinBoundaries.length == fileCounts.length, "number of binBoundaries should be" + + " same as size of fileCounts") + require(sortedBinBoundaries.length == totalBytes.length, "number of binBoundaries should be" + + " same as size of totalBytes") + + /** + * Not intended to be used for [[Map]] structure keys. Implemented for the sole purpose of having + * an equals method, which requires overriding hashCode as well, so an incomplete hash is okay. + * We only require a == b implies a.hashCode == b.hashCode + */ + override def hashCode(): Int = Arrays.hashCode(totalBytes) + + override def equals(that: Any): Boolean = that match { + case FileSizeHistogram(thatSB, thatFC, thatTB) => + sortedBinBoundaries == thatSB && + java.util.Arrays.equals(fileCounts, thatFC) && + java.util.Arrays.equals(totalBytes, thatTB) + case _ => false + } + + /** + * Insert a given value into the appropriate histogram bin + */ + def insert(fileSize: Long): Unit = { + val index = FileSizeHistogram.getBinIndex(fileSize, sortedBinBoundaries) + if (index >= 0) { + fileCounts(index) += 1 + totalBytes(index) += fileSize + } + } + + /** + * Remove a given value from the appropriate histogram bin + * @param fileSize to remove + */ + def remove(fileSize: Long): Unit = { + val index = FileSizeHistogram.getBinIndex(fileSize, sortedBinBoundaries) + if (index >= 0) { + fileCounts(index) -= 1 + totalBytes(index) -= fileSize + } + } +} + +private[delta] object FileSizeHistogram { + + /** + * Returns the index of the bin to which given fileSize belongs OR -1 if given fileSize doesn't + * belongs to any bin + */ + def getBinIndex(fileSize: Long, sortedBinBoundaries: IndexedSeq[Long]): Int = { + import scala.collection.Searching._ + // The search function on IndexedSeq uses binary search. + val searchResult = sortedBinBoundaries.search(fileSize) + searchResult match { + case Found(index) => + index + case InsertionPoint(insertionPoint) => + // insertionPoint=0 means that fileSize is lesser than min bucket of histogram + // return -1 in that case + insertionPoint - 1 + } + } + + def apply(sortedBinBoundaries: IndexedSeq[Long]): FileSizeHistogram = { + new FileSizeHistogram( + sortedBinBoundaries, + Array.fill(sortedBinBoundaries.size)(0), + Array.fill(sortedBinBoundaries.size)(0) + ) + } + + lazy val schema: StructType = ExpressionEncoder[FileSizeHistogram]().schema +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala new file mode 100644 index 00000000000..4f228c8e45f --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala @@ -0,0 +1,419 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import java.util.Objects + +import scala.collection.mutable + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{AddFile, Protocol} +import org.apache.spark.sql.delta.files.{TahoeFileIndex, TahoeFileIndexWithSnapshotDescriptor, TahoeLogFileIndex} +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.perf.OptimizeMetadataOnlyDeltaQuery +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.planning.PhysicalOperation +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreePattern.PROJECT +import org.apache.spark.sql.execution.datasources.{FileIndex, LogicalRelation} +import org.apache.spark.sql.types.StructType + +/** + * Before query planning, we prepare any scans over delta tables by pushing + * any projections or filters in allowing us to gather more accurate statistics + * for CBO and metering. + * + * Note the following + * - This rule also ensures that all reads from the same delta log use the same snapshot of log + * thus providing snapshot isolation. + * - If this rule is invoked within an active [[OptimisticTransaction]], then the scans are + * generated using the transaction. + */ +trait PrepareDeltaScanBase extends Rule[LogicalPlan] + with PredicateHelper + with DeltaLogging + with OptimizeMetadataOnlyDeltaQuery + with PreprocessTableWithDVs { self: PrepareDeltaScan => + + /** + * Tracks the first-access snapshots of other logs planned by this rule. The snapshots are + * the keyed by the log's unique id. Note that the lifetime of this rule is a single + * query, therefore, the map tracks the snapshots only within a query. + */ + private val scannedSnapshots = + new java.util.concurrent.ConcurrentHashMap[(String, Path), Snapshot] + + /** + * Gets the [[DeltaScanGenerator]] for the given log, which will be used to generate + * [[DeltaScan]]s. Every time this method is called on a log within the lifetime of this + * rule (i.e., the lifetime of the query for which this rule was instantiated), the returned + * generator will read a snapshot that is pinned on the first access for that log. + * + * Internally, it will use the snapshot of the file index, the snapshot of the active transaction + * (if any), or the latest snapshot of the given log. + */ + protected def getDeltaScanGenerator(index: TahoeLogFileIndex): DeltaScanGenerator = { + // The first case means that we've fixed the table snapshot for time travel + if (index.isTimeTravelQuery) return index.getSnapshot + val scanGenerator = OptimisticTransaction.getActive() + .map(_.getDeltaScanGenerator(index)) + .getOrElse { + // Will be called only when the log is accessed the first time + scannedSnapshots.computeIfAbsent(index.deltaLog.compositeId, _ => index.getSnapshot) + } + import PrepareDeltaScanBase._ + if (onGetDeltaScanGeneratorCallback != null) onGetDeltaScanGeneratorCallback(scanGenerator) + scanGenerator + } + + /** + * Helper method to generate a [[PreparedDeltaFileIndex]] + */ + protected def getPreparedIndex( + preparedScan: DeltaScan, + fileIndex: TahoeLogFileIndex): PreparedDeltaFileIndex = { + assert(fileIndex.partitionFilters.isEmpty, + "Partition filters should have been extracted by DeltaAnalysis.") + PreparedDeltaFileIndex( + spark, + fileIndex.deltaLog, + fileIndex.path, + preparedScan, + fileIndex.versionToUse) + } + + /** + * Scan files using the given `filters` and return `DeltaScan`. + * + * Note: when `limitOpt` is non empty, `filters` must contain only partition filters. Otherwise, + * it can contain arbitrary filters. See `DeltaTableScan` for more details. + */ + protected def filesForScan( + scanGenerator: DeltaScanGenerator, + limitOpt: Option[Int], + filters: Seq[Expression], + delta: LogicalRelation): DeltaScan = { + withStatusCode("DELTA", "Filtering files for query") { + if (limitOpt.nonEmpty) { + // If we trigger limit push down, the filters must be partition filters. Since + // there are no data filters, we don't need to apply Generated Columns + // optimization. See `DeltaTableScan` for more details. + return scanGenerator.filesForScan(limitOpt.get, filters) + } + val filtersForScan = + if (!GeneratedColumn.partitionFilterOptimizationEnabled(spark)) { + filters + } else { + val generatedPartitionFilters = GeneratedColumn.generatePartitionFilters( + spark, scanGenerator.snapshotToScan, filters, delta) + filters ++ generatedPartitionFilters + } + scanGenerator.filesForScan(filtersForScan) + } + } + + /** + * Prepares delta scans sequentially. + */ + protected def prepareDeltaScan(plan: LogicalPlan): LogicalPlan = { + // A map from the canonicalized form of a DeltaTableScan operator to its corresponding delta + // scan. This map is used to avoid fetching duplicate delta indexes for structurally-equal + // delta scans. + val deltaScans = new mutable.HashMap[LogicalPlan, DeltaScan]() + + transformWithSubqueries(plan) { + case scan @ DeltaTableScan(planWithRemovedProjections, filters, fileIndex, + limit, delta) => + val scanGenerator = getDeltaScanGenerator(fileIndex) + val preparedScan = deltaScans.getOrElseUpdate(planWithRemovedProjections.canonicalized, + filesForScan(scanGenerator, limit, filters, delta)) + val preparedIndex = getPreparedIndex(preparedScan, fileIndex) + optimizeGeneratedColumns(scan, preparedIndex, filters, limit, delta) + } + } + + protected def optimizeGeneratedColumns( + scan: LogicalPlan, + preparedIndex: PreparedDeltaFileIndex, + filters: Seq[Expression], + limit: Option[Int], + delta: LogicalRelation): LogicalPlan = { + if (limit.nonEmpty) { + // If we trigger limit push down, the filters must be partition filters. Since + // there are no data filters, we don't need to apply Generated Columns + // optimization. See `DeltaTableScan` for more details. + return DeltaTableUtils.replaceFileIndex(scan, preparedIndex) + } + if (!GeneratedColumn.partitionFilterOptimizationEnabled(spark)) { + DeltaTableUtils.replaceFileIndex(scan, preparedIndex) + } else { + val generatedPartitionFilters = + GeneratedColumn.generatePartitionFilters(spark, preparedIndex, filters, delta) + val scanWithFilters = + if (generatedPartitionFilters.nonEmpty) { + scan transformUp { + case delta @ DeltaTable(_: TahoeLogFileIndex) => + Filter(generatedPartitionFilters.reduceLeft(And), delta) + } + } else { + scan + } + DeltaTableUtils.replaceFileIndex(scanWithFilters, preparedIndex) + } + } + + override def apply(_plan: LogicalPlan): LogicalPlan = { + var plan = _plan + + val shouldPrepareDeltaScan = ( + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_STATS_SKIPPING) + ) + val updatedPlan = if (shouldPrepareDeltaScan) { + // Should not be applied to subqueries to avoid duplicate delta jobs. + val isSubquery = isSubqueryRoot(plan) + // Should not be applied to DataSourceV2 write plans, because they'll be planned later + // through a V1 fallback and only that later planning takes place within the transaction. + val isDataSourceV2 = plan.isInstanceOf[V2WriteCommand] + if (isSubquery || isDataSourceV2) { + return plan + } + + if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED)) { + plan = optimizeQueryWithMetadata(plan) + } + prepareDeltaScan(plan) + } else { + prepareDeltaScanWithoutFileSkipping(plan) + } + preprocessTablesWithDVs(updatedPlan) + } + + protected def prepareDeltaScanWithoutFileSkipping(plan: LogicalPlan): LogicalPlan = { + // If this query is running inside an active transaction and is touching the same table + // as the transaction, then mark that the entire table as tainted to be safe. + OptimisticTransaction.getActive().foreach { txn => + val logsInPlan = plan.collect { case DeltaTable(fileIndex: TahoeFileIndex) => + fileIndex.deltaLog + } + if (logsInPlan.exists(_.isSameLogAs(txn.deltaLog))) { + txn.readWholeTable() + } + } + + // Just return the plan if statistics based skipping is off. + // It will fall back to just partition pruning at planning time. + plan + } + + /** + * This is an extractor object. See https://docs.scala-lang.org/tour/extractor-objects.html. + */ + object DeltaTableScan extends DeltaTableScan[TahoeLogFileIndex] { + + override def limitPushdownEnabled(plan: LogicalPlan): Boolean = + spark.conf.get(DeltaSQLConf.DELTA_LIMIT_PUSHDOWN_ENABLED) + + override def getPartitionColumns(fileIndex: TahoeLogFileIndex): Seq[String] = + fileIndex.snapshotAtAnalysis.metadata.partitionColumns + + override def getPartitionFilters(fileIndex: TahoeLogFileIndex): Seq[Expression] = + fileIndex.partitionFilters + } + + abstract class DeltaTableScan[FileIndexType <: FileIndex : scala.reflect.ClassTag] { + + /** + * The components of DeltaTableScanType are: + * - the plan with removed projections. We remove projections as a plan differentiator + * because it does not affect file listing results. + * - filter expressions collected by `PhysicalOperation` + * - the `FileIndexType` of the matched DeltaTable` + * - integer value of limit expression, if any + * - matched `DeltaTable` + */ + protected type DeltaTableScanType = + (LogicalPlan, Seq[Expression], FileIndexType, Option[Int], LogicalRelation) + + /** + * This is an extractor method (basically, the opposite of a constructor) which takes in an + * object `plan` and tries to give back the arguments as a [[DeltaTableScanType]]. + */ + def unapply(plan: LogicalPlan): Option[DeltaTableScanType] = { + // Remove projections as a plan differentiator because it does not affect file listing + // results. Plans with the same filters but different projections therefore will not have + // duplicate delta indexes. + def canonicalizePlanForDeltaFileListing(plan: LogicalPlan): LogicalPlan = { + val planWithRemovedProjections = plan.transformWithPruning(_.containsPattern(PROJECT)) { + case p: Project if p.projectList.forall(_.isInstanceOf[AttributeReference]) => p.child + } + planWithRemovedProjections + } + + plan match { + case LocalLimit(IntegerLiteral(limit), + PhysicalOperation(_, filters, delta @ RelationFileIndex(fileIndex: FileIndexType))) + if limitPushdownEnabled(plan) && containsPartitionFiltersOnly(filters, fileIndex) => + Some((canonicalizePlanForDeltaFileListing(plan), filters, fileIndex, Some(limit), delta)) + case PhysicalOperation( + _, + filters, + delta @ RelationFileIndex(fileIndex: FileIndexType)) => + val allFilters = getPartitionFilters(fileIndex) ++ filters + Some((canonicalizePlanForDeltaFileListing(plan), allFilters, fileIndex, None, delta)) + + case _ => None + } + } + + protected def containsPartitionFiltersOnly( + filters: Seq[Expression], + fileIndex: FileIndexType): Boolean = { + val partitionColumns = getPartitionColumns(fileIndex) + import DeltaTableUtils._ + filters.forall(expr => !containsSubquery(expr) && + isPredicatePartitionColumnsOnly(expr, partitionColumns, spark)) + } + + protected def limitPushdownEnabled(plan: LogicalPlan): Boolean + + protected def getPartitionColumns(fileIndex: FileIndexType): Seq[String] + + protected def getPartitionFilters(fileIndex: FileIndexType): Seq[Expression] + } +} + +class PrepareDeltaScan(protected val spark: SparkSession) + extends PrepareDeltaScanBase + +object PrepareDeltaScanBase { + + /** + * Optional callback function that is called after `getDeltaScanGenerator` is called + * by the PrepareDeltaScan rule. This is primarily used for testing purposes. + */ + @volatile private var onGetDeltaScanGeneratorCallback: DeltaScanGenerator => Unit = _ + + /** + * Run a thunk of code with the given callback function injected into the PrepareDeltaScan rule. + * The callback function is called after `getDeltaScanGenerator` is called + * by the PrepareDeltaScan rule. This is primarily used for testing purposes. + */ + private[delta] def withCallbackOnGetDeltaScanGenerator[T]( + callback: DeltaScanGenerator => Unit)(thunk: => T): T = { + try { + onGetDeltaScanGeneratorCallback = callback + thunk + } finally { + onGetDeltaScanGeneratorCallback = null + } + } +} + +/** + * A [[TahoeFileIndex]] that uses a prepared scan to return the list of relevant files. + * This is injected into a query right before query planning by [[PrepareDeltaScan]] so that + * CBO and metering can accurately understand how much data will be read. + * + * @param versionScanned The version of the table that is being scanned, if a specific version + * has specifically been requested, e.g. by time travel. + */ +case class PreparedDeltaFileIndex( + override val spark: SparkSession, + override val deltaLog: DeltaLog, + override val path: Path, + preparedScan: DeltaScan, + versionScanned: Option[Long]) + extends TahoeFileIndexWithSnapshotDescriptor(spark, deltaLog, path, preparedScan.scannedSnapshot) + with DeltaLogging { + + /** + * Returns all matching/valid files by the given `partitionFilters` and `dataFilters` + */ + override def matchingFiles( + partitionFilters: Seq[Expression], + dataFilters: Seq[Expression]): Seq[AddFile] = { + val currentFilters = ExpressionSet(partitionFilters ++ dataFilters) + val (addFiles, eventData) = if (currentFilters == preparedScan.allFilters || + currentFilters == preparedScan.filtersUsedForSkipping) { + // [[DeltaScan]] was created using `allFilters` out of which only `filtersUsedForSkipping` + // filters were used for skipping while creating the DeltaScan. + // If currentFilters is same as allFilters, then no need to recalculate files and we can use + // previous results. + // If currentFilters is same as filtersUsedForSkipping, then also we don't need to recalculate + // files as [[DeltaScan.files]] were calculates using filtersUsedForSkipping only. So if we + // recalculate, we will get same result. So we should use previous result in this case also. + val eventData = Map( + "reused" -> true, + "currentFiltersSameAsPreparedAllFilters" -> (currentFilters == preparedScan.allFilters), + "currentFiltersSameAsPreparedFiltersUsedForSkipping" -> + (currentFilters == preparedScan.filtersUsedForSkipping) + ) + (preparedScan.files.distinct, eventData) + } else { + logInfo( + s""" + |Prepared scan does not match actual filters. Reselecting files to query. + |Prepared: ${preparedScan.allFilters} + |Actual: ${currentFilters} + """.stripMargin) + val eventData = Map( + "reused" -> false, + "preparedAllFilters" -> preparedScan.allFilters.mkString(","), + "preparedFiltersUsedForSkipping" -> preparedScan.filtersUsedForSkipping.mkString(","), + "currentFilters" -> currentFilters.mkString(",") + ) + val files = preparedScan.scannedSnapshot.filesForScan(partitionFilters ++ dataFilters).files + (files, eventData) + } + recordDeltaEvent(deltaLog, + opType = "delta.preparedDeltaFileIndex.reuseSkippingResult", + data = eventData) + addFiles + } + + /** + * Returns the list of files that will be read when scanning this relation. This call may be + * very expensive for large tables. + */ + override def inputFiles: Array[String] = + preparedScan.files.map(f => absolutePath(f.path).toString).toArray + + /** Refresh any cached file listings */ + override def refresh(): Unit = { } + + /** Sum of table file sizes, in bytes */ + override def sizeInBytes: Long = + preparedScan.scanned.bytesCompressed + .getOrElse(spark.sessionState.conf.defaultSizeInBytes) + + override def equals(other: Any): Boolean = other match { + case p: PreparedDeltaFileIndex => + p.deltaLog == deltaLog && p.path == path && p.preparedScan == preparedScan && + p.partitionSchema == partitionSchema && p.versionScanned == versionScanned + case _ => false + } + + override def hashCode(): Int = { + Objects.hash(deltaLog, path, preparedScan, partitionSchema, versionScanned) + } + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/ReadsMetadataFields.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/ReadsMetadataFields.scala new file mode 100644 index 00000000000..6d76368b96b --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/ReadsMetadataFields.scala @@ -0,0 +1,50 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import org.apache.spark.sql.Column +import org.apache.spark.sql.functions.col + +/** + * A mixin trait that provides access to the stats fields in the transaction log. + */ +trait ReadsMetadataFields { + /** Returns a Column that references the stats field data skipping should use */ + def getBaseStatsColumn: Column = col(getBaseStatsColumnName) + def getBaseStatsColumnName: String = "stats" +} + +/** + * A singleton of the Delta statistics field names. + */ +object DeltaStatistics { + /* The total number of records in the file. */ + val NUM_RECORDS = "numRecords" + /* The smallest (possibly truncated) value for a column. */ + val MIN = "minValues" + /* The largest (possibly truncated) value for a column. */ + val MAX = "maxValues" + /* The number of null values present for a column. */ + val NULL_COUNT = "nullCount" + /* + * Whether the column has tight or wide bounds. + * This should only be present in tables with Deletion Vectors enabled. + */ + val TIGHT_BOUNDS = "tightBounds" + + val ALL_STAT_FIELDS = Seq(NUM_RECORDS, MIN, MAX, NULL_COUNT, TIGHT_BOUNDS) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatisticsCollection.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatisticsCollection.scala new file mode 100644 index 00000000000..d07f2fbb50f --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatisticsCollection.scala @@ -0,0 +1,786 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import java.util.Locale + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.language.existentials + +import org.apache.spark.sql.delta.{Checkpoints, DeletionVectorsTableFeature, DeltaColumnMapping, DeltaColumnMappingMode, DeltaConfigs, DeltaErrors, DeltaIllegalArgumentException, DeltaLog, DeltaUDF, NoMapping} +import org.apache.spark.sql.delta.DeltaColumnMapping.COLUMN_MAPPING_PHYSICAL_NAME_KEY +import org.apache.spark.sql.delta.DeltaOperations.ComputeStats +import org.apache.spark.sql.delta.OptimisticTransaction +import org.apache.spark.sql.delta.actions.{AddFile, Metadata, Protocol} +import org.apache.spark.sql.delta.commands.DeletionVectorUtils +import org.apache.spark.sql.delta.commands.DeltaCommand +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils} +import org.apache.spark.sql.delta.schema.SchemaUtils.transformColumnsStructs +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.DeltaStatistics._ +import org.apache.spark.sql.delta.stats.StatisticsCollection.getIndexedColumns +import org.apache.spark.sql.util.ScalaExtensions._ + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, AstBuilder, ParseException, ParserUtils} +import org.apache.spark.sql.catalyst.parser.SqlBaseParser.MultipartIdentifierListContext +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +/** + * Used to report metrics on how predicates are used to prune the set of + * files that are read by a query. + * + * @param predicate A user readable version of the predicate. + * @param pruningType One of {partition, dataStats, none}. + * @param filesMissingStats The number of files that were included due to missing statistics. + * @param filesDropped The number of files that were dropped by this predicate. + */ +case class QueryPredicateReport( + predicate: String, + pruningType: String, + filesMissingStats: Long, + filesDropped: Long) + +/** Used to report details about prequery filtering of what data is scanned. */ +case class FilterMetric(numFiles: Long, predicates: Seq[QueryPredicateReport]) + +/** + * A helper trait that constructs expressions that can be used to collect global + * and column level statistics for a collection of data, given its schema. + * + * Global statistics (such as the number of records) are stored as top level columns. + * Per-column statistics (such as min/max) are stored in a struct that mirrors the + * schema of the data. + * + * To illustrate, here is an example of a data schema along with the schema of the statistics + * that would be collected. + * + * Data Schema: + * {{{ + * |-- a: struct (nullable = true) + * | |-- b: struct (nullable = true) + * | | |-- c: long (nullable = true) + * }}} + * + * Collected Statistics: + * {{{ + * |-- stats: struct (nullable = true) + * | |-- numRecords: long (nullable = false) + * | |-- minValues: struct (nullable = false) + * | | |-- a: struct (nullable = false) + * | | | |-- b: struct (nullable = false) + * | | | | |-- c: long (nullable = true) + * | |-- maxValues: struct (nullable = false) + * | | |-- a: struct (nullable = false) + * | | | |-- b: struct (nullable = false) + * | | | | |-- c: long (nullable = true) + * | |-- nullCount: struct (nullable = false) + * | | |-- a: struct (nullable = false) + * | | | |-- b: struct (nullable = false) + * | | | | |-- c: long (nullable = true) + * }}} + */ +trait StatisticsCollection extends DeltaLogging { + protected def spark: SparkSession + /** The schema of the target table of this statistics collection. */ + def tableSchema: StructType + /** + * The output attributes (`outputAttributeSchema`) that are replaced with table schema with + * the physical mapping information. + * NOTE: The partition columns' definitions are not included in this schema. + */ + def outputTableStatsSchema: StructType + /** + * The schema of the output attributes of the write queries that needs to collect statistics. + * The partition columns' definitions are not included in this schema. + */ + def outputAttributeSchema: StructType + /** The statistic indexed column specification of the target delta table. */ + val statsColumnSpec: DeltaStatsColumnSpec + /** The column mapping mode of the target delta table. */ + def columnMappingMode: DeltaColumnMappingMode + + protected def protocol: Protocol + + lazy val deletionVectorsSupported = protocol.isFeatureSupported(DeletionVectorsTableFeature) + + private def effectiveSchema: StructType = if (statsColumnSpec.numIndexedColsOpt.isDefined) { + outputTableStatsSchema + } else { + tableSchema + } + + private lazy val explodedDataSchemaNames: Seq[String] = + SchemaMergingUtils.explodeNestedFieldNames(outputAttributeSchema) + + /** + * statCollectionPhysicalSchema is the schema that is composed of all the columns that have the + * stats collected with our current table configuration. + */ + lazy val statCollectionPhysicalSchema: StructType = + getIndexedColumns(explodedDataSchemaNames, statsColumnSpec, effectiveSchema, columnMappingMode) + + /** + * statCollectionLogicalSchema is the logical schema that is composed of all the columns that have + * the stats collected with our current table configuration. + */ + lazy val statCollectionLogicalSchema: StructType = + getIndexedColumns(explodedDataSchemaNames, statsColumnSpec, effectiveSchema, NoMapping) + + /** + * Traverses the [[statisticsSchema]] for the provided [[statisticsColumn]] + * and applies [[function]] to leaves. + * + * Note, for values that are outside the domain of the partial function we keep the original + * column. If the caller wants to drop the column needs to explicitly return None. + */ + def applyFuncToStatisticsColumn( + statisticsSchema: StructType, + statisticsColumn: Column)( + function: PartialFunction[(Column, StructField), Option[Column]]): Seq[Column] = { + statisticsSchema.flatMap { + case StructField(name, s: StructType, _, _) => + val column = statisticsColumn.getItem(name) + applyFuncToStatisticsColumn(s, column)(function) match { + case colSeq if colSeq.nonEmpty => Some(struct(colSeq: _*) as name) + case _ => None + } + + case structField@StructField(name, _, _, _) => + val column = statisticsColumn.getItem(name) + function.lift(column, structField).getOrElse(Some(column)).map(_.as(name)) + } + } + + /** + * Sets the TIGHT_BOUNDS column to false and converts the logical nullCount + * to a tri-state nullCount. The nullCount states are the following: + * 1) For "all-nulls" columns we set the physical nullCount which is equal to the + * physical numRecords. + * 2) "no-nulls" columns remain unchanged, i.e. zero nullCount is the same for both + * physical and logical representations. + * 3) For "some-nulls" columns, we leave the existing value. In files with wide bounds, + * the nullCount in SOME_NULLs columns is considered unknown. + * + * The file's state can transition back to tight when statistics are recomputed. In that case, + * TIGHT_BOUNDS is set back to true and nullCount back to the logical value. + * + * Note, this function gets as input parsed statistics and returns a json document + * similarly to allFiles. To further match the behavior of allFiles we always return + * a column named `stats` instead of statsColName. + * + * @param withStats A dataFrame of actions with parsed statistics. + * @param statsColName The name of the parsed statistics column. + */ + def updateStatsToWideBounds(withStats: DataFrame, statsColName: String): DataFrame = { + val dvCardinalityCol = coalesce(col("deletionVector.cardinality"), lit(0)) + val physicalNumRecordsCol = col(s"$statsColName.$NUM_RECORDS") + val logicalNumRecordsCol = physicalNumRecordsCol - dvCardinalityCol + val nullCountCol = col(s"$statsColName.$NULL_COUNT") + val tightBoundsCol = col(s"$statsColName.$TIGHT_BOUNDS") + + // Use the schema of the existing stats column. We only want to modify the existing + // nullCount stats. Note, when the column mapping mode is enabled, the schema uses + // the physical column names, not the logical names. + val nullCountSchema = withStats.schema + .apply(statsColName).dataType.asInstanceOf[StructType] + .apply(NULL_COUNT).dataType.asInstanceOf[StructType] + + // When bounds are tight and we are about to transition to wide, store the physical null count + // for ALL_NULLs columns. + val nullCountColSeq = applyFuncToStatisticsColumn(nullCountSchema, nullCountCol) { + case (c, _) => + val allNullTightBounds = tightBoundsCol && (c === logicalNumRecordsCol) + Some(when(allNullTightBounds, physicalNumRecordsCol).otherwise(c)) + } + + val allStatCols = ALL_STAT_FIELDS.map { + case f if f == TIGHT_BOUNDS => lit(false).as(TIGHT_BOUNDS) + case f if f == NULL_COUNT => struct(nullCountColSeq: _*).as(NULL_COUNT) + case f => col(s"${statsColName}.${f}") + } + + // This may be very expensive because it is rewriting JSON. + withStats + .withColumn("stats", when(col(statsColName).isNotNull, to_json(struct(allStatCols: _*)))) + .drop(col(Checkpoints.STRUCT_STATS_COL_NAME)) // Note: does not always exist. + } + + /** + * Returns a struct column that can be used to collect statistics for the current + * schema of the table. + * The types we keep stats on must be consistent with DataSkippingReader.SkippingEligibleLiteral. + * If a column is missing from dataSchema (which will be filled with nulls), we will only + * collect the NULL_COUNT stats for it as the number of rows. + */ + lazy val statsCollector: Column = { + val stringPrefix = + spark.sessionState.conf.getConf(DeltaSQLConf.DATA_SKIPPING_STRING_PREFIX_LENGTH) + + // On file initialization/stat recomputation TIGHT_BOUNDS is always set to true + val tightBoundsColOpt = + Option.when(deletionVectorsSupported && + !spark.sessionState.conf.getConf(DeltaSQLConf.TIGHT_BOUND_COLUMN_ON_FILE_INIT_DISABLED)) { + lit(true).as(TIGHT_BOUNDS) + } + + val statCols = Seq( + count(new Column("*")) as NUM_RECORDS, + collectStats(MIN, statCollectionPhysicalSchema) { + // Truncate string min values as necessary + case (c, SkippingEligibleDataType(StringType), true) => + substring(min(c), 0, stringPrefix) + + // Collect all numeric min values + case (c, SkippingEligibleDataType(_), true) => + min(c) + }, + collectStats(MAX, statCollectionPhysicalSchema) { + // Truncate and pad string max values as necessary + case (c, SkippingEligibleDataType(StringType), true) => + val udfTruncateMax = + DeltaUDF.stringFromString(StatisticsCollection.truncateMaxStringAgg(stringPrefix)_) + udfTruncateMax(max(c)) + + // Collect all numeric max values + case (c, SkippingEligibleDataType(_), true) => + max(c) + }, + collectStats(NULL_COUNT, statCollectionPhysicalSchema) { + case (c, _, true) => sum(when(c.isNull, 1).otherwise(0)) + case (_, _, false) => count(new Column("*")) + }) ++ tightBoundsColOpt + + struct(statCols: _*).as("stats") + } + + + /** Returns schema of the statistics collected. */ + lazy val statsSchema: StructType = { + // In order to get the Delta min/max stats schema from table schema, we do 1) replace field + // name with physical name 2) set nullable to true 3) only keep stats eligible fields + // 4) omits metadata in table schema as Delta stats schema does not need the metadata + def getMinMaxStatsSchema(schema: StructType): Option[StructType] = { + val fields = schema.fields.flatMap { + case f@StructField(_, dataType: StructType, _, _) => + getMinMaxStatsSchema(dataType).map { newDataType => + StructField(DeltaColumnMapping.getPhysicalName(f), newDataType) + } + case f@StructField(_, SkippingEligibleDataType(dataType), _, _) => + Some(StructField(DeltaColumnMapping.getPhysicalName(f), dataType)) + case _ => None + } + if (fields.nonEmpty) Some(StructType(fields)) else None + } + + // In order to get the Delta null count schema from table schema, we do 1) replace field name + // with physical name 2) set nullable to true 3) use LongType for all fields + // 4) omits metadata in table schema as Delta stats schema does not need the metadata + def getNullCountSchema(schema: StructType): Option[StructType] = { + val fields = schema.fields.flatMap { + case f@StructField(_, dataType: StructType, _, _) => + getNullCountSchema(dataType).map { newDataType => + StructField(DeltaColumnMapping.getPhysicalName(f), newDataType) + } + case f: StructField => + Some(StructField(DeltaColumnMapping.getPhysicalName(f), LongType)) + } + if (fields.nonEmpty) Some(StructType(fields)) else None + } + + val minMaxStatsSchemaOpt = getMinMaxStatsSchema(statCollectionPhysicalSchema) + val nullCountSchemaOpt = getNullCountSchema(statCollectionPhysicalSchema) + val tightBoundsFieldOpt = + Option.when(deletionVectorsSupported)(TIGHT_BOUNDS -> BooleanType) + + val fields = + Array(NUM_RECORDS -> LongType) ++ + minMaxStatsSchemaOpt.map(MIN -> _) ++ + minMaxStatsSchemaOpt.map(MAX -> _) ++ + nullCountSchemaOpt.map(NULL_COUNT -> _) ++ + tightBoundsFieldOpt + + StructType(fields.map { + case (name, dataType) => StructField(name, dataType) + }) + } + + /** + * Recursively walks the given schema, constructing an expression to calculate + * multiple statistics that mirrors structure of the data. When `function` is + * defined for a given column, it return value is added to statistics structure. + * When `function` is not defined, that column is skipped. + * + * @param name The name of the top level column for this statistic (i.e. minValues). + * @param schema The schema of the data to collect statistics from. + * @param function A partial function that is passed a tuple of (column, metadata about that + * column, a flag that indicates whether the column is in the data schema). Based + * on the metadata and flag, the function can decide if the given statistic should + * be collected on the column by returning the correct aggregate expression. + * @param includeAllColumns should statistics all the columns be included? + */ + private def collectStats( + name: String, + schema: StructType, + includeAllColumns: Boolean = false)( + function: PartialFunction[(Column, StructField, Boolean), Column]): Column = { + + def collectStats( + schema: StructType, + parent: Option[Column], + parentFields: Seq[String], + function: PartialFunction[(Column, StructField, Boolean), Column]): Seq[Column] = { + schema.flatMap { + case f @ StructField(name, s: StructType, _, _) => + val column = parent.map(_.getItem(name)) + .getOrElse(new Column(UnresolvedAttribute.quoted(name))) + val stats = collectStats(s, Some(column), parentFields :+ name, function) + if (stats.nonEmpty) { + Some(struct(stats: _*) as DeltaColumnMapping.getPhysicalName(f)) + } else { + None + } + case f @ StructField(name, _, _, _) => + val fieldPath = UnresolvedAttribute(parentFields :+ name).name + val column = parent.map(_.getItem(name)) + .getOrElse(new Column(UnresolvedAttribute.quoted(name))) + // alias the column with its physical name + // Note: explodedDataSchemaNames comes from dataSchema. In the read path, dataSchema comes + // from the table's metadata.dataSchema, which is the same as tableSchema. In the + // write path, dataSchema comes from the DataFrame schema. We then assume + // TransactionWrite.writeFiles has normalized dataSchema, and + // TransactionWrite.getStatsSchema has done the column mapping for tableSchema and + // dropped the partition columns for both dataSchema and tableSchema. + function.lift((column, f, explodedDataSchemaNames.contains(fieldPath))). + map(_.as(DeltaColumnMapping.getPhysicalName(f))) + } + } + + val stats = collectStats(schema, None, Nil, function) + if (stats.nonEmpty) { + struct(stats: _*).as(name) + } else { + lit(null).as(name) + } + } +} + +/** + * Specifies the set of columns to be used for stats collection on a table. + * The `deltaStatsColumnNamesOpt` has higher priority than `numIndexedColsOpt`. Thus, if + * `deltaStatsColumnNamesOpt` is not None, StatisticsCollection would only collects file statistics + * for all columns inside it. Otherwise, `numIndexedColsOpt` is used. + */ +case class DeltaStatsColumnSpec( + deltaStatsColumnNamesOpt: Option[Seq[UnresolvedAttribute]], + numIndexedColsOpt: Option[Int]) { + require(deltaStatsColumnNamesOpt.isEmpty || numIndexedColsOpt.isEmpty) +} + +object StatisticsCollection extends DeltaCommand { + /** + * The SQL grammar already includes a `multipartIdentifierList` rule for parsing a string into a + * list of multi-part identifiers. We just expose it here, with a custom parser and AstBuilder. + */ + private class SqlParser extends AbstractSqlParser { + override val astBuilder = new AstBuilder { + override def visitMultipartIdentifierList(ctx: MultipartIdentifierListContext) + : Seq[UnresolvedAttribute] = ParserUtils.withOrigin(ctx) { + ctx.multipartIdentifier.asScala.toSeq.map(typedVisit[Seq[String]]) + .map(new UnresolvedAttribute(_)) + } + } + def parseMultipartIdentifierList(sqlText: String): Seq[UnresolvedAttribute] = { + parse(sqlText) { parser => + astBuilder.visitMultipartIdentifierList(parser.multipartIdentifierList()) + } + } + } + private val parser = new SqlParser + + /** Parses a comma-separated list of column names; returns None if parsing fails. */ + def parseDeltaStatsColumnNames(deltaStatsColNames: String): Option[Seq[UnresolvedAttribute]] = { + // The parser rejects empty lists, so handle that specially here. + if (deltaStatsColNames.trim.isEmpty) return Some(Nil) + try { + Some(parser.parseMultipartIdentifierList(deltaStatsColNames)) + } catch { + case _: ParseException => None + } + } + + /** + * This method is the wrapper method to validates the DATA_SKIPPING_STATS_COLUMNS value of + * metadata. + */ + def validateDeltaStatsColumns(metadata: Metadata): Unit = { + DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS.fromMetaData(metadata).foreach { statsColumns => + StatisticsCollection.validateDeltaStatsColumns( + metadata.dataSchema, metadata.partitionColumns, statsColumns + ) + } + } + + /** + * This method validates that the data type of data skipping column supports data skipping + * based on file statistics. + * @param name The name of the data skipping column for validating data type. + * @param dataType The data type of the data skipping column. + * @param columnPaths The column paths of all valid fields. + */ + private def validateDataSkippingType( + name: String, + dataType: DataType, + columnPaths: ArrayBuffer[String]): Unit = dataType match { + case s: StructType => + s.foreach { field => + validateDataSkippingType(name + "." + field.name, field.dataType, columnPaths) + } + case SkippingEligibleDataType(_) => columnPaths.append(name) + case _ => + throw new DeltaIllegalArgumentException( + errorClass = "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_TYPE", + messageParameters = Array(name, dataType.toString)) + } + + /** + * This method validates whether the DATA_SKIPPING_STATS_COLUMNS value satisfies following + * conditions: + * 1. Delta statistics columns must not be partitioned column. + * 2. Delta statistics column must exist in delta table's schema. + * 3. Delta statistics columns must be data skipping type. + */ + def validateDeltaStatsColumns( + schema: StructType, partitionColumns: Seq[String], deltaStatsColumnsConfigs: String): Unit = { + val partitionColumnSet = partitionColumns.map(_.toLowerCase(Locale.ROOT)).toSet + val visitedColumns = ArrayBuffer.empty[String] + parseDeltaStatsColumnNames(deltaStatsColumnsConfigs).foreach { columns => + columns.foreach { columnAttribute => + val columnFullPath = columnAttribute.nameParts + // Delta statistics columns must not be partitioned column. + if (partitionColumnSet.contains(columnAttribute.name.toLowerCase(Locale.ROOT))) { + throw new DeltaIllegalArgumentException( + errorClass = "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_PARTITIONED_COLUMN", + messageParameters = Array(columnAttribute.name)) + } + // Delta statistics column must exist in delta table's schema. + SchemaUtils.findColumnPosition(columnFullPath, schema) + // Delta statistics columns must be data skipping type. + val (prefixPath, columnName) = columnFullPath.splitAt(columnFullPath.size - 1) + transformColumnsStructs(schema, Some(columnName.head)) { + case (`prefixPath`, struct @ StructType(_), _) => + val columnField = struct(columnName.head) + validateDataSkippingType(columnAttribute.name, columnField.dataType, visitedColumns) + struct + case (_, s: StructType, _) => s + } + } + } + val duplicatedColumnNames = visitedColumns + .groupBy(identity) + .collect { case (attribute, occurrences) if occurrences.size > 1 => attribute } + .toSeq + if (duplicatedColumnNames.size > 0) { + throw new DeltaIllegalArgumentException( + errorClass = "DELTA_DUPLICATE_DATA_SKIPPING_COLUMNS", + messageParameters = Array(duplicatedColumnNames.mkString(",")) + ) + } + } + + /** + * Removes the dropped columns from delta statistics column list inside + * DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS. + * Note: This method is matching the logical name of tables with the columns inside + * DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS. + */ + def dropDeltaStatsColumns( + metadata: Metadata, + columnsToDrop: Seq[Seq[String]]): Map[String, String] = { + if (columnsToDrop.isEmpty) return Map.empty[String, String] + val deltaStatsColumnSpec = configuredDeltaStatsColumnSpec(metadata) + deltaStatsColumnSpec.deltaStatsColumnNamesOpt.map { deltaColumnsNames => + val droppedColumnSet = columnsToDrop.toSet + val deltaStatsColumnStr = deltaColumnsNames + .map(_.nameParts) + .filterNot { attributeNameParts => + droppedColumnSet.filter { droppedColumnParts => + val commonPrefix = droppedColumnParts.zip(attributeNameParts) + .takeWhile { case (left, right) => left == right } + .size + commonPrefix == droppedColumnParts.size + }.nonEmpty + } + .map(columnParts => UnresolvedAttribute(columnParts).name) + .mkString(",") + Map(DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS.key -> deltaStatsColumnStr) + }.getOrElse(Map.empty[String, String]) + } + + /** + * Rename the delta statistics column `oldColumnPath` of DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS + * to `newColumnPath`. + * Note: This method is matching the logical name of tables with the columns inside + * DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS. + */ + def renameDeltaStatsColumn( + metadata: Metadata, + oldColumnPath: Seq[String], + newColumnPath: Seq[String]): Map[String, String] = { + if (oldColumnPath == newColumnPath) return Map.empty[String, String] + val deltaStatsColumnSpec = configuredDeltaStatsColumnSpec(metadata) + deltaStatsColumnSpec.deltaStatsColumnNamesOpt.map { deltaColumnsNames => + val deltaStatsColumnsPath = deltaColumnsNames + .map(_.nameParts) + .map { attributeNameParts => + val commonPrefix = oldColumnPath.zip(attributeNameParts) + .takeWhile { case (left, right) => left == right } + .size + if (commonPrefix == oldColumnPath.size) { + newColumnPath ++ attributeNameParts.takeRight(attributeNameParts.size - commonPrefix) + } else { + attributeNameParts + } + } + .map(columnParts => UnresolvedAttribute(columnParts).name) + Map( + DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS.key -> deltaStatsColumnsPath.mkString(",") + ) + }.getOrElse(Map.empty[String, String]) + } + + /** Returns the configured set of columns to be used for stats collection on a table */ + def configuredDeltaStatsColumnSpec(metadata: Metadata): DeltaStatsColumnSpec = { + val indexedColNamesOpt = DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS.fromMetaData(metadata) + val numIndexedCols = DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.fromMetaData(metadata) + indexedColNamesOpt.map { indexedColNames => + DeltaStatsColumnSpec(parseDeltaStatsColumnNames(indexedColNames), None) + }.getOrElse { + DeltaStatsColumnSpec(None, Some(numIndexedCols)) + } + } + + /** + * Convert the logical name of each field to physical name according to the column mapping mode. + */ + private def convertToPhysicalName( + fullPath: String, + field: StructField, + schemaNames: Seq[String], + mappingMode: DeltaColumnMappingMode): StructField = { + // If mapping mode is NoMapping or the dataSchemaName already contains the mapped + // column name, the schema mapping can be skipped. + if (mappingMode == NoMapping || schemaNames.contains(fullPath)) return field + // Get the physical co + val physicalName = field.metadata.getString(COLUMN_MAPPING_PHYSICAL_NAME_KEY) + field.dataType match { + case structType: StructType => + val newDataType = StructType( + structType.map(child => convertToPhysicalName(fullPath, child, schemaNames, mappingMode)) + ) + field.copy(name = physicalName, dataType = newDataType) + case _ => field.copy(name = physicalName) + } + } + + /** + * Generates a filtered data schema for stats collection. + * Note: This method is matching the logical name of tables with the columns inside + * DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS. The output of the filter schema is translated into + * physical name. + * + * @param schemaNames the full name path of all columns inside `schema`. + * @param schema the original data schema. + * @param statsColPaths the specific set of columns to collect stats on. + * @param mappingMode the column mapping mode of this statistics collection. + * @param parentPath the parent column path of `schema`. + * @return filtered schema + */ + private def filterSchema( + schemaNames: Seq[String], + schema: StructType, + statsColPaths: Seq[Seq[String]], + mappingMode: DeltaColumnMappingMode, + parentPath: Seq[String] = Seq.empty): StructType = { + // Find the unique column names at this nesting depth, each with its path remainders (if any) + val cols = statsColPaths.groupBy(_.head).mapValues(_.map(_.tail)) + val newSchema = schema.flatMap { field => + cols.get(field.name).flatMap { paths => + field.dataType match { + case _ if paths.forall(_.isEmpty) => + // Convert full path to lower cases to avoid schema name contains upper case + // characters. + val fullPath = (parentPath :+ field.name).mkString(".").toLowerCase(Locale.ROOT) + Some(convertToPhysicalName(fullPath, field, schemaNames, mappingMode)) + case fieldSchema: StructType => + // Convert full path to lower cases to avoid schema name contains upper case + // characters. + val fullPath = (parentPath :+ field.name).mkString(".").toLowerCase(Locale.ROOT) + val physicalName = if (mappingMode == NoMapping || schemaNames.contains(fullPath)) { + field.name + } else { + field.metadata.getString(COLUMN_MAPPING_PHYSICAL_NAME_KEY) + } + // Recurse into the child fields of this struct. + val newSchema = filterSchema( + schemaNames, + fieldSchema, + paths.filterNot(_.isEmpty), + mappingMode, + parentPath:+ field.name + ) + Some(field.copy(name = physicalName, dataType = newSchema)) + case _ => + // Filter expected a nested field and this isn't nested. No match + None + } + } + } + StructType(newSchema.toArray) + } + + /** + * Computes the set of columns to be used for stats collection on a table. Specific named columns + * take precedence, if provided; otherwise the first numIndexedColsOpt are extracted from the + * schema. + */ + def getIndexedColumns( + schemaNames: Seq[String], + spec: DeltaStatsColumnSpec, + schema: StructType, + mappingMode: DeltaColumnMappingMode): StructType = { + spec.deltaStatsColumnNamesOpt + .map { indexedColNames => + // convert all index columns to lower case characters to avoid user assigning any upper + // case characters. + val indexedColPaths = indexedColNames.map(_.nameParts.map(_.toLowerCase(Locale.ROOT))) + filterSchema(schemaNames, schema, indexedColPaths, mappingMode) + } + .getOrElse { + val numIndexedCols = spec.numIndexedColsOpt.get + if (numIndexedCols < 0) { + schema // negative means don't truncate the schema + } else { + truncateSchema(schema, numIndexedCols)._1 + } + } + } + + /** + * Generates a truncated data schema for stats collection. + * @param schema the original data schema + * @param indexedCols the maximum number of leaf columns to collect stats on + * @return truncated schema and the number of leaf columns in this schema + */ + private def truncateSchema(schema: StructType, indexedCols: Int): (StructType, Int) = { + var accCnt = 0 + var i = 0 + val fields = new ArrayBuffer[StructField]() + while (i < schema.length && accCnt < indexedCols) { + val field = schema.fields(i) + val newField = field match { + case StructField(name, st: StructType, nullable, metadata) => + val (newSt, cnt) = truncateSchema(st, indexedCols - accCnt) + accCnt += cnt + StructField(name, newSt, nullable, metadata) + case f => + accCnt += 1 + f + } + i += 1 + fields += newField + } + (StructType(fields.toSeq), accCnt) + } + + /** + * Compute the AddFile entries with delta statistics entries by aggregating the data skipping + * columns of each parquet file. + */ + private def computeNewAddFiles( + deltaLog: DeltaLog, + txn: OptimisticTransaction, + files: Seq[AddFile]): Array[AddFile] = { + val dataPath = deltaLog.dataPath + val pathToAddFileMap = generateCandidateFileMap(dataPath, files) + val persistentDVsReadable = DeletionVectorUtils.deletionVectorsReadable(txn.snapshot) + // Throw error when the table contains DVs, because existing method of stats + // recomputation doesn't work on tables with DVs. It needs to take into consideration of + // DV files (TODO). + if (persistentDVsReadable) { + throw DeltaErrors.statsRecomputeNotSupportedOnDvTables() + } + val fileDataFrame = deltaLog + .createDataFrame(txn.snapshot, addFiles = files, isStreaming = false) + .withColumn("path", col("_metadata.file_path")) + val newStats = fileDataFrame.groupBy(col("path")).agg(to_json(txn.statsCollector)) + newStats.collect().map { r => + val add = getTouchedFile(dataPath, r.getString(0), pathToAddFileMap) + add.copy(dataChange = false, stats = r.getString(1)) + } + } + + /** + * Recomputes statistics for a Delta table. This can be used to compute stats if they were never + * collected or to recompute corrupted statistics. + * @param deltaLog Delta log for the table to update. + * @param predicates Which subset of the data to recompute stats for. Predicates must use only + * partition columns. + * @param fileFilter Filter for which AddFiles to recompute stats for. + */ + def recompute( + spark: SparkSession, + deltaLog: DeltaLog, + catalogTable: Option[CatalogTable], + predicates: Seq[Expression] = Seq(Literal(true)), + fileFilter: AddFile => Boolean = af => true): Unit = { + val txn = deltaLog.startTransaction(catalogTable) + verifyPartitionPredicates(spark, txn.metadata.partitionColumns, predicates) + // Save the current AddFiles that match the predicates so we can update their stats + val files = txn.filterFiles(predicates).filter(fileFilter) + val newAddFiles = computeNewAddFiles(deltaLog, txn, files) + txn.commit(newAddFiles, ComputeStats(predicates)) + } + + /** + * Helper method to truncate the input string `x` to the given `prefixLen` length, while also + * appending the unicode max character to the end of the truncated string. This ensures that any + * value in this column is less than or equal to the max. + */ + def truncateMaxStringAgg(prefixLen: Int)(x: String): String = { + if (x == null || x.length <= prefixLen) { + x + } else { + // Grab the prefix. We want to append `\ufffd` as a tie-breaker, but that is only safe + // if the character we truncated was smaller. Keep extending the prefix until that + // condition holds, or we run off the end of the string. + // scalastyle:off nonascii + val tieBreaker = '\ufffd' + x.take(prefixLen) + x.substring(prefixLen).takeWhile(_ >= tieBreaker) + tieBreaker + // scalastyle:off nonascii + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala new file mode 100644 index 00000000000..1dc1525f948 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala @@ -0,0 +1,770 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.concurrent.duration.Duration +import scala.language.existentials +import scala.util.control.NonFatal + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaColumnMappingMode, DeltaErrors, IdMapping, NameMapping, NoMapping} +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.DeltaStatistics._ +import org.apache.spark.sql.delta.util.{DeltaFileOperations, JsonUtils} +import org.apache.spark.sql.delta.util.threads.DeltaThreadPool +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import org.apache.parquet.hadoop.ParquetFileReader +import org.apache.parquet.hadoop.metadata.{BlockMetaData, ParquetMetadata} +import org.apache.parquet.io.api.Binary +import org.apache.parquet.schema.LogicalTypeAnnotation._ +import org.apache.parquet.schema.PrimitiveType + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.execution.datasources.DataSourceUtils +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{ArrayType, LongType, MapType, StructField, StructType} +import org.apache.spark.util.SerializableConfiguration + + +object StatsCollectionUtils + extends Logging +{ + + /** A helper function to compute stats of addFiles using StatsCollector. + * + * @param spark The SparkSession used to process data. + * @param conf The Hadoop configuration used to access file system. + * @param dataPath The data path of table, to which these AddFile(s) belong. + * @param addFiles The list of target AddFile(s) to be processed. + * @param numFilesOpt The number of AddFile(s) to process if known. Speeds up the query. + * @param columnMappingMode The column mapping mode of table. + * @param dataSchema The data schema of table. + * @param statsSchema The stats schema to be collected. + * @param ignoreMissingStats Whether to ignore missing stats during computation. + * @param setBoundsToWide Whether to set bounds to wide independently of whether or not + * the files have DVs. + * + * @return A list of AddFile(s) with newly computed stats, please note the existing stats from + * the input addFiles will be ignored regardless. + */ + def computeStats( + spark: SparkSession, + conf: Configuration, + dataPath: Path, + addFiles: Dataset[AddFile], + numFilesOpt: Option[Long], + columnMappingMode: DeltaColumnMappingMode, + dataSchema: StructType, + statsSchema: StructType, + ignoreMissingStats: Boolean = true, + setBoundsToWide: Boolean = false): Dataset[AddFile] = { + + val useMultiThreadedStatsCollection = spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_USE_MULTI_THREADED_STATS_COLLECTION) + val preparedAddFiles = if (useMultiThreadedStatsCollection) { + prepareRDDForMultiThreadedStatsCollection(spark, addFiles, numFilesOpt) + } else { + addFiles + } + + val parquetRebaseMode = + spark.sessionState.conf.getConf(SQLConf.PARQUET_REBASE_MODE_IN_READ) + + val stringTruncateLength = + spark.sessionState.conf.getConf(DeltaSQLConf.DATA_SKIPPING_STRING_PREFIX_LENGTH) + + val statsCollector = StatsCollector(columnMappingMode, dataSchema, statsSchema, + parquetRebaseMode, ignoreMissingStats, Some(stringTruncateLength)) + + val serializableConf = new SerializableConfiguration(conf) + val broadcastConf = spark.sparkContext.broadcast(serializableConf) + + val dataRootDir = dataPath.toString + + import org.apache.spark.sql.delta.implicits._ + preparedAddFiles.mapPartitions { addFileIter => + val defaultFileSystem = new Path(dataRootDir).getFileSystem(broadcastConf.value.value) + if (useMultiThreadedStatsCollection) { + ParallelFetchPool.parallelMap(spark, addFileIter.toSeq) { addFile => + computeStatsForFile( + addFile, + dataRootDir, + defaultFileSystem, + broadcastConf.value, + setBoundsToWide, + statsCollector) + }.toIterator + } else { + addFileIter.map { addFile => + computeStatsForFile( + addFile, + dataRootDir, + defaultFileSystem, + broadcastConf.value, + setBoundsToWide, + statsCollector) + } + } + } + } + + /** + * Prepares the underlying RDD of [[addFiles]] for multi-threaded stats collection by splitting + * them up into more partitions if necessary. + * If the number of partitions is too small, not every executor might + * receive a partition, which reduces the achievable parallelism. By increasing the number of + * partitions we can achieve more parallelism. + */ + private def prepareRDDForMultiThreadedStatsCollection( + spark: SparkSession, + addFiles: Dataset[AddFile], + numFilesOpt: Option[Long]): Dataset[AddFile] = { + + val numFiles = numFilesOpt.getOrElse(addFiles.count()) + val currNumPartitions = addFiles.rdd.getNumPartitions + val numFilesPerPartition = spark.sessionState.conf.getConf( + DeltaSQLConf.DELTA_STATS_COLLECTION_NUM_FILES_PARTITION) + + // We should not create more partitions than the cluster can currently handle. + val minNumPartitions = Math.min( + spark.sparkContext.defaultParallelism, + numFiles / numFilesPerPartition + 1).toInt + // Only repartition if it would increase the achievable parallelism + if (currNumPartitions < minNumPartitions) { + addFiles.repartition(minNumPartitions) + } else { + addFiles + } + } + + private def computeStatsForFile( + addFile: AddFile, + dataRootDir: String, + defaultFileSystem: FileSystem, + config: SerializableConfiguration, + setBoundsToWide: Boolean, + statsCollector: StatsCollector): AddFile = { + val path = DeltaFileOperations.absolutePath(dataRootDir, addFile.path) + val fileStatus = if (path.toString.startsWith(dataRootDir)) { + defaultFileSystem.getFileStatus(path) + } else { + path.getFileSystem(config.value).getFileStatus(path) + } + + val (stats, metric) = statsCollector.collect( + ParquetFileReader.readFooter(config.value, fileStatus)) + + if (metric.totalMissingFields > 0 || metric.numMissingTypes > 0) { + logWarning( + s"StatsCollection of file `$path` misses fields/types: ${JsonUtils.toJson(metric)}") + } + + val statsWithTightBoundsCol = { + val hasDeletionVector = + addFile.deletionVector != null && !addFile.deletionVector.isEmpty + stats + (TIGHT_BOUNDS -> !(setBoundsToWide || hasDeletionVector)) + } + + addFile.copy(stats = JsonUtils.toJson(statsWithTightBoundsCol)) + } +} + +object ParallelFetchPool { + val NUM_THREADS_PER_CORE = 10 + val MAX_THREADS = 1024 + + val NUM_THREADS = Math.min( + Runtime.getRuntime.availableProcessors() * NUM_THREADS_PER_CORE, MAX_THREADS) + + lazy val threadPool = DeltaThreadPool("stats-collection", NUM_THREADS) + def parallelMap[T, R]( + spark: SparkSession, + items: Iterable[T])( + f: T => R): Iterable[R] = threadPool.parallelMap(spark, items)(f) +} + +/** + * A helper class to collect stats of parquet data files for Delta table and its equivalent (tables + * that can be converted into Delta table like Parquet/Iceberg table). + * + * @param dataSchema The data schema from table metadata, which is the logical schema with logical + * to physical mapping per schema field. It is used to map statsSchema to parquet + * metadata. + * @param statsSchema The schema of stats to be collected, statsSchema should follow the physical + * schema and must be generated by StatisticsCollection. + * @param parquetRebaseMode The parquet rebase mode used to parse date and timestamp. + * @param ignoreMissingStats Indicate whether to return partial result by ignoring missing stats + * or throw an exception. + * @param stringTruncateLength The optional max length of string stats to be truncated into. + * + * Scala Example: + * {{{ + * import org.apache.spark.sql.delta.stats.StatsCollector + * + * val stringTruncateLength = + * spark.sessionState.conf.getConf(DeltaSQLConf.DATA_SKIPPING_STRING_PREFIX_LENGTH) + * + * val statsCollector = StatsCollector( + * snapshot.metadata.columnMappingMode, snapshot.metadata.dataSchema, snapshot.statsSchema, + * ignoreMissingStats = false, Some(stringTruncateLength)) + * + * val filesWithStats = snapshot.allFiles.map { file => + * val path = DeltaFileOperations.absolutePath(dataPath, file.path) + * val fileSystem = path.getFileSystem(hadoopConf) + * val fileStatus = fileSystem.listStatus(path).head + * + * val footer = ParquetFileReader.readFooter(hadoopConf, fileStatus) + * val (stats, _) = statsCollector.collect(footer) + * file.copy(stats = JsonUtils.toJson(stats)) + * } + * }}} + */ +abstract class StatsCollector( + dataSchema: StructType, + statsSchema: StructType, + parquetRebaseMode: String, + ignoreMissingStats: Boolean, + stringTruncateLength: Option[Int]) + extends Serializable +{ + + final val NUM_MISSING_TYPES = "numMissingTypes" + + /** + * Used to report number of missing fields per supported type and number of missing unsupported + * types in the collected statistics, currently the statistics collection supports 4 types of + * stats: NUM_RECORDS, MAX, MIN, NULL_COUNT. + * + * @param numMissingMax The number of missing fields for MAX + * @param numMissingMin The number of missing fields for MIN + * @param numMissingNullCount The number of missing fields for NULL_COUNT + * @param numMissingTypes The number of unsupported type being requested. + */ + case class StatsCollectionMetrics( + numMissingMax: Long, + numMissingMin: Long, + numMissingNullCount: Long, + numMissingTypes: Long) { + + val totalMissingFields: Long = Seq(numMissingMax, numMissingMin, numMissingNullCount).sum + } + + object StatsCollectionMetrics { + def apply(missingFieldCounts: Map[String, Long]): StatsCollectionMetrics = { + StatsCollectionMetrics( + missingFieldCounts.getOrElse(MAX, 0L), + missingFieldCounts.getOrElse(MIN, 0L), + missingFieldCounts.getOrElse(NULL_COUNT, 0L), + missingFieldCounts.getOrElse(NUM_MISSING_TYPES, 0L)) + } + } + + /** + * A list of schema physical path and corresponding struct field of leaf fields. Beside primitive + * types, Map and Array (instead of their sub-columns) are also treated as leaf fields since we + * only compute null count of them, and null is counted based on themselves instead of sub-fields. + */ + protected lazy val schemaPhysicalPathAndSchemaField: Seq[(Seq[String], StructField)] = { + def explode(schema: StructType): Seq[(Seq[String], StructField)] = { + schema.flatMap { field => + val physicalName = DeltaColumnMapping.getPhysicalName(field) + field.dataType match { + case s: StructType => + explode(s).map { case (path, field) => (Seq(physicalName) ++ path, field) } + case _ => (Seq(physicalName), field) :: Nil + } + } + } + explode(dataSchema) + } + + /** + * Returns the map from schema physical field path (field for which to collect stats) to the + * parquet metadata column index (where to collect stats). statsSchema generated by + * StatisticsCollection always use physical field paths so physical field paths are the same as + * to the ones used in statsSchema. Child class must implement this method based on delta column + * mapping mode. + */ + def getSchemaPhysicalPathToParquetIndex(blockMetaData: BlockMetaData): Map[Seq[String], Int] + + /** + * Collects the stats from [[ParquetMetadata]] + * + * @param parquetMetadata The metadata of parquet file following physical schema, it contains + * statistics of row groups. + * + * @return A nested Map[String: Any] from requested stats field names to their stats field value + * and [[StatsCollectionMetrics]] counting the number of missing fields/types. + */ + final def collect( + parquetMetadata: ParquetMetadata): (Map[String, Any], StatsCollectionMetrics) = { + val blocks = parquetMetadata.getBlocks.asScala.toSeq + if (blocks.isEmpty) { + return (Map(NUM_RECORDS -> 0L), StatsCollectionMetrics(Map.empty[String, Long])) + } + + val schemaPhysicalPathToParquetIndex = getSchemaPhysicalPathToParquetIndex(blocks.head) + val dateRebaseSpec = DataSourceUtils.datetimeRebaseSpec( + parquetMetadata.getFileMetaData.getKeyValueMetaData.get, parquetRebaseMode) + val dateRebaseFunc = DataSourceUtils.createDateRebaseFuncInRead(dateRebaseSpec.mode, "Parquet") + + val missingFieldCounts = + mutable.Map(MAX -> 0L, MIN -> 0L, NULL_COUNT -> 0L, NUM_MISSING_TYPES -> 0L) + + // Collect the actual stats. + // + // The result of this operation is a tree of maps that matches the structure of the stats + // schema. The stats schema is split by stats type at the top, and each type matches the + // structure of the data schema (can be subset), so we collect per stats type. E.g. the MIN + // values are under MIN.a, MIN.b.c, MIN.b.d etc., and then the MAX values are under MAX.a, + // MAX.b.c etc. Note, we do omit here the tightBounds column and add it at a later stage. + val collectedStats = statsSchema.filter(_.name != TIGHT_BOUNDS).map { + case StructField(NUM_RECORDS, LongType, _, _) => + val numRecords = blocks.map { block => + block.getRowCount + }.sum + NUM_RECORDS -> numRecords + case StructField(MIN, statsTypeSchema: StructType, _, _) => + val (minValues, numMissingFields) = + collectStats(Seq.empty[String], statsTypeSchema, blocks, schemaPhysicalPathToParquetIndex, + ignoreMissingStats)(aggMaxOrMin(dateRebaseFunc, isMax = false)) + missingFieldCounts(MIN) += numMissingFields + MIN -> minValues + case StructField(MAX, statsTypeSchema: StructType, _, _) => + val (maxValues, numMissingFields) = + collectStats(Seq.empty[String], statsTypeSchema, blocks, schemaPhysicalPathToParquetIndex, + ignoreMissingStats)(aggMaxOrMin(dateRebaseFunc, isMax = true)) + missingFieldCounts(MAX) += numMissingFields + MAX -> maxValues + case StructField(NULL_COUNT, statsTypeSchema: StructType, _, _) => + val (nullCounts, numMissingFields) = + collectStats(Seq.empty[String], statsTypeSchema, blocks, schemaPhysicalPathToParquetIndex, + ignoreMissingStats)(aggNullCount) + missingFieldCounts(NULL_COUNT) += numMissingFields + NULL_COUNT -> nullCounts + case field: StructField => + if (ignoreMissingStats) { + missingFieldCounts(NUM_MISSING_TYPES) += 1 + field.name -> Map.empty[String, Any] + } else { + throw new UnsupportedOperationException(s"stats type not supported: ${field.name}") + } + }.toMap + + (collectedStats, StatsCollectionMetrics(missingFieldCounts.toMap)) + } + + /** + * Collects statistics by recurring through the structure of statsSchema and tracks the fields + * that we have seen so far in parentPhysicalPath. + * + * @param parentPhysicalFieldPath The absolute path of parent field with physical names. + * @param statsSchema The schema with physical names to collect stats recursively. + * @param blocks The metadata of Parquet row groups, which contains the raw stats. + * @param schemaPhysicalPathToParquetIndex Map from schema path to parquet metadata column index. + * @param ignoreMissingStats Whether to ignore and log missing fields or throw an exception. + * @param aggFunc The aggregation function used to aggregate stats across row. + * + * @return A nested Map[String: Any] from schema field name to stats value and a count of missing + * fields. + * + * Here is an example of stats: + * + * stats schema: + * | -- id: INT + * | -- person: STRUCT + * | name: STRUCT + * | -- first: STRING + * | -- last: STRING + * | height: LONG + * + * The stats: + * Map( + * "id" -> 1003, + * "person" -> Map( + * "name" -> Map( + * "first" -> "Chris", + * "last" -> "Green" + * ), + * "height" -> 175L + * ) + * ) + */ + private def collectStats( + parentPhysicalFieldPath: Seq[String], + statsSchema: StructType, + blocks: Seq[BlockMetaData], + schemaPhysicalPathToParquetIndex: Map[Seq[String], Int], + ignoreMissingStats: Boolean)( + aggFunc: (Seq[BlockMetaData], Int) => Any): (Map[String, Any], Long) = { + val stats = mutable.Map.empty[String, Any] + var numMissingFields = 0L + statsSchema.foreach { + case StructField(name, dataType: StructType, _, _) => + val (map, numMissingFieldsInSubtree) = + collectStats(parentPhysicalFieldPath :+ name, dataType, blocks, + schemaPhysicalPathToParquetIndex, ignoreMissingStats)(aggFunc) + numMissingFields += numMissingFieldsInSubtree + if (map.nonEmpty) { + stats += name -> map + } + case StructField(name, _, _, _) => + val physicalFieldPath = parentPhysicalFieldPath :+ name + if (schemaPhysicalPathToParquetIndex.contains(physicalFieldPath)) { + try { + val value = aggFunc(blocks, schemaPhysicalPathToParquetIndex(physicalFieldPath)) + // None value means the stats is undefined for this field (e.g., max/min of a field, + // whose values are nulls in all blocks), we use null to be consistent with stats + // generated from SQL. + if (value != None) { + stats += name -> value + } else { + stats += name -> null + } + } catch { + case NonFatal(_) if ignoreMissingStats => numMissingFields += 1L + case exception: Throwable => throw exception + } + } else if (ignoreMissingStats) { + // Physical field path requested by stats is missing in the mapping, so it's missing from + // the parquet metadata. + numMissingFields += 1L + } else { + val columnPath = physicalFieldPath.mkString("[", ", ", "]") + throw DeltaErrors.deltaStatsCollectionColumnNotFound("all", columnPath) + } + } + + (stats.toMap, numMissingFields) + } + + /** + * The aggregation function used to collect the max and min of a column across blocks, + * dateRebaseFunc is used to adapt legacy date. + */ + private def aggMaxOrMin( + dateRebaseFunc: Int => Int, isMax: Boolean)( + blocks: Seq[BlockMetaData], index: Int): Any = { + val columnMetadata = blocks.head.getColumns.get(index) + val primitiveType = columnMetadata.getPrimitiveType + val logicalType = primitiveType.getLogicalTypeAnnotation + // Physical type of timestamp is INT96 in both Parquet and Delta. + if (primitiveType.getPrimitiveTypeName == PrimitiveType.PrimitiveTypeName.INT96 || + logicalType.isInstanceOf[TimestampLogicalTypeAnnotation]) { + throw new UnsupportedOperationException( + s"max/min stats is not supported for timestamp: ${columnMetadata.getPath}") + } + + var aggregatedValue: Any = None + blocks.foreach { block => + val column = block.getColumns.get(index) + val statistics = column.getStatistics + // Skip this block if the column has null for all rows, stats is defined as long as it exists + // in even a single block. + if (statistics.hasNonNullValue) { + val currentValue = if (isMax) statistics.genericGetMax else statistics.genericGetMin + if (currentValue == null) { + throw DeltaErrors.deltaStatsCollectionColumnNotFound("max/min", column.getPath.toString) + } + + if (aggregatedValue == None) { + aggregatedValue = currentValue + } else { + // TODO: check NaN value for floating point columns. + val compareResult = currentValue.asInstanceOf[Comparable[Any]].compareTo(aggregatedValue) + if ((isMax && compareResult > 0) || (!isMax && compareResult < 0)) { + aggregatedValue = currentValue + } + } + } + } + + // All blocks have null stats for this column, returns None to indicate the stats of this + // column is undefined. + if (aggregatedValue == None) return None + + aggregatedValue match { + // String + case bytes: Binary if logicalType.isInstanceOf[StringLogicalTypeAnnotation] => + val rawString = bytes.toStringUsingUTF8 + if (stringTruncateLength.isDefined && rawString.length > stringTruncateLength.get) { + if (isMax) { + // Append tie breakers to assure that any value in this column is less than or equal to + // the max, check the helper function for more details. + StatisticsCollection.truncateMaxStringAgg(stringTruncateLength.get)(rawString) + } else { + rawString.substring(0, stringTruncateLength.get) + } + } else { + rawString + } + // Binary + case _: Binary => + throw new UnsupportedOperationException( + s"max/min stats is not supported for binary other than string: ${columnMetadata.getPath}") + // Date + case date: Integer if logicalType.isInstanceOf[DateLogicalTypeAnnotation] => + DateTimeUtils.toJavaDate(dateRebaseFunc(date)).toString + // Byte, Short, Integer and Long + case intValue @ (_: Integer | _: java.lang.Long) + if logicalType.isInstanceOf[IntLogicalTypeAnnotation] => + logicalType.asInstanceOf[IntLogicalTypeAnnotation].getBitWidth match { + case 8 => intValue.asInstanceOf[Int].toByte + case 16 => intValue.asInstanceOf[Int].toShort + case 32 => intValue.asInstanceOf[Int] + case 64 => intValue.asInstanceOf[Long] + case other => throw new UnsupportedOperationException( + s"max/min stats is not supported for $other-bits Integer: ${columnMetadata.getPath}") + } + // Decimal + case _ if logicalType.isInstanceOf[DecimalLogicalTypeAnnotation] => + throw new UnsupportedOperationException( + s"max/min stats is not supported for decimal: ${columnMetadata.getPath}") + // Integer, Long, Float and Double + case primitive @ (_: Integer | _: java.lang.Long | _: java.lang.Float | _: java.lang.Double) + if logicalType == null => primitive + // Throw an exception on the other unknown types for safety. + case unknown => + throw new UnsupportedOperationException( + s"max/min stats is not supported for ${unknown.getClass.getName} with $logicalType:" + + columnMetadata.getPath.toString) + } + } + + /** The aggregation function used to count null of a column across blocks */ + private def aggNullCount(blocks: Seq[BlockMetaData], index: Int): Any = { + var count = 0L + blocks.foreach { block => + val column = block.getColumns.get(index) + val statistics = column.getStatistics + if (!statistics.isNumNullsSet) { + throw DeltaErrors.deltaStatsCollectionColumnNotFound("nullCount", column.getPath.toString) + } + count += statistics.getNumNulls + } + count.asInstanceOf[Any] + } +} + +object StatsCollector { + def apply( + columnMappingMode: DeltaColumnMappingMode, + dataSchema: StructType, + statsSchema: StructType, + parquetRebaseMode: String, + ignoreMissingStats: Boolean = true, + stringTruncateLength: Option[Int] = None): StatsCollector = { + columnMappingMode match { + case NoMapping | NameMapping => + StatsCollectorNameMapping( + dataSchema, statsSchema, parquetRebaseMode, ignoreMissingStats, stringTruncateLength) + case IdMapping => + StatsCollectorIdMapping( + dataSchema, statsSchema, parquetRebaseMode, ignoreMissingStats, stringTruncateLength) + case _ => + throw new UnsupportedOperationException( + s"$columnMappingMode mapping is currently not supported") + } + } + + private case class StatsCollectorNameMapping( + dataSchema: StructType, + statsSchema: StructType, + parquetRebaseMode: String, + ignoreMissingStats: Boolean, + stringTruncateLength: Option[Int]) + extends StatsCollector( + dataSchema, statsSchema, parquetRebaseMode, ignoreMissingStats, stringTruncateLength) { + + /** + * Maps schema physical field path to parquet metadata column index via parquet metadata column + * path in NoMapping and NameMapping modes + */ + override def getSchemaPhysicalPathToParquetIndex( + blockMetaData: BlockMetaData): Map[Seq[String], Int] = { + val parquetColumnPathToIndex = getParquetColumnPathToIndex(blockMetaData) + columnPathSchemaToParquet.collect { + // Collect mapping of fields in physical schema that actually exist in parquet metadata, + // parquet metadata can miss field due to schema evolution. In case stats collection is + // requested on a column that is missing from parquet metadata, we will catch this in + // collectStats when looking up in this map. + case (schemaPath, parquetPath) if parquetColumnPathToIndex.contains(parquetPath) => + schemaPath -> parquetColumnPathToIndex(parquetPath) + } + } + + /** + * A map from schema field path (with physical names) to parquet metadata column path of schema + * leaf fields with special handling of Array and Map. + * + * Here is an example: + * + * Data Schema (physical name in the parenthesis) + * | -- id (a4def3): INT + * | -- history (23aa42): STRUCT + * | -- cost (23ddb0): DOUBLE + * | -- events (23dda1): ARRAY[STRING] + * | -- info (abb4d2): MAP[STRING, STRING] + * + * Block Metadata: + * Columns: [ [a4def3], [23aa42, 23ddb0], [23ddb0, 23dda1, list, element], + * [abb4d2, key_value, key], [abb4d2, key_value, value] ] + * + * The mapping: + * [a4def3] -> [a4def3] + * [23aa42, 23ddb0] -> [23aa42, 23ddb0] + * [23ddb0, 23dda1] -> [23ddb0, 23dda1, list, element] + * [abb4d2] -> [abb4d2, key_value, key] + */ + private lazy val columnPathSchemaToParquet: Map[Seq[String], Seq[String]] = { + // Parquet metadata column path contains addition keywords for Array and Map. Here we only + // support 2 cases below since stats is not available in the other cases: + // 1. Array with non-null elements of primitive types + // 2. Map with key of primitive types + schemaPhysicalPathAndSchemaField.map { + case(path, field) => + field.dataType match { + // Here we don't check array element type and map key type for primitive type since + // parquet metadata column path always points to a primitive column. In other words, + // the type is primitive if the column path can be found in parquet metadata later. + case ArrayType(_, false) => path -> (path ++ Seq("list", "element")) + case MapType(_, _, _) => path -> (path ++ Seq("key_value", "key")) + case _ => path -> path + } + }.toMap + } + + /** + * Returns a map from parquet metadata column path to index. + * + * Here is an example: + * + * Data Schema: + * |-- id : INT + * |-- person : STRUCT + * |-- name: STRING + * |-- phone: INT + * |-- eligible: BOOLEAN + * + * Block Metadata: + * Columns: [ [id], [person, name], [person, phone], [eligible] ] + * + * The mapping: + * [id] -> 0 + * [person, name] -> 1 + * [person, phone] -> 2 + * [eligible] -> 3 + */ + private def getParquetColumnPathToIndex(block: BlockMetaData): Map[Seq[String], Int] = { + block.getColumns.asScala.zipWithIndex.map { + case (column, i) => column.getPath.toArray.toSeq -> i + }.toMap + } + } + + private case class StatsCollectorIdMapping( + dataSchema: StructType, + statsSchema: StructType, + parquetRebaseMode: String, + ignoreMissingStats: Boolean, + stringTruncateLength: Option[Int]) + extends StatsCollector( + dataSchema, statsSchema, parquetRebaseMode, ignoreMissingStats, stringTruncateLength) { + + // Define a FieldId type to better disambiguate between ids and indices in the code + type FieldId = Int + + /** + * Maps schema physical field path to parquet metadata column index via parquet metadata column + * id in IdMapping mode. + */ + override def getSchemaPhysicalPathToParquetIndex( + blockMetaData: BlockMetaData): Map[Seq[String], Int] = { + val parquetColumnIdToIndex = getParquetColumnIdToIndex(blockMetaData) + schemaPhysicalPathToColumnId.collect { + // Collect mapping of fields in physical schema that actually exist in parquet metadata, + // parquet metadata can miss field due to schema evolution and non-primitive types like Map + // and Array. In case stats collection is requested on a column that is missing from + // parquet metadata, we will catch this in collectStats when looking up in this map. + case (schemaPath, columnId) if parquetColumnIdToIndex.contains(columnId) => + schemaPath -> parquetColumnIdToIndex(columnId) + } + } + + /** + * A map from schema field path (with physical names) to parquet metadata column id of schema + * leaf fields. + * + * Here is an example: + * + * Data Schema (physical name, id in the parenthesis) + * | -- id (a4def3, 1): INT + * | -- history (23aa42, 2): STRUCT + * | -- cost (23ddb0, 3): DOUBLE + * | -- events (23dda1, 4): ARRAY[STRING] + * | -- info (abb4d2, 5): MAP[STRING, STRING] + * + * The mapping: + * [a4def3] -> 1 + * [23aa42, 23ddb0] -> 3 + * [23ddb0, 23dda1] -> 4 + * [abb4d2] -> 5 + */ + private lazy val schemaPhysicalPathToColumnId: Map[Seq[String], FieldId] = { + schemaPhysicalPathAndSchemaField.map { + case (path, field) => path -> DeltaColumnMapping.getColumnId(field) + }.toMap + } + + /** + * Returns a map from parquet metadata column id to column index by skipping columns without id. + * E.g., subfields of ARRAY and MAP don't have id assigned. + * + * Here is an example: + * + * Data Schema (id in the parenthesis): + * |-- id (1) : INT + * |-- person (2) : STRUCT + * |-- names (3) : ARRAY[STRING] + * |-- phones (4) : MAP[STRING, INT] + * |-- eligible (5) : BOOLEAN + * + * Block Metadata (id in the parenthesis): + * Columns: [ [id](1), [person, names, list, element](null), + * [person, phones, key_value, key](null), [person, phones, key_value, value](null), + * [eligible](5) ] + * + * The mapping: 1 -> 0, 5 -> 4 + */ + private def getParquetColumnIdToIndex(block: BlockMetaData): Map[FieldId, Int] = { + block.getColumns.asScala.zipWithIndex.collect { + // Id of parquet metadata column is not guaranteed, subfields of Map and Array don't have + // id assigned. In case id is missing and null, we skip the parquet metadata column here + // and will catch this in collectStats when looking up in this map. + case (column, i) if column.getPrimitiveType.getId != null => + column.getPrimitiveType.getId.intValue() -> i + }.toMap + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsProvider.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsProvider.scala new file mode 100644 index 00000000000..df1233068c2 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsProvider.scala @@ -0,0 +1,104 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import org.apache.spark.sql.Column + +/** + * A helper class that provides the functionalities to create [[DataSkippingPredicate]] with + * the statistics for a column. + * + * @param getStat A function that returns an expression to access the given statistics for a + * specific column, or None if that stats column does not exist. For example, + * [[DataSkippingReaderBase.getStatsColumnOpt]] can be used here. + */ + +private [stats] class StatsProvider(getStat: StatsColumn => Option[Column]) { + /** + * Given a [[StatsColumn]], which represents a stats column for a table column, returns a + * [[DataSkippingPredicate]] which includes a data skipping expression (the result of running + * `f` on the expression of accessing the given stats) and the stats column (which the data + * skipping expression depends on), or None if the stats column does not exist. + * + * @param statCol A stats column (MIN, MAX, etc) for a table column name. + * @param f A user-provided function that returns a data skipping expression given the expression + * to access the statistics for `statCol`. + * @return A [[DataSkippingPredicate]] with a data skipping expression, or None if the given + * stats column does not exist. + */ + def getPredicateWithStatsColumn(statCol: StatsColumn) + (f: Column => Column): Option[DataSkippingPredicate] = { + for (stat <- getStat(statCol)) + yield DataSkippingPredicate(f(stat), statCol) + } + + /** A variant of [[getPredicateWithStatsColumn]] with two stats columns. */ + def getPredicateWithStatsColumns(statCol1: StatsColumn, statCol2: StatsColumn) + (f: (Column, Column) => Column): Option[DataSkippingPredicate] = { + for (stat1 <- getStat(statCol1); stat2 <- getStat(statCol2)) + yield DataSkippingPredicate(f(stat1, stat2), statCol1, statCol2) + } + + /** A variant of [[getPredicateWithStatsColumn]] with three stats columns. */ + def getPredicateWithStatsColumns( + statCol1: StatsColumn, + statCol2: StatsColumn, + statCol3: StatsColumn) + (f: (Column, Column, Column) => Column): Option[DataSkippingPredicate] = { + for (stat1 <- getStat(statCol1); stat2 <- getStat(statCol2); stat3 <- getStat(statCol3)) + yield DataSkippingPredicate(f(stat1, stat2, stat3), statCol1, statCol2, statCol3) + } + + /** + * Given a path to a table column and a stat type (MIN, MAX, etc.), returns a + * [[DataSkippingPredicate]] which includes a data skipping expression (the result of running + * `f` on the expression of accessing the given stats) and the stats column (which the data + * skipping expression depends on), or None if the stats column does not exist. + * + * @param pathToColumn The name of a column whose stats are to be accessed. + * @param statType The type of stats to access (MIN, MAX, etc.) + * @param f A user-provided function that returns a data skipping expression given the expression + * to access the statistics for `statCol`. + * @return A [[DataSkippingPredicate]] with a data skipping expression, or None if the given + * stats column does not exist. + */ + def getPredicateWithStatType(pathToColumn: Seq[String], statType: String) + (f: Column => Column): Option[DataSkippingPredicate] = { + getPredicateWithStatsColumn(StatsColumn(statType, pathToColumn))(f) + } + + /** A variant of [[getPredicateWithStatType]] with two stat types. */ + def getPredicateWithStatTypes(pathToColumn: Seq[String], statType1: String, statType2: String) + (f: (Column, Column) => Column): Option[DataSkippingPredicate] = { + getPredicateWithStatsColumns( + StatsColumn(statType1, pathToColumn), + StatsColumn(statType2, pathToColumn))(f) + } + + /** A variant of [[getPredicateWithStatType]] with three stat types. */ + def getPredicateWithStatTypes( + pathToColumn: Seq[String], + statType1: String, + statType2: String, + statType3: String) + (f: (Column, Column, Column) => Column): Option[DataSkippingPredicate] = { + getPredicateWithStatsColumns( + StatsColumn(statType1, pathToColumn), + StatsColumn(statType2, pathToColumn), + StatsColumn(statType3, pathToColumn))(f) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/AzureLogStore.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/AzureLogStore.scala new file mode 100644 index 00000000000..89250908967 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/AzureLogStore.scala @@ -0,0 +1,57 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf + +/** + * LogStore implementation for Azure. + * + * We assume the following from Azure's [[FileSystem]] implementations: + * - Rename without overwrite is atomic. + * - List-after-write is consistent. + * + * Regarding file creation, this implementation: + * - Uses atomic rename when overwrite is false; if the destination file exists or the rename + * fails, throws an exception. + * - Uses create-with-overwrite when overwrite is true. This does not make the file atomically + * visible and therefore the caller must handle partial files. + */ +class AzureLogStore(sparkConf: SparkConf, hadoopConf: Configuration) + extends HadoopFileSystemLogStore(sparkConf, hadoopConf) { + + override def write(path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit = { + writeWithRename(path, actions, overwrite) + } + + override def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + writeWithRename(path, actions, overwrite, hadoopConf) + } + + override def invalidateCache(): Unit = {} + + override def isPartialWriteVisible(path: Path): Boolean = true + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = true +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/ClosableIterator.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/ClosableIterator.scala new file mode 100644 index 00000000000..618972f6489 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/ClosableIterator.scala @@ -0,0 +1,116 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import java.io.Closeable + +trait SupportsRewinding[T] extends Iterator[T] { + // Overrides if class supports rewinding the iterator to the beginning efficiently. + def rewind(): Unit +} + +trait ClosableIterator[T] extends Iterator[T] with Closeable { + /** Calls f(this) and always closes the iterator afterwards. */ + def processAndClose[R](f: Iterator[T] => R): R = { + try { + f(this) + } finally { + close() + } + } +} + +object ClosableIterator { + /** + * An implicit class for applying a function to a [[ClosableIterator]] and returning the + * resulting iterator as a [[ClosableIterator]] with the original `close()` method. + */ + implicit class IteratorCloseOps[A](val closableIter: ClosableIterator[A]) extends AnyVal { + def withClose[B](f: Iterator[A] => Iterator[B]): ClosableIterator[B] = new ClosableIterator[B] { + private val iter = + try { + f(closableIter) + } catch { + case e: Throwable => + closableIter.close() + throw e + } + override def next(): B = iter.next() + override def hasNext: Boolean = iter.hasNext + override def close(): Unit = closableIter.close() + } + } + + /** + * An implicit class for a `flatMap` implementation that returns a [[ClosableIterator]] + * which (a) closes inner iterators upon reaching their end, and (b) has a `close()` method + * that closes any opened and unclosed inner iterators. + */ + implicit class IteratorFlatMapCloseOp[A](val closableIter: Iterator[A]) extends AnyVal { + def flatMapWithClose[B](f: A => ClosableIterator[B]): ClosableIterator[B] = + new ClosableIterator[B] { + private var iter_curr = + if (closableIter.hasNext) { + f(closableIter.next()) + } else { + null + } + override def next(): B = { + if (!hasNext) { + throw new NoSuchElementException + } + iter_curr.next() + } + @scala.annotation.tailrec + override def hasNext: Boolean = { + if (iter_curr == null) { + false + } + else if (iter_curr.hasNext) { + true + } + else { + iter_curr.close() + if (closableIter.hasNext) { + iter_curr = f(closableIter.next()) + hasNext + } else { + iter_curr = null + false + } + } + } + override def close(): Unit = { + if (iter_curr != null) { + iter_curr.close() + } + } + } + } + + /** + * An implicit class for wrapping an iterator to be a [[ClosableIterator]] with a `close` method + * that does nothing. + */ + implicit class ClosableWrapper[A](val iter: Iterator[A]) extends AnyVal { + def toClosable: ClosableIterator[A] = new ClosableIterator[A] { + override def next(): A = iter.next() + override def hasNext: Boolean = iter.hasNext + override def close(): Unit = () + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/DelegatingLogStore.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/DelegatingLogStore.scala new file mode 100644 index 00000000000..e839f55246c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/DelegatingLogStore.scala @@ -0,0 +1,191 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import java.util.Locale + +import scala.collection.mutable + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.SparkEnv + + +/** + * A delegating LogStore used to dynamically resolve LogStore implementation based + * on the scheme of paths. + */ +class DelegatingLogStore(hadoopConf: Configuration) + extends LogStore with DeltaLogging { + + private val sparkConf = SparkEnv.get.conf + + // Map scheme to the corresponding LogStore resolved and created. Accesses to this map need + // synchronization This could be accessed by multiple threads because it is shared through + // shared DeltaLog instances. + private val schemeToLogStoreMap = mutable.Map.empty[String, LogStore] + + private lazy val defaultLogStore = createLogStore(DelegatingLogStore.defaultHDFSLogStoreClassName) + + // Creates a LogStore with given LogStore class name. + private def createLogStore(className: String): LogStore = { + LogStore.createLogStoreWithClassName(className, sparkConf, hadoopConf) + } + + // Create LogStore based on the scheme of `path`. + private def schemeBasedLogStore(path: Path): LogStore = { + val store = Option(path.toUri.getScheme) match { + case Some(origScheme) => + val scheme = origScheme.toLowerCase(Locale.ROOT) + this.synchronized { + if (schemeToLogStoreMap.contains(scheme)) { + schemeToLogStoreMap(scheme) + } else { + // Resolve LogStore class based on the following order: + // 1. Scheme conf if set. + // 2. Defaults for scheme if exists. + // 3. Default. + val logStoreClassNameOpt = LogStore.getLogStoreConfValue( // we look for all viable keys + LogStore.logStoreSchemeConfKey(scheme), sparkConf) + .orElse(DelegatingLogStore.getDefaultLogStoreClassName(scheme)) + val logStore = logStoreClassNameOpt.map(createLogStore(_)).getOrElse(defaultLogStore) + schemeToLogStoreMap += scheme -> logStore + + val actualLogStoreClassName = logStore match { + case lsa: LogStoreAdaptor => s"LogStoreAdapter(${lsa.logStoreImpl.getClass.getName})" + case _ => logStore.getClass.getName + } + logInfo(s"LogStore `$actualLogStoreClassName` is used for scheme `$scheme`") + + logStore + } + } + case _ => defaultLogStore + } + store + } + + def getDelegate(path: Path): LogStore = schemeBasedLogStore(path) + + ////////////////////////// + // Public API Overrides // + ////////////////////////// + + override def read(path: Path): Seq[String] = { + getDelegate(path).read(path) + } + + override def read(path: Path, hadoopConf: Configuration): Seq[String] = { + getDelegate(path).read(path, hadoopConf) + } + + override def readAsIterator(path: Path): ClosableIterator[String] = { + getDelegate(path).readAsIterator(path) + } + + override def readAsIterator(path: Path, hadoopConf: Configuration): ClosableIterator[String] = { + getDelegate(path).readAsIterator(path, hadoopConf) + } + + override def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean): Unit = { + getDelegate(path).write(path, actions, overwrite) + } + + override def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + getDelegate(path).write(path, actions, overwrite, hadoopConf) + } + + override def listFrom(path: Path): Iterator[FileStatus] = { + getDelegate(path).listFrom(path) + } + + override def listFrom(path: Path, hadoopConf: Configuration): Iterator[FileStatus] = { + getDelegate(path).listFrom(path, hadoopConf) + } + + override def invalidateCache(): Unit = { + this.synchronized { + schemeToLogStoreMap.foreach { entry => + entry._2.invalidateCache() + } + } + defaultLogStore.invalidateCache() + } + + override def resolvePathOnPhysicalStorage(path: Path): Path = { + getDelegate(path).resolvePathOnPhysicalStorage(path) + } + + override def resolvePathOnPhysicalStorage(path: Path, hadoopConf: Configuration): Path = { + getDelegate(path).resolvePathOnPhysicalStorage(path, hadoopConf) + } + + override def isPartialWriteVisible(path: Path): Boolean = { + getDelegate(path).isPartialWriteVisible(path) + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = { + getDelegate(path).isPartialWriteVisible(path, hadoopConf) + } +} + +object DelegatingLogStore { + + try { + // load any arbitrary delta-storage class to ensure the dependency has been included + classOf[io.delta.storage.LogStore] + } catch { + case e: NoClassDefFoundError => + throw DeltaErrors.missingDeltaStorageJar(e) + } + + /** + * Java LogStore (io.delta.storage) implementations are now the default. + */ + val defaultS3LogStoreClassName = classOf[io.delta.storage.S3SingleDriverLogStore].getName + val defaultAzureLogStoreClassName = classOf[io.delta.storage.AzureLogStore].getName + val defaultHDFSLogStoreClassName = classOf[io.delta.storage.HDFSLogStore].getName + val defaultGCSLogStoreClassName = classOf[io.delta.storage.GCSLogStore].getName + + // Supported schemes with default. + val s3Schemes = Set("s3", "s3a", "s3n") + val azureSchemes = Set("abfs", "abfss", "adl", "wasb", "wasbs") + val gsSchemes = Set("gs") + + // Returns the default LogStore class name for `scheme`. + // None if we do not have a default for it. + def getDefaultLogStoreClassName(scheme: String): Option[String] = { + if (s3Schemes.contains(scheme)) { + return Some(defaultS3LogStoreClassName) + } else if (DelegatingLogStore.azureSchemes(scheme: String)) { + return Some(defaultAzureLogStoreClassName) + } else if (DelegatingLogStore.gsSchemes(scheme: String)) { + return Some(defaultGCSLogStoreClassName) + } + None + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/HDFSLogStore.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/HDFSLogStore.scala new file mode 100644 index 00000000000..bca0a5b5592 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/HDFSLogStore.scala @@ -0,0 +1,169 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import java.io.IOException +import java.nio.charset.StandardCharsets.UTF_8 +import java.util.EnumSet + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs._ +import org.apache.hadoop.fs.CreateFlag.CREATE +import org.apache.hadoop.fs.Options.{ChecksumOpt, CreateOpts} + +import org.apache.spark.SparkConf +import org.apache.spark.internal.Logging + +/** + * The [[LogStore]] implementation for HDFS, which uses Hadoop [[FileContext]] API's to + * provide the necessary atomic and durability guarantees: + * + * 1. Atomic visibility of files: `FileContext.rename` is used write files which is atomic for HDFS. + * + * 2. Consistent file listing: HDFS file listing is consistent. + */ +class HDFSLogStore(sparkConf: SparkConf, defaultHadoopConf: Configuration) + extends HadoopFileSystemLogStore(sparkConf, defaultHadoopConf) with Logging{ + + @deprecated("call the method that asks for a Hadoop Configuration object instead") + protected def getFileContext(path: Path): FileContext = { + FileContext.getFileContext(path.toUri, getHadoopConfiguration) + } + + protected def getFileContext(path: Path, hadoopConf: Configuration): FileContext = { + FileContext.getFileContext(path.toUri, hadoopConf) + } + + val noAbstractFileSystemExceptionMessage = "No AbstractFileSystem" + + override def write(path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit = { + write(path, actions, overwrite, getHadoopConfiguration) + } + + override def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + val isLocalFs = path.getFileSystem(hadoopConf).isInstanceOf[RawLocalFileSystem] + if (isLocalFs) { + // We need to add `synchronized` for RawLocalFileSystem as its rename will not throw an + // exception when the target file exists. Hence we must make sure `exists + rename` in + // `writeInternal` for RawLocalFileSystem is atomic in our tests. + synchronized { + writeInternal(path, actions, overwrite, hadoopConf) + } + } else { + // rename is atomic and also will fail when the target file exists. Not need to add the extra + // `synchronized`. + writeInternal(path, actions, overwrite, hadoopConf) + } + } + + private def writeInternal( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + val fc: FileContext = try { + getFileContext(path, hadoopConf) + } catch { + case e: IOException if e.getMessage.contains(noAbstractFileSystemExceptionMessage) => + val newException = DeltaErrors.incorrectLogStoreImplementationException(sparkConf, e) + logError(newException.getMessage, newException.getCause) + throw newException + } + if (!overwrite && fc.util.exists(path)) { + // This is needed for the tests to throw error with local file system + throw DeltaErrors.fileAlreadyExists(path.toString) + } + + val tempPath = createTempPath(path) + var streamClosed = false // This flag is to avoid double close + var renameDone = false // This flag is to save the delete operation in most of cases. + val stream = fc.create( + tempPath, EnumSet.of(CREATE), CreateOpts.checksumParam(ChecksumOpt.createDisabled())) + + try { + actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) + stream.close() + streamClosed = true + try { + val renameOpt = if (overwrite) Options.Rename.OVERWRITE else Options.Rename.NONE + fc.rename(tempPath, path, renameOpt) + renameDone = true + // TODO: this is a workaround of HADOOP-16255 - remove this when HADOOP-16255 is resolved + tryRemoveCrcFile(fc, tempPath) + } catch { + case e: org.apache.hadoop.fs.FileAlreadyExistsException => + throw DeltaErrors.fileAlreadyExists(path.toString) + } + } finally { + if (!streamClosed) { + stream.close() + } + if (!renameDone) { + fc.delete(tempPath, false) + } + } + + msyncIfSupported(path, hadoopConf) + } + + /** + * Normally when using HDFS with an Observer NameNode setup, there would be read after write + * consistency within a single process, so the write would be guaranteed to be visible on the + * next read. However, since we are using the FileContext API for writing (for atomic rename), + * and the FileSystem API for reading (for more compatibility with various file systems), we + * are essentially using two separate clients that are not guaranteed to be kept in sync. + * Therefore we "msync" the FileSystem instance, which is cached across all uses of the same + * protocol/host combination, to make sure the next read through the HDFSLogStore can see this + * write. + * Any underlying FileSystem that is not the DistributedFileSystem will simply throw an + * UnsupportedOperationException, which can be ignored. Additionally, if an older version of + * Hadoop is being used that does not include msync, a NoSuchMethodError will be thrown while + * looking up the method, which can also be safely ignored. + */ + private def msyncIfSupported(path: Path, hadoopConf: Configuration): Unit = { + try { + val fs = path.getFileSystem(hadoopConf) + val msync = fs.getClass.getMethod("msync") + msync.invoke(fs) + } catch { + case NonFatal(_) => // ignore, calling msync is best effort + } + } + + private def tryRemoveCrcFile(fc: FileContext, path: Path): Unit = { + try { + val checksumFile = new Path(path.getParent, s".${path.getName}.crc") + if (fc.util.exists(checksumFile)) { + // checksum file exists, deleting it + fc.delete(checksumFile, true) + } + } catch { + case NonFatal(_) => // ignore, we are removing crc file as "best-effort" + } + } + + override def isPartialWriteVisible(path: Path): Boolean = true + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = true +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/HadoopFileSystemLogStore.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/HadoopFileSystemLogStore.scala new file mode 100644 index 00000000000..210885f1716 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/HadoopFileSystemLogStore.scala @@ -0,0 +1,179 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import java.io.{BufferedReader, InputStreamReader} +import java.nio.charset.StandardCharsets.UTF_8 +import java.nio.file.FileAlreadyExistsException +import java.util.UUID + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, FileSystem, FSDataInputStream, Path} + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.SparkSession + +/** + * Default implementation of [[LogStore]] for Hadoop [[FileSystem]] implementations. + */ +abstract class HadoopFileSystemLogStore( + sparkConf: SparkConf, + hadoopConf: Configuration) extends LogStore { + + def this(sc: SparkContext) = this(sc.getConf, sc.hadoopConfiguration) + + protected def getHadoopConfiguration: Configuration = { + // scalastyle:off deltahadoopconfiguration + SparkSession.getActiveSession.map(_.sessionState.newHadoopConf()).getOrElse(hadoopConf) + // scalastyle:on deltahadoopconfiguration + } + + override def read(path: Path): Seq[String] = { + read(path, getHadoopConfiguration) + } + + override def read(path: Path, hadoopConf: Configuration): Seq[String] = { + readStream(open(path, hadoopConf)) + } + + override def readAsIterator(path: Path): ClosableIterator[String] = { + readAsIterator(path, getHadoopConfiguration) + } + + override def readAsIterator(path: Path, hadoopConf: Configuration): ClosableIterator[String] = + readStreamAsIterator(open(path, hadoopConf)) + + private def open(path: Path, hadoopConf: Configuration): FSDataInputStream = + path.getFileSystem(hadoopConf).open(path) + + private def readStream(stream: FSDataInputStream): Seq[String] = { + try { + val reader = new BufferedReader(new InputStreamReader(stream, UTF_8)) + IOUtils.readLines(reader).asScala.map(_.trim).toSeq + } finally { + stream.close() + } + } + + private def readStreamAsIterator(stream: FSDataInputStream): ClosableIterator[String] = { + val reader = new BufferedReader(new InputStreamReader(stream, UTF_8)) + new LineClosableIterator(reader) + } + + override def listFrom(path: Path): Iterator[FileStatus] = { + listFrom(path, getHadoopConfiguration) + } + + override def listFrom(path: Path, hadoopConf: Configuration): Iterator[FileStatus] = { + val fs = path.getFileSystem(hadoopConf) + if (!fs.exists(path.getParent)) { + throw DeltaErrors.fileOrDirectoryNotFoundException(s"${path.getParent}") + } + val files = fs.listStatus(path.getParent) + files.filter(_.getPath.getName >= path.getName).sortBy(_.getPath.getName).iterator + } + + override def resolvePathOnPhysicalStorage(path: Path): Path = { + resolvePathOnPhysicalStorage(path, getHadoopConfiguration) + } + + override def resolvePathOnPhysicalStorage(path: Path, hadoopConf: Configuration): Path = { + path.getFileSystem(hadoopConf).makeQualified(path) + } + + /** + * An internal write implementation that uses FileSystem.rename(). + * + * This implementation should only be used for the underlying file systems that support atomic + * renames, e.g., Azure is OK but HDFS is not. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + protected def writeWithRename( + path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit = { + writeWithRename(path, actions, overwrite, getHadoopConfiguration) + } + + /** + * An internal write implementation that uses FileSystem.rename(). + * + * This implementation should only be used for the underlying file systems that support atomic + * renames, e.g., Azure is OK but HDFS is not. + */ + protected def writeWithRename( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + val fs = path.getFileSystem(hadoopConf) + + if (!fs.exists(path.getParent)) { + throw DeltaErrors.fileOrDirectoryNotFoundException(s"${path.getParent}") + } + if (overwrite) { + val stream = fs.create(path, true) + try { + actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) + } finally { + stream.close() + } + } else { + if (fs.exists(path)) { + throw DeltaErrors.fileAlreadyExists(path.toString) + } + val tempPath = createTempPath(path) + var streamClosed = false // This flag is to avoid double close + var renameDone = false // This flag is to save the delete operation in most of cases. + val stream = fs.create(tempPath) + try { + actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) + stream.close() + streamClosed = true + try { + if (fs.rename(tempPath, path)) { + renameDone = true + } else { + if (fs.exists(path)) { + throw DeltaErrors.fileAlreadyExists(path.toString) + } else { + throw DeltaErrors.cannotRenamePath(tempPath.toString, path.toString) + } + } + } catch { + case _: org.apache.hadoop.fs.FileAlreadyExistsException => + throw DeltaErrors.fileAlreadyExists(path.toString) + } + } finally { + if (!streamClosed) { + stream.close() + } + if (!renameDone) { + fs.delete(tempPath, false) + } + } + } + } + + protected def createTempPath(path: Path): Path = { + new Path(path.getParent, s".${path.getName}.${UUID.randomUUID}.tmp") + } + + override def invalidateCache(): Unit = {} +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/LineClosableIterator.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/LineClosableIterator.scala new file mode 100644 index 00000000000..7ad8281a26c --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/LineClosableIterator.scala @@ -0,0 +1,74 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import java.io.Reader + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.commons.io.IOUtils + +/** + * Turn a `Reader` to `ClosableIterator` which can be read on demand. Each element is + * a trimmed line. + */ +class LineClosableIterator(_reader: Reader) extends ClosableIterator[String] { + private val reader = IOUtils.toBufferedReader(_reader) + // Whether `nextValue` is valid. If it's invalid, we should try to read the next line. + private var gotNext = false + // The next value to return when `next` is called. This is valid only if `getNext` is true. + private var nextValue: String = _ + // Whether the reader is closed. + private var closed = false + // Whether we have consumed all data in the reader. + private var finished = false + + override def hasNext: Boolean = { + if (!finished) { + // Check whether we have closed the reader before reading. Even if `nextValue` is valid, we + // still don't return `nextValue` after a reader is closed. Otherwise, it would be confusing. + if (closed) { + throw DeltaErrors.iteratorAlreadyClosed() + } + if (!gotNext) { + val nextLine = reader.readLine() + if (nextLine == null) { + finished = true + close() + } else { + nextValue = nextLine.trim + } + gotNext = true + } + } + !finished + } + + override def next(): String = { + if (!hasNext) { + throw new NoSuchElementException("End of stream") + } + gotNext = false + nextValue + } + + override def close(): Unit = { + if (!closed) { + closed = true + reader.close() + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/LocalLogStore.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/LocalLogStore.scala new file mode 100644 index 00000000000..8a78296b26a --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/LocalLogStore.scala @@ -0,0 +1,73 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf + +/** + * Default [[LogStore]] implementation (should be used for testing only!). + * + * Production users should specify the appropriate [[[LogStore]] implementation in Spark properties. + * + * We assume the following from [[org.apache.hadoop.fs.FileSystem]] implementations: + * - Rename without overwrite is atomic. + * - List-after-write is consistent. + * + * Regarding file creation, this implementation: + * - Uses atomic rename when overwrite is false; if the destination file exists or the rename + * fails, throws an exception. + * - Uses create-with-overwrite when overwrite is true. This does not make the file atomically + * visible and therefore the caller must handle partial files. + */ +class LocalLogStore(sparkConf: SparkConf, hadoopConf: Configuration) + extends HadoopFileSystemLogStore(sparkConf: SparkConf, hadoopConf: Configuration) { + + /** + * This write implementation needs to wraps `writeWithRename` with `synchronized` as the rename() + * for [[org.apache.hadoop.fs.RawLocalFileSystem]] doesn't throw an exception when the target file + * exists. Hence we must make sure `exists + rename` in `writeWithRename` is atomic in our tests. + */ + override def write(path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit = { + synchronized { + writeWithRename(path, actions, overwrite) + } + } + + /** + * This write implementation needs to wraps `writeWithRename` with `synchronized` as the rename() + * for [[org.apache.hadoop.fs.RawLocalFileSystem]] doesn't throw an exception when the target file + * exists. Hence we must make sure `exists + rename` in `writeWithRename` is atomic in our tests. + */ + override def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + synchronized { + writeWithRename(path, actions, overwrite, hadoopConf) + } + } + + override def invalidateCache(): Unit = {} + + override def isPartialWriteVisible(path: Path): Boolean = true + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = true +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/LogStore.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/LogStore.scala new file mode 100644 index 00000000000..1012778991e --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/LogStore.scala @@ -0,0 +1,472 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession +import org.apache.spark.util.Utils + +/** + * General interface for all critical file system operations required to read and write the + * [[DeltaLog]]. The correctness of the [[DeltaLog]] is predicated on the atomicity and + * durability guarantees of the implementation of this interface. Specifically, + * + * 1. Atomic visibility of files: Any file written through this store must + * be made visible atomically. In other words, this should not generate partial files. + * + * 2. Mutual exclusion: Only one writer must be able to create (or rename) a file at the final + * destination. + * + * 3. Consistent listing: Once a file has been written in a directory, all future listings for + * that directory must return that file. + */ +trait LogStore { + + /** + * Load the given file and return a `Seq` of lines. The line break will be removed from each + * line. This method will load the entire file into the memory. Call `readAsIterator` if possible + * as its implementation may be more efficient. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + final def read(path: String): Seq[String] = read(new Path(path)) + + /** + * Load the given file and return a `Seq` of lines. The line break will be removed from each + * line. This method will load the entire file into the memory. Call `readAsIterator` if possible + * as its implementation may be more efficient. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + def read(path: Path): Seq[String] + + /** + * Load the given file and return a `Seq` of lines. The line break will be removed from each + * line. This method will load the entire file into the memory. Call `readAsIterator` if possible + * as its implementation may be more efficient. + * + * Note: The default implementation ignores the `hadoopConf` parameter to provide the backward + * compatibility. Subclasses should override this method and use `hadoopConf` properly to support + * passing Hadoop file system configurations through DataFrame options. + */ + def read(path: Path, hadoopConf: Configuration): Seq[String] = read(path) + + /** + * Load the given file represented by `fileStatus` and return a `Seq` of lines. + * The line break will be removed from each line. + * + * Note: Using a stale `FileStatus` may get an incorrect result. + */ + final def read(fileStatus: FileStatus, hadoopConf: Configuration): Seq[String] = { + val iter = readAsIterator(fileStatus, hadoopConf) + try { + iter.toIndexedSeq + } finally { + iter.close() + } + } + + /** + * Load the given file and return an iterator of lines. The line break will be removed from each + * line. The default implementation calls `read` to load the entire file into the memory. + * An implementation should provide a more efficient approach if possible. For example, the file + * content can be loaded on demand. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + final def readAsIterator(path: String): ClosableIterator[String] = { + readAsIterator(new Path(path)) + } + + /** + * Load the given file and return an iterator of lines. The line break will be removed from each + * line. The default implementation calls `read` to load the entire file into the memory. + * An implementation should provide a more efficient approach if possible. For example, the file + * content can be loaded on demand. + * + * Note: the returned [[ClosableIterator]] should be closed when it's no longer used to avoid + * resource leak. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + def readAsIterator(path: Path): ClosableIterator[String] = { + val iter = read(path).iterator + new ClosableIterator[String] { + + override def hasNext: Boolean = iter.hasNext + + override def next(): String = iter.next() + + override def close(): Unit = {} + } + } + + /** + * Load the given file and return an iterator of lines. The line break will be removed from each + * line. The default implementation calls `read` to load the entire file into the memory. + * An implementation should provide a more efficient approach if possible. For example, the file + * content can be loaded on demand. + * + * Note: the returned [[ClosableIterator]] should be closed when it's no longer used to avoid + * resource leak. + * + * Note: The default implementation ignores the `hadoopConf` parameter to provide the backward + * compatibility. Subclasses should override this method and use `hadoopConf` properly to support + * passing Hadoop file system configurations through DataFrame options. + */ + def readAsIterator(path: Path, hadoopConf: Configuration): ClosableIterator[String] = { + readAsIterator(path) + } + + /** + * Load the file represented by given fileStatus and return an iterator of lines. The line break + * will be removed from each line. + * + * Note-1: the returned [[ClosableIterator]] should be closed when it's no longer used to avoid + * resource leak. + * + * Note-2: Using a stale `FileStatus` may get an incorrect result. + */ + def readAsIterator( + fileStatus: FileStatus, + hadoopConf: Configuration): ClosableIterator[String] = { + readAsIterator(fileStatus.getPath, hadoopConf) + } + + /** + * Write the given `actions` to the given `path` without overwriting any existing file. + * Implementation must throw [[java.nio.file.FileAlreadyExistsException]] exception if the file + * already exists. Furthermore, implementation must ensure that the entire file is made + * visible atomically, that is, it should not generate partial files. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + final def write(path: String, actions: Iterator[String]): Unit = write(new Path(path), actions) + + /** + * Write the given `actions` to the given `path` with or without overwrite as indicated. + * Implementation must throw [[java.nio.file.FileAlreadyExistsException]] exception if the file + * already exists and overwrite = false. Furthermore, implementation must ensure that the + * entire file is made visible atomically, that is, it should not generate partial files. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + def write(path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit + + /** + * Write the given `actions` to the given `path` with or without overwrite as indicated. + * Implementation must throw [[java.nio.file.FileAlreadyExistsException]] exception if the file + * already exists and overwrite = false. Furthermore, implementation must ensure that the + * entire file is made visible atomically, that is, it should not generate partial files. + * + * Note: The default implementation ignores the `hadoopConf` parameter to provide the backward + * compatibility. Subclasses should override this method and use `hadoopConf` properly to support + * passing Hadoop file system configurations through DataFrame options. + */ + def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + write(path, actions, overwrite) + } + + /** + * List the paths in the same directory that are lexicographically greater or equal to + * (UTF-8 sorting) the given `path`. The result should also be sorted by the file name. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + final def listFrom(path: String): Iterator[FileStatus] = + listFrom(new Path(path)) + + /** + * List the paths in the same directory that are lexicographically greater or equal to + * (UTF-8 sorting) the given `path`. The result should also be sorted by the file name. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + def listFrom(path: Path): Iterator[FileStatus] + + /** + * List the paths in the same directory that are lexicographically greater or equal to + * (UTF-8 sorting) the given `path`. The result should also be sorted by the file name. + * + * Note: The default implementation ignores the `hadoopConf` parameter to provide the backward + * compatibility. Subclasses should override this method and use `hadoopConf` properly to support + * passing Hadoop file system configurations through DataFrame options. + */ + def listFrom(path: Path, hadoopConf: Configuration): Iterator[FileStatus] = listFrom(path) + + /** Invalidate any caching that the implementation may be using */ + def invalidateCache(): Unit + + /** Resolve the fully qualified path for the given `path`. */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + def resolvePathOnPhysicalStorage(path: Path): Path = { + throw new UnsupportedOperationException() + } + + /** + * Resolve the fully qualified path for the given `path`. + * + * Note: The default implementation ignores the `hadoopConf` parameter to provide the backward + * compatibility. Subclasses should override this method and use `hadoopConf` properly to support + * passing Hadoop file system configurations through DataFrame options. + */ + def resolvePathOnPhysicalStorage(path: Path, hadoopConf: Configuration): Path = { + resolvePathOnPhysicalStorage(path) + } + + /** + * Whether a partial write is visible when writing to `path`. + * + * As this depends on the underlying file system implementations, we require the input of `path` + * here in order to identify the underlying file system, even though in most cases a log store + * only deals with one file system. + * + * The default value is only provided here for legacy reasons, which will be removed. + * Any LogStore implementation should override this instead of relying on the default. + */ + @deprecated("call the method that asks for a Hadoop Configuration object instead") + def isPartialWriteVisible(path: Path): Boolean = true + + /** + * Whether a partial write is visible when writing to `path`. + * + * As this depends on the underlying file system implementations, we require the input of `path` + * here in order to identify the underlying file system, even though in most cases a log store + * only deals with one file system. + * + * The default value is only provided here for legacy reasons, which will be removed. + * Any LogStore implementation should override this instead of relying on the default. + * + * Note: The default implementation ignores the `hadoopConf` parameter to provide the backward + * compatibility. Subclasses should override this method and use `hadoopConf` properly to support + * passing Hadoop file system configurations through DataFrame options. + */ + def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = { + isPartialWriteVisible(path) + } +} + +object LogStore extends LogStoreProvider + with Logging { + + + def apply(spark: SparkSession): LogStore = { + // scalastyle:off deltahadoopconfiguration + // Ensure that the LogStore's hadoopConf has the values from the SQLConf. + // This ensures that io.delta.storage LogStore (Java) hadoopConf's are configured correctly. + apply(spark.sparkContext.getConf, spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + } + + def apply(sparkConf: SparkConf, hadoopConf: Configuration): LogStore = { + createLogStore(sparkConf, hadoopConf) + } + + // Creates a LogStore with the given LogStore class name and configurations. + def createLogStoreWithClassName( + className: String, + sparkConf: SparkConf, + hadoopConf: Configuration): LogStore = { + if (className == classOf[DelegatingLogStore].getName) { + new DelegatingLogStore(hadoopConf) + } else { + val logStoreClass = Utils.classForName(className) + if (classOf[io.delta.storage.LogStore].isAssignableFrom(logStoreClass)) { + new LogStoreAdaptor(logStoreClass.getConstructor(classOf[Configuration]) + .newInstance(hadoopConf)) + } else { + logStoreClass.getConstructor(classOf[SparkConf], classOf[Configuration]) + .newInstance(sparkConf, hadoopConf).asInstanceOf[LogStore] + } + } + } +} + +trait LogStoreProvider { + val logStoreClassConfKey: String = "spark.delta.logStore.class" + val defaultLogStoreClass: String = classOf[DelegatingLogStore].getName + + // The conf key for setting the LogStore implementation for `scheme`. + def logStoreSchemeConfKey(scheme: String): String = s"spark.delta.logStore.${scheme}.impl" + + /** + * We accept keys both with and without the `spark.` prefix to maintain compatibility across the + * Delta ecosystem + * @param key the spark-prefixed key to access + */ + def getLogStoreConfValue(key: String, sparkConf: SparkConf): Option[String] = { + // verifyLogStoreConfs already validated that if both keys exist the values are the same when + // the LogStore was instantiated + sparkConf.getOption(key) + .orElse(sparkConf.getOption(key.stripPrefix("spark."))) + } + + def createLogStore(spark: SparkSession): LogStore = { + LogStore(spark) + } + + /** + * Check for conflicting LogStore configs in the spark configuration. + * + * To maintain compatibility across the Delta ecosystem, we accept keys both with and without the + * "spark." prefix. This means for setting the class conf, we accept both + * "spark.delta.logStore.class" and "delta.logStore.class" and for scheme confs we accept both + * "spark.delta.logStore.${scheme}.impl" and "delta.logStore.${scheme}.impl" + * + * If a conf is set both with and without the spark prefix, it must be set to the same value, + * otherwise we throw an error. + */ + def verifyLogStoreConfs(sparkConf: SparkConf): Unit = { + // check LogStore class conf key + val classConf = sparkConf.getOption(logStoreClassConfKey.stripPrefix("spark.")) + classConf.foreach { nonPrefixValue => + sparkConf.getOption(logStoreClassConfKey).foreach { prefixValue => + // Both the spark-prefixed and non-spark-prefixed key is present in the sparkConf. Check + // that they store the same value, otherwise throw an error. + if (prefixValue != nonPrefixValue) { + throw DeltaErrors.inconsistentLogStoreConfs( + Seq((logStoreClassConfKey.stripPrefix("spark."), nonPrefixValue), + (logStoreClassConfKey, prefixValue))) + } + } + } + + // check LogStore scheme conf keys + val schemeConfs = sparkConf.getAllWithPrefix("delta.logStore.") + .filter(_._1.endsWith(".impl")) + schemeConfs.foreach { case (nonPrefixKey, nonPrefixValue) => + val prefixKey = logStoreSchemeConfKey(nonPrefixKey.stripSuffix(".impl")) + sparkConf.getOption(prefixKey).foreach { prefixValue => + // Both the spark-prefixed and non-spark-prefixed key is present in the sparkConf. Check + // that they store the same value, otherwise throw an error. + if (prefixValue != nonPrefixValue) { + throw DeltaErrors.inconsistentLogStoreConfs( + Seq(("delta.logStore." + nonPrefixKey, nonPrefixValue), (prefixKey, prefixValue))) + } + } + } + } + + def checkLogStoreConfConflicts(sparkConf: SparkConf): Unit = { + val sparkPrefixLogStoreConfs = sparkConf.getAllWithPrefix("spark.delta.logStore.") + .map(kv => "spark.delta.logStore." + kv._1 -> kv._2) + val nonSparkPrefixLogStoreConfs = sparkConf.getAllWithPrefix("delta.logStore.") + .map(kv => "delta.logStore." + kv._1 -> kv._2) + val (classConf, otherConf) = (sparkPrefixLogStoreConfs ++ nonSparkPrefixLogStoreConfs) + .partition(v => v._1.endsWith("class")) + val schemeConf = otherConf.filter(_._1.endsWith(".impl")) + if (classConf.nonEmpty && schemeConf.nonEmpty) { + throw DeltaErrors.logStoreConfConflicts(classConf, schemeConf) + } + } + + def createLogStore(sparkConf: SparkConf, hadoopConf: Configuration): LogStore = { + checkLogStoreConfConflicts(sparkConf) + verifyLogStoreConfs(sparkConf) + val logStoreClassName = getLogStoreConfValue(logStoreClassConfKey, sparkConf) + .getOrElse(defaultLogStoreClass) + LogStore.createLogStoreWithClassName(logStoreClassName, sparkConf, hadoopConf) + } +} + +/** + * An adaptor from the new public LogStore API to the old private LogStore API. The old LogStore + * API is still used in most places. Before we move all of them to the new API, adapting from + * the new API to the old API is a cheap way to ensure that implementations of both APIs work. + * + * @param logStoreImpl An implementation of the new public LogStore API. + */ +class LogStoreAdaptor(val logStoreImpl: io.delta.storage.LogStore) extends LogStore { + + private def getHadoopConfiguration: Configuration = { + // scalastyle:off deltahadoopconfiguration + SparkSession.getActiveSession.map(_.sessionState.newHadoopConf()) + .getOrElse(logStoreImpl.initHadoopConf()) + // scalastyle:on deltahadoopconfiguration + } + + override def read(path: Path): Seq[String] = { + read(path, getHadoopConfiguration) + } + + override def read(path: Path, hadoopConf: Configuration): Seq[String] = { + var iter: io.delta.storage.CloseableIterator[String] = null + try { + iter = logStoreImpl.read(path, hadoopConf) + val contents = iter.asScala.toArray + contents + } finally { + if (iter != null) { + iter.close + } + } + } + + override def readAsIterator(path: Path): ClosableIterator[String] = { + readAsIterator(path, getHadoopConfiguration) + } + + override def readAsIterator(path: Path, hadoopConf: Configuration): ClosableIterator[String] = { + val iter = logStoreImpl.read(path, hadoopConf) + new ClosableIterator[String] { + override def close(): Unit = iter.close + override def hasNext: Boolean = iter.hasNext + override def next(): String = iter.next + } + } + + override def write(path: Path, actions: Iterator[String], overwrite: Boolean): Unit = { + write(path, actions, overwrite, getHadoopConfiguration) + } + + override def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + logStoreImpl.write(path, actions.asJava, overwrite, hadoopConf) + } + + override def listFrom(path: Path): Iterator[FileStatus] = { + listFrom(path, getHadoopConfiguration) + } + + override def listFrom(path: Path, hadoopConf: Configuration): Iterator[FileStatus] = { + logStoreImpl.listFrom(path, hadoopConf).asScala + } + + override def invalidateCache(): Unit = {} + + override def resolvePathOnPhysicalStorage(path: Path): Path = { + resolvePathOnPhysicalStorage(path, getHadoopConfiguration) + } + + override def resolvePathOnPhysicalStorage(path: Path, hadoopConf: Configuration): Path = { + logStoreImpl.resolvePathOnPhysicalStorage(path, hadoopConf) + } + + override def isPartialWriteVisible(path: Path): Boolean = { + isPartialWriteVisible(path, getHadoopConfiguration) + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = { + logStoreImpl.isPartialWriteVisible(path, hadoopConf) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/S3SingleDriverLogStore.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/S3SingleDriverLogStore.scala new file mode 100644 index 00000000000..528625cc573 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/S3SingleDriverLogStore.scala @@ -0,0 +1,266 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import java.io.FileNotFoundException +import java.net.URI +import java.nio.charset.StandardCharsets.UTF_8 +import java.util.concurrent.{ConcurrentHashMap, TimeUnit} + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.util.FileNames +import com.google.common.cache.CacheBuilder +import com.google.common.io.CountingOutputStream +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs._ + +import org.apache.spark.SparkConf + +/** + * Single Spark-driver/JVM LogStore implementation for S3. + * + * We assume the following from S3's [[FileSystem]] implementations: + * - File writing on S3 is all-or-nothing, whether overwrite or not. + * - List-after-write can be inconsistent. + * + * Regarding file creation, this implementation: + * - Opens a stream to write to S3 (regardless of the overwrite option). + * - Failures during stream write may leak resources, but may never result in partial writes. + * + * Regarding directory listing, this implementation: + * - returns a list by merging the files listed from S3 and recently-written files from the cache. + */ +class S3SingleDriverLogStore( + sparkConf: SparkConf, + hadoopConf: Configuration) extends HadoopFileSystemLogStore(sparkConf, hadoopConf) { + import S3SingleDriverLogStore._ + + private def resolved(path: Path, hadoopConf: Configuration): (FileSystem, Path) = { + val fs = path.getFileSystem(hadoopConf) + val resolvedPath = stripUserInfo(fs.makeQualified(path)) + (fs, resolvedPath) + } + + private def getPathKey(resolvedPath: Path): Path = { + stripUserInfo(resolvedPath) + } + + private def stripUserInfo(path: Path): Path = { + val uri = path.toUri + val newUri = new URI( + uri.getScheme, + null, + uri.getHost, + uri.getPort, + uri.getPath, + uri.getQuery, + uri.getFragment) + new Path(newUri) + } + + /** + * Merge two iterators of [[FileStatus]] into a single iterator ordered by file path name. + * In case both iterators have [[FileStatus]]s for the same file path, keep the one from + * `iterWithPrecedence` and discard that from `iter`. + */ + private def mergeFileIterators( + iter: Iterator[FileStatus], + iterWithPrecedence: Iterator[FileStatus]): Iterator[FileStatus] = { + (iter.map(f => (f.getPath, f)).toMap ++ iterWithPrecedence.map(f => (f.getPath, f))) + .values + .toSeq + .sortBy(_.getPath.getName) + .iterator + } + + /** + * List files starting from `resolvedPath` (inclusive) in the same directory. + */ + private def listFromCache(fs: FileSystem, resolvedPath: Path) = { + val pathKey = getPathKey(resolvedPath) + writtenPathCache + .asMap() + .asScala + .iterator + .filter { case (path, _) => + path.getParent == pathKey.getParent() && path.getName >= pathKey.getName } + .map { case (path, fileMetadata) => + new FileStatus( + fileMetadata.length, + false, + 1, + fs.getDefaultBlockSize(path), + fileMetadata.modificationTime, + path) + } + } + + /** + * List files starting from `resolvedPath` (inclusive) in the same directory, which merges + * the file system list and the cache list when `useCache` is on, otherwise + * use file system list only. + */ + private def listFromInternal(fs: FileSystem, resolvedPath: Path, useCache: Boolean = true) = { + val parentPath = resolvedPath.getParent + if (!fs.exists(parentPath)) { + throw DeltaErrors.fileOrDirectoryNotFoundException(parentPath.toString) + } + val listedFromFs = + fs.listStatus(parentPath).filter(_.getPath.getName >= resolvedPath.getName).iterator + val listedFromCache = if (useCache) listFromCache(fs, resolvedPath) else Iterator.empty + + // File statuses listed from file system take precedence + mergeFileIterators(listedFromCache, listedFromFs) + } + + override def listFrom(path: Path): Iterator[FileStatus] = { + listFrom(path, getHadoopConfiguration) + } + + /** + * List files starting from `resolvedPath` (inclusive) in the same directory. + */ + override def listFrom(path: Path, hadoopConf: Configuration): Iterator[FileStatus] = { + val (fs, resolvedPath) = resolved(path, hadoopConf) + listFromInternal(fs, resolvedPath) + } + + /** + * Check if the path is an initial version of a Delta log. + */ + private def isInitialVersion(path: Path): Boolean = { + FileNames.isDeltaFile(path) && FileNames.deltaVersion(path) == 0L + } + + /** + * Check if a path exists. Normally we check both the file system and the cache, but when the + * path is the first version of a Delta log, we ignore the cache. + */ + private def exists(fs: FileSystem, resolvedPath: Path): Boolean = { + // Ignore the cache for the first file of a Delta log + listFromInternal(fs, resolvedPath, useCache = !isInitialVersion(resolvedPath)) + .take(1) + .exists(_.getPath.getName == resolvedPath.getName) + } + + override def write(path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit = { + write(path, actions, overwrite, getHadoopConfiguration) + } + + override def write( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + val (fs, resolvedPath) = resolved(path, hadoopConf) + val lockedPath = getPathKey(resolvedPath) + acquirePathLock(lockedPath) + try { + if (exists(fs, resolvedPath) && !overwrite) { + throw new java.nio.file.FileAlreadyExistsException(resolvedPath.toUri.toString) + } + val stream = new CountingOutputStream(fs.create(resolvedPath, overwrite)) + actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) + stream.close() + + // When a Delta log starts afresh, all cached files in that Delta log become obsolete, + // so we remove them from the cache. + if (isInitialVersion(resolvedPath)) { + val obsoleteFiles = writtenPathCache + .asMap() + .asScala + .keys + .filter(_.getParent == lockedPath.getParent()) + .asJava + + writtenPathCache.invalidateAll(obsoleteFiles) + } + + // Cache the information of written files to help fix the inconsistency in future listings + writtenPathCache.put(lockedPath, + FileMetadata(stream.getCount(), System.currentTimeMillis())) + } catch { + // Convert Hadoop's FileAlreadyExistsException to Java's FileAlreadyExistsException + case e: org.apache.hadoop.fs.FileAlreadyExistsException => + throw new java.nio.file.FileAlreadyExistsException(e.getMessage) + } finally { + releasePathLock(lockedPath) + } + } + + override def isPartialWriteVisible(path: Path): Boolean = false + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): Boolean = false + + override def invalidateCache(): Unit = { + writtenPathCache.invalidateAll() + } +} + +object S3SingleDriverLogStore { + /** + * A global path lock to ensure that no concurrent writers writing to the same path in the same + * JVM. + */ + private val pathLock = new ConcurrentHashMap[Path, AnyRef]() + + /** + * A global cache that records the metadata of the files recently written. + * As list-after-write may be inconsistent on S3, we can use the files in the cache + * to fix the inconsistent file listing. + */ + private val writtenPathCache = + CacheBuilder.newBuilder() + .expireAfterAccess(120, TimeUnit.MINUTES) + .build[Path, FileMetadata]() + + /** + * Release the lock for the path after writing. + * + * Note: the caller should resolve the path to make sure we are locking the correct absolute path. + */ + private def releasePathLock(resolvedPath: Path): Unit = { + val lock = pathLock.remove(resolvedPath) + lock.synchronized { + lock.notifyAll() + } + } + + /** + * Acquire a lock for the path before writing. + * + * Note: the caller should resolve the path to make sure we are locking the correct absolute path. + */ + private def acquirePathLock(resolvedPath: Path): Unit = { + while (true) { + val lock = pathLock.putIfAbsent(resolvedPath, new Object) + if (lock == null) return + lock.synchronized { + while (pathLock.get(resolvedPath) == lock) { + lock.wait() + } + } + } + } +} + +/** + * The file metadata to be stored in the cache. + */ +case class FileMetadata(length: Long, modificationTime: Long) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/storage/dv/DeletionVectorStore.scala b/spark/src/main/scala/org/apache/spark/sql/delta/storage/dv/DeletionVectorStore.scala new file mode 100644 index 00000000000..537c95f81bb --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/storage/dv/DeletionVectorStore.scala @@ -0,0 +1,240 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage.dv + +import java.io.{Closeable, DataInputStream} +import java.net.URI +import java.nio.charset.StandardCharsets.UTF_8 +import java.util.UUID +import java.util.zip.CRC32 + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor +import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, StoredBitmap} +import org.apache.spark.sql.delta.util.PathWithFileSystem +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path} + +import org.apache.spark.internal.Logging +import org.apache.spark.paths.SparkPath +import org.apache.spark.util.Utils + +trait DeletionVectorStore extends Logging { + /** + * Read a Deletion Vector and parse it as [[RoaringBitmapArray]]. + */ + def read( + dvDescriptor: DeletionVectorDescriptor, + tablePath: Path): RoaringBitmapArray = + StoredBitmap.create(dvDescriptor, tablePath).load(this) + + /** + * Read Deletion Vector and parse it as [[RoaringBitmapArray]]. + */ + def read(path: Path, offset: Int, size: Int): RoaringBitmapArray + + /** + * Returns a writer that can be used to write multiple deletion vectors to the file at `path`. + */ + def createWriter(path: PathWithFileSystem): DeletionVectorStore.Writer + + /** + * Returns full path for a DV with `filedId` UUID under `targetPath`. + * + * Optionally, prepend a `prefix` to the name. + */ + def generateFileNameInTable( + targetPath: PathWithFileSystem, + fileId: UUID, + prefix: String = ""): PathWithFileSystem = { + DeletionVectorStore.assembleDeletionVectorPathWithFileSystem(targetPath, fileId, prefix) + } + + /** + * Return a new unique path under `targetPath`. + * + * Optionally, prepend a `prefix` to the name. + */ + def generateUniqueNameInTable( + targetPath: PathWithFileSystem, + prefix: String = ""): PathWithFileSystem = + generateFileNameInTable(targetPath, UUID.randomUUID(), prefix) + + /** + * Creates a [[PathWithFileSystem]] instance + * by using the configuration of this `DeletionVectorStore` instance + */ + def pathWithFileSystem(path: Path): PathWithFileSystem +} + +/** + * Trait containing the utility and constants needed for [[DeletionVectorStore]] + */ +trait DeletionVectorStoreUtils { + final val DV_FILE_FORMAT_VERSION_ID_V1: Byte = 1 + + /** The length of a DV checksum. See [[calculateChecksum()]]. */ + final val CHECKSUM_LEN = 4 + /** The size of the stored length of a DV. */ + final val DATA_SIZE_LEN = 4 + + // DV Format: + def getTotalSizeOfDVFieldsInFile(bitmapDataSize: Int): Int = { + DATA_SIZE_LEN + bitmapDataSize + CHECKSUM_LEN + } + + /** Convert the given String path to a Hadoop Path. Please make sure the path is not escaped. */ + def unescapedStringToPath(path: String): Path = SparkPath.fromPathString(path).toPath + + /** Convert the given String path to a Hadoop Path, Please make sure the path is escaped. */ + def escapedStringToPath(path: String): Path = SparkPath.fromUrlString(path).toPath + + /** Convert the given Hadoop path to a String Path, handing special characters properly. */ + def pathToEscapedString(path: Path): String = SparkPath.fromPath(path).urlEncoded + + /** + * Calculate checksum of a serialized deletion vector. We are using CRC32 which has 4bytes size, + * but CRC32 implementation conforms to Java Checksum interface which requires a long. However, + * the high-order bytes are zero, so here is safe to cast to Int. This will result in negative + * checksums, but this is not a problem because we only care about equality. + */ + def calculateChecksum(data: Array[Byte]): Int = { + val crc = new CRC32() + crc.update(data) + crc.getValue.toInt + } + + /** + * Read a serialized deletion vector from a data stream. + */ + def readRangeFromStream(reader: DataInputStream, size: Int): Array[Byte] = { + val sizeAccordingToFile = reader.readInt() + if (size != sizeAccordingToFile) { + throw DeltaErrors.deletionVectorSizeMismatch() + } + + val buffer = new Array[Byte](size) + reader.readFully(buffer) + + val expectedChecksum = reader.readInt() + val actualChecksum = calculateChecksum(buffer) + if (expectedChecksum != actualChecksum) { + throw DeltaErrors.deletionVectorChecksumMismatch() + } + + buffer + } + + /** + * Same as `assembleDeletionVectorPath`, but keeps the new path bundled with the fs. + */ + def assembleDeletionVectorPathWithFileSystem( + targetParentPathWithFileSystem: PathWithFileSystem, + id: UUID, + prefix: String = ""): PathWithFileSystem = { + targetParentPathWithFileSystem.copy(path = + DeletionVectorDescriptor.assembleDeletionVectorPath( + targetParentPathWithFileSystem.path, id, prefix)) + } + + /** Descriptor for a serialized Deletion Vector in a file. */ + case class DVRangeDescriptor(offset: Int, length: Int, checksum: Int) + + trait Writer extends Closeable { + /** + * Appends the serialized deletion vector in `data` to the file, and returns the offset in the + * file that the deletion vector was written to and its checksum. + */ + def write(data: Array[Byte]): DVRangeDescriptor + + /** + * Returns UTF-8 encoded path of the file that is being written by this writer. + */ + def serializedPath: Array[Byte] + + /** + * Closes this writer. After calling this method it is no longer valid to call write (or close). + * This method must always be called when the owner of this writer is done writing deletion + * vectors. + */ + def close(): Unit + } +} + +object DeletionVectorStore extends DeletionVectorStoreUtils { + /** Create a new instance of [[DeletionVectorStore]] from the given Hadoop configuration. */ + private[delta] def createInstance( + hadoopConf: Configuration): DeletionVectorStore = + new HadoopFileSystemDVStore(hadoopConf) +} + +/** + * Default [[DeletionVectorStore]] implementation for Hadoop [[FileSystem]] implementations. + * + * Note: This class must be thread-safe, + * because we sometimes write multiple deletion vectors in parallel through the same store. + */ +class HadoopFileSystemDVStore(hadoopConf: Configuration) + extends DeletionVectorStore { + + override def read(path: Path, offset: Int, size: Int): RoaringBitmapArray = { + val fs = path.getFileSystem(hadoopConf) + val buffer = Utils.tryWithResource(fs.open(path)) { reader => + reader.seek(offset) + DeletionVectorStore.readRangeFromStream(reader, size) + } + RoaringBitmapArray.readFrom(buffer) + } + + override def createWriter(path: PathWithFileSystem): DeletionVectorStore.Writer = { + new DeletionVectorStore.Writer { + // Lazily create the writer for the deletion vectors, so that we don't write an empty file + // in case all deletion vectors are empty. + private var outputStream: FSDataOutputStream = _ + + override def write(data: Array[Byte]): DeletionVectorStore.DVRangeDescriptor = { + if (outputStream == null) { + val overwrite = false // `create` Java API does not support named parameters + outputStream = path.fs.create(path.path, overwrite) + outputStream.writeByte(DeletionVectorStore.DV_FILE_FORMAT_VERSION_ID_V1) + } + val dvRange = DeletionVectorStore.DVRangeDescriptor( + offset = outputStream.size(), + length = data.length, + checksum = DeletionVectorStore.calculateChecksum(data) + ) + log.debug(s"Writing DV range to file: Path=${path.path}, Range=${dvRange}") + outputStream.writeInt(data.length) + outputStream.write(data) + outputStream.writeInt(dvRange.checksum) + dvRange + } + + override val serializedPath: Array[Byte] = + DeletionVectorStore.pathToEscapedString(path.path).getBytes(UTF_8) + + override def close(): Unit = { + if (outputStream != null) { + outputStream.close() + } + } + } + } + + override def pathWithFileSystem(path: Path): PathWithFileSystem = + PathWithFileSystem.withConf(path, hadoopConf) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala b/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala new file mode 100644 index 00000000000..3cb685f7726 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala @@ -0,0 +1,181 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.streaming + +import java.io.{InputStream, OutputStream} +import java.nio.charset.StandardCharsets._ + +import scala.io.{Source => IOSource} +import scala.reflect.ClassTag + +import org.apache.spark.sql.delta.util.JsonUtils +import com.fasterxml.jackson.annotation.JsonIgnore + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.streaming.{HDFSMetadataLog, MetadataVersionUtil} +import org.apache.spark.sql.types.{DataType, StructType} + +/** + * A serializable schema with a partition schema and a data schema. + */ +trait PartitionAndDataSchema { + + @JsonIgnore + def dataSchema: DataType + + @JsonIgnore + def partitionSchema: StructType +} + +/** + * A schema serializer handles the SerDe of a [[PartitionAndDataSchema]] + */ +sealed trait SchemaSerializer[T <: PartitionAndDataSchema] { + def serdeVersion: Int + + def serialize(schema: T, outputStream: OutputStream): Unit + + def deserialize(in: InputStream): T +} + +/** + *A schema serializer that reads/writes schema using the following format: + * {SERDE_VERSION} + * {JSON of the serializable schema} + */ +class JsonSchemaSerializer[T <: PartitionAndDataSchema: ClassTag: Manifest] + (override val serdeVersion: Int) extends SchemaSerializer[T] { + + import SchemaTrackingExceptions._ + + val EMPTY_JSON = "{}" + + /** + * Deserializes the log entry from input stream. + * @throws FailedToDeserializeException + */ + override def deserialize(in: InputStream): T = { + // Called inside a try-finally where the underlying stream is closed in the caller + val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines() + + if (!lines.hasNext) { + throw FailedToDeserializeException + } + + MetadataVersionUtil.validateVersion(lines.next(), serdeVersion) + val schemaJson = if (lines.hasNext) lines.next() else EMPTY_JSON + JsonUtils.fromJson(schemaJson) + } + + override def serialize(metadata: T, out: OutputStream): Unit = { + // Called inside a try-finally where the underlying stream is closed in the caller + out.write(s"v${serdeVersion}".getBytes(UTF_8)) + out.write('\n') + + // Write metadata + out.write(JsonUtils.toJson(metadata).getBytes(UTF_8)) + } +} + +/** + * The underlying class for a streaming log that keeps track of a sequence of schema changes. + * + * It keeps tracks of the sequence of schema changes that this log is aware of, and it detects any + * concurrent modifications to the schema log to prevent accidents on a best effort basis. + */ +class SchemaTrackingLog[T <: PartitionAndDataSchema: ClassTag: Manifest]( + sparkSession: SparkSession, + path: String, + schemaSerializer: SchemaSerializer[T]) + extends HDFSMetadataLog[T](sparkSession, path) { + + import SchemaTrackingExceptions._ + + // The schema and version detected when this log is initialized + private val schemaAndSeqNumAtLogInit: Option[(Long, T)] = getLatest() + + // Next schema version to write, this should be updated after each schema evolution. + // This allow HDFSMetadataLog to best detect concurrent schema log updates. + private var currentSeqNum: Long = schemaAndSeqNumAtLogInit.map(_._1).getOrElse(-1L) + private var nextSeqNumToWrite: Long = currentSeqNum + 1 + + // The current persisted schema this log has been tracking. Note that this does NOT necessarily + // always equal to the globally latest schema. Attempting to commit to a schema version that + // already exists is illegal. + // Subclass can leverage this to compare the differences. + private var currentTrackedSchema: Option[T] = schemaAndSeqNumAtLogInit.map(_._2) + + + /** + * Get the latest tracked schema entry by this schema log + */ + def getCurrentTrackedSchema: Option[T] = currentTrackedSchema + + /** + * Get the latest tracked schema batch ID / seq num by this log + */ + def getCurrentTrackedSeqNum: Long = currentSeqNum + + /** + * Get the tracked schema at specified seq num. + */ + def getTrackedSchemaAtSeqNum(seqNum: Long): Option[T] = get(seqNum) + + /** + * Deserializes the log entry from input stream. + * @throws FailedToDeserializeException + */ + override protected def deserialize(in: InputStream): T = + schemaSerializer.deserialize(in).asInstanceOf[T] + + override protected def serialize(metadata: T, out: OutputStream): Unit = + schemaSerializer.serialize(metadata, out) + + /** + * Main API to actually write the log entry to the schema log. Clients can leverage this + * to save their new schema to the log. + * @throws FailedToEvolveSchema + * @param newSchema New persisted schema + */ + def addSchemaToLog(newSchema: T): T = { + // Write to schema log + logInfo(s"Writing a new metadata version $nextSeqNumToWrite in the metadata log") + if (currentTrackedSchema.contains(newSchema)) { + // Record a warning if schema has not changed + logWarning(s"Schema didn't change after schema evolution. " + + s"currentSchema = ${currentTrackedSchema}.") + return newSchema + } + // Similar to how MicrobatchExecution detects concurrent checkpoint updates + if (!add(nextSeqNumToWrite, newSchema)) { + throw FailedToEvolveSchema + } + + currentTrackedSchema = Some(newSchema) + currentSeqNum = nextSeqNumToWrite + nextSeqNumToWrite += 1 + newSchema + } +} + +object SchemaTrackingExceptions { + // Designated exceptions + val FailedToDeserializeException = + new RuntimeException("Failed to deserialize schema log") + val FailedToEvolveSchema = + new RuntimeException("Failed to add schema entry to log. Concurrent operations detected.") +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/tablefeatures/tableChanges.scala b/spark/src/main/scala/org/apache/spark/sql/delta/tablefeatures/tableChanges.scala new file mode 100644 index 00000000000..d1068fa99a0 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/tablefeatures/tableChanges.scala @@ -0,0 +1,26 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.tablefeatures + +import org.apache.spark.sql.connector.catalog.TableChange + +/** + * Change to remove a feature from a table. + * @param featureName The name of the feature + * @param truncateHistory When true we set the minimum log retention period and clean up metadata. + */ +case class DropFeature(featureName: String, truncateHistory: Boolean) extends TableChange {} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/AnalysisHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/AnalysisHelper.scala new file mode 100644 index 00000000000..73f9f610dc4 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/AnalysisHelper.scala @@ -0,0 +1,119 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaErrors + +import org.apache.spark.sql.{AnalysisException, Dataset, Row, SparkSession} +import org.apache.spark.sql.catalyst.analysis.AnalysisErrorAt +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan + +trait AnalysisHelper { + import AnalysisHelper._ + + // Keeping the following two methods for backward compatibility with previous Delta versions. + protected def tryResolveReferences( + sparkSession: SparkSession)( + expr: Expression, + planContainingExpr: LogicalPlan): Expression = + tryResolveReferencesForExpressions(sparkSession)(Seq(expr), planContainingExpr.children).head + + protected def tryResolveReferencesForExpressions( + sparkSession: SparkSession, + exprs: Seq[Expression], + planContainingExpr: LogicalPlan): Seq[Expression] = + tryResolveReferencesForExpressions(sparkSession)(exprs, planContainingExpr.children) + + /** + * Resolve expressions using the attributes provided by `planProvidingAttrs`. Throw an error if + * failing to resolve any expressions. + */ + protected def resolveReferencesForExpressions( + sparkSession: SparkSession, + exprs: Seq[Expression], + planProvidingAttrs: LogicalPlan): Seq[Expression] = { + val resolvedExprs = + tryResolveReferencesForExpressions(sparkSession)(exprs, Seq(planProvidingAttrs)) + resolvedExprs.foreach { expr => + if (!expr.resolved) { + throw new AnalysisException( + s"cannot resolve ${expr.sql} given $planProvidingAttrs") + } + } + resolvedExprs + } + + /** + * Resolve expressions using the attributes provided by `planProvidingAttrs`, ignoring errors. + */ + protected def tryResolveReferencesForExpressions( + sparkSession: SparkSession)( + exprs: Seq[Expression], + plansProvidingAttrs: Seq[LogicalPlan]): Seq[Expression] = { + val newPlan = FakeLogicalPlan(exprs, plansProvidingAttrs) + sparkSession.sessionState.analyzer.execute(newPlan) match { + case FakeLogicalPlan(resolvedExprs, _) => + // Return even if it did not successfully resolve + resolvedExprs + case _ => + // This is unexpected + throw DeltaErrors.analysisException( + s"Could not resolve expression $exprs", plan = Some(newPlan)) + } + } + + protected def toDataset(sparkSession: SparkSession, logicalPlan: LogicalPlan): Dataset[Row] = { + Dataset.ofRows(sparkSession, logicalPlan) + } + + protected def improveUnsupportedOpError(f: => Unit): Unit = { + val possibleErrorMsgs = Seq( + "is only supported with v2 table", // full error: DELETE is only supported with v2 tables + "is not supported temporarily", // full error: UPDATE TABLE is not supported temporarily + "Table does not support read", + "Table implementation does not support writes" + ).map(_.toLowerCase()) + + def isExtensionOrCatalogError(error: Exception): Boolean = { + possibleErrorMsgs.exists { m => + error.getMessage != null && error.getMessage.toLowerCase().contains(m) + } + } + + try { f } catch { + case e: Exception if isExtensionOrCatalogError(e) => + throw DeltaErrors.configureSparkSessionWithExtensionAndCatalog(Some(e)) + } + } + +} + +object AnalysisHelper { + /** LogicalPlan to help resolve the given expression */ + case class FakeLogicalPlan( + exprs: Seq[Expression], + children: Seq[LogicalPlan]) + extends LogicalPlan + { + override def output: Seq[Attribute] = Nil + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[LogicalPlan]): FakeLogicalPlan = copy(children = newChildren) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/BinPackingIterator.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/BinPackingIterator.scala new file mode 100644 index 00000000000..50a031dcf20 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/BinPackingIterator.scala @@ -0,0 +1,65 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import scala.collection.generic.Sizing +import scala.collection.mutable.ArrayBuffer + +/** + * Iterator that packs objects in `inputIter` to create bins that have a total size of + * 'targetSize'. Each [[T]] object may contain multiple inputs that are always packed into a + * single bin. [[T]] instances must inherit from [[Sizing]] and define what is their size. + */ +class BinPackingIterator[T <: Sizing]( + inputIter: Iterator[T], + targetSize: Long) + extends Iterator[Seq[T]] { + + private val currentBin = new ArrayBuffer[T]() + private var sizeOfCurrentBin = 0L + + override def hasNext: Boolean = inputIter.hasNext || currentBin.nonEmpty + + override def next(): Seq[T] = { + var resultBin: Seq[T] = null + while (inputIter.hasNext && resultBin == null) { + val input = inputIter.next() + + val sizeOfCurrentFile = input.size + + // Start a new bin if the deletion vectors for the current Parquet file corresponding to + // `row` causes us to go over the target file size. + if (currentBin.nonEmpty && + sizeOfCurrentBin + sizeOfCurrentFile > targetSize) { + resultBin = currentBin.toVector + sizeOfCurrentBin = 0L + currentBin.clear() + } + + currentBin += input + sizeOfCurrentBin += sizeOfCurrentFile + } + + // Finish the last bin. + if (resultBin == null && !inputIter.hasNext) { + resultBin = currentBin.toVector + currentBin.clear() + } + + resultBin + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/BinPackingUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/BinPackingUtils.scala new file mode 100644 index 00000000000..32480e1e721 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/BinPackingUtils.scala @@ -0,0 +1,58 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import scala.collection.mutable.ArrayBuffer + +object BinPackingUtils { + /** + * Takes a sequence of items and groups them such that the size of each group is + * less than the specified maxBinSize. + */ + @inline def binPackBySize[I, V]( + elements: Seq[I], + sizeGetter: I => Long, + valueGetter: I => V, + maxBinSize: Long): Seq[Seq[V]] = { + val bins = new ArrayBuffer[Seq[V]]() + + val currentBin = new ArrayBuffer[V]() + var currentSize = 0L + + elements.sortBy(sizeGetter).foreach { element => + val size = sizeGetter(element) + // Generally, a bin is a group of existing files, whose total size does not exceed the + // desired maxFileSize. They will be coalesced into a single output file. + if ((currentSize >= maxBinSize) || size + currentSize > maxBinSize) { + if (currentBin.nonEmpty) { + bins += currentBin.toVector + currentBin.clear() + } + currentBin += valueGetter(element) + currentSize = size + } else { + currentBin += valueGetter(element) + currentSize += size + } + } + + if (currentBin.nonEmpty) { + bins += currentBin.toVector + } + bins.toSeq + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/Codec.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/Codec.scala new file mode 100644 index 00000000000..ec8921c37db --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/Codec.scala @@ -0,0 +1,208 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets.US_ASCII +import java.util.UUID + +import com.google.common.primitives.UnsignedInteger + +/** Additional codecs not supported by Apache Commons Codecs. */ +object Codec { + + def uuidToBytes(id: UUID): Array[Byte] = uuidToByteBuffer(id).array() + + def uuidFromBytes(bytes: Array[Byte]): UUID = { + require(bytes.length == 16) + uuidFromByteBuffer(ByteBuffer.wrap(bytes)) + } + + def uuidToByteBuffer(id: UUID): ByteBuffer = { + val buffer = ByteBuffer.allocate(16) + buffer.putLong(id.getMostSignificantBits) + buffer.putLong(id.getLeastSignificantBits) + buffer.rewind() + buffer + } + + def uuidFromByteBuffer(buffer: ByteBuffer): UUID = { + require(buffer.remaining() >= 16) + val highBits = buffer.getLong + val lowBits = buffer.getLong + new UUID(highBits, lowBits) + } + + /** + * This implements Base85 using the 4 byte block aligned encoding and character set from Z85. + * + * @see https://rfc.zeromq.org/spec/32/ + */ + object Base85Codec { + + final val ENCODE_MAP: Array[Byte] = { + val chars = ('0' to '9') ++ ('a' to 'z') ++ ('A' to 'Z') ++ ".-:+=^!/*?&<>()[]{}@%$#" + chars.map(_.toByte).toArray + } + + lazy val DECODE_MAP: Array[Byte] = { + require(ENCODE_MAP.length - 1 <= Byte.MaxValue) + // The bitmask is the same as largest possible value, so the length of the array must + // be one greater. + val map: Array[Byte] = Array.fill(ASCII_BITMASK + 1)(-1) + for ((b, i) <- ENCODE_MAP.zipWithIndex) { + map(b) = i.toByte + } + map + } + + final val BASE: Long = 85L + final val BASE_2ND_POWER: Long = 7225L // 85^2 + final val BASE_3RD_POWER: Long = 614125L // 85^3 + final val BASE_4TH_POWER: Long = 52200625L // 85^4 + final val ASCII_BITMASK: Int = 0x7F + + // UUIDs always encode into 20 characters. + final val ENCODED_UUID_LENGTH: Int = 20 + + /** Encode a 16 byte UUID. */ + def encodeUUID(id: UUID): String = { + val buffer = uuidToByteBuffer(id) + encodeBlocks(buffer) + } + + /** + * Decode a 16 byte UUID. */ + def decodeUUID(encoded: String): UUID = { + val buffer = decodeBlocks(encoded) + uuidFromByteBuffer(buffer) + } + + /** + * Encode an arbitrary byte array. + * + * Unaligned input will be padded to a multiple of 4 bytes. + */ + def encodeBytes(input: Array[Byte]): String = { + if (input.length % 4 == 0) { + encodeBlocks(ByteBuffer.wrap(input)) + } else { + val alignedLength = ((input.length + 4) / 4) * 4 + val buffer = ByteBuffer.allocate(alignedLength) + buffer.put(input) + while (buffer.hasRemaining) { + buffer.put(0.asInstanceOf[Byte]) + } + buffer.rewind() + encodeBlocks(buffer) + } + } + + /** + * Encode an arbitrary byte array using 4 byte blocks. + * + * Expects the input to be 4 byte aligned. + */ + private def encodeBlocks(buffer: ByteBuffer): String = { + require(buffer.remaining() % 4 == 0) + val numBlocks = buffer.remaining() / 4 + // Every 4 byte block gets encoded into 5 bytes/chars + val outputLength = numBlocks * 5 + val output: Array[Byte] = Array.ofDim(outputLength) + var outputIndex = 0 + + while (buffer.hasRemaining) { + var sum: Long = buffer.getInt & 0x00000000ffffffffL + output(outputIndex) = ENCODE_MAP((sum / BASE_4TH_POWER).toInt) + sum %= BASE_4TH_POWER + output(outputIndex + 1) = ENCODE_MAP((sum / BASE_3RD_POWER).toInt) + sum %= BASE_3RD_POWER + output(outputIndex + 2) = ENCODE_MAP((sum / BASE_2ND_POWER).toInt) + sum %= BASE_2ND_POWER + output(outputIndex + 3) = ENCODE_MAP((sum / BASE).toInt) + output(outputIndex + 4) = ENCODE_MAP((sum % BASE).toInt) + outputIndex += 5 + } + + new String(output, US_ASCII) + } + + /** + * Decode an arbitrary byte array. + * + * Only `outputLength` bytes will be returned. + * Any extra bytes, such as padding added because the input was unaligned, will be dropped. + */ + def decodeBytes(encoded: String, outputLength: Int): Array[Byte] = { + val result = decodeBlocks(encoded) + if (result.remaining() > outputLength) { + // Only read the expected number of bytes. + val output: Array[Byte] = Array.ofDim(outputLength) + result.get(output) + output + } else { + result.array() + } + } + + /** + * Decode an arbitrary byte array. + * + * Output may contain padding bytes, if the input was not 4 byte aligned. + * Use [[decodeBytes]] in that case and specify the expected number of output bytes + * without padding. + */ + def decodeAlignedBytes(encoded: String): Array[Byte] = decodeBlocks(encoded).array() + + /** + * Decode an arbitrary byte array. + * + * Output may contain padding bytes, if the input was not 4 byte aligned. + */ + private def decodeBlocks(encoded: String): ByteBuffer = { + val input = encoded.toCharArray + require(input.length % 5 == 0, "Input should be 5 character aligned.") + val buffer = ByteBuffer.allocate(input.length / 5 * 4) + + // A mechanism to detect invalid characters in the input while decoding, that only has a + // single conditional at the very end, instead of branching for every character. + var canary: Int = 0 + def decodeInputChar(i: Int): Long = { + val c = input(i) + canary |= c // non-ascii char has bits outside of ASCII_BITMASK + val b = DECODE_MAP(c & ASCII_BITMASK) + canary |= b // invalid char maps to -1, which has bits outside ASCII_BITMASK + b.toLong + } + + var inputIndex = 0 + while (buffer.hasRemaining) { + var sum = 0L + sum += decodeInputChar(inputIndex) * BASE_4TH_POWER + sum += decodeInputChar(inputIndex + 1) * BASE_3RD_POWER + sum += decodeInputChar(inputIndex + 2) * BASE_2ND_POWER + sum += decodeInputChar(inputIndex + 3) * BASE + sum += decodeInputChar(inputIndex + 4) + buffer.putInt(sum.toInt) + inputIndex += 5 + } + require((canary & ~ASCII_BITMASK) == 0, s"Input is not valid Z85: $encoded") + buffer.rewind() + buffer + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DatasetRefCache.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DatasetRefCache.scala new file mode 100644 index 00000000000..3d9bdbdbebe --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DatasetRefCache.scala @@ -0,0 +1,57 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +// scalastyle:off import.ordering.noEmptyLine +import java.util.concurrent.atomic.AtomicReference + +import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} + +/** + * A [[Dataset]] reference cache to automatically create new [[Dataset]] objects when the active + * [[SparkSession]] changes. This is useful when sharing objects holding [[Dataset]] references + * cross multiple sessions. Without this, using a [[Dataset]] that holds a stale session may change + * the active session and cause multiple issues (e.g., if we switch to a stale session coming from a + * notebook that has been detached, we may not be able to use built-in functions because those are + * cleaned up). + * + * The `creator` function will be called to create a new [[Dataset]] object when the old one has a + * different session than the current active session. Note that one MUST use SparkSession.active + * in the creator() if creator() needs to use Spark session. + * + * Unlike [[StateCache]], this class only caches the [[Dataset]] reference and doesn't cache the + * underlying `RDD`. + * + * WARNING: If there are many concurrent Spark sessions and each session calls 'get' multiple times, + * then the cost of creator becomes more noticeable as everytime it switch the active + * session, the older session needs to call creator again when it becomes active. + * + * @param creator a function to create [[Dataset]]. + */ +class DatasetRefCache[T](creator: () => Dataset[T]) { + + private val holder = new AtomicReference[Dataset[T]] + + def get: Dataset[T] = Option(holder.get()) + .filter(_.sparkSession eq SparkSession.active) + .getOrElse { + val df = creator() + holder.set(df) + df + } +} + diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DateFormatter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DateFormatter.scala new file mode 100644 index 00000000000..54013c788ec --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DateFormatter.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.delta.util + +import java.time.{Instant, ZoneId} +import java.util.Locale + +import org.apache.spark.sql.delta.util.DateTimeUtils.instantToDays + +/** + * Forked from [[org.apache.spark.sql.catalyst.util.DateFormatter]] + */ +sealed trait DateFormatter extends Serializable { + def parse(s: String): Int // returns days since epoch + def format(days: Int): String +} + +class Iso8601DateFormatter( + pattern: String, + locale: Locale) extends DateFormatter with DateTimeFormatterHelper { + + @transient + private lazy val formatter = getOrCreateFormatter(pattern, locale) + private val UTC = ZoneId.of("UTC") + + private def toInstant(s: String): Instant = { + val temporalAccessor = formatter.parse(s) + toInstantWithZoneId(temporalAccessor, UTC) + } + + override def parse(s: String): Int = instantToDays(toInstant(s)) + + override def format(days: Int): String = { + val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY) + formatter.withZone(UTC).format(instant) + } +} + +object DateFormatter { + val defaultPattern: String = "yyyy-MM-dd" + val defaultLocale: Locale = Locale.US + + def apply(format: String, locale: Locale): DateFormatter = { + new Iso8601DateFormatter(format, locale) + } + + def apply(format: String): DateFormatter = apply(format, defaultLocale) + + def apply(): DateFormatter = apply(defaultPattern) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DateTimeFormatterHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DateTimeFormatterHelper.scala new file mode 100644 index 00000000000..3e1ad3fe892 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DateTimeFormatterHelper.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.delta.util + +import java.time._ +import java.time.chrono.IsoChronology +import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle} +import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} +import java.util.Locale +import java.util.concurrent.Callable + +import org.apache.spark.sql.delta.util.DateTimeFormatterHelper._ +import com.google.common.cache.CacheBuilder + +/** + * Forked from [[org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper]] + */ +trait DateTimeFormatterHelper { + protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = { + val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) { + LocalTime.ofNanoOfDay(0) + } else { + LocalTime.from(temporalAccessor) + } + val localDate = LocalDate.from(temporalAccessor) + val localDateTime = LocalDateTime.of(localDate, localTime) + val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId) + Instant.from(zonedDateTime) + } + + // Gets a formatter from the cache or creates new one. The buildFormatter method can be called + // a few times with the same parameters in parallel if the cache does not contain values + // associated to those parameters. Since the formatter is immutable, it does not matter. + // In this way, synchronised is intentionally omitted in this method to make parallel calls + // less synchronised. + // The Cache.get method is not used here to avoid creation of additional instances of Callable. + protected def getOrCreateFormatter(pattern: String, locale: Locale): DateTimeFormatter = { + val key = (pattern, locale) + cache.get(key, new Callable[DateTimeFormatter] { def call = buildFormatter(pattern, locale) }) + } +} + +private object DateTimeFormatterHelper { + val cache = CacheBuilder.newBuilder() + .maximumSize(128) + .build[(String, Locale), DateTimeFormatter]() + + def createBuilder(): DateTimeFormatterBuilder = { + new DateTimeFormatterBuilder().parseCaseInsensitive() + } + + def toFormatter(builder: DateTimeFormatterBuilder, locale: Locale): DateTimeFormatter = { + builder + .parseDefaulting(ChronoField.ERA, 1) + .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1) + .parseDefaulting(ChronoField.DAY_OF_MONTH, 1) + .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) + .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0) + .toFormatter(locale) + .withChronology(IsoChronology.INSTANCE) + .withResolverStyle(ResolverStyle.STRICT) + } + + def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = { + val builder = createBuilder().appendPattern(pattern) + toFormatter(builder, locale) + } + + lazy val fractionFormatter: DateTimeFormatter = { + val builder = createBuilder() + .append(DateTimeFormatter.ISO_LOCAL_DATE) + .appendLiteral(' ') + .appendValue(ChronoField.HOUR_OF_DAY, 2).appendLiteral(':') + .appendValue(ChronoField.MINUTE_OF_HOUR, 2).appendLiteral(':') + .appendValue(ChronoField.SECOND_OF_MINUTE, 2) + .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true) + toFormatter(builder, TimestampFormatter.defaultLocale) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DateTimeUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DateTimeUtils.scala new file mode 100644 index 00000000000..ce3ab4f4a08 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DateTimeUtils.scala @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.delta.util + +import java.sql.Timestamp +import java.time._ +import java.util.TimeZone +import java.util.concurrent.TimeUnit._ + +/** + * Forked from [[org.apache.spark.sql.catalyst.util.DateTimeUtils]]. + * Only included the methods that are used by Delta and added after Spark 2.4. + */ + +/** + * Helper functions for converting between internal and external date and time representations. + * Dates are exposed externally as java.sql.Date and are represented internally as the number of + * dates since the Unix epoch (1970-01-01). Timestamps are exposed externally as java.sql.Timestamp + * and are stored internally as longs, which are capable of storing timestamps with microsecond + * precision. + */ +object DateTimeUtils { + + // we use Int and Long internally to represent [[DateType]] and [[TimestampType]] + type SQLDate = Int + type SQLTimestamp = Long + + // Pre-calculated values can provide an opportunity of additional optimizations + // to the compiler like constants propagation and folding. + final val NANOS_PER_MICROS: Long = 1000 + final val MICROS_PER_MILLIS: Long = 1000 + final val MILLIS_PER_SECOND: Long = 1000 + final val SECONDS_PER_DAY: Long = 24 * 60 * 60 + final val MICROS_PER_SECOND: Long = MILLIS_PER_SECOND * MICROS_PER_MILLIS + final val NANOS_PER_MILLIS: Long = NANOS_PER_MICROS * MICROS_PER_MILLIS + final val NANOS_PER_SECOND: Long = NANOS_PER_MICROS * MICROS_PER_SECOND + final val MICROS_PER_DAY: Long = SECONDS_PER_DAY * MICROS_PER_SECOND + final val MILLIS_PER_MINUTE: Long = 60 * MILLIS_PER_SECOND + final val MILLIS_PER_HOUR: Long = 60 * MILLIS_PER_MINUTE + final val MILLIS_PER_DAY: Long = SECONDS_PER_DAY * MILLIS_PER_SECOND + + def defaultTimeZone(): TimeZone = TimeZone.getDefault + + def getTimeZone(timeZoneId: String): TimeZone = { + val zoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS) + TimeZone.getTimeZone(zoneId) + } + + // Converts Timestamp to string according to Hive TimestampWritable convention. + def timestampToString(tf: TimestampFormatter, us: SQLTimestamp): String = { + tf.format(us) + } + + def instantToMicros(instant: Instant): Long = { + val us = Math.multiplyExact(instant.getEpochSecond, MICROS_PER_SECOND) + val result = Math.addExact(us, NANOSECONDS.toMicros(instant.getNano)) + result + } + + def microsToInstant(us: Long): Instant = { + val secs = Math.floorDiv(us, MICROS_PER_SECOND) + val mos = Math.floorMod(us, MICROS_PER_SECOND) + Instant.ofEpochSecond(secs, mos * NANOS_PER_MICROS) + } + + def instantToDays(instant: Instant): Int = { + val seconds = instant.getEpochSecond + val days = Math.floorDiv(seconds, SECONDS_PER_DAY) + days.toInt + } + + /** + * Returns the number of micros since epoch from java.sql.Timestamp. + */ + def fromJavaTimestamp(t: Timestamp): SQLTimestamp = { + if (t != null) { + MILLISECONDS.toMicros(t.getTime) + NANOSECONDS.toMicros(t.getNanos()) % NANOS_PER_MICROS + } else { + 0L + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaEncoders.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaEncoders.scala new file mode 100644 index 00000000000..4fb074ebde8 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaEncoders.scala @@ -0,0 +1,116 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import scala.reflect.runtime.universe.TypeTag + +import org.apache.spark.sql.delta.{DeltaHistory, DeltaHistoryManager, SerializableFileStatus, SnapshotState} +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.commands.convert.ConvertTargetFile +import org.apache.spark.sql.delta.sources.IndexedFile + +import org.apache.spark.sql.Encoder +import org.apache.spark.sql.catalyst.catalog.CatalogTypes +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder + +private[delta] class DeltaEncoder[T: TypeTag] { + private lazy val _encoder = ExpressionEncoder[T]() + + def get: Encoder[T] = { + _encoder.copy() + } +} + +/** + * Define a few `Encoder`s to reuse in Delta in order to avoid touching Scala reflection after + * warming up. This will be mixed into `org.apache.spark.sql.delta.implicits`. Use + * `import org.apache.spark.sql.delta.implicits._` to use these `Encoder`s. + */ +private[delta] trait DeltaEncoders { + private lazy val _BooleanEncoder = new DeltaEncoder[Boolean] + implicit def booleanEncoder: Encoder[Boolean] = _BooleanEncoder.get + + private lazy val _IntEncoder = new DeltaEncoder[Int] + implicit def intEncoder: Encoder[Int] = _IntEncoder.get + + private lazy val _longEncoder = new DeltaEncoder[Long] + implicit def longEncoder: Encoder[Long] = _longEncoder.get + + private lazy val _stringEncoder = new DeltaEncoder[String] + implicit def stringEncoder: Encoder[String] = _stringEncoder.get + + private lazy val _longLongEncoder = new DeltaEncoder[(Long, Long)] + implicit def longLongEncoder: Encoder[(Long, Long)] = _longLongEncoder.get + + private lazy val _stringLongEncoder = new DeltaEncoder[(String, Long)] + implicit def stringLongEncoder: Encoder[(String, Long)] = _stringLongEncoder.get + + private lazy val _stringStringEncoder = new DeltaEncoder[(String, String)] + implicit def stringStringEncoder: Encoder[(String, String)] = _stringStringEncoder.get + + private lazy val _javaLongEncoder = new DeltaEncoder[java.lang.Long] + implicit def javaLongEncoder: Encoder[java.lang.Long] = _javaLongEncoder.get + + private lazy val _singleActionEncoder = new DeltaEncoder[SingleAction] + implicit def singleActionEncoder: Encoder[SingleAction] = _singleActionEncoder.get + + private lazy val _addFileEncoder = new DeltaEncoder[AddFile] + implicit def addFileEncoder: Encoder[AddFile] = _addFileEncoder.get + + private lazy val _removeFileEncoder = new DeltaEncoder[RemoveFile] + implicit def removeFileEncoder: Encoder[RemoveFile] = _removeFileEncoder.get + + private lazy val _pmvEncoder = new DeltaEncoder[(Protocol, Metadata, Long)] + implicit def pmvEncoder: Encoder[(Protocol, Metadata, Long)] = _pmvEncoder.get + + private lazy val _v2CheckpointActionsEncoder = new DeltaEncoder[(CheckpointMetadata, SidecarFile)] + implicit def v2CheckpointActionsEncoder: Encoder[(CheckpointMetadata, SidecarFile)] = + _v2CheckpointActionsEncoder.get + + private lazy val _serializableFileStatusEncoder = new DeltaEncoder[SerializableFileStatus] + implicit def serializableFileStatusEncoder: Encoder[SerializableFileStatus] = + _serializableFileStatusEncoder.get + + private lazy val _indexedFileEncoder = new DeltaEncoder[IndexedFile] + implicit def indexedFileEncoder: Encoder[IndexedFile] = _indexedFileEncoder.get + + private lazy val _addFileWithIndexEncoder = new DeltaEncoder[(AddFile, Long)] + implicit def addFileWithIndexEncoder: Encoder[(AddFile, Long)] = _addFileWithIndexEncoder.get + + private lazy val _addFileWithSourcePathEncoder = new DeltaEncoder[(AddFile, String)] + implicit def addFileWithSourcePathEncoder: Encoder[(AddFile, String)] = + _addFileWithSourcePathEncoder.get + + private lazy val _deltaHistoryEncoder = new DeltaEncoder[DeltaHistory] + implicit def deltaHistoryEncoder: Encoder[DeltaHistory] = _deltaHistoryEncoder.get + + private lazy val _historyCommitEncoder = new DeltaEncoder[DeltaHistoryManager.Commit] + implicit def historyCommitEncoder: Encoder[DeltaHistoryManager.Commit] = _historyCommitEncoder.get + + private lazy val _snapshotStateEncoder = new DeltaEncoder[SnapshotState] + implicit def snapshotStateEncoder: Encoder[SnapshotState] = _snapshotStateEncoder.get + + private lazy val _convertTargetFileEncoder = new DeltaEncoder[ConvertTargetFile] + implicit def convertTargetFileEncoder: Encoder[ConvertTargetFile] = + _convertTargetFileEncoder.get + + private lazy val _fsPartitionSpecEncoder = + new DeltaEncoder[(SerializableFileStatus, CatalogTypes.TablePartitionSpec)] + implicit def fsPartitionSpecEncoder + : Encoder[(SerializableFileStatus, CatalogTypes.TablePartitionSpec)] + = _fsPartitionSpecEncoder.get +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaFileOperations.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaFileOperations.scala new file mode 100644 index 00000000000..48946ae2235 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaFileOperations.scala @@ -0,0 +1,463 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import java.io.{FileNotFoundException, IOException} +import java.net.URI +import java.util.Locale + +import scala.util.Random +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.{DeltaErrors, SerializableFileStatus} +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.storage.LogStore +import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileAlreadyExistsException, FileStatus, FileSystem, FSDataInputStream, Path} +import org.apache.hadoop.io.IOUtils.copyBytes +import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS +import org.apache.parquet.hadoop.{Footer, ParquetFileReader} + +import org.apache.spark.{SparkEnv, SparkException, TaskContext} +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.execution.streaming.CheckpointFileManager +import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream +import org.apache.spark.util.{SerializableConfiguration, ThreadUtils} + +/** + * Some utility methods on files, directories, and paths. + */ +object DeltaFileOperations extends DeltaLogging { + /** + * Create an absolute path from `child` using the `basePath` if the child is a relative path. + * Return `child` if it is an absolute path. + * + * @param basePath Base path to prepend to `child` if child is a relative path. + * Note: It is assumed that the basePath do not have any escaped characters and + * is directly readable by Hadoop APIs. + * @param child Child path to append to `basePath` if child is a relative path. + * Note: t is assumed that the child is escaped, that is, all special chars that + * need escaping by URI standards are already escaped. + * @return Absolute path without escaped chars that is directly readable by Hadoop APIs. + */ + def absolutePath(basePath: String, child: String): Path = { + // scalastyle:off pathfromuri + val p = new Path(new URI(child)) + if (p.isAbsolute) { + p + } else { + val merged = new Path(basePath, p) + // URI resolution strips the final `/` in `p` if it exists + val mergedUri = merged.toUri.toString + if (child.endsWith("/") && !mergedUri.endsWith("/")) { + new Path(new URI(mergedUri + "/")) + } else { + merged + } + } + // scalastyle:on pathfromuri + } + + /** + * Given a path `child`: + * 1. Returns `child` if the path is already relative + * 2. Tries relativizing `child` with respect to `basePath` + * a) If the `child` doesn't live within the same base path, returns `child` as is + * b) If `child` lives in a different FileSystem, throws an exception + * Note that `child` may physically be pointing to a path within `basePath`, but may logically + * belong to a different FileSystem, e.g. DBFS mount points and direct S3 paths. + */ + def tryRelativizePath( + fs: FileSystem, + basePath: Path, + child: Path, + ignoreError: Boolean = false): Path = { + // We can map multiple schemes to the same `FileSystem` class, but `FileSystem.getScheme` is + // usually just a hard-coded string. Hence, we need to use the scheme of the URI that we use to + // create the FileSystem here. + if (child.isAbsolute) { + try { + new Path(fs.makeQualified(basePath).toUri.relativize(fs.makeQualified(child).toUri)) + } catch { + case _: IllegalArgumentException if ignoreError => + // ES-85571: when the file system failed to make the child path qualified, + // it means the child path exists in a different file system + // (a different authority or schema). This usually happens when the file is coming + // from the across buckets or across cloud storage system shallow clone. + // When ignoreError being set to true, not try to relativize this path, + // ignore the error and just return `child` as is. + child + case e: IllegalArgumentException => + logError(s"Failed to relativize the path ($child) " + + s"with the base path ($basePath) and the file system URI (${fs.getUri})", e) + throw DeltaErrors.failRelativizePath(child.toString) + } + } else { + child + } + } + + /** Check if the thrown exception is a throttling error. */ + private def isThrottlingError(t: Throwable): Boolean = { + Option(t.getMessage).exists(_.toLowerCase(Locale.ROOT).contains("slow down")) + } + + private def randomBackoff( + opName: String, + t: Throwable, + base: Int = 100, + jitter: Int = 1000): Unit = { + val sleepTime = Random.nextInt(jitter) + base + logWarning(s"Sleeping for $sleepTime ms to rate limit $opName", t) + Thread.sleep(sleepTime) + } + + /** Iterate through the contents of directories. + * + * If `listAsDirectories` is enabled, then we consider each path in `subDirs` to be directories, + * and we list files under that path. If, for example, "a/b" is provided, we would attempt to + * list "a/b/1.txt", "a/b/c/2.txt", and so on. We would not list "a/c", since it's not the same + * directory as "a/b". + * If not, we consider that path to be a filename, and we list paths in the same directory with + * names after that path. So, if "a/b" is provided, we would list "a/b/1.txt", "a/c", "a/d", and + * so on. However a file like "a/a.txt" would not be listed, because lexically it appears before + * "a/b". + */ + private def listUsingLogStore( + logStore: LogStore, + hadoopConf: Configuration, + subDirs: Iterator[String], + recurse: Boolean, + hiddenDirNameFilter: String => Boolean, + hiddenFileNameFilter: String => Boolean, + listAsDirectories: Boolean = true): Iterator[SerializableFileStatus] = { + + def list(dir: String, tries: Int): Iterator[SerializableFileStatus] = { + logInfo(s"Listing $dir") + try { + val path = if (listAsDirectories) new Path(dir, "\u0000") else new Path(dir + "\u0000") + logStore.listFrom(path, hadoopConf) + .filterNot{ f => + val name = f.getPath.getName + if (f.isDirectory) hiddenDirNameFilter(name) else hiddenFileNameFilter(name) + }.map(SerializableFileStatus.fromStatus) + } catch { + case NonFatal(e) if isThrottlingError(e) && tries > 0 => + randomBackoff("listing", e) + list(dir, tries - 1) + case e: FileNotFoundException => + // Can happen when multiple GCs are running concurrently or due to eventual consistency + Iterator.empty + } + } + + val filesAndDirs = subDirs.flatMap { dir => + list(dir, tries = 10) + } + + if (recurse) { + recurseDirectories( + logStore, hadoopConf, filesAndDirs, hiddenDirNameFilter, hiddenFileNameFilter) + } else { + filesAndDirs + } + } + + /** Given an iterator of files and directories, recurse directories with its contents. */ + private def recurseDirectories( + logStore: LogStore, + hadoopConf: Configuration, + filesAndDirs: Iterator[SerializableFileStatus], + hiddenDirNameFilter: String => Boolean, + hiddenFileNameFilter: String => Boolean): Iterator[SerializableFileStatus] = { + filesAndDirs.flatMap { + case dir: SerializableFileStatus if dir.isDir => + Iterator.single(dir) ++ + listUsingLogStore( + logStore, + hadoopConf, + Iterator.single(dir.path), + recurse = true, + hiddenDirNameFilter, + hiddenFileNameFilter) + case file => + Iterator.single(file) + } + } + + /** + * The default filter for hidden files. Files names beginning with _ or . are considered hidden. + * @param fileName + * @return true if the file is hidden + */ + def defaultHiddenFileFilter(fileName: String): Boolean = { + fileName.startsWith("_") || fileName.startsWith(".") + } + + /** + * Recursively lists all the files and directories for the given `subDirs` in a scalable manner. + * + * @param spark The SparkSession + * @param subDirs Absolute path of the subdirectories to list + * @param hadoopConf The Hadoop Configuration to get a FileSystem instance + * @param hiddenDirNameFilter A function that returns true when the directory should be considered + * hidden and excluded from results. Defaults to checking for prefixes + * of "." or "_". + * @param hiddenFileNameFilter A function that returns true when the file should be considered + * hidden and excluded from results. Defaults to checking for prefixes + * of "." or "_". + * @param listAsDirectories Whether to treat the paths in subDirs as directories, where all files + * that are children to the path will be listed. If false, the paths are + * treated as filenames, and files under the same folder with filenames + * after the path will be listed instead. + */ + def recursiveListDirs( + spark: SparkSession, + subDirs: Seq[String], + hadoopConf: Broadcast[SerializableConfiguration], + hiddenDirNameFilter: String => Boolean = defaultHiddenFileFilter, + hiddenFileNameFilter: String => Boolean = defaultHiddenFileFilter, + fileListingParallelism: Option[Int] = None, + listAsDirectories: Boolean = true): Dataset[SerializableFileStatus] = { + import org.apache.spark.sql.delta.implicits._ + if (subDirs.isEmpty) return spark.emptyDataset[SerializableFileStatus] + val listParallelism = fileListingParallelism.getOrElse(spark.sparkContext.defaultParallelism) + val dirsAndFiles = spark.sparkContext.parallelize(subDirs).mapPartitions { dirs => + val logStore = LogStore(SparkEnv.get.conf, hadoopConf.value.value) + listUsingLogStore( + logStore, + hadoopConf.value.value, + dirs, + recurse = false, + hiddenDirNameFilter, hiddenFileNameFilter, listAsDirectories) + }.repartition(listParallelism) // Initial list of subDirs may be small + + val allDirsAndFiles = dirsAndFiles.mapPartitions { firstLevelDirsAndFiles => + val logStore = LogStore(SparkEnv.get.conf, hadoopConf.value.value) + recurseDirectories( + logStore, + hadoopConf.value.value, + firstLevelDirsAndFiles, + hiddenDirNameFilter, + hiddenFileNameFilter) + } + spark.createDataset(allDirsAndFiles) + } + + /** + * Recursively and incrementally lists files with filenames after `listFilename` by alphabetical + * order. Helpful if you only want to list new files instead of the entire directory. + * + * Files located within `topDir` with filenames lexically after `listFilename` will be included, + * even if they may be located in parent/sibling folders of `listFilename`. + * + * @param spark The SparkSession + * @param listFilename Absolute path to a filename from which new files are listed (exclusive) + * @param topDir Absolute path to the original starting directory + * @param hadoopConf The Hadoop Configuration to get a FileSystem instance + * @param hiddenDirNameFilter A function that returns true when the directory should be considered + * hidden and excluded from results. Defaults to checking for prefixes + * of "." or "_". + * @param hiddenFileNameFilter A function that returns true when the file should be considered + * hidden and excluded from results. Defaults to checking for prefixes + * of "." or "_". + */ + def recursiveListFrom( + spark: SparkSession, + listFilename: String, + topDir: String, + hadoopConf: Broadcast[SerializableConfiguration], + hiddenDirNameFilter: String => Boolean = defaultHiddenFileFilter, + hiddenFileNameFilter: String => Boolean = defaultHiddenFileFilter, + fileListingParallelism: Option[Int] = None): Dataset[SerializableFileStatus] = { + + // Add folders from `listPath` to the depth before `topPath`, so as to ensure new folders/files + // in the parent directories are also included in the listing. + // If there are no new files, listing from parent directories are expected to be constant time. + val subDirs = getAllTopComponents(new Path(listFilename), new Path(topDir)) + + recursiveListDirs(spark, subDirs, hadoopConf, hiddenDirNameFilter, hiddenFileNameFilter, + fileListingParallelism, listAsDirectories = false) + } + + /** + * Lists the directory locally using LogStore without launching a spark job. Returns an iterator + * from LogStore. + */ + def localListDirs( + hadoopConf: Configuration, + dirs: Seq[String], + recursive: Boolean = true, + dirFilter: String => Boolean = defaultHiddenFileFilter, + fileFilter: String => Boolean = defaultHiddenFileFilter): Iterator[SerializableFileStatus] = { + val logStore = LogStore(SparkEnv.get.conf, hadoopConf) + listUsingLogStore( + logStore, hadoopConf, dirs.toIterator, recurse = recursive, dirFilter, fileFilter) + } + + /** + * Incrementally lists files with filenames after `listDir` by alphabetical order. Helpful if you + * only want to list new files instead of the entire directory. + * Listed locally using LogStore without launching a spark job. Returns an iterator from LogStore. + */ + def localListFrom( + hadoopConf: Configuration, + listFilename: String, + topDir: String, + recursive: Boolean = true, + dirFilter: String => Boolean = defaultHiddenFileFilter, + fileFilter: String => Boolean = defaultHiddenFileFilter): Iterator[SerializableFileStatus] = { + val logStore = LogStore(SparkEnv.get.conf, hadoopConf) + val listDirs = getAllTopComponents(new Path(listFilename), new Path(topDir)) + listUsingLogStore(logStore, hadoopConf, listDirs.toIterator, recurse = recursive, + dirFilter, fileFilter, listAsDirectories = false) + } + + /** + * Tries deleting a file or directory non-recursively. If the file/folder doesn't exist, + * that's fine, a separate operation may be deleting files/folders. If a directory is non-empty, + * we shouldn't delete it. FileSystem implementations throw an `IOException` in those cases, + * which we return as a "we failed to delete". + * + * Listing on S3 is not consistent after deletes, therefore in case the `delete` returns `false`, + * because the file didn't exist, then we still return `true`. Retries on S3 rate limits up to 3 + * times. + */ + def tryDeleteNonRecursive(fs: FileSystem, path: Path, tries: Int = 3): Boolean = { + try fs.delete(path, false) catch { + case _: FileNotFoundException => true + case _: IOException => false + case NonFatal(e) if isThrottlingError(e) && tries > 0 => + randomBackoff("deletes", e) + tryDeleteNonRecursive(fs, path, tries - 1) + } + } + + /** + * Returns all the levels of sub directories that `path` has with respect to `base`. For example: + * getAllSubDirectories("/base", "/base/a/b/c") => + * (Iterator("/base/a", "/base/a/b"), "/base/a/b/c") + */ + def getAllSubDirectories(base: String, path: String): (Iterator[String], String) = { + val baseSplits = base.split(Path.SEPARATOR) + val pathSplits = path.split(Path.SEPARATOR).drop(baseSplits.length) + val it = Iterator.tabulate(pathSplits.length - 1) { i => + (baseSplits ++ pathSplits.take(i + 1)).mkString(Path.SEPARATOR) + } + (it, path) + } + + /** Register a task failure listener to delete a temp file in our best effort. */ + def registerTempFileDeletionTaskFailureListener( + conf: Configuration, + tempPath: Path): Unit = { + val tc = TaskContext.get() + if (tc == null) { + throw DeltaErrors.sparkTaskThreadNotFound + } + tc.addTaskFailureListener { (_, _) => + // Best effort to delete the temp file + try { + tempPath.getFileSystem(conf).delete(tempPath, false /* = recursive */) + } catch { + case NonFatal(e) => + logError(s"Failed to delete $tempPath", e) + } + () // Make the compiler happy + } + } + + /** + * Reads Parquet footers in multi-threaded manner. + * If the config "spark.sql.files.ignoreCorruptFiles" is set to true, we will ignore the corrupted + * files when reading footers. + */ + def readParquetFootersInParallel( + conf: Configuration, + partFiles: Seq[FileStatus], + ignoreCorruptFiles: Boolean): Seq[Footer] = { + ThreadUtils.parmap(partFiles, "readingParquetFooters", 8) { currentFile => + try { + // Skips row group information since we only need the schema. + // ParquetFileReader.readFooter throws RuntimeException, instead of IOException, + // when it can't read the footer. + Some(new Footer(currentFile.getPath(), + ParquetFileReader.readFooter( + conf, currentFile, SKIP_ROW_GROUPS))) + } catch { case e: RuntimeException => + if (ignoreCorruptFiles) { + logWarning(s"Skipped the footer in the corrupted file: $currentFile", e) + None + } else { + throw DeltaErrors.failedReadFileFooter(currentFile.toString, e) + } + } + }.flatten + } + + /** + * Get all parent directory paths from `listDir` until `topDir` (exclusive). + * For example, if `topDir` is "/folder/" and `currDir` is "/folder/a/b/c", we would return + * "/folder/a/b/c", "/folder/a/b" and "/folder/a". + */ + def getAllTopComponents(listDir: Path, topDir: Path): List[String] = { + var ret: List[String] = List() + var currDir = listDir + while (currDir.depth() > topDir.depth()) { + ret = ret :+ currDir.toString + val parent = currDir.getParent + currDir = parent + } + ret + } + + /** Expose `org.apache.spark.util.ThreadUtils.runInNewThread` to use in Delta code. */ + def runInNewThread[T]( + threadName: String, + isDaemon: Boolean = true)(body: => T): T = { + ThreadUtils.runInNewThread(threadName, isDaemon)(body) + } + + /** + * Returns a `Dataset[AddFile]`, where all the `AddFile` actions have absolute paths. The files + * may have already had absolute paths, in which case they are left unchanged. Else, they are + * prepended with the `qualifiedSourcePath`. + * + * @param qualifiedTablePath Fully qualified path of Delta table root + * @param files List of `AddFile` instances + */ + def makePathsAbsolute( + qualifiedTablePath: String, + files: Dataset[AddFile]): Dataset[AddFile] = { + import org.apache.spark.sql.delta.implicits._ + files.mapPartitions { fileList => + fileList.map { addFile => + val fileSource = DeltaFileOperations.absolutePath(qualifiedTablePath, addFile.path) + if (addFile.deletionVector != null) { + val absoluteDV = addFile.deletionVector.copyWithAbsolutePath(new Path(qualifiedTablePath)) + addFile.copy(path = fileSource.toUri.toString, deletionVector = absoluteDV) + } else { + addFile.copy(path = fileSource.toUri.toString) + } + } + } + } + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala new file mode 100644 index 00000000000..0cab3f46616 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala @@ -0,0 +1,58 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import org.apache.spark.SparkContext +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession + +trait DeltaProgressReporter extends Logging { + /** + * Report a log to indicate some command is running. + */ + def withStatusCode[T]( + statusCode: String, + defaultMessage: String, + data: Map[String, Any] = Map.empty)(body: => T): T = { + logInfo(s"$statusCode: $defaultMessage") + val t = withJobDescription(defaultMessage)(body) + logInfo(s"$statusCode: Done") + t + } + /** + * Wrap various delta operations to provide a more meaningful name in Spark UI + * This only has an effect if {{{body}}} actually runs a Spark job + * @param jobDesc a short description of the operation + */ + private def withJobDescription[U](jobDesc: String)(body: => U): U = { + val sc = SparkSession.active.sparkContext + // will prefix jobDesc with whatever the user specified in the job description + // of the higher level operation that triggered this delta operation + val oldDesc = sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION) + val suffix = if (oldDesc == null) { + "" + } else { + s" $oldDesc:" + } + try { + sc.setJobDescription(s"Delta:$suffix $jobDesc") + body + } finally { + sc.setJobDescription(oldDesc) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaSparkPlanUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaSparkPlanUtils.scala new file mode 100644 index 00000000000..e114d1ca3da --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaSparkPlanUtils.scala @@ -0,0 +1,165 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import org.apache.spark.sql.delta.{DeltaTable, DeltaTableReadPredicate} + +import org.apache.spark.sql.catalyst.expressions.{Exists, Expression, InSubquery, LateralSubquery, ScalarSubquery, UserDefinedExpression} +import org.apache.spark.sql.catalyst.plans.logical.{Distinct, Filter, LeafNode, LogicalPlan, OneRowRelation, Project, SubqueryAlias, Union} +import org.apache.spark.sql.execution.datasources.LogicalRelation + + +trait DeltaSparkPlanUtils { + import DeltaSparkPlanUtils._ + + protected def planContainsOnlyDeltaScans(source: LogicalPlan): Boolean = + findFirstNonDeltaScan(source).isEmpty + + protected def findFirstNonDeltaScan(source: LogicalPlan): Option[LogicalPlan] = { + source match { + case l: LogicalRelation => + l match { + case DeltaTable(_) => None + case _ => Some(l) + } + case OneRowRelation() => None + case leaf: LeafNode => Some(leaf) // Any other LeafNode is a non Delta scan. + case node => collectFirst(node.children, findFirstNonDeltaScan) + } + } + + /** + * Returns `true` if `plan` has a safe level of determinism. This is a conservative + * approximation of `plan` being a truly deterministic query. + * + */ + protected def planIsDeterministic( + plan: LogicalPlan, + checkDeterministicOptions: CheckDeterministicOptions): Boolean = + findFirstNonDeterministicNode(plan, checkDeterministicOptions).isEmpty + + type PlanOrExpression = Either[LogicalPlan, Expression] + + /** + * Returns a part of the `plan` that does not have a safe level of determinism. + * This is a conservative approximation of `plan` being a truly deterministic query. + */ + protected def findFirstNonDeterministicNode( + plan: LogicalPlan, + checkDeterministicOptions: CheckDeterministicOptions): Option[PlanOrExpression] = { + plan match { + // This is very restrictive, allowing only deterministic filters and projections directly + // on top of a Delta Table. + case Distinct(child) => findFirstNonDeterministicNode(child, checkDeterministicOptions) + case Project(projectList, child) => + findFirstNonDeterministicChildNode(projectList, checkDeterministicOptions) orElse { + findFirstNonDeterministicNode(child, checkDeterministicOptions) + } + case Filter(cond, child) => + findFirstNonDeterministicNode(cond, checkDeterministicOptions) orElse { + findFirstNonDeterministicNode(child, checkDeterministicOptions) + } + case Union(children, _, _) => collectFirst[LogicalPlan, PlanOrExpression]( + children, + c => findFirstNonDeterministicNode(c, checkDeterministicOptions)) + case SubqueryAlias(_, child) => + findFirstNonDeterministicNode(child, checkDeterministicOptions) + case DeltaTable(_) => None + case OneRowRelation() => None + case node => Some(Left(node)) + } + } + + protected def findFirstNonDeterministicChildNode( + children: Seq[Expression], + checkDeterministicOptions: CheckDeterministicOptions): Option[PlanOrExpression] = + collectFirst[Expression, PlanOrExpression]( + children, + c => findFirstNonDeterministicNode(c, checkDeterministicOptions)) + + protected def findFirstNonDeterministicNode( + child: Expression, + checkDeterministicOptions: CheckDeterministicOptions): Option[PlanOrExpression] = { + child match { + case SubqueryExpression(plan) => + findFirstNonDeltaScan(plan).map(Left(_)) + .orElse(findFirstNonDeterministicNode(plan, checkDeterministicOptions)) + case _: UserDefinedExpression if !checkDeterministicOptions.allowDeterministicUdf => + Some(Right(child)) + case p => + collectFirst[Expression, PlanOrExpression]( + p.children, + c => findFirstNonDeterministicNode(c, checkDeterministicOptions)) orElse { + if (p.deterministic) None else Some(Right(p)) + } + } + } + + protected def collectFirst[In, Out]( + input: Iterable[In], + recurse: In => Option[Out]): Option[Out] = { + input.foldLeft(Option.empty[Out]) { case (acc, value) => + acc.orElse(recurse(value)) + } + } + + /** Extractor object for the subquery plan of expressions that contain subqueries. */ + object SubqueryExpression { + def unapply(expr: Expression): Option[LogicalPlan] = expr match { + case subquery: ScalarSubquery => Some(subquery.plan) + case exists: Exists => Some(exists.plan) + case subquery: InSubquery => Some(subquery.query.plan) + case subquery: LateralSubquery => Some(subquery.plan) + case _ => None + } + } + + /** Returns whether the read predicates of a transaction contain any deterministic UDFs. */ + def containsDeterministicUDF( + predicates: Seq[DeltaTableReadPredicate], partitionedOnly: Boolean): Boolean = { + if (partitionedOnly) { + predicates.exists { + _.partitionPredicates.exists(containsDeterministicUDF) + } + } else { + predicates.exists { p => + p.dataPredicates.exists(containsDeterministicUDF) || + p.partitionPredicates.exists(containsDeterministicUDF) + } + } + } + + /** Returns whether an expression contains any deterministic UDFs. */ + def containsDeterministicUDF(expr: Expression): Boolean = expr.exists { + case udf: UserDefinedExpression => udf.deterministic + case _ => false + } +} + + +object DeltaSparkPlanUtils { + /** + * Options for deciding whether plans contain non-deterministic nodes and expressions. + * + * @param allowDeterministicUdf If true, allow UDFs that are marked by users as deterministic. + * If false, always treat them as non-deterministic to be more + * defensive against user bugs. + */ + case class CheckDeterministicOptions( + allowDeterministicUdf: Boolean + ) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/FileNames.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/FileNames.scala new file mode 100644 index 00000000000..6d487c1b047 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/FileNames.scala @@ -0,0 +1,204 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import java.util.UUID + +import org.apache.hadoop.fs.{FileStatus, Path} + +/** Helper for creating file names for specific commits / checkpoints. */ +object FileNames { + + val deltaFileRegex = raw"(\d+)\.json".r + val compactedDeltaFileRegex = raw"(\d+).(\d+).compacted.json".r + val checksumFileRegex = raw"(\d+)\.crc".r + val checkpointFileRegex = raw"(\d+)\.checkpoint((\.\d+\.\d+)?\.parquet|\.[^.]+\.(json|parquet))".r + + val deltaFilePattern = deltaFileRegex.pattern + val compactedDeltaFilePattern = compactedDeltaFileRegex.pattern + val checksumFilePattern = checksumFileRegex.pattern + val checkpointFilePattern = checkpointFileRegex.pattern + + /** Returns the delta (json format) path for a given delta file. */ + def deltaFile(path: Path, version: Long): Path = new Path(path, f"$version%020d.json") + + /** Returns the path for a given sample file */ + def sampleFile(path: Path, version: Long): Path = new Path(path, f"$version%020d") + + /** Returns the path to the checksum file for the given version. */ + def checksumFile(path: Path, version: Long): Path = new Path(path, f"$version%020d.crc") + + /** Returns the path to the compacted delta file for the given version range. */ + def compactedDeltaFile( + path: Path, + fromVersion: Long, + toVersion: Long): Path = { + new Path(path, f"$fromVersion%020d.$toVersion%020d.compacted.json") + } + + /** Returns the version for the given delta path. */ + def deltaVersion(path: Path): Long = path.getName.split("\\.")(0).toLong + def deltaVersion(file: FileStatus): Long = deltaVersion(file.getPath) + + /** Returns the version for the given checksum file. */ + def checksumVersion(path: Path): Long = path.getName.stripSuffix(".crc").toLong + def checksumVersion(file: FileStatus): Long = checksumVersion(file.getPath) + + def compactedDeltaVersions(path: Path): (Long, Long) = { + val parts = path.getName.split("\\.") + (parts(0).toLong, parts(1).toLong) + } + def compactedDeltaVersions(file: FileStatus): (Long, Long) = compactedDeltaVersions(file.getPath) + + /** + * Returns the prefix of all delta log files for the given version. + * + * Intended for use with listFrom to get all files from this version onwards. The returned Path + * will not exist as a file. + */ + def listingPrefix(path: Path, version: Long): Path = new Path(path, f"$version%020d.") + + /** + * Returns the path for a singular checkpoint up to the given version. + * + * In a future protocol version this path will stop being written. + */ + def checkpointFileSingular(path: Path, version: Long): Path = + new Path(path, f"$version%020d.checkpoint.parquet") + + /** + * Returns the paths for all parts of the checkpoint up to the given version. + * + * In a future protocol version we will write this path instead of checkpointFileSingular. + * + * Example of the format: 00000000000000004915.checkpoint.0000000020.0000000060.parquet is + * checkpoint part 20 out of 60 for the snapshot at version 4915. Zero padding is for + * lexicographic sorting. + */ + def checkpointFileWithParts(path: Path, version: Long, numParts: Int): Seq[Path] = { + Range(1, numParts + 1) + .map(i => new Path(path, f"$version%020d.checkpoint.$i%010d.$numParts%010d.parquet")) + } + + def numCheckpointParts(path: Path): Option[Int] = { + val segments = path.getName.split("\\.") + + if (segments.size != 5) None else Some(segments(3).toInt) + } + + def isCheckpointFile(path: Path): Boolean = checkpointFilePattern.matcher(path.getName).matches() + def isCheckpointFile(file: FileStatus): Boolean = isCheckpointFile(file.getPath) + + def isDeltaFile(path: Path): Boolean = deltaFilePattern.matcher(path.getName).matches() + def isDeltaFile(file: FileStatus): Boolean = isDeltaFile(file.getPath) + + def isChecksumFile(path: Path): Boolean = checksumFilePattern.matcher(path.getName).matches() + def isChecksumFile(file: FileStatus): Boolean = isChecksumFile(file.getPath) + + def isCompactedDeltaFile(path: Path): Boolean = + compactedDeltaFilePattern.matcher(path.getName).matches() + def isCompactedDeltaFile(file: FileStatus): Boolean = isCompactedDeltaFile(file.getPath) + + def checkpointVersion(path: Path): Long = path.getName.split("\\.")(0).toLong + def checkpointVersion(file: FileStatus): Long = checkpointVersion(file.getPath) + + object CompactedDeltaFile { + def unapply(f: FileStatus): Option[(FileStatus, Long, Long)] = + unapply(f.getPath).map { case (_, startVersion, endVersion) => (f, startVersion, endVersion) } + def unapply(path: Path): Option[(Path, Long, Long)] = path.getName match { + case compactedDeltaFileRegex(lo, hi) => Some(path, lo.toLong, hi.toLong) + case _ => None + } + } + + + /** + * Get the version of the checkpoint, checksum or delta file. Returns None if an unexpected + * file type is seen. + */ + def getFileVersionOpt(path: Path): Option[Long] = path match { + case DeltaFile(_, version) => Some(version) + case ChecksumFile(_, version) => Some(version) + case CheckpointFile(_, version) => Some(version) + case CompactedDeltaFile(_, _, endVersion) => Some(endVersion) + case _ => None + } + + /** + * Get the version of the checkpoint, checksum or delta file. Throws an error if an unexpected + * file type is seen. These unexpected files should be filtered out to ensure forward + * compatibility in cases where new file types are added, but without an explicit protocol + * upgrade. + */ + def getFileVersion(path: Path): Long = { + getFileVersionOpt(path).getOrElse { + // scalastyle:off throwerror + throw new AssertionError( + s"Unexpected file type found in transaction log: $path") + // scalastyle:on throwerror + } + } + def getFileVersion(file: FileStatus): Long = getFileVersion(file.getPath) + + object DeltaFile { + def unapply(f: FileStatus): Option[(FileStatus, Long)] = + unapply(f.getPath).map { case (_, version) => (f, version) } + def unapply(path: Path): Option[(Path, Long)] = { + deltaFileRegex.unapplySeq(path.getName).map(path -> _.head.toLong) + } + } + object ChecksumFile { + def unapply(f: FileStatus): Option[(FileStatus, Long)] = + unapply(f.getPath).map { case (_, version) => (f, version) } + def unapply(path: Path): Option[(Path, Long)] = + checksumFileRegex.unapplySeq(path.getName).map(path -> _.head.toLong) + } + object CheckpointFile { + def unapply(f: FileStatus): Option[(FileStatus, Long)] = + unapply(f.getPath).map { case (_, version) => (f, version) } + def unapply(path: Path): Option[(Path, Long)] = { + checkpointFileRegex.unapplySeq(path.getName).map(path -> _.head.toLong) + } + } + + object FileType extends Enumeration { + val DELTA, CHECKPOINT, CHECKSUM, COMPACTED_DELTA, OTHER = Value + } + + /** File path for a new V2 Checkpoint Json file */ + def newV2CheckpointJsonFile(path: Path, version: Long): Path = + new Path(path, f"$version%020d.checkpoint.${UUID.randomUUID.toString}.json") + + /** File path for a new V2 Checkpoint Parquet file */ + def newV2CheckpointParquetFile(path: Path, version: Long): Path = + new Path(path, f"$version%020d.checkpoint.${UUID.randomUUID.toString}.parquet") + + /** File path for a V2 Checkpoint's Sidecar file */ + def newV2CheckpointSidecarFile( + logPath: Path, + version: Long, + numParts: Int, + currentPart: Int): Path = { + val basePath = sidecarDirPath(logPath) + val uuid = UUID.randomUUID.toString + new Path(basePath, f"$version%020d.checkpoint.$currentPart%010d.$numParts%010d.$uuid.parquet") + } + + val SIDECAR_SUBDIR = "_sidecars" + /** Returns path to the sidecar directory */ + def sidecarDirPath(logPath: Path): Path = new Path(logPath, SIDECAR_SUBDIR) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/JsonUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/JsonUtils.scala new file mode 100644 index 00000000000..2da273a9bd3 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/JsonUtils.scala @@ -0,0 +1,45 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import com.fasterxml.jackson.module.scala.{DefaultScalaModule, ScalaObjectMapper} + +/** Useful json functions used around the Delta codebase. */ +object JsonUtils { + /** Used to convert between classes and JSON. */ + lazy val mapper = { + val _mapper = new ObjectMapper with ScalaObjectMapper + _mapper.setSerializationInclusion(Include.NON_ABSENT) + _mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + _mapper.registerModule(DefaultScalaModule) + _mapper + } + + def toJson[T: Manifest](obj: T): String = { + mapper.writeValueAsString(obj) + } + + def toPrettyJson[T: Manifest](obj: T): String = { + mapper.writerWithDefaultPrettyPrinter().writeValueAsString(obj) + } + + def fromJson[T: Manifest](json: String): T = { + mapper.readValue[T](json) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala new file mode 100644 index 00000000000..6f870e30ca5 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala @@ -0,0 +1,745 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import java.lang.{Double => JDouble, Long => JLong} +import java.math.{BigDecimal => JBigDecimal} +import java.util.{Locale, TimeZone} + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer +import scala.util.Try + +import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaErrors} +import org.apache.hadoop.fs.Path + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal} +import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.types._ + +/** + * This file is forked from [[org.apache.spark.sql.execution.datasources.PartitioningUtils]]. + */ + + +// In open-source Apache Spark, PartitionPath is defined as +// +// case class PartitionPath(values: InternalRow, path: Path) +// +// but in Databricks we use a different representation where the Path is stored as a String +// and converted back to a Path only when read. This significantly cuts memory consumption because +// Hadoop Path objects are heavyweight. See SC-7591 for details. +object PartitionPath { + // Used only in tests: + def apply(values: InternalRow, path: String): PartitionPath = { + // Roundtrip through `new Path` to ensure any normalization done there is applied: + apply(values, new Path(path)) + } + + def apply(values: InternalRow, path: Path): PartitionPath = { + new PartitionPath(values, path.toString) + } +} + +/** + * Holds a directory in a partitioned collection of files as well as the partition values + * in the form of a Row. Before scanning, the files at `path` need to be enumerated. + */ +class PartitionPath private (val values: InternalRow, val pathStr: String) { + // Note: this isn't a case class because we don't want to have a public apply() method which + // accepts a string. The goal is to force every value stored in `pathStr` to have gone through + // a `new Path(...).toString` to ensure that canonicalization / normalization has taken place. + def path: Path = new Path(pathStr) + def withNewValues(newValues: InternalRow): PartitionPath = { + new PartitionPath(newValues, pathStr) + } + override def equals(other: Any): Boolean = other match { + case that: PartitionPath => values == that.values && pathStr == that.pathStr + case _ => false + } + override def hashCode(): Int = { + (values, pathStr).hashCode() + } + override def toString: String = { + s"PartitionPath($values, $pathStr)" + } +} + +case class PartitionSpec( + partitionColumns: StructType, + partitions: Seq[PartitionPath]) + +object PartitionSpec { + val emptySpec = PartitionSpec(StructType(Seq.empty[StructField]), Seq.empty[PartitionPath]) +} + +private[delta] object PartitionUtils { + + val timestampPartitionPattern = "yyyy-MM-dd HH:mm:ss[.S]" + + case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal]) + { + require(columnNames.size == literals.size) + } + + import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.{escapePathName, unescapePathName, DEFAULT_PARTITION_NAME} + + /** + * Given a group of qualified paths, tries to parse them and returns a partition specification. + * For example, given: + * {{{ + * hdfs://:/path/to/partition/a=1/b=hello/c=3.14 + * hdfs://:/path/to/partition/a=2/b=world/c=6.28 + * }}} + * it returns: + * {{{ + * PartitionSpec( + * partitionColumns = StructType( + * StructField(name = "a", dataType = IntegerType, nullable = true), + * StructField(name = "b", dataType = StringType, nullable = true), + * StructField(name = "c", dataType = DoubleType, nullable = true)), + * partitions = Seq( + * Partition( + * values = Row(1, "hello", 3.14), + * path = "hdfs://:/path/to/partition/a=1/b=hello/c=3.14"), + * Partition( + * values = Row(2, "world", 6.28), + * path = "hdfs://:/path/to/partition/a=2/b=world/c=6.28"))) + * }}} + */ + def parsePartitions( + paths: Seq[Path], + typeInference: Boolean, + basePaths: Set[Path], + userSpecifiedSchema: Option[StructType], + caseSensitive: Boolean, + validatePartitionColumns: Boolean, + timeZoneId: String): PartitionSpec = { + parsePartitions(paths, typeInference, basePaths, userSpecifiedSchema, caseSensitive, + validatePartitionColumns, DateTimeUtils.getTimeZone(timeZoneId)) + } + + def parsePartitions( + paths: Seq[Path], + typeInference: Boolean, + basePaths: Set[Path], + userSpecifiedSchema: Option[StructType], + caseSensitive: Boolean, + validatePartitionColumns: Boolean, + timeZone: TimeZone): PartitionSpec = { + val userSpecifiedDataTypes = if (userSpecifiedSchema.isDefined) { + val nameToDataType = userSpecifiedSchema.get.fields.map(f => f.name -> f.dataType).toMap + if (!caseSensitive) { + CaseInsensitiveMap(nameToDataType) + } else { + nameToDataType + } + } else { + Map.empty[String, DataType] + } + + // SPARK-26990: use user specified field names if case insensitive. + val userSpecifiedNames = if (userSpecifiedSchema.isDefined && !caseSensitive) { + CaseInsensitiveMap(userSpecifiedSchema.get.fields.map(f => f.name -> f.name).toMap) + } else { + Map.empty[String, String] + } + + val dateFormatter = DateFormatter() + val timestampFormatter = TimestampFormatter(timestampPartitionPattern, timeZone) + // First, we need to parse every partition's path and see if we can find partition values. + val (partitionValues, optDiscoveredBasePaths) = paths.map { path => + parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes, + validatePartitionColumns, timeZone, dateFormatter, timestampFormatter) + }.unzip + + // We create pairs of (path -> path's partition value) here + // If the corresponding partition value is None, the pair will be skipped + val pathsWithPartitionValues = paths.zip(partitionValues).flatMap(x => x._2.map(x._1 -> _)) + + if (pathsWithPartitionValues.isEmpty) { + // This dataset is not partitioned. + PartitionSpec.emptySpec + } else { + // This dataset is partitioned. We need to check whether all partitions have the same + // partition columns and resolve potential type conflicts. + + // Check if there is conflicting directory structure. + // For the paths such as: + // var paths = Seq( + // "hdfs://host:9000/invalidPath", + // "hdfs://host:9000/path/a=10/b=20", + // "hdfs://host:9000/path/a=10.5/b=hello") + // It will be recognised as conflicting directory structure: + // "hdfs://host:9000/invalidPath" + // "hdfs://host:9000/path" + // TODO: Selective case sensitivity. + val discoveredBasePaths = optDiscoveredBasePaths.flatten.map(_.toString.toLowerCase()) + assert( + discoveredBasePaths.distinct.size == 1, + "Conflicting directory structures detected. Suspicious paths:\b" + + discoveredBasePaths.distinct.mkString("\n\t", "\n\t", "\n\n") + + "If provided paths are partition directories, please set " + + "\"basePath\" in the options of the data source to specify the " + + "root directory of the table. If there are multiple root directories, " + + "please load them separately and then union them.") + + val resolvedPartitionValues = + resolvePartitions(pathsWithPartitionValues, caseSensitive, timeZone) + + // Creates the StructType which represents the partition columns. + val fields = { + val PartitionValues(columnNames, literals) = resolvedPartitionValues.head + columnNames.zip(literals).map { case (name, Literal(_, dataType)) => + // We always assume partition columns are nullable since we've no idea whether null values + // will be appended in the future. + val resultName = userSpecifiedNames.getOrElse(name, name) + val resultDataType = userSpecifiedDataTypes.getOrElse(name, dataType) + StructField(resultName, resultDataType, nullable = true) + } + } + + // Finally, we create `Partition`s based on paths and resolved partition values. + val partitions = resolvedPartitionValues.zip(pathsWithPartitionValues).map { + case (PartitionValues(_, literals), (path, _)) => + PartitionPath(InternalRow.fromSeq(literals.map(_.value)), path) + } + + PartitionSpec(StructType(fields), partitions) + } + } + + /** + * Parses a single partition, returns column names and values of each partition column, also + * the path when we stop partition discovery. For example, given: + * {{{ + * path = hdfs://:/path/to/partition/a=42/b=hello/c=3.14 + * }}} + * it returns the partition: + * {{{ + * PartitionValues( + * Seq("a", "b", "c"), + * Seq( + * Literal.create(42, IntegerType), + * Literal.create("hello", StringType), + * Literal.create(3.14, DoubleType))) + * }}} + * and the path when we stop the discovery is: + * {{{ + * hdfs://:/path/to/partition + * }}} + */ + def parsePartition( + path: Path, + typeInference: Boolean, + basePaths: Set[Path], + userSpecifiedDataTypes: Map[String, DataType], + validatePartitionColumns: Boolean, + timeZone: TimeZone, + dateFormatter: DateFormatter, + timestampFormatter: TimestampFormatter): (Option[PartitionValues], Option[Path]) = { + val columns = ArrayBuffer.empty[(String, Literal)] + // Old Hadoop versions don't have `Path.isRoot` + var finished = path.getParent == null + // currentPath is the current path that we will use to parse partition column value. + var currentPath: Path = path + + while (!finished) { + // Sometimes (e.g., when speculative task is enabled), temporary directories may be left + // uncleaned. Here we simply ignore them. + if (currentPath.getName.toLowerCase(Locale.ROOT) == "_temporary") { + return (None, None) + } + + if (basePaths.contains(currentPath)) { + // If the currentPath is one of base paths. We should stop. + finished = true + } else { + // Let's say currentPath is a path of "/table/a=1/", currentPath.getName will give us a=1. + // Once we get the string, we try to parse it and find the partition column and value. + val maybeColumn = + parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes, + validatePartitionColumns, timeZone, dateFormatter, timestampFormatter) + maybeColumn.foreach(columns += _) + + // Now, we determine if we should stop. + // When we hit any of the following cases, we will stop: + // - In this iteration, we could not parse the value of partition column and value, + // i.e. maybeColumn is None, and columns is not empty. At here we check if columns is + // empty to handle cases like /table/a=1/_temporary/something (we need to find a=1 in + // this case). + // - After we get the new currentPath, this new currentPath represent the top level dir + // i.e. currentPath.getParent == null. For the example of "/table/a=1/", + // the top level dir is "/table". + finished = + (maybeColumn.isEmpty && columns.nonEmpty) || currentPath.getParent == null + + if (!finished) { + // For the above example, currentPath will be "/table/". + currentPath = currentPath.getParent + } + } + } + + if (columns.isEmpty) { + (None, Some(path)) + } else { + val (columnNames, values) = columns.reverse.unzip + (Some(PartitionValues(columnNames.toSeq, values.toSeq)), Some(currentPath)) + } + } + + private def parsePartitionColumn( + columnSpec: String, + typeInference: Boolean, + userSpecifiedDataTypes: Map[String, DataType], + validatePartitionColumns: Boolean, + timeZone: TimeZone, + dateFormatter: DateFormatter, + timestampFormatter: TimestampFormatter): Option[(String, Literal)] = { + val equalSignIndex = columnSpec.indexOf('=') + if (equalSignIndex == -1) { + None + } else { + val columnName = unescapePathName(columnSpec.take(equalSignIndex)) + assert(columnName.nonEmpty, s"Empty partition column name in '$columnSpec'") + + val rawColumnValue = columnSpec.drop(equalSignIndex + 1) + assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'") + + val literal = if (userSpecifiedDataTypes.contains(columnName)) { + // SPARK-26188: if user provides corresponding column schema, get the column value without + // inference, and then cast it as user specified data type. + val dataType = userSpecifiedDataTypes(columnName) + val columnValueLiteral = inferPartitionColumnValue( + rawColumnValue, + false, + timeZone, + dateFormatter, + timestampFormatter) + val columnValue = columnValueLiteral.eval() + val castedValue = Cast(columnValueLiteral, dataType, Option(timeZone.getID)).eval() + if (validatePartitionColumns && columnValue != null && castedValue == null) { + throw DeltaErrors.partitionColumnCastFailed( + columnValue.toString, dataType.toString, columnName) + } + Literal.create(castedValue, dataType) + } else { + inferPartitionColumnValue( + rawColumnValue, + typeInference, + timeZone, + dateFormatter, + timestampFormatter) + } + Some(columnName -> literal) + } + } + + /** + * Given a partition path fragment, e.g. `fieldOne=1/fieldTwo=2`, returns a parsed spec + * for that fragment as a `TablePartitionSpec`, e.g. `Map(("fieldOne", "1"), ("fieldTwo", "2"))`. + */ + def parsePathFragment(pathFragment: String): TablePartitionSpec = { + parsePathFragmentAsSeq(pathFragment).toMap + } + + /** + * Given a partition path fragment, e.g. `fieldOne=1/fieldTwo=2`, returns a parsed spec + * for that fragment as a `Seq[(String, String)]`, e.g. + * `Seq(("fieldOne", "1"), ("fieldTwo", "2"))`. + */ + def parsePathFragmentAsSeq(pathFragment: String): Seq[(String, String)] = { + pathFragment.stripPrefix("data/").split("/").map { kv => + val pair = kv.split("=", 2) + (unescapePathName(pair(0)), unescapePathName(pair(1))) + } + } + + /** + * This is the inverse of parsePathFragment(). + */ + def getPathFragment(spec: TablePartitionSpec, partitionSchema: StructType): String = { + partitionSchema.map { field => + escapePathName(field.name) + "=" + escapePathName(spec(field.name)) + }.mkString("/") + } + + def getPathFragment(spec: TablePartitionSpec, partitionColumns: Seq[Attribute]): String = { + getPathFragment(spec, DataTypeUtils.fromAttributes(partitionColumns)) + } + + /** + * Normalize the column names in partition specification, w.r.t. the real partition column names + * and case sensitivity. e.g., if the partition spec has a column named `monTh`, and there is a + * partition column named `month`, and it's case insensitive, we will normalize `monTh` to + * `month`. + */ + def normalizePartitionSpec[T]( + partitionSpec: Map[String, T], + partColNames: Seq[String], + tblName: String, + resolver: Resolver): Map[String, T] = { + val normalizedPartSpec = partitionSpec.toSeq.map { case (key, value) => + val normalizedKey = partColNames.find(resolver(_, key)).getOrElse { + throw DeltaErrors.invalidPartitionColumn(key, tblName) + } + normalizedKey -> value + } + + checkColumnNameDuplication( + normalizedPartSpec.map(_._1), "in the partition schema", resolver) + + normalizedPartSpec.toMap + } + + /** + * Resolves possible type conflicts between partitions by up-casting "lower" types using + * [[findWiderTypeForPartitionColumn]]. + */ + def resolvePartitions( + pathsWithPartitionValues: Seq[(Path, PartitionValues)], + caseSensitive: Boolean, + timeZone: TimeZone): Seq[PartitionValues] = { + if (pathsWithPartitionValues.isEmpty) { + Seq.empty + } else { + val partColNames = if (caseSensitive) { + pathsWithPartitionValues.map(_._2.columnNames) + } else { + pathsWithPartitionValues.map(_._2.columnNames.map(_.toLowerCase())) + } + assert( + partColNames.distinct.size == 1, + listConflictingPartitionColumns(pathsWithPartitionValues)) + + // Resolves possible type conflicts for each column + val values = pathsWithPartitionValues.map(_._2) + val columnCount = values.head.columnNames.size + val resolvedValues = (0 until columnCount).map { i => + resolveTypeConflicts(values.map(_.literals(i)), timeZone) + } + + // Fills resolved literals back to each partition + values.zipWithIndex.map { case (d, index) => + d.copy(literals = resolvedValues.map(_(index))) + } + } + } + + def listConflictingPartitionColumns( + pathWithPartitionValues: Seq[(Path, PartitionValues)]): String = { + val distinctPartColNames = pathWithPartitionValues.map(_._2.columnNames).distinct + + def groupByKey[K, V](seq: Seq[(K, V)]): Map[K, Iterable[V]] = + seq.groupBy { case (key, _) => key }.mapValues(_.map { case (_, value) => value }).toMap + + val partColNamesToPaths = groupByKey(pathWithPartitionValues.map { + case (path, partValues) => partValues.columnNames -> path + }) + + val distinctPartColLists = distinctPartColNames.map(_.mkString(", ")).zipWithIndex.map { + case (names, index) => + s"Partition column name list #$index: $names" + } + + // Lists out those non-leaf partition directories that also contain files + val suspiciousPaths = distinctPartColNames.sortBy(_.length).flatMap(partColNamesToPaths) + + s"Conflicting partition column names detected:\n" + + distinctPartColLists.mkString("\n\t", "\n\t", "\n\n") + + "For partitioned table directories, data files should only live in leaf directories.\n" + + "And directories at the same level should have the same partition column name.\n" + + "Please check the following directories for unexpected files or " + + "inconsistent partition column names:\n" + + suspiciousPaths.map("\t" + _).mkString("\n", "\n", "") + } + + // scalastyle:off line.size.limit + /** + * Converts a string to a [[Literal]] with automatic type inference. Currently only supports + * [[NullType]], [[IntegerType]], [[LongType]], [[DoubleType]], [[DecimalType]], [[DateType]] + * [[TimestampType]], and [[StringType]]. + * + * When resolving conflicts, it follows the table below: + * + * +--------------------+-------------------+-------------------+-------------------+--------------------+------------+---------------+---------------+------------+ + * | InputA \ InputB | NullType | IntegerType | LongType | DecimalType(38,0)* | DoubleType | DateType | TimestampType | StringType | + * +--------------------+-------------------+-------------------+-------------------+--------------------+------------+---------------+---------------+------------+ + * | NullType | NullType | IntegerType | LongType | DecimalType(38,0) | DoubleType | DateType | TimestampType | StringType | + * | IntegerType | IntegerType | IntegerType | LongType | DecimalType(38,0) | DoubleType | StringType | StringType | StringType | + * | LongType | LongType | LongType | LongType | DecimalType(38,0) | StringType | StringType | StringType | StringType | + * | DecimalType(38,0)* | DecimalType(38,0) | DecimalType(38,0) | DecimalType(38,0) | DecimalType(38,0) | StringType | StringType | StringType | StringType | + * | DoubleType | DoubleType | DoubleType | StringType | StringType | DoubleType | StringType | StringType | StringType | + * | DateType | DateType | StringType | StringType | StringType | StringType | DateType | TimestampType | StringType | + * | TimestampType | TimestampType | StringType | StringType | StringType | StringType | TimestampType | TimestampType | StringType | + * | StringType | StringType | StringType | StringType | StringType | StringType | StringType | StringType | StringType | + * +--------------------+-------------------+-------------------+-------------------+--------------------+------------+---------------+---------------+------------+ + * Note that, for DecimalType(38,0)*, the table above intentionally does not cover all other + * combinations of scales and precisions because currently we only infer decimal type like + * `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type. + */ + // scalastyle:on line.size.limit + def inferPartitionColumnValue( + raw: String, + typeInference: Boolean, + timeZone: TimeZone, + dateFormatter: DateFormatter, + timestampFormatter: TimestampFormatter): Literal = { + val decimalTry = Try { + // `BigDecimal` conversion can fail when the `field` is not a form of number. + val bigDecimal = new JBigDecimal(raw) + // It reduces the cases for decimals by disallowing values having scale (eg. `1.1`). + require(bigDecimal.scale <= 0) + // `DecimalType` conversion can fail when + // 1. The precision is bigger than 38. + // 2. scale is bigger than precision. + Literal(bigDecimal) + } + + val dateTry = Try { + // try and parse the date, if no exception occurs this is a candidate to be resolved as + // DateType + dateFormatter.parse(raw) + // SPARK-23436: Casting the string to date may still return null if a bad Date is provided. + // This can happen since DateFormat.parse may not use the entire text of the given string: + // so if there are extra-characters after the date, it returns correctly. + // We need to check that we can cast the raw string since we later can use Cast to get + // the partition values with the right DataType (see + // org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning) + val dateValue = Cast(Literal(raw), DateType).eval() + // Disallow DateType if the cast returned null + require(dateValue != null) + Literal.create(dateValue, DateType) + } + + val timestampTry = Try { + val unescapedRaw = unescapePathName(raw) + // try and parse the date, if no exception occurs this is a candidate to be resolved as + // TimestampType + timestampFormatter.parse(unescapedRaw) + // SPARK-23436: see comment for date + val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval() + // Disallow TimestampType if the cast returned null + require(timestampValue != null) + Literal.create(timestampValue, TimestampType) + } + + if (typeInference) { + // First tries integral types + Try(Literal.create(Integer.parseInt(raw), IntegerType)) + .orElse(Try(Literal.create(JLong.parseLong(raw), LongType))) + .orElse(decimalTry) + // Then falls back to fractional types + .orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType))) + // Then falls back to date/timestamp types + .orElse(timestampTry) + .orElse(dateTry) + // Then falls back to string + .getOrElse { + if (raw == DEFAULT_PARTITION_NAME) { + Literal.default(NullType) + } else { + Literal.create(unescapePathName(raw), StringType) + } + } + } else { + if (raw == DEFAULT_PARTITION_NAME) { + Literal.default(NullType) + } else { + Literal.create(unescapePathName(raw), StringType) + } + } + } + + def validatePartitionColumn( + schema: StructType, + partitionColumns: Seq[String], + caseSensitive: Boolean): Unit = { + checkColumnNameDuplication( + partitionColumns, + "in the partition columns", + caseSensitive) + + partitionColumnsSchema(schema, partitionColumns, caseSensitive).foreach { + field => field.dataType match { + case _: AtomicType => // OK + case _ => throw DeltaErrors.cannotUseDataTypeForPartitionColumnError(field) + } + } + + if (partitionColumns.nonEmpty && partitionColumns.size == schema.fields.length) { + throw new DeltaAnalysisException( + errorClass = "DELTA_CANNOT_USE_ALL_COLUMNS_FOR_PARTITION", + Array.empty) + } + } + + def partitionColumnsSchema( + schema: StructType, + partitionColumns: Seq[String], + caseSensitive: Boolean): StructType = { + val equality = columnNameEquality(caseSensitive) + StructType(partitionColumns.map { col => + schema.find(f => equality(f.name, col)).getOrElse { + val schemaCatalog = schema.catalogString + throw DeltaErrors.missingPartitionColumn(col, schemaCatalog) + } + }).asNullable + } + + def mergeDataAndPartitionSchema( + dataSchema: StructType, + partitionSchema: StructType, + caseSensitive: Boolean): (StructType, Map[String, StructField]) = { + val overlappedPartCols = mutable.Map.empty[String, StructField] + partitionSchema.foreach { partitionField => + val partitionFieldName = getColName(partitionField, caseSensitive) + if (dataSchema.exists(getColName(_, caseSensitive) == partitionFieldName)) { + overlappedPartCols += partitionFieldName -> partitionField + } + } + + // When data and partition schemas have overlapping columns, the output + // schema respects the order of the data schema for the overlapping columns, and it + // respects the data types of the partition schema. + // `HadoopFsRelation` will be mapped to `FileSourceScanExec`, which always output + // all the partition columns physically. Here we need to make sure the final schema + // contains all the partition columns. + val fullSchema = + StructType(dataSchema.map(f => overlappedPartCols.getOrElse(getColName(f, caseSensitive), f)) ++ + partitionSchema.filterNot(f => overlappedPartCols.contains(getColName(f, caseSensitive)))) + (fullSchema, overlappedPartCols.toMap) + } + + def getColName(f: StructField, caseSensitive: Boolean): String = { + if (caseSensitive) { + f.name + } else { + f.name.toLowerCase(Locale.ROOT) + } + } + + private def columnNameEquality(caseSensitive: Boolean): (String, String) => Boolean = { + if (caseSensitive) { + org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution + } else { + org.apache.spark.sql.catalyst.analysis.caseInsensitiveResolution + } + } + + /** + * Given a collection of [[Literal]]s, resolves possible type conflicts by + * [[findWiderTypeForPartitionColumn]]. + */ + private def resolveTypeConflicts(literals: Seq[Literal], timeZone: TimeZone): Seq[Literal] = { + val litTypes = literals.map(_.dataType) + val desiredType = litTypes.reduce(findWiderTypeForPartitionColumn) + + literals.map { case l @ Literal(_, dataType) => + Literal.create(Cast(l, desiredType, Some(timeZone.getID)).eval(), desiredType) + } + } + + /** + * Type widening rule for partition column types. It is similar to + * [[TypeCoercion.findWiderTypeForTwo]] but the main difference is that here we disallow + * precision loss when widening double/long and decimal, and fall back to string. + */ + private val findWiderTypeForPartitionColumn: (DataType, DataType) => DataType = { + case (DoubleType, _: DecimalType) | (_: DecimalType, DoubleType) => StringType + case (DoubleType, LongType) | (LongType, DoubleType) => StringType + case (t1, t2) => TypeCoercion.findWiderTypeForTwo(t1, t2).getOrElse(StringType) + } + + /** The methods below are forked from [[org.apache.spark.sql.util.SchemaUtils]] */ + + /** + * Checks if input column names have duplicate identifiers. This throws an exception if + * the duplication exists. + * + * @param columnNames column names to check + * @param colType column type name, used in an exception message + * @param resolver resolver used to determine if two identifiers are equal + */ + def checkColumnNameDuplication( + columnNames: Seq[String], colType: String, resolver: Resolver): Unit = { + checkColumnNameDuplication(columnNames, colType, isCaseSensitiveAnalysis(resolver)) + } + + /** + * Checks if input column names have duplicate identifiers. This throws an exception if + * the duplication exists. + * + * @param columnNames column names to check + * @param colType column type name, used in an exception message + * @param caseSensitiveAnalysis whether duplication checks should be case sensitive or not + */ + def checkColumnNameDuplication( + columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit = { + // scalastyle:off caselocale + val names = if (caseSensitiveAnalysis) columnNames else columnNames.map(_.toLowerCase) + // scalastyle:on caselocale + if (names.distinct.length != names.length) { + val duplicateColumns = names.groupBy(identity).collect { + case (x, ys) if ys.length > 1 => s"`$x`" + } + throw DeltaErrors.foundDuplicateColumnsException(colType, + duplicateColumns.mkString(", ")) + } + } + + // Returns true if a given resolver is case-sensitive + private def isCaseSensitiveAnalysis(resolver: Resolver): Boolean = { + if (resolver == caseSensitiveResolution) { + true + } else if (resolver == caseInsensitiveResolution) { + false + } else { + sys.error("A resolver to check if two identifiers are equal must be " + + "`caseSensitiveResolution` or `caseInsensitiveResolution` in o.a.s.sql.catalyst.") + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/PathWithFileSystem.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/PathWithFileSystem.scala new file mode 100644 index 00000000000..c6b7a9a7988 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/PathWithFileSystem.scala @@ -0,0 +1,55 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} + +/** + * Bundling the `Path` with the `FileSystem` instance ensures + * that we never pass the wrong file system with the path to a function + * at compile time. + */ +case class PathWithFileSystem private (path: Path, fs: FileSystem) { + + /** + * Extends the path with `s` + * + * The resulting path must be on the same filesystem. + */ + def withSuffix(s: String): PathWithFileSystem = new PathWithFileSystem(new Path(path, s), fs) + + /** + * Qualify `path`` using `fs` + */ + def makeQualified(): PathWithFileSystem = { + val qualifiedPath = fs.makeQualified(path) + PathWithFileSystem(qualifiedPath, fs) + } +} + +object PathWithFileSystem { + + /** + * Create a new `PathWithFileSystem` instance by calling `getFileSystem` + * on `path` with the given `hadoopConf`. + */ + def withConf(path: Path, hadoopConf: Configuration): PathWithFileSystem = { + val fs = path.getFileSystem(hadoopConf) + PathWithFileSystem(path, fs) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/SetAccumulator.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/SetAccumulator.scala new file mode 100644 index 00000000000..13585dac0ce --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/SetAccumulator.scala @@ -0,0 +1,56 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import org.apache.spark.util.AccumulatorV2 + +/** + * Accumulator to collect distinct elements as a set. + */ +class SetAccumulator[T] extends AccumulatorV2[T, java.util.Set[T]] { + private var _set: java.util.Set[T] = _ + + private def getOrCreate = { + _set = Option(_set).getOrElse(java.util.Collections.synchronizedSet(new java.util.HashSet[T]())) + _set + } + + override def isZero: Boolean = this.synchronized(getOrCreate.isEmpty) + + override def reset(): Unit = this.synchronized { + _set = null + } + override def add(v: T): Unit = this.synchronized(getOrCreate.add(v)) + + override def merge(other: AccumulatorV2[T, java.util.Set[T]]): Unit = other match { + case o: SetAccumulator[T] => this.synchronized(getOrCreate.addAll(o.value)) + case _ => throw new UnsupportedOperationException( + s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") + } + + override def value: java.util.Set[T] = this.synchronized { + java.util.Collections.unmodifiableSet(new java.util.HashSet[T](getOrCreate)) + } + + override def copy(): AccumulatorV2[T, java.util.Set[T]] = { + val newAcc = new SetAccumulator[T]() + this.synchronized { + newAcc.getOrCreate.addAll(getOrCreate) + } + newAcc + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/StateCache.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/StateCache.scala new file mode 100644 index 00000000000..391cf56710d --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/StateCache.scala @@ -0,0 +1,120 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.Snapshot +import org.apache.spark.sql.delta.metering.DeltaLogging +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} +import org.apache.spark.sql.execution.{LogicalRDD, SQLExecution} +import org.apache.spark.storage.StorageLevel + +/** + * Machinary that caches the reconstructed state of a Delta table + * using the RDD cache. The cache is designed so that the first access + * will materialize the results. However once uncache is called, + * all data will be flushed and will not be cached again. + */ +trait StateCache extends DeltaLogging { + protected def spark: SparkSession + + /** If state RDDs for this snapshot should still be cached. */ + private var _isCached = true + /** A list of RDDs that we need to uncache when we are done with this snapshot. */ + private val cached = ArrayBuffer[RDD[_]]() + + /** Method to expose the value of _isCached for testing. */ + private[delta] def isCached: Boolean = _isCached + + private val storageLevel = StorageLevel.fromString( + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SNAPSHOT_CACHE_STORAGE_LEVEL)) + + class CachedDS[A](ds: Dataset[A], name: String) { + // While we cache RDD to avoid re-computation in different spark sessions, `Dataset` can only be + // reused by the session that created it to avoid session pollution. So we use `DatasetRefCache` + // to re-create a new `Dataset` when the active session is changed. This is an optimization for + // single-session scenarios to avoid the overhead of `Dataset` creation which can take 100ms. + private val cachedDs = cached.synchronized { + if (isCached) { + val qe = ds.queryExecution + val rdd = SQLExecution.withNewExecutionId(qe, Some(s"Cache $name")) { + val rdd = recordFrameProfile("Delta", "CachedDS.toRdd") { + // toRdd should always trigger execution + qe.toRdd.map(_.copy()) + } + rdd.setName(name) + rdd.persist(storageLevel) + } + cached += rdd + val dsCache = new DatasetRefCache(() => { + val logicalRdd = LogicalRDD(qe.analyzed.output, rdd)(spark) + Dataset.ofRows(spark, logicalRdd) + }) + Some(dsCache) + } else { + None + } + } + + /** + * Retrieves the cached RDD in Dataframe form. + * + * If a RDD cache is available, + * - return the cached DF if called from the same session in which the cached DF is created, or + * - reconstruct the DF using the RDD cache if called from a different session. + * + * If no RDD cache is available, + * - return a copy of the original DF with updated spark session. + * + * Since a cached DeltaLog can be accessed from multiple Spark sessions, this interface makes + * sure that the original Spark session in the cached DF does not leak into the current active + * sessions. + */ + def getDF: DataFrame = { + if (cached.synchronized(isCached) && cachedDs.isDefined) { + cachedDs.get.get + } else { + Dataset.ofRows(spark, ds.queryExecution.logical) + } + } + + /** + * Retrieves the cached RDD as a strongly-typed Dataset. + */ + def getDS: Dataset[A] = getDF.as[A](ds.exprEnc) + } + + /** + * Create a CachedDS instance for the given Dataset and the name. + */ + def cacheDS[A](ds: Dataset[A], name: String): CachedDS[A] = recordFrameProfile( + "Delta", "CachedDS.cacheDS") { + new CachedDS[A](ds, name) + } + + /** Drop any cached data for this [[Snapshot]]. */ + def uncache(): Unit = cached.synchronized { + if (isCached) { + _isCached = false + cached.foreach(_.unpersist(blocking = false)) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/TimestampFormatter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/TimestampFormatter.scala new file mode 100644 index 00000000000..6f4bd8465b7 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/TimestampFormatter.scala @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.delta.util + +import java.text.ParseException +import java.time._ +import java.time.format.DateTimeParseException +import java.time.temporal.TemporalQueries +import java.util.{Locale, TimeZone} + +import org.apache.spark.sql.delta.util.DateTimeUtils.instantToMicros + +/** + * Forked from [[org.apache.spark.sql.catalyst.util.TimestampFormatter]] + */ +sealed trait TimestampFormatter extends Serializable { + /** + * Parses a timestamp in a string and converts it to microseconds. + * + * @param s - string with timestamp to parse + * @return microseconds since epoch. + * @throws ParseException can be thrown by legacy parser + * @throws DateTimeParseException can be thrown by new parser + * @throws DateTimeException unable to obtain local date or time + */ + @throws(classOf[ParseException]) + @throws(classOf[DateTimeParseException]) + @throws(classOf[DateTimeException]) + def parse(s: String): Long + def format(us: Long): String +} + +class Iso8601TimestampFormatter( + pattern: String, + timeZone: TimeZone, + locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper { + @transient + protected lazy val formatter = getOrCreateFormatter(pattern, locale) + + private def toInstant(s: String): Instant = { + val temporalAccessor = formatter.parse(s) + if (temporalAccessor.query(TemporalQueries.offset()) == null) { + toInstantWithZoneId(temporalAccessor, timeZone.toZoneId) + } else { + Instant.from(temporalAccessor) + } + } + + override def parse(s: String): Long = instantToMicros(toInstant(s)) + + override def format(us: Long): String = { + val instant = DateTimeUtils.microsToInstant(us) + formatter.withZone(timeZone.toZoneId).format(instant) + } +} + +/** + * The formatter parses/formats timestamps according to the pattern `yyyy-MM-dd HH:mm:ss.[..fff..]` + * where `[..fff..]` is a fraction of second up to microsecond resolution. The formatter does not + * output trailing zeros in the fraction. For example, the timestamp `2019-03-05 15:00:01.123400` is + * formatted as the string `2019-03-05 15:00:01.1234`. + * + * @param timeZone the time zone in which the formatter parses or format timestamps + */ +class FractionTimestampFormatter(timeZone: TimeZone) + extends Iso8601TimestampFormatter("", timeZone, TimestampFormatter.defaultLocale) { + + @transient + override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter +} + +object TimestampFormatter { + val defaultPattern: String = "yyyy-MM-dd HH:mm:ss" + val defaultLocale: Locale = Locale.US + + def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = { + new Iso8601TimestampFormatter(format, timeZone, locale) + } + + def apply(format: String, timeZone: TimeZone): TimestampFormatter = { + apply(format, timeZone, defaultLocale) + } + + def apply(timeZone: TimeZone): TimestampFormatter = { + apply(defaultPattern, timeZone, defaultLocale) + } + + def getFractionFormatter(timeZone: TimeZone): TimestampFormatter = { + new FractionTimestampFormatter(timeZone) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/Utils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/Utils.scala new file mode 100644 index 00000000000..df5649adb24 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/Utils.scala @@ -0,0 +1,71 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import scala.util.Random + +import org.apache.spark.sql.delta.DeltaConfigs +import org.apache.spark.sql.delta.actions.Metadata + +import org.apache.spark.sql.{Column, Dataset} +import org.apache.spark.sql.catalyst.expressions.ElementAt +import org.apache.spark.sql.functions.lit + +/** + * Various utility methods used by Delta. + */ +object Utils { + + /** Measures the time taken by function `f` */ + def timedMs[T](f: => T): (T, Long) = { + val start = System.currentTimeMillis() + val res = f + val duration = System.currentTimeMillis() - start + (res, duration) + } + + /** Returns the length of the random prefix to use for the data files of a Delta table. */ + def getRandomPrefixLength(metadata: Metadata): Int = { + if (DeltaConfigs.RANDOMIZE_FILE_PREFIXES.fromMetaData(metadata)) { + DeltaConfigs.RANDOM_PREFIX_LENGTH.fromMetaData(metadata) + } else { + 0 + } + } + + /** Generates a string created of `randomPrefixLength` alphanumeric characters. */ + def getRandomPrefix(numChars: Int): String = { + Random.alphanumeric.take(numChars).mkString + } + + /** + * Indicates whether Delta is currently running unit tests. + */ + def isTesting: Boolean = { + System.getenv("DELTA_TESTING") != null + } + + /** + * Returns value for the given key in value if column is a map and the key is present, NULL + * otherwise. + */ + def try_element_at(mapColumn: Column, key: Any): Column = { + Column { + ElementAt(mapColumn.expr, lit(key).expr, failOnError = false) + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/DeltaThreadPool.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/DeltaThreadPool.scala new file mode 100644 index 00000000000..f5f6e973940 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/DeltaThreadPool.scala @@ -0,0 +1,151 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util.threads + +import java.util.concurrent._ + +import scala.concurrent.duration.Duration +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.DeltaErrors +import org.apache.spark.sql.delta.metering.DeltaLogging + +import org.apache.spark.SparkException +import org.apache.spark.sql.SparkSession +import org.apache.spark.util.ThreadUtils +import org.apache.spark.util.ThreadUtils.namedThreadFactory + +/** A wrapper for [[ThreadPoolExecutor]] whose tasks run with the caller's [[SparkSession]]. */ +private[delta] class DeltaThreadPool(tpe: ThreadPoolExecutor) { + /** Submits a task for execution and returns a [[Future]] representing that task. */ + def submit[T](spark: SparkSession)(body: => T): Future[T] = { + tpe.submit { () => spark.withActive(body) } + } + + /** + * Executes `f` on each element of `items` as a task and returns the result. + * Throws [[SparkException]] on error. + */ + def parallelMap[T, R]( + spark: SparkSession, + items: Iterable[T])( + f: T => R): Iterable[R] = { + // Materialize a list of futures, to ensure they all got submitted before we start waiting. + val futures = items.map(i => submit(spark)(f(i))).toList + futures.map(f => ThreadUtils.awaitResult(f, Duration.Inf)).toSeq + } + + def submitNonFateSharing[T](f: SparkSession => T): NonFateSharingFuture[T] = + new NonFateSharingFuture(this)(f) +} + + +/** Convenience constructor that creates a [[ThreadPoolExecutor]] with sensible defaults. */ +private[delta] object DeltaThreadPool { + def apply(prefix: String, numThreads: Int): DeltaThreadPool = + new DeltaThreadPool(newDaemonCachedThreadPool(prefix, numThreads)) + + /** + * Create a cached thread pool whose max number of threads is `maxThreadNumber`. Thread names + * are formatted as prefix-ID, where ID is a unique, sequentially assigned integer. + */ + def newDaemonCachedThreadPool( + prefix: String, + maxThreadNumber: Int): ThreadPoolExecutor = { + val keepAliveSeconds = 60 + val queueSize = Integer.MAX_VALUE + val threadFactory = namedThreadFactory(prefix) + val threadPool = new SparkThreadLocalForwardingThreadPoolExecutor( + maxThreadNumber, // corePoolSize: the max number of threads to create before queuing the tasks + maxThreadNumber, // maximumPoolSize: because we use LinkedBlockingDeque, this one is not used + keepAliveSeconds, + TimeUnit.SECONDS, + new LinkedBlockingQueue[Runnable](queueSize), + threadFactory) + threadPool.allowCoreThreadTimeOut(true) + threadPool + } +} + +/** + * A future invocation of `f` which avoids "fate sharing" of errors, in case multiple threads could + * wait on the future's result. + * + * The future is only launched if a [[SparkSession]] is available. + * + * If the future succeeds, any thread can consume the result. + * + * If the future fails, threads will just invoke `f` directly -- except that fatal errors will + * propagate (once) if the caller is from the same [[SparkSession]] that created the future. + */ +class NonFateSharingFuture[T](pool: DeltaThreadPool)(f: SparkSession => T) + extends DeltaLogging { + + // Submit `f` as a future if a spark session is available + @volatile private var futureOpt = SparkSession.getActiveSession.map { spark => + spark -> pool.submit(spark) { f(spark) } + } + + def get(timeout: Duration): T = { + // Prefer to get a prefetched result from the future, but never fail because of it. + val futureResult = futureOpt.flatMap { case (ownerSession, future) => + try { + Some(ThreadUtils.awaitResult(future, timeout)) + } catch { + // NOTE: ThreadUtils.awaitResult wraps all non-fatal exceptions other than TimeoutException + // with SparkException. Meanwhile, Java Future.get only throws four exceptions: + // ExecutionException (non-fatal, wrapped, and itself wraps any Throwable from the task + // itself), CancellationException (non-fatal, wrapped), InterruptedException (fatal, not + // wrapped), and TimeoutException (non-fatal, but not wrapped). Thus, any "normal" failure + // of the future will surface as SparkException(ExecutionException(OriginalException)). + case outer: SparkException => outer.getCause match { + case e: CancellationException => + logWarning("Future was cancelled") + futureOpt = None + None + case inner: ExecutionException if inner.getCause != null => inner.getCause match { + case NonFatal(e) => + logWarning("Future threw non-fatal exception", e) + futureOpt = None + None + case e: Throwable => + logWarning("Future threw fatal error", e) + if (ownerSession eq SparkSession.active) { + futureOpt = None + throw e + } + None + } + } + case e: TimeoutException => + logWarning("Timed out waiting for future") + None + case NonFatal(e) => + logWarning("Unknown failure while waiting for future", e) + None + } + } + + futureResult.getOrElse { + // Future missing or failed, so fall back to direct execution. + SparkSession.getActiveSession match { + case Some(spark) => f(spark) + case _ => throw DeltaErrors.sparkSessionNotSetException() + } + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala new file mode 100644 index 00000000000..dbb126ccaea --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala @@ -0,0 +1,118 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util.threads + +import java.util.Properties +import java.util.concurrent._ + +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkContext, TaskContext} +import org.apache.spark.internal.Logging +import org.apache.spark.util.{Utils => SparkUtils} + +/** + * Implementation of ThreadPoolExecutor that captures the Spark ThreadLocals present at submit time + * and inserts them into the thread before executing the provided runnable. + */ +class SparkThreadLocalForwardingThreadPoolExecutor( + corePoolSize: Int, + maximumPoolSize: Int, + keepAliveTime: Long, + unit: TimeUnit, + workQueue: BlockingQueue[Runnable], + threadFactory: ThreadFactory, + rejectedExecutionHandler: RejectedExecutionHandler = new ThreadPoolExecutor.AbortPolicy) + extends ThreadPoolExecutor( + corePoolSize, maximumPoolSize, keepAliveTime, + unit, workQueue, threadFactory, rejectedExecutionHandler) { + + override def execute(command: Runnable): Unit = + super.execute(new SparkThreadLocalCapturingRunnable(command)) +} + + +trait SparkThreadLocalCapturingHelper extends Logging { + // At the time of creating this instance we capture the task context and command context. + val capturedTaskContext = TaskContext.get() + val sparkContext = SparkContext.getActive + // Capture an immutable threadsafe snapshot of the current local properties + val capturedProperties = sparkContext + .map(sc => CapturedSparkThreadLocals.toValuesArray( + SparkUtils.cloneProperties(sc.getLocalProperties))) + + def runWithCaptured[T](body: => T): T = { + // Save the previous contexts, overwrite them with the captured contexts, and then restore the + // previous when execution completes. + // This has the unfortunate side effect of writing nulls to these thread locals if they were + // empty beforehand. + val previousTaskContext = TaskContext.get() + val previousProperties = sparkContext.map(_.getLocalProperties) + + TaskContext.setTaskContext(capturedTaskContext) + for { + p <- capturedProperties + sc <- sparkContext + } { + sc.setLocalProperties(CapturedSparkThreadLocals.toProperties(p)) + } + + try { + body + } catch { + case t: Throwable => + logError(s"Exception in thread ${Thread.currentThread().getName}", t) + throw t + } finally { + TaskContext.setTaskContext(previousTaskContext) + for { + p <- previousProperties + sc <- sparkContext + } { + sc.setLocalProperties(p) + } + } + } +} + +class CapturedSparkThreadLocals extends SparkThreadLocalCapturingHelper + +object CapturedSparkThreadLocals { + def apply(): CapturedSparkThreadLocals = { + new CapturedSparkThreadLocals() + } + + def toProperties(props: Array[(String, String)]): Properties = { + val resultProps = new Properties() + for ((key, value) <- props) { + resultProps.put(key, value) + } + resultProps + } + + def toValuesArray(props: Properties): Array[(String, String)] = { + props.asScala.toArray + } + +} + +class SparkThreadLocalCapturingRunnable(runnable: Runnable) + extends Runnable with SparkThreadLocalCapturingHelper { + override def run(): Unit = { + runWithCaptured(runnable.run()) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/util/ScalaExtensions.scala b/spark/src/main/scala/org/apache/spark/sql/util/ScalaExtensions.scala new file mode 100644 index 00000000000..df350560cf7 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/util/ScalaExtensions.scala @@ -0,0 +1,55 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.util + +/** Extension utility classes for built-in Scala functionality. */ +object ScalaExtensions { + + implicit class OptionExt[T](opt: Option[T]) { + /** + * Execute `f` on the content of `opt`, if `opt.isDefined`. + * + * This is basically a rename of `opt.foreach`, but with better readability. + */ + def ifDefined(f: T => Unit): Unit = opt.foreach(f) + } + + implicit class OptionExtCompanion(opt: Option.type) { + /** + * When a given condition is true, evaluates the a argument and returns Some(a). + * When the condition is false, a is not evaluated and None is returned. + */ + def when[A](cond: Boolean)(a: => A): Option[A] = if (cond) Some(a) else None + + /** + * When a given condition is false, evaluates the a argument and returns Some(a). + * When the condition is true, a is not evaluated and None is returned. + */ + def whenNot[A](cond: Boolean)(a: => A): Option[A] = if (!cond) Some(a) else None + + /** Sum up all the `options`, substituting `default` for each `None`. */ + def sum[N : Numeric](default: N)(options: Option[N]*): N = + options.map(_.getOrElse(default)).sum + } + + implicit class AnyExt(any: Any) { + /** + * Applies the partial function to any if it is defined and ignores the result if any. + */ + def condDo(pf: PartialFunction[Any, Unit]): Unit = scala.PartialFunction.condOpt(any)(pf) + } +} diff --git a/spark/src/test/java/io/delta/sql/JavaDeltaSparkSessionExtensionSuite.java b/spark/src/test/java/io/delta/sql/JavaDeltaSparkSessionExtensionSuite.java new file mode 100644 index 00000000000..2fe1d12c8d5 --- /dev/null +++ b/spark/src/test/java/io/delta/sql/JavaDeltaSparkSessionExtensionSuite.java @@ -0,0 +1,45 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sql; + +import org.apache.spark.sql.SparkSession; +import org.apache.spark.util.Utils; +import org.junit.Test; + +import java.io.IOException; + +public class JavaDeltaSparkSessionExtensionSuite { + + @Test + public void testSQLConf() throws IOException { + SparkSession spark = SparkSession.builder() + .appName("JavaDeltaSparkSessionExtensionSuiteUsingSQLConf") + .master("local[2]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate(); + try { + String input = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "input") + .getCanonicalPath(); + spark.range(1, 10).write().format("delta").save(input); + spark.sql("vacuum delta.`" + input + "`"); + } finally { + spark.stop(); + } + } +} diff --git a/spark/src/test/java/io/delta/tables/JavaDeltaTableBuilderSuite.java b/spark/src/test/java/io/delta/tables/JavaDeltaTableBuilderSuite.java new file mode 100644 index 00000000000..01db2cadf2d --- /dev/null +++ b/spark/src/test/java/io/delta/tables/JavaDeltaTableBuilderSuite.java @@ -0,0 +1,215 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.spark.sql.delta.DeltaLog; +import org.apache.hadoop.fs.Path; +import org.apache.spark.sql.*; + +import org.apache.spark.util.Utils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.apache.spark.sql.delta.DeltaSQLCommandJavaTest; + +import static org.apache.spark.sql.types.DataTypes.*; + +public class JavaDeltaTableBuilderSuite implements DeltaSQLCommandJavaTest { + + private transient SparkSession spark; + private transient String input; + + + @Before + public void setUp() { + // Trigger static initializer of TestData + spark = buildSparkSession(); + } + + @After + public void tearDown() { + if (spark != null) { + spark.stop(); + spark = null; + } + } + + private DeltaTable buildTable(DeltaTableBuilder builder) { + return builder.addColumn("c1", "int") + .addColumn("c2", IntegerType) + .addColumn("c3", "string", false) + .addColumn("c4", StringType, true) + .addColumn(DeltaTable.columnBuilder(spark, "c5") + .dataType("bigint") + .comment("foo") + .nullable(false) + .build() + ) + .addColumn(DeltaTable.columnBuilder(spark, "c6") + .dataType(LongType) + .generatedAlwaysAs("c5 + 10") + .build() + ).execute(); + } + + private DeltaTable createTable(boolean ifNotExists, String tableName) { + DeltaTableBuilder builder; + if (ifNotExists) { + builder = DeltaTable.createIfNotExists(); + } else { + builder = DeltaTable.create(); + } + if (tableName.startsWith("delta.`")) { + tableName = tableName.substring("delta.`".length()); + String location = tableName.substring(0, tableName.length() - 1); + builder = builder.location(location); + DeltaLog.forTable(spark, location).clearCache(); + } else { + builder = builder.tableName(tableName); + DeltaLog.forTable(spark, new Path(tableName)).clearCache(); + } + return buildTable(builder); + } + + private DeltaTable replaceTable(boolean orCreate, String tableName) { + DeltaTableBuilder builder; + if (orCreate) { + builder = DeltaTable.createOrReplace(); + } else { + builder = DeltaTable.replace(); + } + if (tableName.startsWith("delta.`")) { + tableName = tableName.substring("delta.`".length()); + String location = tableName.substring(0, tableName.length() - 1); + builder = builder.location(location); + } else { + builder = builder.tableName(tableName); + } + return buildTable(builder); + } + + private void verifyGeneratedColumn(String tableName, DeltaTable deltaTable) { + String cmd = String.format("INSERT INTO %s (c1, c2, c3, c4, c5, c6) %s", tableName, + "VALUES (1, 2, 'a', 'c', 1, 11)"); + spark.sql(cmd); + Map set = new HashMap() {{ + put("c5", "10"); + }}; + deltaTable.updateExpr("c6 = 11", set); + assert(deltaTable.toDF().select("c6").collectAsList().get(0).getLong(0) == 20); + } + + @Test + public void testCreateTable() { + try { + // Test creating DeltaTable by name + DeltaTable table = createTable(false, "deltaTable"); + verifyGeneratedColumn("deltaTable", table); + } finally { + spark.sql("DROP TABLE IF EXISTS deltaTable"); + } + // Test creating DeltaTable by path. + String input = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "input") + .toString(); + DeltaTable table2 = createTable(false, String.format("delta.`%s`", input)); + verifyGeneratedColumn(String.format("delta.`%s`", input), table2); + } + + @Test + public void testCreateTableIfNotExists() { + // Ignore table creation if already exsits. + List data = Arrays.asList("hello", "world"); + Dataset dataDF = spark.createDataset(data, Encoders.STRING()).toDF(); + try { + // Test creating DeltaTable by name - not exists. + DeltaTable table = createTable(true, "deltaTable"); + verifyGeneratedColumn("deltaTable", table); + + dataDF.write().format("delta").mode("overwrite").saveAsTable("deltaTable2"); + + // Table 2 should be the old table saved by path. + DeltaTable table2 = DeltaTable.createIfNotExists().tableName("deltaTable2") + .addColumn("value", "string") + .execute(); + QueryTest$.MODULE$.checkAnswer(table2.toDF(), dataDF.collectAsList()); + } finally { + spark.sql("DROP TABLE IF EXISTS deltaTable"); + spark.sql("DROP TABLE IF EXISTS deltaTable2"); + } + // Test creating DeltaTable by path. + String input = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "input") + .toString(); + dataDF.write().format("delta").mode("overwrite").save(input); + DeltaTable table = createTable(true, String.format("delta.`%s`", input)); + QueryTest$.MODULE$.checkAnswer(table.toDF(), dataDF.collectAsList()); + } + + @Test + public void testCreateTableWithExistingSchema() { + try { + // Test create table with an existing schema. + List data = Arrays.asList("hello", "world"); + Dataset dataDF = spark.createDataset(data, Encoders.STRING()).toDF(); + + DeltaLog.forTable(spark, new Path("deltaTable")).clearCache(); + DeltaTable table = DeltaTable.create().tableName("deltaTable") + .addColumns(dataDF.schema()) + .execute(); + dataDF.write().format("delta").mode("append").saveAsTable("deltaTable"); + + QueryTest$.MODULE$.checkAnswer(table.toDF(), dataDF.collectAsList()); + } finally { + spark.sql("DROP TABLE IF EXISTS deltaTable"); + } + } + + @Test + public void testReplaceTable() { + try { + // create a table first + spark.sql("CREATE TABLE deltaTable (col1 int) USING delta"); + // Test replacing DeltaTable by name + DeltaTable table = replaceTable(false, "deltaTable"); + verifyGeneratedColumn("deltaTable", table); + } finally { + spark.sql("DROP TABLE IF EXISTS deltaTable"); + } + String input = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "input") + .toString(); + List data = Arrays.asList("hello", "world"); + Dataset dataDF = spark.createDataset(data, Encoders.STRING()).toDF(); + dataDF.write().format("delta").mode("overwrite").save(input); + DeltaTable table = replaceTable(false, String.format("delta.`%s`", input)); + verifyGeneratedColumn(String.format("delta.`%s`", input), table); + } + + @Test + public void testCreateOrReplaceTable() { + try { + // Test creating DeltaTable by name if table to be replaced does not exist. + DeltaTable table = replaceTable(true, "deltaTable"); + verifyGeneratedColumn("deltaTable", table); + } finally { + spark.sql("DROP TABLE IF EXISTS deltaTable"); + } + } +} diff --git a/spark/src/test/java/io/delta/tables/JavaDeltaTableSuite.java b/spark/src/test/java/io/delta/tables/JavaDeltaTableSuite.java new file mode 100644 index 00000000000..de3f859500c --- /dev/null +++ b/spark/src/test/java/io/delta/tables/JavaDeltaTableSuite.java @@ -0,0 +1,88 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables; + +import java.util.Arrays; +import java.util.List; + +import org.apache.spark.sql.test.*; +import org.apache.spark.sql.*; + +import org.apache.spark.util.Utils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.apache.spark.sql.delta.DeltaSQLCommandJavaTest; + +public class JavaDeltaTableSuite implements DeltaSQLCommandJavaTest { + + private transient SparkSession spark; + private transient String input; + + + @Before + public void setUp() { + // Trigger static initializer of TestData + spark = buildSparkSession(); + } + + @After + public void tearDown() { + if (spark != null) { + spark.stop(); + spark = null; + } + } + + @Test + public void testAPI() { + try { + String input = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "input").toString(); + List data = Arrays.asList("hello", "world"); + Dataset dataDF = spark.createDataset(data, Encoders.STRING()).toDF(); + List dataRows = dataDF.collectAsList(); + dataDF.write().format("delta").mode("overwrite").save(input); + + // Test creating DeltaTable by path + DeltaTable table1 = DeltaTable.forPath(spark, input); + QueryTest$.MODULE$.checkAnswer(table1.toDF(), dataRows); + + // Test creating DeltaTable by path picks up active SparkSession + DeltaTable table2 = DeltaTable.forPath(input); + QueryTest$.MODULE$.checkAnswer(table2.toDF(), dataRows); + + dataDF.write().format("delta").mode("overwrite").saveAsTable("deltaTable"); + + // Test creating DeltaTable by name + DeltaTable table3 = DeltaTable.forName(spark, "deltaTable"); + QueryTest$.MODULE$.checkAnswer(table3.toDF(), dataRows); + + // Test creating DeltaTable by name + DeltaTable table4 = DeltaTable.forName("deltaTable"); + QueryTest$.MODULE$.checkAnswer(table4.toDF(), dataRows); + + // Test DeltaTable.as() creates subquery alias + QueryTest$.MODULE$.checkAnswer(table2.as("tbl").toDF().select("tbl.value"), dataRows); + + // Test DeltaTable.isDeltaTable() is true for a Delta file path. + Assert.assertTrue(DeltaTable.isDeltaTable(input)); + } finally { + spark.sql("DROP TABLE IF EXISTS deltaTable"); + } + } +} diff --git a/spark/src/test/java/org/apache/spark/sql/delta/DeleteJavaSuite.java b/spark/src/test/java/org/apache/spark/sql/delta/DeleteJavaSuite.java new file mode 100644 index 00000000000..3f8604255f7 --- /dev/null +++ b/spark/src/test/java/org/apache/spark/sql/delta/DeleteJavaSuite.java @@ -0,0 +1,110 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import scala.Tuple2; + +import io.delta.tables.DeltaTable; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.sql.*; +import org.apache.spark.util.Utils; + +public class DeleteJavaSuite implements DeltaSQLCommandJavaTest { + + private transient SparkSession spark; + private transient String tempPath; + + @Before + public void setUp() { + spark = buildSparkSession(); + tempPath = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString(); + } + + @After + public void tearDown() { + if (spark != null) { + spark.stop(); + spark = null; + } + } + + @Test + public void testWithoutCondition() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20), tuple2(3, 30), tuple2(4, 40)), + "key", "value"); + targetTable.write().format("delta").save(tempPath); + DeltaTable target = DeltaTable.forPath(spark, tempPath); + + target.delete(); + + List expectedAnswer = new ArrayList<>(); + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + @Test + public void testWithCondition() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20), tuple2(3, 30), tuple2(4, 40)), + "key", "value"); + targetTable.write().format("delta").save(tempPath); + DeltaTable target = DeltaTable.forPath(spark, tempPath); + + target.delete("key = 1 or key = 2"); + + List expectedAnswer = createKVDataSet( + Arrays.asList(tuple2(3, 30), tuple2(4, 40))).collectAsList(); + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + @Test + public void testWithColumnCondition() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20), tuple2(3, 30), tuple2(4, 40)), + "key", "value"); + targetTable.write().format("delta").save(tempPath); + DeltaTable target = DeltaTable.forPath(spark, tempPath); + + target.delete(functions.expr("key = 1 or key = 2")); + + List expectedAnswer = createKVDataSet( + Arrays.asList(tuple2(3, 30), tuple2(4, 40))).collectAsList(); + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + private Dataset createKVDataSet( + List> data, String keyName, String valueName) { + Encoder> encoder = Encoders.tuple(Encoders.INT(), Encoders.INT()); + return spark.createDataset(data, encoder).toDF(keyName, valueName); + } + + private Dataset createKVDataSet(List> data) { + Encoder> encoder = Encoders.tuple(Encoders.INT(), Encoders.INT()); + return spark.createDataset(data, encoder).toDF(); + } + + private Tuple2 tuple2(T1 t1, T2 t2) { + return new Tuple2<>(t1, t2); + } +} diff --git a/spark/src/test/java/org/apache/spark/sql/delta/DeltaSQLCommandJavaTest.java b/spark/src/test/java/org/apache/spark/sql/delta/DeltaSQLCommandJavaTest.java new file mode 100644 index 00000000000..22a86f37caa --- /dev/null +++ b/spark/src/test/java/org/apache/spark/sql/delta/DeltaSQLCommandJavaTest.java @@ -0,0 +1,32 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta; + +import org.apache.spark.sql.SparkSession; + +public interface DeltaSQLCommandJavaTest { + default SparkSession buildSparkSession() { + // Set the configurations as DeltaSQLCommandTest + return SparkSession.builder() + .appName("JavaDeltaSparkSessionExtensionSuiteUsingSQLConf") + .master("local[2]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate(); + } +} diff --git a/spark/src/test/java/org/apache/spark/sql/delta/MergeIntoJavaSuite.java b/spark/src/test/java/org/apache/spark/sql/delta/MergeIntoJavaSuite.java new file mode 100644 index 00000000000..b4ffb080970 --- /dev/null +++ b/spark/src/test/java/org/apache/spark/sql/delta/MergeIntoJavaSuite.java @@ -0,0 +1,187 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import scala.Tuple2; + +import io.delta.tables.DeltaTable; + +import org.apache.spark.sql.*; +import org.apache.spark.util.Utils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.sql.test.TestSparkSession; +import org.apache.spark.sql.delta.catalog.DeltaCatalog; +import org.apache.spark.sql.internal.SQLConf; + +public class MergeIntoJavaSuite implements Serializable { + private transient TestSparkSession spark; + private transient String tempPath; + + @Before + public void setUp() { + spark = new TestSparkSession(); + tempPath = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString(); + spark.sqlContext().conf().setConfString(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION().key(), DeltaCatalog.class.getCanonicalName()); + } + + @After + public void tearDown() { + spark.stop(); + spark = null; + } + + @Test + public void checkBasicApi() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20)), "key1", "value1"); + targetTable.write().format("delta").save(tempPath); + + Dataset sourceTable = createKVDataSet( + Arrays.asList(tuple2(1, 100), tuple2(3, 30)), "key2", "value2"); + + DeltaTable target = DeltaTable.forPath(spark, tempPath); + Map updateMap = new HashMap() {{ + put("key1", "key2"); + put("value1", "value2"); + }}; + Map insertMap = new HashMap() {{ + put("key1", "key2"); + put("value1", "value2"); + }}; + target.merge(sourceTable, "key1 = key2") + .whenMatched() + .updateExpr(updateMap) + .whenNotMatched() + .insertExpr(insertMap) + .execute(); + + List expectedAnswer = createKVDataSet( + Arrays.asList(tuple2(1, 100), tuple2(2, 20), tuple2(3, 30))).collectAsList(); + + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + @Test + public void checkExtendedApi() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20)), "key1", "value1"); + targetTable.write().format("delta").save(tempPath); + + Dataset sourceTable = createKVDataSet( + Arrays.asList(tuple2(1, 100), tuple2(3, 30)), "key2", "value2"); + + DeltaTable target = DeltaTable.forPath(spark, tempPath); + Map updateMap = new HashMap() {{ + put("key1", "key2"); + put("value1", "value2"); + }}; + Map insertMap = new HashMap() {{ + put("key1", "key2"); + put("value1", "value2"); + }}; + target.merge(sourceTable, "key1 = key2") + .whenMatched("key1 = 4").delete() + .whenMatched("key2 = 1") + .updateExpr(updateMap) + .whenNotMatched("key2 = 3") + .insertExpr(insertMap) + .execute(); + + List expectedAnswer = createKVDataSet( + Arrays.asList(tuple2(1, 100), tuple2(2, 20), tuple2(3, 30))).collectAsList(); + + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + @Test + public void checkExtendedApiWithColumn() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20), tuple2(4, 40)), "key1", "value1"); + targetTable.write().format("delta").save(tempPath); + + Dataset sourceTable = createKVDataSet( + Arrays.asList(tuple2(1, 100), tuple2(3, 30), tuple2(4, 41)), "key2", "value2"); + + DeltaTable target = DeltaTable.forPath(spark, tempPath); + Map updateMap = new HashMap() {{ + put("key1", functions.col("key2")); + put("value1", functions.col("value2")); + }}; + Map insertMap = new HashMap() {{ + put("key1", functions.col("key2")); + put("value1", functions.col("value2")); + }}; + target.merge(sourceTable, functions.expr("key1 = key2")) + .whenMatched(functions.expr("key1 = 4")).delete() + .whenMatched(functions.expr("key2 = 1")) + .update(updateMap) + .whenNotMatched(functions.expr("key2 = 3")) + .insert(insertMap) + .execute(); + + List expectedAnswer = createKVDataSet( + Arrays.asList(tuple2(1, 100), tuple2(2, 20), tuple2(3, 30))).collectAsList(); + + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + @Test + public void checkUpdateAllAndInsertAll() { + Dataset targetTable = createKVDataSet(Arrays.asList( + tuple2(1, 10), tuple2(2, 20), tuple2(4, 40), tuple2(5, 50)), "key", "value"); + targetTable.write().format("delta").save(tempPath); + + Dataset sourceTable = createKVDataSet(Arrays.asList( + tuple2(1, 100), tuple2(3, 30), tuple2(4, 41), tuple2(5, 51), tuple2(6, 60)), + "key", "value"); + + DeltaTable target = DeltaTable.forPath(spark, tempPath); + target.as("t").merge(sourceTable.as("s"), functions.expr("t.key = s.key")) + .whenMatched().updateAll() + .whenNotMatched().insertAll() + .execute(); + + List expectedAnswer = createKVDataSet(Arrays.asList(tuple2(1, 100), tuple2(2, 20), + tuple2(3, 30), tuple2(4, 41), tuple2(5, 51), tuple2(6, 60))).collectAsList(); + + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + private Dataset createKVDataSet( + List> data, String keyName, String valueName) { + Encoder> encoder = Encoders.tuple(Encoders.INT(), Encoders.INT()); + return spark.createDataset(data, encoder).toDF(keyName, valueName); + } + + private Dataset createKVDataSet(List> data) { + Encoder> encoder = Encoders.tuple(Encoders.INT(), Encoders.INT()); + return spark.createDataset(data, encoder).toDF(); + } + + private Tuple2 tuple2(T1 t1, T2 t2) { + return new Tuple2<>(t1, t2); + } +} diff --git a/spark/src/test/java/org/apache/spark/sql/delta/UpdateJavaSuite.java b/spark/src/test/java/org/apache/spark/sql/delta/UpdateJavaSuite.java new file mode 100644 index 00000000000..a68d60547d2 --- /dev/null +++ b/spark/src/test/java/org/apache/spark/sql/delta/UpdateJavaSuite.java @@ -0,0 +1,133 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta; + +import java.util.*; + +import scala.Tuple2; + +import io.delta.tables.DeltaTable; +import org.junit.*; + +import org.apache.spark.sql.*; +import org.apache.spark.util.Utils; + +public class UpdateJavaSuite implements DeltaSQLCommandJavaTest { + private transient SparkSession spark; + private transient String tempPath; + + @Before + public void setUp() { + spark = buildSparkSession(); + tempPath = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString(); + } + + @After + public void tearDown() { + if (spark != null) { + spark.stop(); + spark = null; + } + } + + @Test + public void testWithoutCondition() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20), tuple2(3, 30), tuple2(4, 40)), + "key", "value"); + targetTable.write().format("delta").save(tempPath); + DeltaTable target = DeltaTable.forPath(spark, tempPath); + + Map set = new HashMap() {{ + put("key", "100"); + }}; + target.updateExpr(set); + + List expectedAnswer = createKVDataSet(Arrays.asList( + tuple2(100, 10), tuple2(100, 20), tuple2(100, 30), tuple2(100, 40))).collectAsList(); + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + @Test + public void testWithoutConditionUsingColumn() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20), tuple2(3, 30), tuple2(4, 40)), + "key", "value"); + targetTable.write().format("delta").save(tempPath); + DeltaTable target = DeltaTable.forPath(spark, tempPath); + + Map set = new HashMap() {{ + put("key", functions.expr("100")); + }}; + target.update(set); + + List expectedAnswer = createKVDataSet(Arrays.asList( + tuple2(100, 10), tuple2(100, 20), tuple2(100, 30), tuple2(100, 40))).collectAsList(); + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + @Test + public void testWithCondition() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20), tuple2(3, 30), tuple2(4, 40)), + "key", "value"); + targetTable.write().format("delta").save(tempPath); + DeltaTable target = DeltaTable.forPath(spark, tempPath); + + Map set = new HashMap() {{ + put("key", "100"); + }}; + target.updateExpr("key = 1 or key = 2", set); + + List expectedAnswer = createKVDataSet(Arrays.asList( + tuple2(100, 10), tuple2(100, 20), tuple2(3, 30), tuple2(4, 40))).collectAsList(); + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + @Test + public void testWithConditionUsingColumn() { + Dataset targetTable = createKVDataSet( + Arrays.asList(tuple2(1, 10), tuple2(2, 20), tuple2(3, 30), tuple2(4, 40)), + "key", "value"); + targetTable.write().format("delta").save(tempPath); + DeltaTable target = DeltaTable.forPath(spark, tempPath); + + Map set = new HashMap() {{ + put("key", functions.expr("100")); + }}; + target.update(functions.expr("key = 1 or key = 2"), set); + + List expectedAnswer = createKVDataSet(Arrays.asList( + tuple2(100, 10), tuple2(100, 20), tuple2(3, 30), tuple2(4, 40))).collectAsList(); + QueryTest$.MODULE$.checkAnswer(target.toDF(), expectedAnswer); + } + + private Dataset createKVDataSet( + List> data, String keyName, String valueName) { + Encoder> encoder = Encoders.tuple(Encoders.INT(), Encoders.INT()); + return spark.createDataset(data, encoder).toDF(keyName, valueName); + } + + private Dataset createKVDataSet(List> data) { + Encoder> encoder = Encoders.tuple(Encoders.INT(), Encoders.INT()); + return spark.createDataset(data, encoder).toDF(); + } + + private Tuple2 tuple2(T1 t1, T2 t2) { + return new Tuple2<>(t1, t2); + } +} diff --git a/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/.part-00000-74e02f0d-e727-46e5-8d74-779d2abd616e-c000.snappy.parquet.crc b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/.part-00000-74e02f0d-e727-46e5-8d74-779d2abd616e-c000.snappy.parquet.crc new file mode 100644 index 00000000000..c179a3acc57 Binary files /dev/null and b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/.part-00000-74e02f0d-e727-46e5-8d74-779d2abd616e-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/.00000000000000000000.json.crc b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 00000000000..90adb629610 Binary files /dev/null and b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/.00000000000000000000.json.crc differ diff --git a/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/00000000000000000000.crc b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/00000000000000000000.crc new file mode 100644 index 00000000000..9b75903d1bd --- /dev/null +++ b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"tableSizeBytes":422,"numFiles":1,"numMetadata":1,"numProtocol":1,"numTransactions":0} diff --git a/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..da1b4eb2931 --- /dev/null +++ b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1617557139648,"operation":"CREATE TABLE AS SELECT","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{}"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"422","numOutputRows":"0"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"027fb01c-94aa-4cab-87cb-5aab6aec6d17","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.generationExpression\":\"c1 + 1\"}}]}","partitionColumns":[],"configuration":{},"createdTime":1617557137253}} +{"add":{"path":"part-00000-74e02f0d-e727-46e5-8d74-779d2abd616e-c000.snappy.parquet","partitionValues":{},"size":422,"modificationTime":1617557139000,"dataChange":true,"stats":"{\"numRecords\":0,\"minValues\":{},\"maxValues\":{},\"nullCount\":{}}"}} diff --git a/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/part-00000-74e02f0d-e727-46e5-8d74-779d2abd616e-c000.snappy.parquet b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/part-00000-74e02f0d-e727-46e5-8d74-779d2abd616e-c000.snappy.parquet new file mode 100644 index 00000000000..74f0c98a7b0 Binary files /dev/null and b/spark/src/test/resources/delta/dbr_8_0_non_generated_columns/part-00000-74e02f0d-e727-46e5-8d74-779d2abd616e-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/.00000000000000000000.json.crc b/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 00000000000..d3853f56ebd Binary files /dev/null and b/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/.00000000000000000000.json.crc differ diff --git a/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/00000000000000000000.crc b/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/00000000000000000000.crc new file mode 100644 index 00000000000..40da51e0926 --- /dev/null +++ b/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"tableSizeBytes":0,"numFiles":0,"numMetadata":1,"numProtocol":1,"numTransactions":0} diff --git a/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..09d9aa3466e --- /dev/null +++ b/spark/src/test/resources/delta/dbr_8_1_generated_columns/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1617556462951,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{}"},"isolationLevel":"SnapshotIsolation","isBlindAppend":true,"operationMetrics":{}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":4}} +{"metaData":{"id":"b406888a-3eb9-4dd5-a81a-ed0b0b535c00","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.generationExpression\":\"c1 + 1\"}}]}","partitionColumns":[],"configuration":{},"createdTime":1617556462734}} diff --git a/spark/src/test/resources/delta/delta-0.1.0/.part-00000-348d7f43-38f6-4778-88c7-45f379471c49-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-0.1.0/.part-00000-348d7f43-38f6-4778-88c7-45f379471c49-c000.snappy.parquet.crc new file mode 100644 index 00000000000..b1da5c742e3 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/.part-00000-348d7f43-38f6-4778-88c7-45f379471c49-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/.part-00000-f4aeebd0-a689-4e1b-bc7a-bbb0ec59dce5-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-0.1.0/.part-00000-f4aeebd0-a689-4e1b-bc7a-bbb0ec59dce5-c000.snappy.parquet.crc new file mode 100644 index 00000000000..885bc618d41 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/.part-00000-f4aeebd0-a689-4e1b-bc7a-bbb0ec59dce5-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/.part-00001-6d252218-2632-416e-9e46-f32316ec314a-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-0.1.0/.part-00001-6d252218-2632-416e-9e46-f32316ec314a-c000.snappy.parquet.crc new file mode 100644 index 00000000000..de219ac0bb7 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/.part-00001-6d252218-2632-416e-9e46-f32316ec314a-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/.part-00001-f1cb1cf9-7a73-439c-b0ea-dcba5c2280a6-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-0.1.0/.part-00001-f1cb1cf9-7a73-439c-b0ea-dcba5c2280a6-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5c3034a01b5 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/.part-00001-f1cb1cf9-7a73-439c-b0ea-dcba5c2280a6-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000000.json.crc b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 00000000000..e07330db312 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000000.json.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000001.json.crc b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 00000000000..8440eb7b265 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000001.json.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000002.json.crc b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 00000000000..f8df4d29ef7 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000002.json.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000003.checkpoint.parquet.crc b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000003.checkpoint.parquet.crc new file mode 100644 index 00000000000..0a15dd1a255 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000003.checkpoint.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000003.json.crc b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 00000000000..019a96cc1a8 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/.00000000000000000003.json.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..46287fd5baf --- /dev/null +++ b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":1}} +{"metaData":{"id":"2edf2c02-bb63-44e9-a84c-517fad0db296","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{}}} +{"add":{"path":"part-00000-f4aeebd0-a689-4e1b-bc7a-bbb0ec59dce5-c000.snappy.parquet","partitionValues":{},"size":525,"modificationTime":1501109075000,"dataChange":true}} +{"add":{"path":"part-00001-f1cb1cf9-7a73-439c-b0ea-dcba5c2280a6-c000.snappy.parquet","partitionValues":{},"size":534,"modificationTime":1501109075000,"dataChange":true}} diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000001.json b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..b8bbc7b5baa --- /dev/null +++ b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"remove":{"path":"part-00001-f1cb1cf9-7a73-439c-b0ea-dcba5c2280a6-c000.snappy.parquet","dataChange":true}} +{"remove":{"path":"part-00000-f4aeebd0-a689-4e1b-bc7a-bbb0ec59dce5-c000.snappy.parquet","dataChange":true}} diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000002.json b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..d3d3491609d --- /dev/null +++ b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000002.json @@ -0,0 +1,3 @@ +{"txn":{"appId":"txnId","version":0}} +{"add":{"path":"part-00000-348d7f43-38f6-4778-88c7-45f379471c49-c000.snappy.parquet","partitionValues":{},"size":525,"modificationTime":1501109075000,"dataChange":true}} +{"add":{"path":"part-00001-6d252218-2632-416e-9e46-f32316ec314a-c000.snappy.parquet","partitionValues":{},"size":534,"modificationTime":1501109075000,"dataChange":true}} diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000003.checkpoint.parquet b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000003.checkpoint.parquet new file mode 100644 index 00000000000..b6448445c4f Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000003.checkpoint.parquet differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000003.json b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..f5c0b2d3081 --- /dev/null +++ b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/00000000000000000003.json @@ -0,0 +1,6 @@ +{"metaData":{"id":"2edf2c02-bb63-44e9-a84c-517fad0db296","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["id"],"configuration":{}}} +{"remove":{"path":"part-00001-6d252218-2632-416e-9e46-f32316ec314a-c000.snappy.parquet","dataChange":true}} +{"remove":{"path":"part-00000-348d7f43-38f6-4778-88c7-45f379471c49-c000.snappy.parquet","dataChange":true}} +{"add":{"path":"id=5/part-00000-f1e0b560-ca00-409e-a274-f1ab264bc412.c000.snappy.parquet","partitionValues":{"id":"5"},"size":362,"modificationTime":1501109076000,"dataChange":true}} +{"add":{"path":"id=6/part-00000-adb59f54-6b8f-4bfd-9915-ae26bd0f0e2c.c000.snappy.parquet","partitionValues":{"id":"6"},"size":362,"modificationTime":1501109076000,"dataChange":true}} +{"add":{"path":"id=4/part-00001-36c738bf-7836-479b-9cc1-7a4934207856.c000.snappy.parquet","partitionValues":{"id":"4"},"size":362,"modificationTime":1501109076000,"dataChange":true}} diff --git a/spark/src/test/resources/delta/delta-0.1.0/_delta_log/_last_checkpoint b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..a801408710e --- /dev/null +++ b/spark/src/test/resources/delta/delta-0.1.0/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":3,"size":6} diff --git a/spark/src/test/resources/delta/delta-0.1.0/id=4/.part-00001-36c738bf-7836-479b-9cc1-7a4934207856.c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-0.1.0/id=4/.part-00001-36c738bf-7836-479b-9cc1-7a4934207856.c000.snappy.parquet.crc new file mode 100644 index 00000000000..9e4150199bc Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/id=4/.part-00001-36c738bf-7836-479b-9cc1-7a4934207856.c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/id=4/part-00001-36c738bf-7836-479b-9cc1-7a4934207856.c000.snappy.parquet b/spark/src/test/resources/delta/delta-0.1.0/id=4/part-00001-36c738bf-7836-479b-9cc1-7a4934207856.c000.snappy.parquet new file mode 100644 index 00000000000..ff0a5aa6049 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/id=4/part-00001-36c738bf-7836-479b-9cc1-7a4934207856.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/id=5/.part-00000-f1e0b560-ca00-409e-a274-f1ab264bc412.c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-0.1.0/id=5/.part-00000-f1e0b560-ca00-409e-a274-f1ab264bc412.c000.snappy.parquet.crc new file mode 100644 index 00000000000..1440ef2e1e1 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/id=5/.part-00000-f1e0b560-ca00-409e-a274-f1ab264bc412.c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/id=5/part-00000-f1e0b560-ca00-409e-a274-f1ab264bc412.c000.snappy.parquet b/spark/src/test/resources/delta/delta-0.1.0/id=5/part-00000-f1e0b560-ca00-409e-a274-f1ab264bc412.c000.snappy.parquet new file mode 100644 index 00000000000..5e447133cc5 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/id=5/part-00000-f1e0b560-ca00-409e-a274-f1ab264bc412.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/id=6/.part-00000-adb59f54-6b8f-4bfd-9915-ae26bd0f0e2c.c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-0.1.0/id=6/.part-00000-adb59f54-6b8f-4bfd-9915-ae26bd0f0e2c.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3ef0ea32561 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/id=6/.part-00000-adb59f54-6b8f-4bfd-9915-ae26bd0f0e2c.c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/id=6/part-00000-adb59f54-6b8f-4bfd-9915-ae26bd0f0e2c.c000.snappy.parquet b/spark/src/test/resources/delta/delta-0.1.0/id=6/part-00000-adb59f54-6b8f-4bfd-9915-ae26bd0f0e2c.c000.snappy.parquet new file mode 100644 index 00000000000..2a0c6097ef3 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/id=6/part-00000-adb59f54-6b8f-4bfd-9915-ae26bd0f0e2c.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/part-00000-348d7f43-38f6-4778-88c7-45f379471c49-c000.snappy.parquet b/spark/src/test/resources/delta/delta-0.1.0/part-00000-348d7f43-38f6-4778-88c7-45f379471c49-c000.snappy.parquet new file mode 100644 index 00000000000..119aa1bb010 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/part-00000-348d7f43-38f6-4778-88c7-45f379471c49-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/part-00000-f4aeebd0-a689-4e1b-bc7a-bbb0ec59dce5-c000.snappy.parquet b/spark/src/test/resources/delta/delta-0.1.0/part-00000-f4aeebd0-a689-4e1b-bc7a-bbb0ec59dce5-c000.snappy.parquet new file mode 100644 index 00000000000..9905d167e8b Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/part-00000-f4aeebd0-a689-4e1b-bc7a-bbb0ec59dce5-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/part-00001-6d252218-2632-416e-9e46-f32316ec314a-c000.snappy.parquet b/spark/src/test/resources/delta/delta-0.1.0/part-00001-6d252218-2632-416e-9e46-f32316ec314a-c000.snappy.parquet new file mode 100644 index 00000000000..b164469c20f Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/part-00001-6d252218-2632-416e-9e46-f32316ec314a-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-0.1.0/part-00001-f1cb1cf9-7a73-439c-b0ea-dcba5c2280a6-c000.snappy.parquet b/spark/src/test/resources/delta/delta-0.1.0/part-00001-f1cb1cf9-7a73-439c-b0ea-dcba5c2280a6-c000.snappy.parquet new file mode 100644 index 00000000000..2ca5f3d7805 Binary files /dev/null and b/spark/src/test/resources/delta/delta-0.1.0/part-00001-f1cb1cf9-7a73-439c-b0ea-dcba5c2280a6-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/.part-00000-59316e80-0f6c-491a-9716-5e0419434e46-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-1.2.1/.part-00000-59316e80-0f6c-491a-9716-5e0419434e46-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d7aaf296de9 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/.part-00000-59316e80-0f6c-491a-9716-5e0419434e46-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/.part-00000-635b7994-d3f9-4623-b032-8a9c8a7ca5b9-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-1.2.1/.part-00000-635b7994-d3f9-4623-b032-8a9c8a7ca5b9-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d7aaf296de9 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/.part-00000-635b7994-d3f9-4623-b032-8a9c8a7ca5b9-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/.part-00000-87624dd4-c6dc-4163-a4e6-0e50caa28760-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-1.2.1/.part-00000-87624dd4-c6dc-4163-a4e6-0e50caa28760-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d7aaf296de9 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/.part-00000-87624dd4-c6dc-4163-a4e6-0e50caa28760-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/.part-00000-e107d259-11d5-4e5b-b472-62daa676743b-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-1.2.1/.part-00000-e107d259-11d5-4e5b-b472-62daa676743b-c000.snappy.parquet.crc new file mode 100644 index 00000000000..6abc1764fa1 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/.part-00000-e107d259-11d5-4e5b-b472-62daa676743b-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/.part-00001-91d10124-a73d-42c2-9ef0-75ed41ca73d8-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-1.2.1/.part-00001-91d10124-a73d-42c2-9ef0-75ed41ca73d8-c000.snappy.parquet.crc new file mode 100644 index 00000000000..6abc1764fa1 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/.part-00001-91d10124-a73d-42c2-9ef0-75ed41ca73d8-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/.part-00002-dca394a5-9d0a-4630-a90a-a8f7f675e4e4-c000.snappy.parquet.crc b/spark/src/test/resources/delta/delta-1.2.1/.part-00002-dca394a5-9d0a-4630-a90a-a8f7f675e4e4-c000.snappy.parquet.crc new file mode 100644 index 00000000000..6abc1764fa1 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/.part-00002-dca394a5-9d0a-4630-a90a-a8f7f675e4e4-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000000.json.crc b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 00000000000..7072e161963 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000000.json.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000001.json.crc b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 00000000000..814cd1b164f Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000001.json.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000002.checkpoint.parquet.crc b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000002.checkpoint.parquet.crc new file mode 100644 index 00000000000..24eb40e0fd1 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000002.checkpoint.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000002.json.crc b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 00000000000..f9aaa5e62f6 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000002.json.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000003.json.crc b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 00000000000..49e3fbeaf96 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000003.json.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000004.checkpoint.parquet.crc b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000004.checkpoint.parquet.crc new file mode 100644 index 00000000000..ca16c6dd2bb Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000004.checkpoint.parquet.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000004.json.crc b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 00000000000..78b693f5ea3 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/.00000000000000000004.json.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/._last_checkpoint.crc b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/._last_checkpoint.crc new file mode 100644 index 00000000000..4db654b4c47 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/._last_checkpoint.crc differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..306bf8ff922 --- /dev/null +++ b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"fbfd25ac-9401-4dac-a644-ae543f02cc0f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col1\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1657517977667}} +{"add":{"path":"part-00000-87624dd4-c6dc-4163-a4e6-0e50caa28760-c000.snappy.parquet","partitionValues":{},"size":1124,"modificationTime":1657517977000,"dataChange":true,"stats":"{\"numRecords\":11,\"minValues\":{\"value\":0,\"col1\":0,\"col2\":0},\"maxValues\":{\"value\":10,\"col1\":6,\"col2\":2},\"nullCount\":{\"value\":0,\"col1\":0,\"col2\":0}}"}} +{"commitInfo":{"timestamp":1657517977863,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"11","numOutputBytes":"1124"},"engineInfo":"Apache-Spark/3.2.1 Delta-Lake/1.2.1","txnId":"57be32c2-4b7d-415a-96a0-1499caf659e5"}} diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000001.json b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..d443d898dc0 --- /dev/null +++ b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"metaData":{"id":"fbfd25ac-9401-4dac-a644-ae543f02cc0f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col1\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2"},"createdTime":1657517977667}} +{"commitInfo":{"timestamp":1657517989647,"operation":"SET TBLPROPERTIES","operationParameters":{"properties":"{\"delta.checkpointInterval\":\"2\"}"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.2.1 Delta-Lake/1.2.1","txnId":"b53af69e-b0aa-423b-af05-c3bff1c35a11"}} diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000002.checkpoint.parquet b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 00000000000..a0caa4767c9 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000002.checkpoint.parquet differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000002.json b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..a6bea0b6804 --- /dev/null +++ b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"add":{"path":"part-00000-59316e80-0f6c-491a-9716-5e0419434e46-c000.snappy.parquet","partitionValues":{},"size":1124,"modificationTime":1657517994000,"dataChange":true,"stats":"{\"numRecords\":11,\"minValues\":{\"value\":0,\"col1\":0,\"col2\":0},\"maxValues\":{\"value\":10,\"col1\":6,\"col2\":2},\"nullCount\":{\"value\":0,\"col1\":0,\"col2\":0}}"}} +{"commitInfo":{"timestamp":1657517994301,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"11","numOutputBytes":"1124"},"engineInfo":"Apache-Spark/3.2.1 Delta-Lake/1.2.1","txnId":"6e6280cc-b8af-4e60-b2e1-766690b9faee"}} diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000003.json b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..1310be109ad --- /dev/null +++ b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"add":{"path":"part-00000-635b7994-d3f9-4623-b032-8a9c8a7ca5b9-c000.snappy.parquet","partitionValues":{},"size":1124,"modificationTime":1657518013000,"dataChange":true,"stats":"{\"numRecords\":11,\"minValues\":{\"value\":0,\"col1\":0,\"col2\":0},\"maxValues\":{\"value\":10,\"col1\":6,\"col2\":2},\"nullCount\":{\"value\":0,\"col1\":0,\"col2\":0}}"}} +{"commitInfo":{"timestamp":1657518013762,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"11","numOutputBytes":"1124"},"engineInfo":"Apache-Spark/3.2.1 Delta-Lake/1.2.1","txnId":"8e95e72f-dee7-4e0b-abb6-a47b4bcc46d2"}} diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000004.checkpoint.parquet b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000004.checkpoint.parquet new file mode 100644 index 00000000000..4df6dd9d13c Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000004.checkpoint.parquet differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000004.json b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..db37a1325d8 --- /dev/null +++ b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/00000000000000000004.json @@ -0,0 +1,7 @@ +{"remove":{"path":"part-00000-635b7994-d3f9-4623-b032-8a9c8a7ca5b9-c000.snappy.parquet","deletionTimestamp":1657518515173,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1124}} +{"remove":{"path":"part-00000-87624dd4-c6dc-4163-a4e6-0e50caa28760-c000.snappy.parquet","deletionTimestamp":1657518515173,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1124}} +{"remove":{"path":"part-00000-59316e80-0f6c-491a-9716-5e0419434e46-c000.snappy.parquet","deletionTimestamp":1657518515173,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1124}} +{"add":{"path":"part-00000-e107d259-11d5-4e5b-b472-62daa676743b-c000.snappy.parquet","partitionValues":{},"size":1124,"modificationTime":1657518515000,"dataChange":true,"stats":"{\"numRecords\":11,\"minValues\":{\"value\":0,\"col1\":0,\"col2\":0},\"maxValues\":{\"value\":10,\"col1\":8,\"col2\":2},\"nullCount\":{\"value\":0,\"col1\":0,\"col2\":0}}"}} +{"add":{"path":"part-00001-91d10124-a73d-42c2-9ef0-75ed41ca73d8-c000.snappy.parquet","partitionValues":{},"size":1124,"modificationTime":1657518515000,"dataChange":true,"stats":"{\"numRecords\":11,\"minValues\":{\"value\":0,\"col1\":0,\"col2\":0},\"maxValues\":{\"value\":10,\"col1\":8,\"col2\":2},\"nullCount\":{\"value\":0,\"col1\":0,\"col2\":0}}"}} +{"add":{"path":"part-00002-dca394a5-9d0a-4630-a90a-a8f7f675e4e4-c000.snappy.parquet","partitionValues":{},"size":1124,"modificationTime":1657518515000,"dataChange":true,"stats":"{\"numRecords\":11,\"minValues\":{\"value\":0,\"col1\":0,\"col2\":0},\"maxValues\":{\"value\":10,\"col1\":8,\"col2\":2},\"nullCount\":{\"value\":0,\"col1\":0,\"col2\":0}}"}} +{"commitInfo":{"timestamp":1657518515749,"operation":"UPDATE","operationParameters":{"predicate":"(col2#477L = 2)"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"3","numCopiedRows":"24","executionTimeMs":"2306","scanTimeMs":"1738","numAddedFiles":"3","numUpdatedRows":"9","rewriteTimeMs":"568"},"engineInfo":"Apache-Spark/3.2.1 Delta-Lake/1.2.1","txnId":"342d874b-a8e5-49a0-8641-7e5b2285d7cb"}} diff --git a/spark/src/test/resources/delta/delta-1.2.1/_delta_log/_last_checkpoint b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..8c78b2500fd --- /dev/null +++ b/spark/src/test/resources/delta/delta-1.2.1/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":4,"size":8} diff --git a/spark/src/test/resources/delta/delta-1.2.1/part-00000-59316e80-0f6c-491a-9716-5e0419434e46-c000.snappy.parquet b/spark/src/test/resources/delta/delta-1.2.1/part-00000-59316e80-0f6c-491a-9716-5e0419434e46-c000.snappy.parquet new file mode 100644 index 00000000000..12eeb266927 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/part-00000-59316e80-0f6c-491a-9716-5e0419434e46-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/part-00000-635b7994-d3f9-4623-b032-8a9c8a7ca5b9-c000.snappy.parquet b/spark/src/test/resources/delta/delta-1.2.1/part-00000-635b7994-d3f9-4623-b032-8a9c8a7ca5b9-c000.snappy.parquet new file mode 100644 index 00000000000..12eeb266927 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/part-00000-635b7994-d3f9-4623-b032-8a9c8a7ca5b9-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/part-00000-87624dd4-c6dc-4163-a4e6-0e50caa28760-c000.snappy.parquet b/spark/src/test/resources/delta/delta-1.2.1/part-00000-87624dd4-c6dc-4163-a4e6-0e50caa28760-c000.snappy.parquet new file mode 100644 index 00000000000..12eeb266927 Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/part-00000-87624dd4-c6dc-4163-a4e6-0e50caa28760-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/part-00000-e107d259-11d5-4e5b-b472-62daa676743b-c000.snappy.parquet b/spark/src/test/resources/delta/delta-1.2.1/part-00000-e107d259-11d5-4e5b-b472-62daa676743b-c000.snappy.parquet new file mode 100644 index 00000000000..f75b435c05a Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/part-00000-e107d259-11d5-4e5b-b472-62daa676743b-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/part-00001-91d10124-a73d-42c2-9ef0-75ed41ca73d8-c000.snappy.parquet b/spark/src/test/resources/delta/delta-1.2.1/part-00001-91d10124-a73d-42c2-9ef0-75ed41ca73d8-c000.snappy.parquet new file mode 100644 index 00000000000..f75b435c05a Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/part-00001-91d10124-a73d-42c2-9ef0-75ed41ca73d8-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/delta-1.2.1/part-00002-dca394a5-9d0a-4630-a90a-a8f7f675e4e4-c000.snappy.parquet b/spark/src/test/resources/delta/delta-1.2.1/part-00002-dca394a5-9d0a-4630-a90a-a8f7f675e4e4-c000.snappy.parquet new file mode 100644 index 00000000000..f75b435c05a Binary files /dev/null and b/spark/src/test/resources/delta/delta-1.2.1/part-00002-dca394a5-9d0a-4630-a90a-a8f7f675e4e4-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet.crc b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet.crc new file mode 100644 index 00000000000..c29a858708e Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-7c2deba3-1994-4fb8-bc07-d46c948aa415-c000.snappy.parquet.crc b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-7c2deba3-1994-4fb8-bc07-d46c948aa415-c000.snappy.parquet.crc new file mode 100644 index 00000000000..c29a858708e Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-7c2deba3-1994-4fb8-bc07-d46c948aa415-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-b44fcdb0-8b06-4f3a-8606-f8311a96f6dc-c000.snappy.parquet.crc b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-b44fcdb0-8b06-4f3a-8606-f8311a96f6dc-c000.snappy.parquet.crc new file mode 100644 index 00000000000..c29a858708e Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-b44fcdb0-8b06-4f3a-8606-f8311a96f6dc-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet.crc b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet.crc new file mode 100644 index 00000000000..ccbde4e02b3 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet.crc b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d67ea8a3c25 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-4327c977-2734-4477-9507-7ccf67924649-c000.snappy.parquet.crc b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-4327c977-2734-4477-9507-7ccf67924649-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d67ea8a3c25 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-4327c977-2734-4477-9507-7ccf67924649-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-c373a5bd-85f0-4758-815e-7eb62007a15c-c000.snappy.parquet.crc b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-c373a5bd-85f0-4758-815e-7eb62007a15c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d67ea8a3c25 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/.part-00001-c373a5bd-85f0-4758-815e-7eb62007a15c-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000000.json.c6b312ca-665d-46ab-93a9-9f87ad2baa92.tmp.crc b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000000.json.c6b312ca-665d-46ab-93a9-9f87ad2baa92.tmp.crc new file mode 100644 index 00000000000..f8a02aefb06 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000000.json.c6b312ca-665d-46ab-93a9-9f87ad2baa92.tmp.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000001.json.641a776e-6e56-4423-a9b0-7efc9e58826a.tmp.crc b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000001.json.641a776e-6e56-4423-a9b0-7efc9e58826a.tmp.crc new file mode 100644 index 00000000000..48472326a7b Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000001.json.641a776e-6e56-4423-a9b0-7efc9e58826a.tmp.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000002.json.e64807e6-437c-44c9-abd2-50e6514d236e.tmp.crc b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000002.json.e64807e6-437c-44c9-abd2-50e6514d236e.tmp.crc new file mode 100644 index 00000000000..ea20bde02bf Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000002.json.e64807e6-437c-44c9-abd2-50e6514d236e.tmp.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000003.json.b374eda7-fa09-48ce-b06c-56025163f6ae.tmp.crc b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000003.json.b374eda7-fa09-48ce-b06c-56025163f6ae.tmp.crc new file mode 100644 index 00000000000..9f4152a1b87 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/..00000000000000000003.json.b374eda7-fa09-48ce-b06c-56025163f6ae.tmp.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/.._last_checkpoint.477ba875-7a14-4e57-9973-1349c21a152c.tmp.crc b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/.._last_checkpoint.477ba875-7a14-4e57-9973-1349c21a152c.tmp.crc new file mode 100644 index 00000000000..dff2ad22145 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/.._last_checkpoint.477ba875-7a14-4e57-9973-1349c21a152c.tmp.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/.00000000000000000003.checkpoint.parquet.crc b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/.00000000000000000003.checkpoint.parquet.crc new file mode 100644 index 00000000000..a12b17b7058 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/.00000000000000000003.checkpoint.parquet.crc differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..ae8a857b929 --- /dev/null +++ b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1564524295023,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"22ef18ba-191c-4c36-a606-3dad5cdf3830","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1564524294376}} +{"add":{"path":"part-00000-b44fcdb0-8b06-4f3a-8606-f8311a96f6dc-c000.snappy.parquet","partitionValues":{},"size":396,"modificationTime":1564524294000,"dataChange":true}} +{"add":{"path":"part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet","partitionValues":{},"size":400,"modificationTime":1564524294000,"dataChange":true}} diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000001.json b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..e916b166652 --- /dev/null +++ b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1564524296741,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet","partitionValues":{},"size":396,"modificationTime":1564524296000,"dataChange":true}} +{"add":{"path":"part-00001-4327c977-2734-4477-9507-7ccf67924649-c000.snappy.parquet","partitionValues":{},"size":400,"modificationTime":1564524296000,"dataChange":true}} diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000002.json b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..29920cb1a96 --- /dev/null +++ b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000002.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1564524298214,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isBlindAppend":false}} +{"add":{"path":"part-00000-7c2deba3-1994-4fb8-bc07-d46c948aa415-c000.snappy.parquet","partitionValues":{},"size":396,"modificationTime":1564524297000,"dataChange":true}} +{"add":{"path":"part-00001-c373a5bd-85f0-4758-815e-7eb62007a15c-c000.snappy.parquet","partitionValues":{},"size":400,"modificationTime":1564524297000,"dataChange":true}} +{"remove":{"path":"part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet","deletionTimestamp":1564524298213,"dataChange":true}} +{"remove":{"path":"part-00000-b44fcdb0-8b06-4f3a-8606-f8311a96f6dc-c000.snappy.parquet","deletionTimestamp":1564524298214,"dataChange":true}} +{"remove":{"path":"part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet","deletionTimestamp":1564524298214,"dataChange":true}} +{"remove":{"path":"part-00001-4327c977-2734-4477-9507-7ccf67924649-c000.snappy.parquet","deletionTimestamp":1564524298214,"dataChange":true}} diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000003.checkpoint.parquet b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000003.checkpoint.parquet new file mode 100644 index 00000000000..1549dd3fd3a Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000003.checkpoint.parquet differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000003.json b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..25c112de1e0 --- /dev/null +++ b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1564524299648,"operation":"STREAMING UPDATE","operationParameters":{"outputMode":"Append","queryId":"e4a20b59-dd0e-4c50-b074-e8ae4786df30","epochId":"0"},"readVersion":2,"isBlindAppend":true}} +{"txn":{"appId":"e4a20b59-dd0e-4c50-b074-e8ae4786df30","version":0,"lastUpdated":1564524299648}} +{"add":{"path":"part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet","partitionValues":{},"size":404,"modificationTime":1564524299000,"dataChange":true}} diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/_last_checkpoint b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..d83040f8f83 --- /dev/null +++ b/spark/src/test/resources/delta/history/delta-0.2.0/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":3,"size":10} diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet b/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet new file mode 100644 index 00000000000..dbc814ce804 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-7c2deba3-1994-4fb8-bc07-d46c948aa415-c000.snappy.parquet b/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-7c2deba3-1994-4fb8-bc07-d46c948aa415-c000.snappy.parquet new file mode 100644 index 00000000000..dbc814ce804 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-7c2deba3-1994-4fb8-bc07-d46c948aa415-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-b44fcdb0-8b06-4f3a-8606-f8311a96f6dc-c000.snappy.parquet b/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-b44fcdb0-8b06-4f3a-8606-f8311a96f6dc-c000.snappy.parquet new file mode 100644 index 00000000000..dbc814ce804 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-b44fcdb0-8b06-4f3a-8606-f8311a96f6dc-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet b/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet new file mode 100644 index 00000000000..ee09ff9a133 Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet b/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet new file mode 100644 index 00000000000..c11b874f77e Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-4327c977-2734-4477-9507-7ccf67924649-c000.snappy.parquet b/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-4327c977-2734-4477-9507-7ccf67924649-c000.snappy.parquet new file mode 100644 index 00000000000..c11b874f77e Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-4327c977-2734-4477-9507-7ccf67924649-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-c373a5bd-85f0-4758-815e-7eb62007a15c-c000.snappy.parquet b/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-c373a5bd-85f0-4758-815e-7eb62007a15c-c000.snappy.parquet new file mode 100644 index 00000000000..c11b874f77e Binary files /dev/null and b/spark/src/test/resources/delta/history/delta-0.2.0/part-00001-c373a5bd-85f0-4758-815e-7eb62007a15c-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..589538bc732 --- /dev/null +++ b/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000000.json @@ -0,0 +1,13 @@ +{"commitInfo":{"timestamp":1675465305121,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"partCol\"]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"10","numOutputRows":"2000","numOutputBytes":"13989"},"engineInfo":"","txnId":"ec179bfe-cc75-442f-bf1f-75a7a499d1ae"}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partCol\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["partCol"],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1675465301176}} +{"add":{"path":"partCol=0/part-00000-757a3870-38dd-41ac-86f1-e1e6826df6bc.c000.snappy.parquet","partitionValues":{"partCol":"0"},"size":1399,"modificationTime":1675465304390,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":1990},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390000","MIN_INSERTION_TIME":"1675465304390000","MAX_INSERTION_TIME":"1675465304390000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=1/part-00000-ffe81e1a-1a1f-4803-bc2a-e68f7b2ea122.c000.snappy.parquet","partitionValues":{"partCol":"1"},"size":1399,"modificationTime":1675465304503,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1991},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390001","MIN_INSERTION_TIME":"1675465304390001","MAX_INSERTION_TIME":"1675465304390001","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=2/part-00000-5963000f-3e52-4c43-a106-d7e527f5722a.c000.snappy.parquet","partitionValues":{"partCol":"2"},"size":1399,"modificationTime":1675465304550,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":1992},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390002","MIN_INSERTION_TIME":"1675465304390002","MAX_INSERTION_TIME":"1675465304390002","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=3/part-00000-068d9a17-0362-43f9-ad68-6bfcbd27448d.c000.snappy.parquet","partitionValues":{"partCol":"3"},"size":1397,"modificationTime":1675465304596,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":1993},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390003","MIN_INSERTION_TIME":"1675465304390003","MAX_INSERTION_TIME":"1675465304390003","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=4/part-00000-c66868e5-d1e0-4f22-ae89-9cc4d2a133fa.c000.snappy.parquet","partitionValues":{"partCol":"4"},"size":1400,"modificationTime":1675465304641,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":4},\"maxValues\":{\"id\":1994},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390004","MIN_INSERTION_TIME":"1675465304390004","MAX_INSERTION_TIME":"1675465304390004","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=5/part-00000-70dbcf83-e5c0-4c91-8e1a-be86f08b98f4.c000.snappy.parquet","partitionValues":{"partCol":"5"},"size":1399,"modificationTime":1675465304685,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":5},\"maxValues\":{\"id\":1995},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390005","MIN_INSERTION_TIME":"1675465304390005","MAX_INSERTION_TIME":"1675465304390005","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=6/part-00000-34e763ec-3291-4cd0-9b90-fd2d24c68098.c000.snappy.parquet","partitionValues":{"partCol":"6"},"size":1399,"modificationTime":1675465304728,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":6},\"maxValues\":{\"id\":1996},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390006","MIN_INSERTION_TIME":"1675465304390006","MAX_INSERTION_TIME":"1675465304390006","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=7/part-00000-f43c32e8-3996-43ae-9b14-9b7f8fec6221.c000.snappy.parquet","partitionValues":{"partCol":"7"},"size":1399,"modificationTime":1675465304770,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":7},\"maxValues\":{\"id\":1997},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390007","MIN_INSERTION_TIME":"1675465304390007","MAX_INSERTION_TIME":"1675465304390007","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=8/part-00000-a1137e9e-5425-4589-b039-84378f061fc4.c000.snappy.parquet","partitionValues":{"partCol":"8"},"size":1399,"modificationTime":1675465304879,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":8},\"maxValues\":{\"id\":1998},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390008","MIN_INSERTION_TIME":"1675465304390008","MAX_INSERTION_TIME":"1675465304390008","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=9/part-00000-6bcf7302-8e23-4613-aec2-02856f8f1d05.c000.snappy.parquet","partitionValues":{"partCol":"9"},"size":1399,"modificationTime":1675465304928,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":9},\"maxValues\":{\"id\":1999},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465304390009","MIN_INSERTION_TIME":"1675465304390009","MAX_INSERTION_TIME":"1675465304390009","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000001.json b/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..95ac03718ba --- /dev/null +++ b/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000001.json @@ -0,0 +1,13 @@ +{"commitInfo":{"timestamp":1675465322730,"operation":"DELETE","operationParameters":{"predicate":"[\"(spark_catalog.delta.`/private/var/folders/g3/hcd28y8s71s0yh7whh443wz00000gp/T/spark-2434260e-1ecd-45b0-b08a-62dd7928b9ae`.id IN (0, 180, 308, 225, 756, 1007, 1503))\"]"},"readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"0","numCopiedRows":"0","numDeletionVectorsAdded":"6","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"11013","numDeletionVectorsUpdated":"0","numDeletedRows":"7","scanTimeMs":"10438","numAddedFiles":"0","rewriteTimeMs":"557"},"engineInfo":"","txnId":"bf3a73e8-ad42-4a6a-8c7f-4430e1891c36"}} +{"remove":{"path":"partCol=8/part-00000-a1137e9e-5425-4589-b039-84378f061fc4.c000.snappy.parquet","deletionTimestamp":1675465322727,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"partCol":"8"},"size":1399,"tags":{"INSERTION_TIME":"1675465304390008","MIN_INSERTION_TIME":"1675465304390008","MAX_INSERTION_TIME":"1675465304390008","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"partCol=5/part-00000-70dbcf83-e5c0-4c91-8e1a-be86f08b98f4.c000.snappy.parquet","deletionTimestamp":1675465322727,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"partCol":"5"},"size":1399,"tags":{"INSERTION_TIME":"1675465304390005","MIN_INSERTION_TIME":"1675465304390005","MAX_INSERTION_TIME":"1675465304390005","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"partCol=3/part-00000-068d9a17-0362-43f9-ad68-6bfcbd27448d.c000.snappy.parquet","deletionTimestamp":1675465322727,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"partCol":"3"},"size":1397,"tags":{"INSERTION_TIME":"1675465304390003","MIN_INSERTION_TIME":"1675465304390003","MAX_INSERTION_TIME":"1675465304390003","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"partCol=7/part-00000-f43c32e8-3996-43ae-9b14-9b7f8fec6221.c000.snappy.parquet","deletionTimestamp":1675465322727,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"partCol":"7"},"size":1399,"tags":{"INSERTION_TIME":"1675465304390007","MIN_INSERTION_TIME":"1675465304390007","MAX_INSERTION_TIME":"1675465304390007","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"partCol=6/part-00000-34e763ec-3291-4cd0-9b90-fd2d24c68098.c000.snappy.parquet","deletionTimestamp":1675465322727,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"partCol":"6"},"size":1399,"tags":{"INSERTION_TIME":"1675465304390006","MIN_INSERTION_TIME":"1675465304390006","MAX_INSERTION_TIME":"1675465304390006","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"partCol=0/part-00000-757a3870-38dd-41ac-86f1-e1e6826df6bc.c000.snappy.parquet","deletionTimestamp":1675465322727,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"partCol":"0"},"size":1399,"tags":{"INSERTION_TIME":"1675465304390000","MIN_INSERTION_TIME":"1675465304390000","MAX_INSERTION_TIME":"1675465304390000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=8/part-00000-a1137e9e-5425-4589-b039-84378f061fc4.c000.snappy.parquet","partitionValues":{"partCol":"8"},"size":1399,"modificationTime":1675465304879,"dataChange":true,"stats":"{\"numRecords\":200,\"minValues\":{\"id\":8},\"maxValues\":{\"id\":1998},\"nullCount\":{\"id\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1675465304390008","MIN_INSERTION_TIME":"1675465304390008","MAX_INSERTION_TIME":"1675465304390008","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"24t","txnId":"c72d2694-23fb-4adc-a315-8ee8c30853b0"}} +{"add":{"path":"partCol=6/part-00000-2dee959e-3d92-4c43-ac01-24d888ba82fd.c000.snappy.parquet","partitionValues":{"partCol":"6"},"size":586,"modificationTime":1675465324549,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":756},\"maxValues\":{\"id\":756},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465324549000","MIN_INSERTION_TIME":"1675465324549000","MAX_INSERTION_TIME":"1675465324549000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=8/part-00000-fe120a67-87dc-4997-8811-3ad9d8dc3743.c000.snappy.parquet","partitionValues":{"partCol":"8"},"size":586,"modificationTime":1675465324578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":308},\"maxValues\":{\"id\":308},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465324549001","MIN_INSERTION_TIME":"1675465324549001","MAX_INSERTION_TIME":"1675465324549001","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000003.json b/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..191cc216435 --- /dev/null +++ b/spark/src/test/resources/delta/partitioned-table-with-dv-large/_delta_log/00000000000000000003.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1675465327086,"operation":"DELETE","operationParameters":{"predicate":"[\"(spark_catalog.delta.`/private/var/folders/g3/hcd28y8s71s0yh7whh443wz00000gp/T/spark-2434260e-1ecd-45b0-b08a-62dd7928b9ae`.id IN (300, 257, 399, 786, 1353, 1567, 1800))\"]"},"readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"0","numCopiedRows":"0","numDeletionVectorsAdded":"1","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"1484","numDeletionVectorsUpdated":"4","numDeletedRows":"7","scanTimeMs":"779","numAddedFiles":"0","rewriteTimeMs":"703"},"engineInfo":"","txnId":"67b81203-e0e8-4eca-bb04-5806f4b1cad5"}} +{"remove":{"path":"partCol=0/part-00000-757a3870-38dd-41ac-86f1-e1e6826df6bc.c000.snappy.parquet","deletionTimestamp":1675465327084,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"partCol":"0"},"size":1399,"tags":{"INSERTION_TIME":"1675465304390000","MIN_INSERTION_TIME":"1675465304390000","MAX_INSERTION_TIME":"1675465304390000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"24t","txnId":"14dd0cb9-4d96-487f-af5b-1e29a5c1fa70"}} +{"add":{"path":"partCol=3/part-00000-8775b518-3470-41d4-8d7e-27596c48053e.c000.snappy.parquet","partitionValues":{"partCol":"3"},"size":585,"modificationTime":1675465328471,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1353},\"maxValues\":{\"id\":1353},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465328471000","MIN_INSERTION_TIME":"1675465328471000","MAX_INSERTION_TIME":"1675465328471000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"partCol=7/part-00000-156df4a5-759c-4b9f-82b1-9727a62b7990.c000.snappy.parquet","partitionValues":{"partCol":"7"},"size":586,"modificationTime":1675465328500,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1567},\"maxValues\":{\"id\":1567},\"nullCount\":{\"id\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1675465328471001","MIN_INSERTION_TIME":"1675465328471001","MAX_INSERTION_TIME":"1675465328471001","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/deletion_vector_0661cff2-4d32-4f91-ba26-c77e4498683c.bin b/spark/src/test/resources/delta/partitioned-table-with-dv-large/deletion_vector_0661cff2-4d32-4f91-ba26-c77e4498683c.bin new file mode 100644 index 00000000000..c59597a17ef Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/deletion_vector_0661cff2-4d32-4f91-ba26-c77e4498683c.bin differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/deletion_vector_b86cba62-c87a-4399-80da-d5ffaa3746a8.bin b/spark/src/test/resources/delta/partitioned-table-with-dv-large/deletion_vector_b86cba62-c87a-4399-80da-d5ffaa3746a8.bin new file mode 100644 index 00000000000..c899e9cb035 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/deletion_vector_b86cba62-c87a-4399-80da-d5ffaa3746a8.bin differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=0/part-00000-757a3870-38dd-41ac-86f1-e1e6826df6bc.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=0/part-00000-757a3870-38dd-41ac-86f1-e1e6826df6bc.c000.snappy.parquet new file mode 100644 index 00000000000..94cf758afdb Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=0/part-00000-757a3870-38dd-41ac-86f1-e1e6826df6bc.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=1/part-00000-ffe81e1a-1a1f-4803-bc2a-e68f7b2ea122.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=1/part-00000-ffe81e1a-1a1f-4803-bc2a-e68f7b2ea122.c000.snappy.parquet new file mode 100644 index 00000000000..47b5d3bc50d Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=1/part-00000-ffe81e1a-1a1f-4803-bc2a-e68f7b2ea122.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=2/part-00000-5963000f-3e52-4c43-a106-d7e527f5722a.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=2/part-00000-5963000f-3e52-4c43-a106-d7e527f5722a.c000.snappy.parquet new file mode 100644 index 00000000000..5e1eb795bb1 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=2/part-00000-5963000f-3e52-4c43-a106-d7e527f5722a.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=3/part-00000-068d9a17-0362-43f9-ad68-6bfcbd27448d.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=3/part-00000-068d9a17-0362-43f9-ad68-6bfcbd27448d.c000.snappy.parquet new file mode 100644 index 00000000000..d3e36f34c99 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=3/part-00000-068d9a17-0362-43f9-ad68-6bfcbd27448d.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=3/part-00000-8775b518-3470-41d4-8d7e-27596c48053e.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=3/part-00000-8775b518-3470-41d4-8d7e-27596c48053e.c000.snappy.parquet new file mode 100644 index 00000000000..65d9abca330 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=3/part-00000-8775b518-3470-41d4-8d7e-27596c48053e.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=4/part-00000-c66868e5-d1e0-4f22-ae89-9cc4d2a133fa.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=4/part-00000-c66868e5-d1e0-4f22-ae89-9cc4d2a133fa.c000.snappy.parquet new file mode 100644 index 00000000000..8e437d91734 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=4/part-00000-c66868e5-d1e0-4f22-ae89-9cc4d2a133fa.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=5/part-00000-70dbcf83-e5c0-4c91-8e1a-be86f08b98f4.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=5/part-00000-70dbcf83-e5c0-4c91-8e1a-be86f08b98f4.c000.snappy.parquet new file mode 100644 index 00000000000..f2de9ee4d22 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=5/part-00000-70dbcf83-e5c0-4c91-8e1a-be86f08b98f4.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=6/part-00000-2dee959e-3d92-4c43-ac01-24d888ba82fd.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=6/part-00000-2dee959e-3d92-4c43-ac01-24d888ba82fd.c000.snappy.parquet new file mode 100644 index 00000000000..cc79002589a Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=6/part-00000-2dee959e-3d92-4c43-ac01-24d888ba82fd.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=6/part-00000-34e763ec-3291-4cd0-9b90-fd2d24c68098.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=6/part-00000-34e763ec-3291-4cd0-9b90-fd2d24c68098.c000.snappy.parquet new file mode 100644 index 00000000000..cbf53bc3ed8 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=6/part-00000-34e763ec-3291-4cd0-9b90-fd2d24c68098.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=7/part-00000-156df4a5-759c-4b9f-82b1-9727a62b7990.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=7/part-00000-156df4a5-759c-4b9f-82b1-9727a62b7990.c000.snappy.parquet new file mode 100644 index 00000000000..33afe0254c4 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=7/part-00000-156df4a5-759c-4b9f-82b1-9727a62b7990.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=7/part-00000-f43c32e8-3996-43ae-9b14-9b7f8fec6221.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=7/part-00000-f43c32e8-3996-43ae-9b14-9b7f8fec6221.c000.snappy.parquet new file mode 100644 index 00000000000..36b447805a0 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=7/part-00000-f43c32e8-3996-43ae-9b14-9b7f8fec6221.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=8/part-00000-a1137e9e-5425-4589-b039-84378f061fc4.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=8/part-00000-a1137e9e-5425-4589-b039-84378f061fc4.c000.snappy.parquet new file mode 100644 index 00000000000..1a2b7977487 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=8/part-00000-a1137e9e-5425-4589-b039-84378f061fc4.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=8/part-00000-fe120a67-87dc-4997-8811-3ad9d8dc3743.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=8/part-00000-fe120a67-87dc-4997-8811-3ad9d8dc3743.c000.snappy.parquet new file mode 100644 index 00000000000..8a6beea397c Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=8/part-00000-fe120a67-87dc-4997-8811-3ad9d8dc3743.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=9/part-00000-6bcf7302-8e23-4613-aec2-02856f8f1d05.c000.snappy.parquet b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=9/part-00000-6bcf7302-8e23-4613-aec2-02856f8f1d05.c000.snappy.parquet new file mode 100644 index 00000000000..e835e350377 Binary files /dev/null and b/spark/src/test/resources/delta/partitioned-table-with-dv-large/partCol=9/part-00000-6bcf7302-8e23-4613-aec2-02856f8f1d05.c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-gigantic/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/table-with-dv-gigantic/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..4a95b7b6ff4 --- /dev/null +++ b/spark/src/test/resources/delta/table-with-dv-gigantic/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1682351914000}} +{"add":{"path":"part-00000-2bc940f0-dd3f-461d-8581-136026bf6f95-c000.snappy.parquet","partitionValues":{},"size":8473865,"modificationTime":1682351914339,"dataChange":true,"stats":"{\"numRecords\":2147483658,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":21},\"nullCount\":{\"value\":0},\"tightBounds\":false}","deletionVector":{"storageType":"u","pathOrInlineDv":"o6J(G4p@f*QZS+b{khvI","offset":1,"sizeInBytes":4557136,"cardinality":2147484}}} diff --git a/spark/src/test/resources/delta/table-with-dv-gigantic/deletion_vector_4ae9b93c-0d65-4b33-a40c-9c13f68a4763.bin b/spark/src/test/resources/delta/table-with-dv-gigantic/deletion_vector_4ae9b93c-0d65-4b33-a40c-9c13f68a4763.bin new file mode 100644 index 00000000000..21cecd90a2c Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-gigantic/deletion_vector_4ae9b93c-0d65-4b33-a40c-9c13f68a4763.bin differ diff --git a/spark/src/test/resources/delta/table-with-dv-gigantic/part-00000-2bc940f0-dd3f-461d-8581-136026bf6f95-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-gigantic/part-00000-2bc940f0-dd3f-461d-8581-136026bf6f95-c000.snappy.parquet new file mode 100644 index 00000000000..6e88e78c6c4 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-gigantic/part-00000-2bc940f0-dd3f-461d-8581-136026bf6f95-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..48beb84f608 --- /dev/null +++ b/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000000.json @@ -0,0 +1,23 @@ +{"commitInfo":{"timestamp":1674064770682,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"20","numOutputRows":"2000","numOutputBytes":"20157"},"engineInfo":"","txnId":"f0ddc566-dfe6-4bd8-b264-ce100f9362ef"}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1674064767118}} +{"add":{"path":"part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":4},\"maxValues\":{\"value\":1967},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860000","MIN_INSERTION_TIME":"1674064769860000","MAX_INSERTION_TIME":"1674064769860000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":18},\"maxValues\":{\"value\":1988},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770019,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":16},\"maxValues\":{\"value\":1977},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860002","MIN_INSERTION_TIME":"1674064769860002","MAX_INSERTION_TIME":"1674064769860002","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770019,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":5},\"maxValues\":{\"value\":1982},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860003","MIN_INSERTION_TIME":"1674064769860003","MAX_INSERTION_TIME":"1674064769860003","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770100,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":1},\"maxValues\":{\"value\":1999},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860004","MIN_INSERTION_TIME":"1674064769860004","MAX_INSERTION_TIME":"1674064769860004","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770100,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":8},\"maxValues\":{\"value\":1914},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860005","MIN_INSERTION_TIME":"1674064769860005","MAX_INSERTION_TIME":"1674064769860005","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":30},\"maxValues\":{\"value\":1992},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860006","MIN_INSERTION_TIME":"1674064769860006","MAX_INSERTION_TIME":"1674064769860006","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":40},\"maxValues\":{\"value\":1990},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860007","MIN_INSERTION_TIME":"1674064769860007","MAX_INSERTION_TIME":"1674064769860007","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770265,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":13},\"maxValues\":{\"value\":1897},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860008","MIN_INSERTION_TIME":"1674064769860008","MAX_INSERTION_TIME":"1674064769860008","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770265,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":12},\"maxValues\":{\"value\":1987},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860009","MIN_INSERTION_TIME":"1674064769860009","MAX_INSERTION_TIME":"1674064769860009","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770319,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":19},\"maxValues\":{\"value\":1993},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860010","MIN_INSERTION_TIME":"1674064769860010","MAX_INSERTION_TIME":"1674064769860010","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770319,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":11},\"maxValues\":{\"value\":1984},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860011","MIN_INSERTION_TIME":"1674064769860011","MAX_INSERTION_TIME":"1674064769860011","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":33},\"maxValues\":{\"value\":1995},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":20},\"maxValues\":{\"value\":1974},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860013","MIN_INSERTION_TIME":"1674064769860013","MAX_INSERTION_TIME":"1674064769860013","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770427,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":3},\"maxValues\":{\"value\":1996},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860014","MIN_INSERTION_TIME":"1674064769860014","MAX_INSERTION_TIME":"1674064769860014","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770427,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":1997},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770477,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":2},\"maxValues\":{\"value\":1986},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860016","MIN_INSERTION_TIME":"1674064769860016","MAX_INSERTION_TIME":"1674064769860016","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770476,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":22},\"maxValues\":{\"value\":1998},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860017","MIN_INSERTION_TIME":"1674064769860017","MAX_INSERTION_TIME":"1674064769860017","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770529,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":6},\"maxValues\":{\"value\":1983},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860018","MIN_INSERTION_TIME":"1674064769860018","MAX_INSERTION_TIME":"1674064769860018","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770528,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":36},\"maxValues\":{\"value\":1969},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json b/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..b5a9043f06f --- /dev/null +++ b/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1674064789962,"operation":"DELETE","operationParameters":{"predicate":"[\"(spark_catalog.delta.`/private/var/folders/g3/hcd28y8s71s0yh7whh443wz00000gp/T/spark-f3dd4a29-dc57-42eb-b752-84179135f5b8`.value IN (0, 180, 300, 700, 1800))\"]"},"readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"0","numCopiedRows":"0","numDeletionVectorsAdded":"5","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"12828","numDeletionVectorsUpdated":"0","numDeletedRows":"5","scanTimeMs":"12323","numAddedFiles":"0","rewriteTimeMs":"487"},"engineInfo":"","txnId":"5327cd46-c25b-4127-88fd-5b3c2402691b"}} +{"remove":{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860003","MIN_INSERTION_TIME":"1674064769860003","MAX_INSERTION_TIME":"1674064769860003","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1007,"tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":18},\"maxValues\":{\"value\":1988},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":85,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770019,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":5},\"maxValues\":{\"value\":1982},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860003","MIN_INSERTION_TIME":"1674064769860003","MAX_INSERTION_TIME":"1674064769860003","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":169,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":33},\"maxValues\":{\"value\":1995},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":1,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770427,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":1997},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":43,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770528,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":36},\"maxValues\":{\"value\":1969},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":127,"sizeInBytes":34,"cardinality":1}}} diff --git a/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.json b/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..bc186d5e900 --- /dev/null +++ b/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1674064791599,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"2","numOutputBytes":"600"},"engineInfo":"","txnId":"fb0a7015-0096-4d74-821b-3507163c17fa"}} +{"add":{"path":"part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet","partitionValues":{},"size":600,"modificationTime":1674064791593,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"value\":300},\"maxValues\":{\"value\":700},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064791593000","MIN_INSERTION_TIME":"1674064791593000","MAX_INSERTION_TIME":"1674064791593000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.json b/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..fca7e5709a9 --- /dev/null +++ b/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.json @@ -0,0 +1,13 @@ +{"commitInfo":{"timestamp":1674064797400,"operation":"DELETE","operationParameters":{"predicate":"[\"(spark_catalog.delta.`/private/var/folders/g3/hcd28y8s71s0yh7whh443wz00000gp/T/spark-f3dd4a29-dc57-42eb-b752-84179135f5b8`.value IN (300, 250, 350, 900, 1353, 1567, 1800))\"]"},"readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"0","numCopiedRows":"0","numDeletionVectorsAdded":"3","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"4726","numDeletionVectorsUpdated":"3","numDeletedRows":"6","scanTimeMs":"4057","numAddedFiles":"0","rewriteTimeMs":"667"},"engineInfo":"","txnId":"d50de74c-f8c8-4e68-b120-267504045e9d"}} +{"remove":{"path":"part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":600,"tags":{"INSERTION_TIME":"1674064791593000","MIN_INSERTION_TIME":"1674064791593000","MAX_INSERTION_TIME":"1674064791593000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":85,"sizeInBytes":34,"cardinality":1}}} +{"remove":{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":1,"sizeInBytes":34,"cardinality":1}}} +{"remove":{"path":"part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1007,"tags":{"INSERTION_TIME":"1674064769860014","MIN_INSERTION_TIME":"1674064769860014","MAX_INSERTION_TIME":"1674064769860014","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860018","MIN_INSERTION_TIME":"1674064769860018","MAX_INSERTION_TIME":"1674064769860018","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1007,"tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":127,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet","partitionValues":{},"size":600,"modificationTime":1674064791593,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"value\":300},\"maxValues\":{\"value\":700},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064791593000","MIN_INSERTION_TIME":"1674064791593000","MAX_INSERTION_TIME":"1674064791593000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"UGM+pBY.mtVeP","txnId":"4016704a-babb-44a8-ae8b-c53303465742"}} +{"add":{"path":"part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet","partitionValues":{},"size":600,"modificationTime":1674064798704,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"value\":900},\"maxValues\":{\"value\":1567},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064798704000","MIN_INSERTION_TIME":"1674064798704000","MAX_INSERTION_TIME":"1674064798704000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/spark/src/test/resources/delta/table-with-dv-large/deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin b/spark/src/test/resources/delta/table-with-dv-large/deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin new file mode 100644 index 00000000000..e729ea4e696 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin b/spark/src/test/resources/delta/table-with-dv-large/deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin new file mode 100644 index 00000000000..e45492fbf48 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet new file mode 100644 index 00000000000..e3cb2ff9e2d Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet new file mode 100644 index 00000000000..8f856e0a336 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet new file mode 100644 index 00000000000..eb7f6909018 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet new file mode 100644 index 00000000000..c57f1d7871d Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet new file mode 100644 index 00000000000..fbafa84440a Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet new file mode 100644 index 00000000000..55d4f847138 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet new file mode 100644 index 00000000000..38b1af17dcd Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet new file mode 100644 index 00000000000..9391c846a62 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet new file mode 100644 index 00000000000..ea18948b1ad Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet new file mode 100644 index 00000000000..e640051c8e4 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet new file mode 100644 index 00000000000..c95daac1d18 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet new file mode 100644 index 00000000000..1d17a4cd184 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet new file mode 100644 index 00000000000..e011b569bc5 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet new file mode 100644 index 00000000000..83360c68c2a Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet new file mode 100644 index 00000000000..1426316cc86 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet new file mode 100644 index 00000000000..680b7371166 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet new file mode 100644 index 00000000000..267e8de72c4 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet new file mode 100644 index 00000000000..65feba93d8b Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet new file mode 100644 index 00000000000..61ef5982f65 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet new file mode 100644 index 00000000000..0a6a0b69ed6 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet new file mode 100644 index 00000000000..52bd23e20c4 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-large/part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-large/part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet new file mode 100644 index 00000000000..9ba4278e875 Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-large/part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..32b1f9c5467 --- /dev/null +++ b/spark/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1673461409137,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"818"},"engineInfo":"","txnId":"d54c00f5-9500-4ed5-b1b5-9f463861f4d3"}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","columnMapping"],"writerFeatures":["deletionVectors","columnMapping"]}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\"}}]}","partitionColumns":[],"configuration":{"delta.columnMapping.mode":"name","delta.enableDeletionVectors":"true","delta.columnMapping.maxColumnId":"1"},"createdTime":1673461406485}} +{"add":{"path":"r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet","partitionValues":{},"size":818,"modificationTime":1673461408778,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"maxValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":9},\"nullCount\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1673461408778000","MIN_INSERTION_TIME":"1673461408778000","MAX_INSERTION_TIME":"1673461408778000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/spark/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.json b/spark/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..c1b4a97cf8a --- /dev/null +++ b/spark/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1673461427387,"operation":"DELETE","operationParameters":{"predicate":"[\"(spark_catalog.delta.`/private/var/folders/g3/hcd28y8s71s0yh7whh443wz00000gp/T/spark-cb573b98-e75d-460f-9769-efd9e9bfeffc`.value IN (0, 9))\"]"},"readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"0","numCopiedRows":"0","numDeletionVectorsAdded":"1","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"11114","numDeletionVectorsUpdated":"0","numDeletedRows":"2","scanTimeMs":"10589","numAddedFiles":"0","rewriteTimeMs":"508"},"engineInfo":"","txnId":"3943baa4-30a0-44a4-a4f4-e5e92d2ab08b"}} +{"remove":{"path":"r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet","deletionTimestamp":1673461427383,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":818,"tags":{"INSERTION_TIME":"1673461408778000","MIN_INSERTION_TIME":"1673461408778000","MAX_INSERTION_TIME":"1673461408778000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet","partitionValues":{},"size":818,"modificationTime":1673461408778,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"maxValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":9},\"nullCount\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1673461408778000","MIN_INSERTION_TIME":"1673461408778000","MAX_INSERTION_TIME":"1673461408778000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"WYbkwCTB$gH)J7t?$/sK","offset":1,"sizeInBytes":36,"cardinality":2}}} diff --git a/spark/src/test/resources/delta/table-with-dv-small/deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin b/spark/src/test/resources/delta/table-with-dv-small/deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin new file mode 100644 index 00000000000..f1a01e661cd Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-small/deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin differ diff --git a/spark/src/test/resources/delta/table-with-dv-small/r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet b/spark/src/test/resources/delta/table-with-dv-small/r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet new file mode 100644 index 00000000000..29adffe4f0e Binary files /dev/null and b/spark/src/test/resources/delta/table-with-dv-small/r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet.crc new file mode 100644 index 00000000000..58027d28f4b Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-dfb1dd9a-0fe2-420e-81d5-a84004aebcee-c000.snappy.parquet.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-dfb1dd9a-0fe2-420e-81d5-a84004aebcee-c000.snappy.parquet.crc new file mode 100644 index 00000000000..58027d28f4b Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-dfb1dd9a-0fe2-420e-81d5-a84004aebcee-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-f654b1f4-e1ea-40e5-a8cd-452f7c3359d8-c000.snappy.parquet.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-f654b1f4-e1ea-40e5-a8cd-452f7c3359d8-c000.snappy.parquet.crc new file mode 100644 index 00000000000..58027d28f4b Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00000-f654b1f4-e1ea-40e5-a8cd-452f7c3359d8-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-bfb08fc5-c967-40e4-a646-c8178d8b5e21-c000.snappy.parquet.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-bfb08fc5-c967-40e4-a646-c8178d8b5e21-c000.snappy.parquet.crc new file mode 100644 index 00000000000..58301bb2fed Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-bfb08fc5-c967-40e4-a646-c8178d8b5e21-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet.crc new file mode 100644 index 00000000000..58301bb2fed Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-d5da9c60-a615-4065-a3cb-4796d86fc797-c000.snappy.parquet.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-d5da9c60-a615-4065-a3cb-4796d86fc797-c000.snappy.parquet.crc new file mode 100644 index 00000000000..58301bb2fed Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/.part-00001-d5da9c60-a615-4065-a3cb-4796d86fc797-c000.snappy.parquet.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000003.json.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 00000000000..f87275e2cb9 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000003.json.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000004.json.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 00000000000..9a759c2a9e2 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000004.json.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000005.json.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000005.json.crc new file mode 100644 index 00000000000..4a399ece715 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/.00000000000000000005.json.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/._last_checkpoint.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/._last_checkpoint.crc new file mode 100644 index 00000000000..c796d344b3c Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/._last_checkpoint.crc differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000000.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000000.crc new file mode 100644 index 00000000000..a7d049774f4 --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"tableSizeBytes":1594,"numFiles":2,"numMetadata":1,"numProtocol":1,"numTransactions":0} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000000.json b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..21d1ae6bb22 --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1623255695348,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"1594","numOutputRows":"9"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"key\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1623255692280}} +{"add":{"path":"part-00000-dfb1dd9a-0fe2-420e-81d5-a84004aebcee-c000.snappy.parquet","partitionValues":{},"size":793,"modificationTime":1623255695000,"dataChange":true,"stats":"{\"numRecords\":4,\"minValues\":{\"key\":1,\"value\":1},\"maxValues\":{\"key\":4,\"value\":4},\"nullCount\":{\"key\":0,\"value\":0}}"}} +{"add":{"path":"part-00001-d5da9c60-a615-4065-a3cb-4796d86fc797-c000.snappy.parquet","partitionValues":{},"size":801,"modificationTime":1623255695000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"key\":5,\"value\":5},\"maxValues\":{\"key\":9,\"value\":9},\"nullCount\":{\"key\":0,\"value\":0}}"}} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000001.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000001.crc new file mode 100644 index 00000000000..a7d049774f4 --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"tableSizeBytes":1594,"numFiles":2,"numMetadata":1,"numProtocol":1,"numTransactions":0} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000001.json b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..a3a2498219e --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1623255703194,"operation":"SET TBLPROPERTIES","operationParameters":{"properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"true\",\"delta.checkpoint.writeStatsAsJson\":\"false\"}"},"readVersion":0,"isolationLevel":"SnapshotIsolation","isBlindAppend":true,"operationMetrics":{}}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"key\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsStruct":"true","delta.checkpoint.writeStatsAsJson":"false"},"createdTime":1623255692280}} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.checkpoint.parquet b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 00000000000..4642edaa68d Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.checkpoint.parquet differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.crc new file mode 100644 index 00000000000..a7d049774f4 --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"tableSizeBytes":1594,"numFiles":2,"numMetadata":1,"numProtocol":1,"numTransactions":0} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.json b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..8cb6348fa86 --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000002.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1623255706138,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numFiles":"2","numOutputBytes":"1594","numOutputRows":"9"}}} +{"add":{"path":"part-00000-f654b1f4-e1ea-40e5-a8cd-452f7c3359d8-c000.snappy.parquet","partitionValues":{},"size":793,"modificationTime":1623255705000,"dataChange":true,"stats":"{\"numRecords\":4,\"minValues\":{\"key\":1,\"value\":1},\"maxValues\":{\"key\":4,\"value\":4},\"nullCount\":{\"key\":0,\"value\":0}}"}} +{"add":{"path":"part-00001-bfb08fc5-c967-40e4-a646-c8178d8b5e21-c000.snappy.parquet","partitionValues":{},"size":801,"modificationTime":1623255705000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"key\":5,\"value\":5},\"maxValues\":{\"key\":9,\"value\":9},\"nullCount\":{\"key\":0,\"value\":0}}"}} +{"remove":{"path":"part-00000-dfb1dd9a-0fe2-420e-81d5-a84004aebcee-c000.snappy.parquet","deletionTimestamp":1623255706137,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":793}} +{"remove":{"path":"part-00001-d5da9c60-a615-4065-a3cb-4796d86fc797-c000.snappy.parquet","deletionTimestamp":1623255706138,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":801}} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000003.crc b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000003.crc new file mode 100644 index 00000000000..aedbd3cae4a --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"tableSizeBytes":3188,"numFiles":4,"numMetadata":1,"numProtocol":1,"numTransactions":0} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000003.json b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..ccd704d23ee --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1623255724166,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"1594","numOutputRows":"9"}}} +{"add":{"path":"part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet","partitionValues":{},"size":793,"modificationTime":1623255724000,"dataChange":true,"stats":"{\"numRecords\":4,\"minValues\":{\"key\":1,\"value\":1},\"maxValues\":{\"key\":4,\"value\":4},\"nullCount\":{\"key\":0,\"value\":0}}"}} +{"add":{"path":"part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet","partitionValues":{},"size":801,"modificationTime":1623255724000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"key\":5,\"value\":5},\"maxValues\":{\"key\":9,\"value\":9},\"nullCount\":{\"key\":0,\"value\":0}}"}} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000004.json b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..fde7065801a --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000004.json @@ -0,0 +1,3 @@ +{"remove":{"path":"part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet","deletionTimestamp":1623255727201,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":793}} +{"remove":{"path":"part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet","deletionTimestamp":1623255727201,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":801}} +{"some_new_action":{"a":1}} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000005.json b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..4710f2158a1 --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/00000000000000000005.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1623255724166,"operationMetrics":{"numFiles":"2","numOutputBytes":"1594","numOutputRows":"9"},"isolationLevel":"WriteSerializable","operationParameters":{"mode":"Append","partitionBy":"[]"},"operation":"WRITE","isBlindAppend":true,"readVersion":2},"some_new_action_alongside_add_action":["a","1"]} +{"add":{"stats":"{\"numRecords\":4,\"minValues\":{\"key\":1,\"value\":1},\"maxValues\":{\"key\":4,\"value\":4},\"nullCount\":{\"key\":0,\"value\":0}}","path":"part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet","size":793,"modificationTime":1623255724000,"dataChange":true,"some_new_column_in_add_action":1,"partitionValues":{}},"some_new_action_alongside_add_action":["a","1"]} +{"add":{"stats":"{\"numRecords\":5,\"minValues\":{\"key\":5,\"value\":5},\"maxValues\":{\"key\":9,\"value\":9},\"nullCount\":{\"key\":0,\"value\":0}}","path":"part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet","size":801,"modificationTime":1623255724000,"dataChange":true,"some_new_column_in_add_action":1,"partitionValues":{}},"some_new_action_alongside_add_action":["a","1"]} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/_last_checkpoint b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..97fc976cdce --- /dev/null +++ b/spark/src/test/resources/delta/transaction_log_schema_evolvability/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":2,"size":6} diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet new file mode 100644 index 00000000000..0cb04525fc9 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-9f483b95-3ea3-44f0-b54d-73199574be15-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-dfb1dd9a-0fe2-420e-81d5-a84004aebcee-c000.snappy.parquet b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-dfb1dd9a-0fe2-420e-81d5-a84004aebcee-c000.snappy.parquet new file mode 100644 index 00000000000..0cb04525fc9 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-dfb1dd9a-0fe2-420e-81d5-a84004aebcee-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-f654b1f4-e1ea-40e5-a8cd-452f7c3359d8-c000.snappy.parquet b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-f654b1f4-e1ea-40e5-a8cd-452f7c3359d8-c000.snappy.parquet new file mode 100644 index 00000000000..0cb04525fc9 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00000-f654b1f4-e1ea-40e5-a8cd-452f7c3359d8-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-bfb08fc5-c967-40e4-a646-c8178d8b5e21-c000.snappy.parquet b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-bfb08fc5-c967-40e4-a646-c8178d8b5e21-c000.snappy.parquet new file mode 100644 index 00000000000..664e0cc8053 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-bfb08fc5-c967-40e4-a646-c8178d8b5e21-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet new file mode 100644 index 00000000000..664e0cc8053 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-d1030238-b55d-48f8-a4d6-89ef12e9d501-c000.snappy.parquet differ diff --git a/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-d5da9c60-a615-4065-a3cb-4796d86fc797-c000.snappy.parquet b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-d5da9c60-a615-4065-a3cb-4796d86fc797-c000.snappy.parquet new file mode 100644 index 00000000000..664e0cc8053 Binary files /dev/null and b/spark/src/test/resources/delta/transaction_log_schema_evolvability/part-00001-d5da9c60-a615-4065-a3cb-4796d86fc797-c000.snappy.parquet differ diff --git a/spark/src/test/resources/hms/README.md b/spark/src/test/resources/hms/README.md new file mode 100644 index 00000000000..7c7e2e6cdb1 --- /dev/null +++ b/spark/src/test/resources/hms/README.md @@ -0,0 +1,3 @@ +The file `hive-schema-3.1.0.derby.sql` is copied from the Hive official repository. Hive MetaStore uses this file to create database schema used by the metastore with Apache Derby. We use it for the same purpose as the EmbeddedHMS is back by Apache Derby. The original file can be found [here](https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-3.1.0.derby.sql). + +In the embedded HMS, we first create a derby instance, then load this script into derby to initialize the schema. See `org.apache.spark.sql.delta.uniform.ehms.EmbeddedHMS` for more details. diff --git a/spark/src/test/resources/hms/hive-schema-3.1.0.derby.sql b/spark/src/test/resources/hms/hive-schema-3.1.0.derby.sql new file mode 100644 index 00000000000..74582e1c632 --- /dev/null +++ b/spark/src/test/resources/hms/hive-schema-3.1.0.derby.sql @@ -0,0 +1,708 @@ +-- Timestamp: 2011-09-22 15:32:02.024 +-- Source database is: /home/carl/Work/repos/hive1/metastore/scripts/upgrade/derby/mdb +-- Connection URL is: jdbc:derby:/home/carl/Work/repos/hive1/metastore/scripts/upgrade/derby/mdb +-- Specified schema is: APP +-- appendLogs: false + +-- ---------------------------------------------- +-- DDL Statements for functions +-- ---------------------------------------------- + +CREATE FUNCTION "APP"."NUCLEUS_ASCII" (C CHAR(1)) RETURNS INTEGER LANGUAGE JAVA PARAMETER STYLE JAVA READS SQL DATA CALLED ON NULL INPUT EXTERNAL NAME 'org.datanucleus.store.rdbms.adapter.DerbySQLFunction.ascii' ; + +CREATE FUNCTION "APP"."NUCLEUS_MATCHES" (TEXT VARCHAR(8000),PATTERN VARCHAR(8000)) RETURNS INTEGER LANGUAGE JAVA PARAMETER STYLE JAVA READS SQL DATA CALLED ON NULL INPUT EXTERNAL NAME 'org.datanucleus.store.rdbms.adapter.DerbySQLFunction.matches' ; + +-- ---------------------------------------------- +-- DDL Statements for tables +-- ---------------------------------------------- +CREATE TABLE "APP"."DBS" ( + "DB_ID" BIGINT NOT NULL, + "DESC" VARCHAR(4000), + "DB_LOCATION_URI" VARCHAR(4000) NOT NULL, + "NAME" VARCHAR(128), + "OWNER_NAME" VARCHAR(128), + "OWNER_TYPE" VARCHAR(10), + "CTLG_NAME" VARCHAR(256) NOT NULL DEFAULT 'hive' +); + +CREATE TABLE "APP"."TBL_PRIVS" ("TBL_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_PRIV" VARCHAR(128), "TBL_ID" BIGINT, "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."DATABASE_PARAMS" ("DB_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(180) NOT NULL, "PARAM_VALUE" VARCHAR(4000)); + +CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(767), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT, "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."SERDE_PARAMS" ("SERDE_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" CLOB); + +CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(767) NOT NULL, "TYPE_NAME" CLOB, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(767), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."CDS" ("CD_ID" BIGINT NOT NULL); + +CREATE TABLE "APP"."PARTITION_KEY_VALS" ("PART_ID" BIGINT NOT NULL, "PART_KEY_VAL" VARCHAR(256), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."DB_PRIVS" ("DB_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "DB_ID" BIGINT, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "DB_PRIV" VARCHAR(128), "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."IDXS" ("INDEX_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "DEFERRED_REBUILD" CHAR(1) NOT NULL, "INDEX_HANDLER_CLASS" VARCHAR(4000), "INDEX_NAME" VARCHAR(128), "INDEX_TBL_ID" BIGINT, "LAST_ACCESS_TIME" INTEGER NOT NULL, "ORIG_TBL_ID" BIGINT, "SD_ID" BIGINT); + +CREATE TABLE "APP"."INDEX_PARAMS" ("INDEX_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000)); + +CREATE TABLE "APP"."PARTITIONS" ("PART_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "LAST_ACCESS_TIME" INTEGER NOT NULL, "PART_NAME" VARCHAR(767), "SD_ID" BIGINT, "TBL_ID" BIGINT); + +CREATE TABLE "APP"."SERDES" ("SERDE_ID" BIGINT NOT NULL, "NAME" VARCHAR(128), "SLIB" VARCHAR(4000), "DESCRIPTION" VARCHAR(4000), "SERIALIZER_CLASS" VARCHAR(4000), "DESERIALIZER_CLASS" VARCHAR(4000), SERDE_TYPE INTEGER); + +CREATE TABLE "APP"."PART_PRIVS" ("PART_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_PRIV" VARCHAR(128), "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."ROLE_MAP" ("ROLE_GRANT_ID" BIGINT NOT NULL, "ADD_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "ROLE_ID" BIGINT); + +CREATE TABLE "APP"."TYPES" ("TYPES_ID" BIGINT NOT NULL, "TYPE_NAME" VARCHAR(128), "TYPE1" VARCHAR(767), "TYPE2" VARCHAR(767)); + +CREATE TABLE "APP"."GLOBAL_PRIVS" ("USER_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "USER_PRIV" VARCHAR(128), "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."PARTITION_PARAMS" ("PART_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000)); + +CREATE TABLE "APP"."PARTITION_EVENTS" ( + "PART_NAME_ID" BIGINT NOT NULL, + "CAT_NAME" VARCHAR(256), + "DB_NAME" VARCHAR(128), + "EVENT_TIME" BIGINT NOT NULL, + "EVENT_TYPE" INTEGER NOT NULL, + "PARTITION_NAME" VARCHAR(767), + "TBL_NAME" VARCHAR(256) +); + +CREATE TABLE "APP"."COLUMNS" ("SD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(256), "COLUMN_NAME" VARCHAR(128) NOT NULL, "TYPE_NAME" VARCHAR(4000) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."ROLES" ("ROLE_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "OWNER_NAME" VARCHAR(128), "ROLE_NAME" VARCHAR(128)); + +CREATE TABLE "APP"."TBLS" ("TBL_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "DB_ID" BIGINT, "LAST_ACCESS_TIME" INTEGER NOT NULL, "OWNER" VARCHAR(767), "OWNER_TYPE" VARCHAR(10), "RETENTION" INTEGER NOT NULL, "SD_ID" BIGINT, "TBL_NAME" VARCHAR(256), "TBL_TYPE" VARCHAR(128), "VIEW_EXPANDED_TEXT" LONG VARCHAR, "VIEW_ORIGINAL_TEXT" LONG VARCHAR, "IS_REWRITE_ENABLED" CHAR(1) NOT NULL DEFAULT 'N'); + +CREATE TABLE "APP"."PARTITION_KEYS" ("TBL_ID" BIGINT NOT NULL, "PKEY_COMMENT" VARCHAR(4000), "PKEY_NAME" VARCHAR(128) NOT NULL, "PKEY_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(767), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128), "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."SDS" ("SD_ID" BIGINT NOT NULL, "INPUT_FORMAT" VARCHAR(4000), "IS_COMPRESSED" CHAR(1) NOT NULL, "LOCATION" VARCHAR(4000), "NUM_BUCKETS" INTEGER NOT NULL, "OUTPUT_FORMAT" VARCHAR(4000), "SERDE_ID" BIGINT, "CD_ID" BIGINT, "IS_STOREDASSUBDIRECTORIES" CHAR(1) NOT NULL); + +CREATE TABLE "APP"."SEQUENCE_TABLE" ("SEQUENCE_NAME" VARCHAR(256) NOT NULL, "NEXT_VAL" BIGINT NOT NULL); + +CREATE TABLE "APP"."TAB_COL_STATS"( + "CAT_NAME" VARCHAR(256) NOT NULL, + "DB_NAME" VARCHAR(128) NOT NULL, + "TABLE_NAME" VARCHAR(256) NOT NULL, + "COLUMN_NAME" VARCHAR(767) NOT NULL, + "COLUMN_TYPE" VARCHAR(128) NOT NULL, + "LONG_LOW_VALUE" BIGINT, + "LONG_HIGH_VALUE" BIGINT, + "DOUBLE_LOW_VALUE" DOUBLE, + "DOUBLE_HIGH_VALUE" DOUBLE, + "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), + "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000), + "NUM_DISTINCTS" BIGINT, + "NUM_NULLS" BIGINT NOT NULL, + "AVG_COL_LEN" DOUBLE, + "MAX_COL_LEN" BIGINT, + "NUM_TRUES" BIGINT, + "NUM_FALSES" BIGINT, + "LAST_ANALYZED" BIGINT, + "CS_ID" BIGINT NOT NULL, + "TBL_ID" BIGINT NOT NULL, + "BIT_VECTOR" BLOB +); + +CREATE TABLE "APP"."TABLE_PARAMS" ("TBL_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" CLOB); + +CREATE TABLE "APP"."BUCKETING_COLS" ("SD_ID" BIGINT NOT NULL, "BUCKET_COL_NAME" VARCHAR(256), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."TYPE_FIELDS" ("TYPE_NAME" BIGINT NOT NULL, "COMMENT" VARCHAR(256), "FIELD_NAME" VARCHAR(128) NOT NULL, "FIELD_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."NUCLEUS_TABLES" ("CLASS_NAME" VARCHAR(128) NOT NULL, "TABLE_NAME" VARCHAR(128) NOT NULL, "TYPE" VARCHAR(4) NOT NULL, "OWNER" VARCHAR(2) NOT NULL, "VERSION" VARCHAR(20) NOT NULL, "INTERFACE_NAME" VARCHAR(256) DEFAULT NULL); + +CREATE TABLE "APP"."SD_PARAMS" ("SD_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" CLOB); + +CREATE TABLE "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID" BIGINT NOT NULL); + +CREATE TABLE "APP"."SKEWED_STRING_LIST_VALUES" ("STRING_LIST_ID" BIGINT NOT NULL, "STRING_LIST_VALUE" VARCHAR(256), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."SKEWED_COL_NAMES" ("SD_ID" BIGINT NOT NULL, "SKEWED_COL_NAME" VARCHAR(256), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."SKEWED_COL_VALUE_LOC_MAP" ("SD_ID" BIGINT NOT NULL, "STRING_LIST_ID_KID" BIGINT NOT NULL, "LOCATION" VARCHAR(4000)); + +CREATE TABLE "APP"."SKEWED_VALUES" ("SD_ID_OID" BIGINT NOT NULL, "STRING_LIST_ID_EID" BIGINT NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as identity (start with 1), "MASTER_KEY" VARCHAR(767)); + +CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767)); + +CREATE TABLE "APP"."PART_COL_STATS"( + "CAT_NAME" VARCHAR(256) NOT NULL, + "DB_NAME" VARCHAR(128) NOT NULL, + "TABLE_NAME" VARCHAR(256) NOT NULL, + "PARTITION_NAME" VARCHAR(767) NOT NULL, + "COLUMN_NAME" VARCHAR(767) NOT NULL, + "COLUMN_TYPE" VARCHAR(128) NOT NULL, + "LONG_LOW_VALUE" BIGINT, + "LONG_HIGH_VALUE" BIGINT, + "DOUBLE_LOW_VALUE" DOUBLE, + "DOUBLE_HIGH_VALUE" DOUBLE, + "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), + "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000), + "NUM_DISTINCTS" BIGINT, + "BIT_VECTOR" BLOB, + "NUM_NULLS" BIGINT NOT NULL, + "AVG_COL_LEN" DOUBLE, + "MAX_COL_LEN" BIGINT, + "NUM_TRUES" BIGINT, + "NUM_FALSES" BIGINT, + "LAST_ANALYZED" BIGINT, + "CS_ID" BIGINT NOT NULL, + "PART_ID" BIGINT NOT NULL +); + +CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255)); + +CREATE TABLE "APP"."FUNCS" ("FUNC_ID" BIGINT NOT NULL, "CLASS_NAME" VARCHAR(4000), "CREATE_TIME" INTEGER NOT NULL, "DB_ID" BIGINT, "FUNC_NAME" VARCHAR(128), "FUNC_TYPE" INTEGER NOT NULL, "OWNER_NAME" VARCHAR(128), "OWNER_TYPE" VARCHAR(10)); + +CREATE TABLE "APP"."FUNC_RU" ("FUNC_ID" BIGINT NOT NULL, "RESOURCE_TYPE" INTEGER NOT NULL, "RESOURCE_URI" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."NOTIFICATION_LOG" ( + "NL_ID" BIGINT NOT NULL, + "CAT_NAME" VARCHAR(256), + "DB_NAME" VARCHAR(128), + "EVENT_ID" BIGINT NOT NULL, + "EVENT_TIME" INTEGER NOT NULL, + "EVENT_TYPE" VARCHAR(32) NOT NULL, + "MESSAGE" CLOB, + "TBL_NAME" VARCHAR(256), + "MESSAGE_FORMAT" VARCHAR(16) +); + +CREATE TABLE "APP"."NOTIFICATION_SEQUENCE" ("NNI_ID" BIGINT NOT NULL, "NEXT_EVENT_ID" BIGINT NOT NULL); + +CREATE TABLE "APP"."KEY_CONSTRAINTS" ("CHILD_CD_ID" BIGINT, "CHILD_INTEGER_IDX" INTEGER, "CHILD_TBL_ID" BIGINT, "PARENT_CD_ID" BIGINT , "PARENT_INTEGER_IDX" INTEGER, "PARENT_TBL_ID" BIGINT NOT NULL, "POSITION" BIGINT NOT NULL, "CONSTRAINT_NAME" VARCHAR(400) NOT NULL, "CONSTRAINT_TYPE" SMALLINT NOT NULL, "UPDATE_RULE" SMALLINT, "DELETE_RULE" SMALLINT, "ENABLE_VALIDATE_RELY" SMALLINT NOT NULL, "DEFAULT_VALUE" VARCHAR(400)); + +CREATE TABLE "APP"."METASTORE_DB_PROPERTIES" ("PROPERTY_KEY" VARCHAR(255) NOT NULL, "PROPERTY_VALUE" VARCHAR(1000) NOT NULL, "DESCRIPTION" VARCHAR(1000)); + +CREATE TABLE "APP"."WM_RESOURCEPLAN" (RP_ID BIGINT NOT NULL, NAME VARCHAR(128) NOT NULL, QUERY_PARALLELISM INTEGER, STATUS VARCHAR(20) NOT NULL, DEFAULT_POOL_ID BIGINT); + +CREATE TABLE "APP"."WM_POOL" (POOL_ID BIGINT NOT NULL, RP_ID BIGINT NOT NULL, PATH VARCHAR(1024) NOT NULL, ALLOC_FRACTION DOUBLE, QUERY_PARALLELISM INTEGER, SCHEDULING_POLICY VARCHAR(1024)); + +CREATE TABLE "APP"."WM_TRIGGER" (TRIGGER_ID BIGINT NOT NULL, RP_ID BIGINT NOT NULL, NAME VARCHAR(128) NOT NULL, TRIGGER_EXPRESSION VARCHAR(1024), ACTION_EXPRESSION VARCHAR(1024), IS_IN_UNMANAGED INTEGER NOT NULL DEFAULT 0); + +CREATE TABLE "APP"."WM_POOL_TO_TRIGGER" (POOL_ID BIGINT NOT NULL, TRIGGER_ID BIGINT NOT NULL); + +CREATE TABLE "APP"."WM_MAPPING" (MAPPING_ID BIGINT NOT NULL, RP_ID BIGINT NOT NULL, ENTITY_TYPE VARCHAR(128) NOT NULL, ENTITY_NAME VARCHAR(128) NOT NULL, POOL_ID BIGINT, ORDERING INTEGER); + +CREATE TABLE "APP"."MV_CREATION_METADATA" ( + "MV_CREATION_METADATA_ID" BIGINT NOT NULL, + "CAT_NAME" VARCHAR(256) NOT NULL, + "DB_NAME" VARCHAR(128) NOT NULL, + "TBL_NAME" VARCHAR(256) NOT NULL, + "TXN_LIST" CLOB, + "MATERIALIZATION_TIME" BIGINT NOT NULL +); + +CREATE TABLE "APP"."MV_TABLES_USED" ( + "MV_CREATION_METADATA_ID" BIGINT NOT NULL, + "TBL_ID" BIGINT NOT NULL +); + +CREATE TABLE "APP"."CTLGS" ( + "CTLG_ID" BIGINT NOT NULL, + "NAME" VARCHAR(256) UNIQUE, + "DESC" VARCHAR(4000), + "LOCATION_URI" VARCHAR(4000) NOT NULL); + +-- Insert a default value. The location is TBD. Hive will fix this when it starts +INSERT INTO "APP"."CTLGS" VALUES (1, 'hive', 'Default catalog for Hive', 'TBD'); + +-- ---------------------------------------------- +-- DML Statements +-- ---------------------------------------------- + +INSERT INTO "APP"."NOTIFICATION_SEQUENCE" ("NNI_ID", "NEXT_EVENT_ID") SELECT * FROM (VALUES (1,1)) tmp_table WHERE NOT EXISTS ( SELECT "NEXT_EVENT_ID" FROM "APP"."NOTIFICATION_SEQUENCE"); + +INSERT INTO "APP"."SEQUENCE_TABLE" ("SEQUENCE_NAME", "NEXT_VAL") SELECT * FROM (VALUES ('org.apache.hadoop.hive.metastore.model.MNotificationLog', 1)) tmp_table WHERE NOT EXISTS ( SELECT "NEXT_VAL" FROM "APP"."SEQUENCE_TABLE" WHERE "SEQUENCE_NAME" = 'org.apache.hadoop.hive.metastore.model.MNotificationLog'); + +-- ---------------------------------------------- +-- DDL Statements for indexes +-- ---------------------------------------------- + +CREATE UNIQUE INDEX "APP"."UNIQUEINDEX" ON "APP"."IDXS" ("INDEX_NAME", "ORIG_TBL_ID"); + +CREATE INDEX "APP"."TABLECOLUMNPRIVILEGEINDEX" ON "APP"."TBL_COL_PRIVS" ("AUTHORIZER", "TBL_ID", "COLUMN_NAME", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "TBL_COL_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."DBPRIVILEGEINDEX" ON "APP"."DB_PRIVS" ("AUTHORIZER", "DB_ID", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "DB_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE INDEX "APP"."PCS_STATS_IDX" ON "APP"."PART_COL_STATS" ("CAT_NAME", "DB_NAME","TABLE_NAME","COLUMN_NAME","PARTITION_NAME"); + +CREATE INDEX "APP"."TAB_COL_STATS_IDX" ON "APP"."TAB_COL_STATS" ("CAT_NAME", "DB_NAME", "TABLE_NAME", "COLUMN_NAME"); + +CREATE INDEX "APP"."PARTPRIVILEGEINDEX" ON "APP"."PART_PRIVS" ("AUTHORIZER", "PART_ID", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "PART_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."ROLEENTITYINDEX" ON "APP"."ROLES" ("ROLE_NAME"); + +CREATE INDEX "APP"."TABLEPRIVILEGEINDEX" ON "APP"."TBL_PRIVS" ("AUTHORIZER", "TBL_ID", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "TBL_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."UNIQUETABLE" ON "APP"."TBLS" ("TBL_NAME", "DB_ID"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_DATABASE" ON "APP"."DBS" ("NAME", "CTLG_NAME"); + +CREATE UNIQUE INDEX "APP"."USERROLEMAPINDEX" ON "APP"."ROLE_MAP" ("PRINCIPAL_NAME", "ROLE_ID", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."GLOBALPRIVILEGEINDEX" ON "APP"."GLOBAL_PRIVS" ("AUTHORIZER", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "USER_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_TYPE" ON "APP"."TYPES" ("TYPE_NAME"); + +CREATE INDEX "APP"."PARTITIONCOLUMNPRIVILEGEINDEX" ON "APP"."PART_COL_PRIVS" ("AUTHORIZER", "PART_ID", "COLUMN_NAME", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "PART_COL_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."UNIQUEPARTITION" ON "APP"."PARTITIONS" ("PART_NAME", "TBL_ID"); + +CREATE UNIQUE INDEX "APP"."UNIQUEFUNCTION" ON "APP"."FUNCS" ("FUNC_NAME", "DB_ID"); + +CREATE INDEX "APP"."FUNCS_N49" ON "APP"."FUNCS" ("DB_ID"); + +CREATE INDEX "APP"."FUNC_RU_N49" ON "APP"."FUNC_RU" ("FUNC_ID"); + +CREATE INDEX "APP"."CONSTRAINTS_PARENT_TBL_ID_INDEX" ON "APP"."KEY_CONSTRAINTS"("PARENT_TBL_ID"); + +CREATE INDEX "APP"."CONSTRAINTS_CONSTRAINT_TYPE_INDEX" ON "APP"."KEY_CONSTRAINTS"("CONSTRAINT_TYPE"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_WM_RESOURCEPLAN" ON "APP"."WM_RESOURCEPLAN" ("NAME"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_WM_POOL" ON "APP"."WM_POOL" ("RP_ID", "PATH"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_WM_TRIGGER" ON "APP"."WM_TRIGGER" ("RP_ID", "NAME"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_WM_MAPPING" ON "APP"."WM_MAPPING" ("RP_ID", "ENTITY_TYPE", "ENTITY_NAME"); + +CREATE UNIQUE INDEX "APP"."MV_UNIQUE_TABLE" ON "APP"."MV_CREATION_METADATA" ("TBL_NAME", "DB_NAME"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_CATALOG" ON "APP"."CTLGS" ("NAME"); + + +-- ---------------------------------------------- +-- DDL Statements for keys +-- ---------------------------------------------- + +-- primary/unique +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_PK" PRIMARY KEY ("INDEX_ID"); + +ALTER TABLE "APP"."TBL_COL_PRIVS" ADD CONSTRAINT "TBL_COL_PRIVS_PK" PRIMARY KEY ("TBL_COLUMN_GRANT_ID"); + +ALTER TABLE "APP"."CDS" ADD CONSTRAINT "SQL110922153006460" PRIMARY KEY ("CD_ID"); + +ALTER TABLE "APP"."DB_PRIVS" ADD CONSTRAINT "DB_PRIVS_PK" PRIMARY KEY ("DB_GRANT_ID"); + +ALTER TABLE "APP"."INDEX_PARAMS" ADD CONSTRAINT "INDEX_PARAMS_PK" PRIMARY KEY ("INDEX_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."PARTITION_KEYS" ADD CONSTRAINT "PARTITION_KEY_PK" PRIMARY KEY ("TBL_ID", "PKEY_NAME"); + +ALTER TABLE "APP"."SEQUENCE_TABLE" ADD CONSTRAINT "SEQUENCE_TABLE_PK" PRIMARY KEY ("SEQUENCE_NAME"); + +ALTER TABLE "APP"."PART_PRIVS" ADD CONSTRAINT "PART_PRIVS_PK" PRIMARY KEY ("PART_GRANT_ID"); + +ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SDS_PK" PRIMARY KEY ("SD_ID"); + +ALTER TABLE "APP"."SERDES" ADD CONSTRAINT "SERDES_PK" PRIMARY KEY ("SERDE_ID"); + +ALTER TABLE "APP"."COLUMNS" ADD CONSTRAINT "COLUMNS_PK" PRIMARY KEY ("SD_ID", "COLUMN_NAME"); + +ALTER TABLE "APP"."PARTITION_EVENTS" ADD CONSTRAINT "PARTITION_EVENTS_PK" PRIMARY KEY ("PART_NAME_ID"); + +ALTER TABLE "APP"."TYPE_FIELDS" ADD CONSTRAINT "TYPE_FIELDS_PK" PRIMARY KEY ("TYPE_NAME", "FIELD_NAME"); + +ALTER TABLE "APP"."ROLES" ADD CONSTRAINT "ROLES_PK" PRIMARY KEY ("ROLE_ID"); + +ALTER TABLE "APP"."TBL_PRIVS" ADD CONSTRAINT "TBL_PRIVS_PK" PRIMARY KEY ("TBL_GRANT_ID"); + +ALTER TABLE "APP"."SERDE_PARAMS" ADD CONSTRAINT "SERDE_PARAMS_PK" PRIMARY KEY ("SERDE_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."NUCLEUS_TABLES" ADD CONSTRAINT "NUCLEUS_TABLES_PK" PRIMARY KEY ("CLASS_NAME"); + +ALTER TABLE "APP"."TBLS" ADD CONSTRAINT "TBLS_PK" PRIMARY KEY ("TBL_ID"); + +ALTER TABLE "APP"."SD_PARAMS" ADD CONSTRAINT "SD_PARAMS_PK" PRIMARY KEY ("SD_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."DATABASE_PARAMS" ADD CONSTRAINT "DATABASE_PARAMS_PK" PRIMARY KEY ("DB_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."DBS" ADD CONSTRAINT "DBS_PK" PRIMARY KEY ("DB_ID"); + +ALTER TABLE "APP"."ROLE_MAP" ADD CONSTRAINT "ROLE_MAP_PK" PRIMARY KEY ("ROLE_GRANT_ID"); + +ALTER TABLE "APP"."GLOBAL_PRIVS" ADD CONSTRAINT "GLOBAL_PRIVS_PK" PRIMARY KEY ("USER_GRANT_ID"); + +ALTER TABLE "APP"."BUCKETING_COLS" ADD CONSTRAINT "BUCKETING_COLS_PK" PRIMARY KEY ("SD_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."SORT_COLS" ADD CONSTRAINT "SORT_COLS_PK" PRIMARY KEY ("SD_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."PARTITION_KEY_VALS" ADD CONSTRAINT "PARTITION_KEY_VALS_PK" PRIMARY KEY ("PART_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."TYPES" ADD CONSTRAINT "TYPES_PK" PRIMARY KEY ("TYPES_ID"); + +ALTER TABLE "APP"."COLUMNS_V2" ADD CONSTRAINT "SQL110922153006740" PRIMARY KEY ("CD_ID", "COLUMN_NAME"); + +ALTER TABLE "APP"."PART_COL_PRIVS" ADD CONSTRAINT "PART_COL_PRIVS_PK" PRIMARY KEY ("PART_COLUMN_GRANT_ID"); + +ALTER TABLE "APP"."PARTITION_PARAMS" ADD CONSTRAINT "PARTITION_PARAMS_PK" PRIMARY KEY ("PART_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."PARTITIONS" ADD CONSTRAINT "PARTITIONS_PK" PRIMARY KEY ("PART_ID"); + +ALTER TABLE "APP"."TABLE_PARAMS" ADD CONSTRAINT "TABLE_PARAMS_PK" PRIMARY KEY ("TBL_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."SKEWED_STRING_LIST" ADD CONSTRAINT "SKEWED_STRING_LIST_PK" PRIMARY KEY ("STRING_LIST_ID"); + +ALTER TABLE "APP"."SKEWED_STRING_LIST_VALUES" ADD CONSTRAINT "SKEWED_STRING_LIST_VALUES_PK" PRIMARY KEY ("STRING_LIST_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."SKEWED_COL_NAMES" ADD CONSTRAINT "SKEWED_COL_NAMES_PK" PRIMARY KEY ("SD_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."SKEWED_COL_VALUE_LOC_MAP" ADD CONSTRAINT "SKEWED_COL_VALUE_LOC_MAP_PK" PRIMARY KEY ("SD_ID", "STRING_LIST_ID_KID"); + +ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_PK" PRIMARY KEY ("SD_ID_OID", "INTEGER_IDX"); + +ALTER TABLE "APP"."TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_PK" PRIMARY KEY ("CS_ID"); + +ALTER TABLE "APP"."PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_PK" PRIMARY KEY ("CS_ID"); + +ALTER TABLE "APP"."FUNCS" ADD CONSTRAINT "FUNCS_PK" PRIMARY KEY ("FUNC_ID"); + +ALTER TABLE "APP"."FUNC_RU" ADD CONSTRAINT "FUNC_RU_PK" PRIMARY KEY ("FUNC_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."NOTIFICATION_LOG" ADD CONSTRAINT "NOTIFICATION_LOG_PK" PRIMARY KEY ("NL_ID"); + +ALTER TABLE "APP"."NOTIFICATION_SEQUENCE" ADD CONSTRAINT "NOTIFICATION_SEQUENCE_PK" PRIMARY KEY ("NNI_ID"); + +ALTER TABLE "APP"."KEY_CONSTRAINTS" ADD CONSTRAINT "CONSTRAINTS_PK" PRIMARY KEY ("CONSTRAINT_NAME", "POSITION"); + +ALTER TABLE "APP"."METASTORE_DB_PROPERTIES" ADD CONSTRAINT "PROPERTY_KEY_PK" PRIMARY KEY ("PROPERTY_KEY"); + +ALTER TABLE "APP"."MV_CREATION_METADATA" ADD CONSTRAINT "MV_CREATION_METADATA_PK" PRIMARY KEY ("MV_CREATION_METADATA_ID"); + +ALTER TABLE "APP"."CTLGS" ADD CONSTRAINT "CTLG_PK" PRIMARY KEY ("CTLG_ID"); + + +-- foreign +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_FK1" FOREIGN KEY ("ORIG_TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_FK2" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_FK3" FOREIGN KEY ("INDEX_TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TBL_COL_PRIVS" ADD CONSTRAINT "TBL_COL_PRIVS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."DB_PRIVS" ADD CONSTRAINT "DB_PRIVS_FK1" FOREIGN KEY ("DB_ID") REFERENCES "APP"."DBS" ("DB_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."INDEX_PARAMS" ADD CONSTRAINT "INDEX_PARAMS_FK1" FOREIGN KEY ("INDEX_ID") REFERENCES "APP"."IDXS" ("INDEX_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITION_KEYS" ADD CONSTRAINT "PARTITION_KEYS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PART_PRIVS" ADD CONSTRAINT "PART_PRIVS_FK1" FOREIGN KEY ("PART_ID") REFERENCES "APP"."PARTITIONS" ("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SDS_FK1" FOREIGN KEY ("SERDE_ID") REFERENCES "APP"."SERDES" ("SERDE_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SDS_FK2" FOREIGN KEY ("CD_ID") REFERENCES "APP"."CDS" ("CD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."COLUMNS" ADD CONSTRAINT "COLUMNS_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TYPE_FIELDS" ADD CONSTRAINT "TYPE_FIELDS_FK1" FOREIGN KEY ("TYPE_NAME") REFERENCES "APP"."TYPES" ("TYPES_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TBL_PRIVS" ADD CONSTRAINT "TBL_PRIVS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SERDE_PARAMS" ADD CONSTRAINT "SERDE_PARAMS_FK1" FOREIGN KEY ("SERDE_ID") REFERENCES "APP"."SERDES" ("SERDE_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TBLS" ADD CONSTRAINT "TBLS_FK2" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TBLS" ADD CONSTRAINT "TBLS_FK1" FOREIGN KEY ("DB_ID") REFERENCES "APP"."DBS" ("DB_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."DBS" ADD CONSTRAINT "DBS_FK1" FOREIGN KEY ("CTLG_NAME") REFERENCES "APP"."CTLGS" ("NAME") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SD_PARAMS" ADD CONSTRAINT "SD_PARAMS_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."DATABASE_PARAMS" ADD CONSTRAINT "DATABASE_PARAMS_FK1" FOREIGN KEY ("DB_ID") REFERENCES "APP"."DBS" ("DB_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."ROLE_MAP" ADD CONSTRAINT "ROLE_MAP_FK1" FOREIGN KEY ("ROLE_ID") REFERENCES "APP"."ROLES" ("ROLE_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."BUCKETING_COLS" ADD CONSTRAINT "BUCKETING_COLS_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SORT_COLS" ADD CONSTRAINT "SORT_COLS_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITION_KEY_VALS" ADD CONSTRAINT "PARTITION_KEY_VALS_FK1" FOREIGN KEY ("PART_ID") REFERENCES "APP"."PARTITIONS" ("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."COLUMNS_V2" ADD CONSTRAINT "COLUMNS_V2_FK1" FOREIGN KEY ("CD_ID") REFERENCES "APP"."CDS" ("CD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PART_COL_PRIVS" ADD CONSTRAINT "PART_COL_PRIVS_FK1" FOREIGN KEY ("PART_ID") REFERENCES "APP"."PARTITIONS" ("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITION_PARAMS" ADD CONSTRAINT "PARTITION_PARAMS_FK1" FOREIGN KEY ("PART_ID") REFERENCES "APP"."PARTITIONS" ("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITIONS" ADD CONSTRAINT "PARTITIONS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITIONS" ADD CONSTRAINT "PARTITIONS_FK2" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TABLE_PARAMS" ADD CONSTRAINT "TABLE_PARAMS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_STRING_LIST_VALUES" ADD CONSTRAINT "SKEWED_STRING_LIST_VALUES_FK1" FOREIGN KEY ("STRING_LIST_ID") REFERENCES "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_COL_NAMES" ADD CONSTRAINT "SKEWED_COL_NAMES_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_COL_VALUE_LOC_MAP" ADD CONSTRAINT "SKEWED_COL_VALUE_LOC_MAP_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_COL_VALUE_LOC_MAP" ADD CONSTRAINT "SKEWED_COL_VALUE_LOC_MAP_FK2" FOREIGN KEY ("STRING_LIST_ID_KID") REFERENCES "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_FK1" FOREIGN KEY ("SD_ID_OID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_FK2" FOREIGN KEY ("STRING_LIST_ID_EID") REFERENCES "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_FK" FOREIGN KEY ("TBL_ID") REFERENCES TBLS("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_FK" FOREIGN KEY ("PART_ID") REFERENCES PARTITIONS("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."VERSION" ADD CONSTRAINT "VERSION_PK" PRIMARY KEY ("VER_ID"); + +ALTER TABLE "APP"."FUNCS" ADD CONSTRAINT "FUNCS_FK1" FOREIGN KEY ("DB_ID") REFERENCES "APP"."DBS" ("DB_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."FUNC_RU" ADD CONSTRAINT "FUNC_RU_FK1" FOREIGN KEY ("FUNC_ID") REFERENCES "APP"."FUNCS" ("FUNC_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_RESOURCEPLAN" ADD CONSTRAINT "WM_RESOURCEPLAN_PK" PRIMARY KEY ("RP_ID"); + +ALTER TABLE "APP"."WM_POOL" ADD CONSTRAINT "WM_POOL_PK" PRIMARY KEY ("POOL_ID"); + +ALTER TABLE "APP"."WM_POOL" ADD CONSTRAINT "WM_POOL_FK1" FOREIGN KEY ("RP_ID") REFERENCES "APP"."WM_RESOURCEPLAN" ("RP_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_RESOURCEPLAN" ADD CONSTRAINT "WM_RESOURCEPLAN_FK1" FOREIGN KEY ("DEFAULT_POOL_ID") REFERENCES "APP"."WM_POOL" ("POOL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_TRIGGER" ADD CONSTRAINT "WM_TRIGGER_PK" PRIMARY KEY ("TRIGGER_ID"); + +ALTER TABLE "APP"."WM_TRIGGER" ADD CONSTRAINT "WM_TRIGGER_FK1" FOREIGN KEY ("RP_ID") REFERENCES "APP"."WM_RESOURCEPLAN" ("RP_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_POOL_TO_TRIGGER" ADD CONSTRAINT "WM_POOL_TO_TRIGGER_FK1" FOREIGN KEY ("POOL_ID") REFERENCES "APP"."WM_POOL" ("POOL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_POOL_TO_TRIGGER" ADD CONSTRAINT "WM_POOL_TO_TRIGGER_FK2" FOREIGN KEY ("TRIGGER_ID") REFERENCES "APP"."WM_TRIGGER" ("TRIGGER_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_MAPPING" ADD CONSTRAINT "WM_MAPPING_PK" PRIMARY KEY ("MAPPING_ID"); + +ALTER TABLE "APP"."WM_MAPPING" ADD CONSTRAINT "WM_MAPPING_FK1" FOREIGN KEY ("RP_ID") REFERENCES "APP"."WM_RESOURCEPLAN" ("RP_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_MAPPING" ADD CONSTRAINT "WM_MAPPING_FK2" FOREIGN KEY ("POOL_ID") REFERENCES "APP"."WM_POOL" ("POOL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."MV_TABLES_USED" ADD CONSTRAINT "MV_TABLES_USED_FK1" FOREIGN KEY ("MV_CREATION_METADATA_ID") REFERENCES "APP"."MV_CREATION_METADATA" ("MV_CREATION_METADATA_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."MV_TABLES_USED" ADD CONSTRAINT "MV_TABLES_USED_FK2" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."DBS" ADD CONSTRAINT "DBS_CTLG_FK" FOREIGN KEY ("CTLG_NAME") REFERENCES "APP"."CTLGS" ("NAME") ON DELETE NO ACTION ON UPDATE NO ACTION; + +-- ---------------------------------------------- +-- DDL Statements for checks +-- ---------------------------------------------- + +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "SQL110318025504980" CHECK (DEFERRED_REBUILD IN ('Y','N')); + +ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SQL110318025505550" CHECK (IS_COMPRESSED IN ('Y','N')); + +-- ---------------------------- +-- Transaction and Lock Tables +-- ---------------------------- +CREATE TABLE TXNS ( + TXN_ID bigint PRIMARY KEY, + TXN_STATE char(1) NOT NULL, + TXN_STARTED bigint NOT NULL, + TXN_LAST_HEARTBEAT bigint NOT NULL, + TXN_USER varchar(128) NOT NULL, + TXN_HOST varchar(128) NOT NULL, + TXN_AGENT_INFO varchar(128), + TXN_META_INFO varchar(128), + TXN_HEARTBEAT_COUNT integer, + TXN_TYPE integer +); + +CREATE TABLE TXN_COMPONENTS ( + TC_TXNID bigint NOT NULL REFERENCES TXNS (TXN_ID), + TC_DATABASE varchar(128) NOT NULL, + TC_TABLE varchar(128), + TC_PARTITION varchar(767), + TC_OPERATION_TYPE char(1) NOT NULL, + TC_WRITEID bigint +); + +CREATE INDEX TC_TXNID_INDEX ON TXN_COMPONENTS (TC_TXNID); + +CREATE TABLE COMPLETED_TXN_COMPONENTS ( + CTC_TXNID bigint NOT NULL, + CTC_DATABASE varchar(128) NOT NULL, + CTC_TABLE varchar(256), + CTC_PARTITION varchar(767), + CTC_TIMESTAMP timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, + CTC_WRITEID bigint, + CTC_UPDATE_DELETE char(1) NOT NULL +); + +CREATE INDEX COMPLETED_TXN_COMPONENTS_IDX ON COMPLETED_TXN_COMPONENTS (CTC_DATABASE, CTC_TABLE, CTC_PARTITION); + +CREATE TABLE NEXT_TXN_ID ( + NTXN_NEXT bigint NOT NULL +); +INSERT INTO NEXT_TXN_ID VALUES(1); + +CREATE TABLE HIVE_LOCKS ( + HL_LOCK_EXT_ID bigint NOT NULL, + HL_LOCK_INT_ID bigint NOT NULL, + HL_TXNID bigint NOT NULL, + HL_DB varchar(128) NOT NULL, + HL_TABLE varchar(128), + HL_PARTITION varchar(767), + HL_LOCK_STATE char(1) NOT NULL, + HL_LOCK_TYPE char(1) NOT NULL, + HL_LAST_HEARTBEAT bigint NOT NULL, + HL_ACQUIRED_AT bigint, + HL_USER varchar(128) NOT NULL, + HL_HOST varchar(128) NOT NULL, + HL_HEARTBEAT_COUNT integer, + HL_AGENT_INFO varchar(128), + HL_BLOCKEDBY_EXT_ID bigint, + HL_BLOCKEDBY_INT_ID bigint, + PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID) +); + +CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID); + +CREATE TABLE NEXT_LOCK_ID ( + NL_NEXT bigint NOT NULL +); +INSERT INTO NEXT_LOCK_ID VALUES(1); + +CREATE TABLE COMPACTION_QUEUE ( + CQ_ID bigint PRIMARY KEY, + CQ_DATABASE varchar(128) NOT NULL, + CQ_TABLE varchar(128) NOT NULL, + CQ_PARTITION varchar(767), + CQ_STATE char(1) NOT NULL, + CQ_TYPE char(1) NOT NULL, + CQ_TBLPROPERTIES varchar(2048), + CQ_WORKER_ID varchar(128), + CQ_START bigint, + CQ_RUN_AS varchar(128), + CQ_HIGHEST_WRITE_ID bigint, + CQ_META_INFO varchar(2048) for bit data, + CQ_HADOOP_JOB_ID varchar(32) +); + +CREATE TABLE NEXT_COMPACTION_QUEUE_ID ( + NCQ_NEXT bigint NOT NULL +); +INSERT INTO NEXT_COMPACTION_QUEUE_ID VALUES(1); + +CREATE TABLE COMPLETED_COMPACTIONS ( + CC_ID bigint PRIMARY KEY, + CC_DATABASE varchar(128) NOT NULL, + CC_TABLE varchar(128) NOT NULL, + CC_PARTITION varchar(767), + CC_STATE char(1) NOT NULL, + CC_TYPE char(1) NOT NULL, + CC_TBLPROPERTIES varchar(2048), + CC_WORKER_ID varchar(128), + CC_START bigint, + CC_END bigint, + CC_RUN_AS varchar(128), + CC_HIGHEST_WRITE_ID bigint, + CC_META_INFO varchar(2048) for bit data, + CC_HADOOP_JOB_ID varchar(32) +); + +CREATE TABLE AUX_TABLE ( + MT_KEY1 varchar(128) NOT NULL, + MT_KEY2 bigint NOT NULL, + MT_COMMENT varchar(255), + PRIMARY KEY(MT_KEY1, MT_KEY2) +); + +--1st 4 cols make up a PK but since WS_PARTITION is nullable we can't declare such PK +--This is a good candidate for Index orgainzed table +CREATE TABLE WRITE_SET ( + WS_DATABASE varchar(128) NOT NULL, + WS_TABLE varchar(128) NOT NULL, + WS_PARTITION varchar(767), + WS_TXNID bigint NOT NULL, + WS_COMMIT_ID bigint NOT NULL, + WS_OPERATION_TYPE char(1) NOT NULL +); + +CREATE TABLE TXN_TO_WRITE_ID ( + T2W_TXNID bigint NOT NULL, + T2W_DATABASE varchar(128) NOT NULL, + T2W_TABLE varchar(256) NOT NULL, + T2W_WRITEID bigint NOT NULL +); + +CREATE UNIQUE INDEX TBL_TO_TXN_ID_IDX ON TXN_TO_WRITE_ID (T2W_DATABASE, T2W_TABLE, T2W_TXNID); +CREATE UNIQUE INDEX TBL_TO_WRITE_ID_IDX ON TXN_TO_WRITE_ID (T2W_DATABASE, T2W_TABLE, T2W_WRITEID); + +CREATE TABLE NEXT_WRITE_ID ( + NWI_DATABASE varchar(128) NOT NULL, + NWI_TABLE varchar(256) NOT NULL, + NWI_NEXT bigint NOT NULL +); + +CREATE UNIQUE INDEX NEXT_WRITE_ID_IDX ON NEXT_WRITE_ID (NWI_DATABASE, NWI_TABLE); + +CREATE TABLE MIN_HISTORY_LEVEL ( + MHL_TXNID bigint NOT NULL, + MHL_MIN_OPEN_TXNID bigint NOT NULL, + PRIMARY KEY(MHL_TXNID) +); + +CREATE INDEX MIN_HISTORY_LEVEL_IDX ON MIN_HISTORY_LEVEL (MHL_MIN_OPEN_TXNID); + +CREATE TABLE MATERIALIZATION_REBUILD_LOCKS ( + MRL_TXN_ID BIGINT NOT NULL, + MRL_DB_NAME VARCHAR(128) NOT NULL, + MRL_TBL_NAME VARCHAR(256) NOT NULL, + MRL_LAST_HEARTBEAT BIGINT NOT NULL, + PRIMARY KEY(MRL_TXN_ID) +); + +CREATE TABLE "APP"."I_SCHEMA" ( + "SCHEMA_ID" bigint primary key, + "SCHEMA_TYPE" integer not null, + "NAME" varchar(256) unique, + "DB_ID" bigint references "APP"."DBS" ("DB_ID"), + "COMPATIBILITY" integer not null, + "VALIDATION_LEVEL" integer not null, + "CAN_EVOLVE" char(1) not null, + "SCHEMA_GROUP" varchar(256), + "DESCRIPTION" varchar(4000) +); + +CREATE TABLE "APP"."SCHEMA_VERSION" ( + "SCHEMA_VERSION_ID" bigint primary key, + "SCHEMA_ID" bigint references "APP"."I_SCHEMA" ("SCHEMA_ID"), + "VERSION" integer not null, + "CREATED_AT" bigint not null, + "CD_ID" bigint references "APP"."CDS" ("CD_ID"), + "STATE" integer not null, + "DESCRIPTION" varchar(4000), + "SCHEMA_TEXT" clob, + "FINGERPRINT" varchar(256), + "SCHEMA_VERSION_NAME" varchar(256), + "SERDE_ID" bigint references "APP"."SERDES" ("SERDE_ID") +); + +CREATE UNIQUE INDEX "APP"."UNIQUE_SCHEMA_VERSION" ON "APP"."SCHEMA_VERSION" ("SCHEMA_ID", "VERSION"); + +CREATE TABLE REPL_TXN_MAP ( + RTM_REPL_POLICY varchar(256) NOT NULL, + RTM_SRC_TXN_ID bigint NOT NULL, + RTM_TARGET_TXN_ID bigint NOT NULL, + PRIMARY KEY (RTM_REPL_POLICY, RTM_SRC_TXN_ID) +); + +CREATE TABLE "APP"."RUNTIME_STATS" ( + "RS_ID" bigint primary key, + "CREATE_TIME" integer not null, + "WEIGHT" integer not null, + "PAYLOAD" BLOB +); + +CREATE INDEX IDX_RUNTIME_STATS_CREATE_TIME ON RUNTIME_STATS(CREATE_TIME); + +-- ----------------------------------------------------------------- +-- Record schema version. Should be the last step in the init script +-- ----------------------------------------------------------------- +INSERT INTO "APP"."VERSION" (VER_ID, SCHEMA_VERSION, VERSION_COMMENT) VALUES (1, '3.1.0', 'Hive release version 3.1.0'); diff --git a/spark/src/test/resources/log4j2.properties b/spark/src/test/resources/log4j2.properties new file mode 100644 index 00000000000..838cda46d9f --- /dev/null +++ b/spark/src/test/resources/log4j2.properties @@ -0,0 +1,53 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +rootLogger.level = info +rootLogger.appenderRef.file.ref = ${sys:test.appender:-File} + +appender.file.type = File +appender.file.name = File +appender.file.fileName = target/unit-tests.log +appender.file.append = true +appender.file.layout.type = PatternLayout +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n + +# Tests that launch java subprocesses can set the "test.appender" system property to +# "console" to avoid having the child process's logs overwrite the unit test's +# log file. +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +logger.jetty.name = org.sparkproject.jetty +logger.jetty.level = warn + diff --git a/spark/src/test/scala/io/delta/exceptions/DeltaConcurrentExceptionsSuite.scala b/spark/src/test/scala/io/delta/exceptions/DeltaConcurrentExceptionsSuite.scala new file mode 100644 index 00000000000..2a970ea6508 --- /dev/null +++ b/spark/src/test/scala/io/delta/exceptions/DeltaConcurrentExceptionsSuite.scala @@ -0,0 +1,183 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.exceptions + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.test.SharedSparkSession + +class DeltaConcurrentExceptionsSuite extends SparkFunSuite with SharedSparkSession { + + test("test ConcurrentWriteException") { + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentWriteException(None) + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentWriteException(None) + } + + intercept[org.apache.spark.sql.delta.ConcurrentWriteException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentWriteException(None) + } + + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentWriteException(None) + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentWriteException(None) + } + } + + test("test MetadataChangedException") { + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.metadataChangedException(None) + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.metadataChangedException(None) + } + + intercept[org.apache.spark.sql.delta.MetadataChangedException] { + throw org.apache.spark.sql.delta.DeltaErrors.metadataChangedException(None) + } + + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.MetadataChangedException(None) + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.MetadataChangedException(None) + } + } + + test("test ProtocolChangedException") { + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.protocolChangedException(None) + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.protocolChangedException(None) + } + + intercept[org.apache.spark.sql.delta.ProtocolChangedException] { + throw org.apache.spark.sql.delta.DeltaErrors.protocolChangedException(None) + } + + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ProtocolChangedException(None) + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ProtocolChangedException(None) + } + } + + test("test ConcurrentAppendException") { + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentAppendException(None, "") + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentAppendException(None, "") + } + + intercept[org.apache.spark.sql.delta.ConcurrentAppendException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentAppendException(None, "") + } + + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentAppendException(None, "") + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentAppendException(None, "") + } + } + + test("test ConcurrentDeleteReadException") { + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors. + concurrentDeleteReadException(None, "") + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors + .concurrentDeleteReadException(None, "") + } + + intercept[org.apache.spark.sql.delta.ConcurrentDeleteReadException] { + throw org.apache.spark.sql.delta.DeltaErrors + .concurrentDeleteReadException(None, "") + } + + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentDeleteReadException(None, "") + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentDeleteReadException(None, "") + } + } + + test("test ConcurrentDeleteDeleteException") { + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors + .concurrentDeleteDeleteException(None, "") + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors + .concurrentDeleteDeleteException(None, "") + } + + intercept[org.apache.spark.sql.delta.ConcurrentDeleteDeleteException] { + throw org.apache.spark.sql.delta.DeltaErrors + .concurrentDeleteDeleteException(None, "") + } + + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentDeleteDeleteException(None, "") + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentDeleteDeleteException(None, "") + } + } + + test("test ConcurrentTransactionException") { + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentTransactionException(None) + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentTransactionException(None) + } + + intercept[org.apache.spark.sql.delta.ConcurrentTransactionException] { + throw org.apache.spark.sql.delta.DeltaErrors.concurrentTransactionException(None) + } + + intercept[org.apache.spark.sql.delta.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentTransactionException(None) + } + + intercept[io.delta.exceptions.DeltaConcurrentModificationException] { + throw new org.apache.spark.sql.delta.ConcurrentTransactionException(None) + } + } +} diff --git a/spark/src/test/scala/io/delta/sql/DeltaExtensionAndCatalogSuite.scala b/spark/src/test/scala/io/delta/sql/DeltaExtensionAndCatalogSuite.scala new file mode 100644 index 00000000000..4d57ec54623 --- /dev/null +++ b/spark/src/test/scala/io/delta/sql/DeltaExtensionAndCatalogSuite.scala @@ -0,0 +1,153 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sql + +import java.nio.file.Files + +import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaLog} +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import io.delta.tables.DeltaTable +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkFunSuite +import org.apache.spark.network.util.JavaUtils +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf + +class DeltaExtensionAndCatalogSuite extends SparkFunSuite { + + private def createTempDir(): String = { + val dir = Files.createTempDirectory("DeltaSparkSessionExtensionSuite").toFile + FileUtils.forceDeleteOnExit(dir) + dir.getCanonicalPath + } + + private def verifyDeltaSQLParserIsActivated(spark: SparkSession): Unit = { + val input = Files.createTempDirectory("DeltaSparkSessionExtensionSuite").toFile + try { + spark.range(1, 10).write.format("delta").save(input.getCanonicalPath) + spark.sql(s"vacuum delta.`${input.getCanonicalPath}`") + } finally { + JavaUtils.deleteRecursively(input) + } + } + + test("activate Delta SQL parser using SQL conf") { + val spark = SparkSession.builder() + .appName("DeltaSparkSessionExtensionSuiteUsingSQLConf") + .master("local[2]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate() + try { + verifyDeltaSQLParserIsActivated(spark) + } finally { + spark.close() + } + } + + test("activate Delta SQL parser using withExtensions") { + val spark = SparkSession.builder() + .appName("DeltaSparkSessionExtensionSuiteUsingWithExtensions") + .master("local[2]") + .withExtensions(new io.delta.sql.DeltaSparkSessionExtension) + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate() + try { + verifyDeltaSQLParserIsActivated(spark) + } finally { + spark.close() + } + } + + test("DeltaCatalog class should be initialized correctly") { + withSparkSession( + SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key -> + classOf[org.apache.spark.sql.delta.catalog.DeltaCatalog].getName + ) { spark => + val v2Catalog = spark.sessionState.analyzer.catalogManager.catalog("spark_catalog") + assert(v2Catalog.isInstanceOf[org.apache.spark.sql.delta.catalog.DeltaCatalog]) + } + } + + test("DeltaLog should not throw exception if spark.sql.catalog.spark_catalog is set") { + withTempDir { dir => + withSparkSession( + SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key -> + classOf[org.apache.spark.sql.delta.catalog.DeltaCatalog].getName + ) { spark => + val path = new Path(dir.getCanonicalPath) + assert(DeltaLog.forTable(spark, path).tableExists == false) + } + } + } + + test("DeltaLog should throw exception if spark.sql.catalog.spark_catalog " + + "config is not found") { + withTempDir { dir => + withSparkSession("" -> "") { spark => + val path = new Path(dir.getCanonicalPath) + val e = intercept[DeltaAnalysisException] { + DeltaLog.forTable(spark, path) + } + assert(e.isInstanceOf[DeltaAnalysisException]) + assert(e.getErrorClass() == "DELTA_CONFIGURE_SPARK_SESSION_WITH_EXTENSION_AND_CATALOG") + } + } + } + + test("DeltaLog should not throw exception if spark.sql.catalog.spark_catalog " + + "config is not found and the check is disabled") { + withTempDir { dir => + withSparkSession(DeltaSQLConf.DELTA_REQUIRED_SPARK_CONFS_CHECK.key -> "false") { spark => + val path = new Path(dir.getCanonicalPath) + DeltaLog.forTable(spark, path) + assert(DeltaLog.forTable(spark, path).tableExists == false) + } + } + } + + private def withSparkSession(configs: (String, String)*)(f: SparkSession => Unit): Unit = { + var builder = SparkSession.builder() + .appName("DeltaSparkSessionExtensionSuite") + .master("local[2]") + .config("spark.sql.warehouse.dir", createTempDir()) + + configs.foreach { c => builder = builder.config(c._1, c._2) } + val spark = builder.getOrCreate() + try { + f(spark) + } finally { + spark.close() + } + } + + private def checkErrorMessage(f: => Unit): Unit = { + val e = intercept[AnalysisException](f) + val expectedStrs = Seq( + "Delta operation requires the SparkSession to be configured", + "spark.sql.extensions", + s"${classOf[DeltaSparkSessionExtension].getName}", + SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, + s"${classOf[DeltaCatalog].getName}" + ) + expectedStrs.foreach { m => assert(e.getMessage().contains(m), "full exception: " + e) } + } +} diff --git a/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala b/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala new file mode 100644 index 00000000000..ecdbe0d1856 --- /dev/null +++ b/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala @@ -0,0 +1,517 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.sql.parser + +import io.delta.tables.execution.VacuumTableCommand + +import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils +import org.apache.spark.sql.delta.skipping.clustering.temp.ClusterByTransform + +import org.apache.spark.sql.delta.CloneTableSQLTestUtils +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.{UnresolvedPathBasedDeltaTable, UnresolvedPathBasedTable} +import org.apache.spark.sql.delta.commands.{DescribeDeltaDetailCommand, DescribeDeltaHistory, OptimizeTableCommand, DeltaReorgTable} +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.{TableIdentifier, TimeTravel} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedTable} +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.catalyst.plans.logical.{AlterTableDropFeature, CloneTableStatement, CreateTable, CreateTableAsSelect, LogicalPlan, ReplaceTable, ReplaceTableAsSelect, RestoreTableStatement} +import org.apache.spark.sql.execution.SparkSqlParser + +class DeltaSqlParserSuite extends SparkFunSuite with SQLHelper { + + test("isValidDecimal should recognize a table identifier and not treat them as a decimal") { + // Setting `delegate` to `null` is fine. The following tests don't need to touch `delegate`. + val parser = new DeltaSqlParser(null) + assert(parser.parsePlan("vacuum 123_") === + VacuumTableCommand(UnresolvedTable(Seq("123_"), "VACUUM", None), None, false)) + assert(parser.parsePlan("vacuum 1a.123_") === + VacuumTableCommand(UnresolvedTable(Seq("1a", "123_"), "VACUUM", None), None, false)) + assert(parser.parsePlan("vacuum a.123A") === + VacuumTableCommand(UnresolvedTable(Seq("a", "123A"), "VACUUM", None), None, false)) + assert(parser.parsePlan("vacuum a.123E3_column") === + VacuumTableCommand(UnresolvedTable(Seq("a", "123E3_column"), "VACUUM", None), None, false)) + assert(parser.parsePlan("vacuum a.123D_column") === + VacuumTableCommand(UnresolvedTable(Seq("a", "123D_column"), "VACUUM", None), + None, false)) + assert(parser.parsePlan("vacuum a.123BD_column") === + VacuumTableCommand(UnresolvedTable(Seq("a", "123BD_column"), "VACUUM", None), + None, false)) + + assert(parser.parsePlan("vacuum delta.`/tmp/table`") === + VacuumTableCommand(UnresolvedTable(Seq("delta", "/tmp/table"), "VACUUM", None), + None, false)) + + assert(parser.parsePlan("vacuum \"/tmp/table\"") === + VacuumTableCommand( + UnresolvedPathBasedDeltaTable("/tmp/table", Map.empty, "VACUUM"), None, false)) + } + + test("Restore command is parsed as expected") { + val parser = new DeltaSqlParser(null) + var parsedCmd = parser.parsePlan("RESTORE catalog_foo.db.tbl TO VERSION AS OF 1;") + assert(parsedCmd === + RestoreTableStatement(TimeTravel( + UnresolvedRelation(Seq("catalog_foo", "db", "tbl")), + None, + Some(1), + Some("sql")))) + + parsedCmd = parser.parsePlan("RESTORE delta.`/tmp` TO VERSION AS OF 1;") + assert(parsedCmd === + RestoreTableStatement(TimeTravel( + UnresolvedRelation(Seq("delta", "/tmp")), + None, + Some(1), + Some("sql")))) + } + + test("OPTIMIZE command is parsed as expected") { + val parser = new DeltaSqlParser(null) + var parsedCmd = parser.parsePlan("OPTIMIZE tbl") + assert(parsedCmd === + OptimizeTableCommand(None, Some(tblId("tbl")), Nil)(Nil)) + assert(parsedCmd.asInstanceOf[OptimizeTableCommand].child === + UnresolvedTable(Seq("tbl"), "OPTIMIZE", None)) + + parsedCmd = parser.parsePlan("OPTIMIZE db.tbl") + assert(parsedCmd === + OptimizeTableCommand(None, Some(tblId("tbl", "db")), Nil)(Nil)) + assert(parsedCmd.asInstanceOf[OptimizeTableCommand].child === + UnresolvedTable(Seq("db", "tbl"), "OPTIMIZE", None)) + + parsedCmd = parser.parsePlan("OPTIMIZE catalog_foo.db.tbl") + assert(parsedCmd === + OptimizeTableCommand(None, Some(tblId("tbl", "db", "catalog_foo")), Nil)(Nil)) + assert(parsedCmd.asInstanceOf[OptimizeTableCommand].child === + UnresolvedTable(Seq("catalog_foo", "db", "tbl"), "OPTIMIZE", None)) + + assert(parser.parsePlan("OPTIMIZE tbl_${system:spark.testing}") === + OptimizeTableCommand(None, Some(tblId("tbl_true")), Nil)(Nil)) + + withSQLConf("tbl_var" -> "tbl") { + assert(parser.parsePlan("OPTIMIZE ${tbl_var}") === + OptimizeTableCommand(None, Some(tblId("tbl")), Nil)(Nil)) + + assert(parser.parsePlan("OPTIMIZE ${spark:tbl_var}") === + OptimizeTableCommand(None, Some(tblId("tbl")), Nil)(Nil)) + + assert(parser.parsePlan("OPTIMIZE ${sparkconf:tbl_var}") === + OptimizeTableCommand(None, Some(tblId("tbl")), Nil)(Nil)) + + assert(parser.parsePlan("OPTIMIZE ${hiveconf:tbl_var}") === + OptimizeTableCommand(None, Some(tblId("tbl")), Nil)(Nil)) + + assert(parser.parsePlan("OPTIMIZE ${hivevar:tbl_var}") === + OptimizeTableCommand(None, Some(tblId("tbl")), Nil)(Nil)) + } + + parsedCmd = parser.parsePlan("OPTIMIZE '/path/to/tbl'") + assert(parsedCmd === + OptimizeTableCommand(Some("/path/to/tbl"), None, Nil)(Nil)) + assert(parsedCmd.asInstanceOf[OptimizeTableCommand].child === + UnresolvedPathBasedDeltaTable("/path/to/tbl", Map.empty, "OPTIMIZE")) + + parsedCmd = parser.parsePlan("OPTIMIZE delta.`/path/to/tbl`") + assert(parsedCmd === + OptimizeTableCommand(None, Some(tblId("/path/to/tbl", "delta")), Nil)(Nil)) + assert(parsedCmd.asInstanceOf[OptimizeTableCommand].child === + UnresolvedTable(Seq("delta", "/path/to/tbl"), "OPTIMIZE", None)) + + assert(parser.parsePlan("OPTIMIZE tbl WHERE part = 1") === + OptimizeTableCommand(None, Some(tblId("tbl")), Seq("part = 1"))(Nil)) + + assert(parser.parsePlan("OPTIMIZE tbl ZORDER BY (col1)") === + OptimizeTableCommand(None, Some(tblId("tbl")), Nil) + (Seq(unresolvedAttr("col1")))) + + assert(parser.parsePlan("OPTIMIZE tbl WHERE part = 1 ZORDER BY col1, col2.subcol") === + OptimizeTableCommand(None, Some(tblId("tbl")), Seq("part = 1"))( + Seq(unresolvedAttr("col1"), unresolvedAttr("col2", "subcol")))) + + assert(parser.parsePlan("OPTIMIZE tbl WHERE part = 1 ZORDER BY (col1, col2.subcol)") === + OptimizeTableCommand(None, Some(tblId("tbl")), Seq("part = 1"))( + Seq(unresolvedAttr("col1"), unresolvedAttr("col2", "subcol")))) + } + + test("OPTIMIZE command new tokens are non-reserved keywords") { + // new keywords: OPTIMIZE, ZORDER + val parser = new DeltaSqlParser(null) + + // Use the new keywords in table name + assert(parser.parsePlan("OPTIMIZE optimize") === + OptimizeTableCommand(None, Some(tblId("optimize")), Nil)(Nil)) + + assert(parser.parsePlan("OPTIMIZE zorder") === + OptimizeTableCommand(None, Some(tblId("zorder")), Nil)(Nil)) + + // Use the new keywords in column name + assert(parser.parsePlan("OPTIMIZE tbl WHERE zorder = 1 and optimize = 2") === + OptimizeTableCommand(None, + Some(tblId("tbl")) + , Seq("zorder = 1 and optimize = 2"))(Nil)) + + assert(parser.parsePlan("OPTIMIZE tbl ZORDER BY (optimize, zorder)") === + OptimizeTableCommand(None, Some(tblId("tbl")), Nil)( + Seq(unresolvedAttr("optimize"), unresolvedAttr("zorder")))) + } + + test("DESCRIBE DETAIL command is parsed as expected") { + val parser = new DeltaSqlParser(null) + + // Desc detail on a table + assert(parser.parsePlan("DESCRIBE DETAIL catalog_foo.db.tbl") === + DescribeDeltaDetailCommand( + UnresolvedTable(Seq("catalog_foo", "db", "tbl"), DescribeDeltaDetailCommand.CMD_NAME, None), + Map.empty)) + + // Desc detail on a raw path + assert(parser.parsePlan("DESCRIBE DETAIL \"/tmp/table\"") === + DescribeDeltaDetailCommand( + UnresolvedPathBasedTable("/tmp/table", Map.empty, DescribeDeltaDetailCommand.CMD_NAME), + Map.empty)) + + // Desc detail on a delta raw path + assert(parser.parsePlan("DESCRIBE DETAIL delta.`dummy_raw_path`") === + DescribeDeltaDetailCommand( + UnresolvedTable(Seq("delta", "dummy_raw_path"), DescribeDeltaDetailCommand.CMD_NAME, None), + Map.empty)) + } + + test("DESCRIBE HISTORY command is parsed as expected") { + val parser = new DeltaSqlParser(null) + var parsedCmd = parser.parsePlan("DESCRIBE HISTORY catalog_foo.db.tbl") + assert(parsedCmd.asInstanceOf[DescribeDeltaHistory].child === + UnresolvedTable(Seq("catalog_foo", "db", "tbl"), DescribeDeltaHistory.COMMAND_NAME, None)) + parsedCmd = parser.parsePlan("DESCRIBE HISTORY delta.`/path/to/tbl`") + assert(parsedCmd.asInstanceOf[DescribeDeltaHistory].child === + UnresolvedTable(Seq("delta", "/path/to/tbl"), DescribeDeltaHistory.COMMAND_NAME, None)) + parsedCmd = parser.parsePlan("DESCRIBE HISTORY '/path/to/tbl'") + assert(parsedCmd.asInstanceOf[DescribeDeltaHistory].child === + UnresolvedPathBasedDeltaTable("/path/to/tbl", Map.empty, DescribeDeltaHistory.COMMAND_NAME)) + } + + private def targetPlanForTable(tableParts: String*): UnresolvedTable = + UnresolvedTable(tableParts.toSeq, "REORG", relationTypeMismatchHint = None) + + test("REORG command is parsed as expected") { + val parser = new DeltaSqlParser(null) + + assert(parser.parsePlan("REORG TABLE tbl APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl"))(Nil)) + + assert(parser.parsePlan("REORG TABLE tbl_${system:spark.testing} APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl_true"))(Nil)) + + withSQLConf("tbl_var" -> "tbl") { + assert(parser.parsePlan("REORG TABLE ${tbl_var} APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl"))(Nil)) + + assert(parser.parsePlan("REORG TABLE ${spark:tbl_var} APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl"))(Nil)) + + assert(parser.parsePlan("REORG TABLE ${sparkconf:tbl_var} APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl"))(Nil)) + + assert(parser.parsePlan("REORG TABLE ${hiveconf:tbl_var} APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl"))(Nil)) + + assert(parser.parsePlan("REORG TABLE ${hivevar:tbl_var} APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl"))(Nil)) + } + + assert(parser.parsePlan("REORG TABLE delta.`/path/to/tbl` APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("delta", "/path/to/tbl"))(Nil)) + + assert(parser.parsePlan("REORG TABLE tbl WHERE part = 1 APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl"))(Seq("part = 1"))) + } + + test("REORG command new tokens are non-reserved keywords") { + // new keywords: REORG, APPLY, PURGE + val parser = new DeltaSqlParser(null) + + // Use the new keywords in table name + assert(parser.parsePlan("REORG TABLE reorg APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("reorg"))(Nil)) + assert(parser.parsePlan("REORG TABLE apply APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("apply"))(Nil)) + assert(parser.parsePlan("REORG TABLE purge APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("purge"))(Nil)) + + // Use the new keywords in column name + assert(parser.parsePlan( + "REORG TABLE tbl WHERE reorg = 1 AND apply = 2 AND purge = 3 APPLY (PURGE)") === + DeltaReorgTable(targetPlanForTable("tbl"))(Seq("reorg = 1 AND apply =2 AND purge = 3"))) + } + + // scalastyle:off argcount + private def checkCloneStmt( + parser: DeltaSqlParser, + source: String, + target: String, + sourceFormat: String = "delta", + sourceIsTable: Boolean = true, + sourceIs3LTable: Boolean = false, + targetIsTable: Boolean = true, + targetLocation: Option[String] = None, + versionAsOf: Option[Long] = None, + timestampAsOf: Option[String] = None, + isCreate: Boolean = true, + isReplace: Boolean = false, + tableProperties: Map[String, String] = Map.empty): Unit = { + assert { + parser.parsePlan(CloneTableSQLTestUtils.buildCloneSqlString( + source, + target, + sourceIsTable, + targetIsTable, + sourceFormat, + targetLocation = targetLocation, + versionAsOf = versionAsOf, + timestampAsOf = timestampAsOf, + isCreate = isCreate, + isReplace = isReplace, + tableProperties = tableProperties + )) == { + val sourceRelation = if (sourceIs3LTable) { + new UnresolvedRelation(source.split('.')) + } else { + UnresolvedRelation(tblId(source, if (sourceIsTable) null else sourceFormat)) + } + CloneTableStatement( + if (versionAsOf.isEmpty && timestampAsOf.isEmpty) { + sourceRelation + } else { + TimeTravel( + sourceRelation, + timestampAsOf.map(Literal(_)), + versionAsOf, + Some("sql")) + }, + new UnresolvedRelation(target.split('.')), + ifNotExists = false, + isReplaceCommand = isReplace, + isCreateCommand = isCreate, + tablePropertyOverrides = tableProperties, + targetLocation = targetLocation + ) + } + } + } + // scalastyle:on argcount + + test("CLONE command is parsed as expected") { + val parser = new DeltaSqlParser(null) + // Standard shallow clone + checkCloneStmt(parser, source = "t1", target = "t1") + // Path based source table + checkCloneStmt(parser, source = "/path/to/t1", target = "t1", sourceIsTable = false) + // REPLACE + checkCloneStmt(parser, source = "t1", target = "t1", isCreate = false, isReplace = true) + // CREATE OR REPLACE + checkCloneStmt(parser, source = "t1", target = "t1", isCreate = true, isReplace = true) + // Clone with table properties + checkCloneStmt(parser, source = "t1", target = "t1", tableProperties = Map("a" -> "a")) + // Clone with external location + checkCloneStmt(parser, source = "t1", target = "t1", targetLocation = Some("/new/path")) + // Clone with time travel + checkCloneStmt(parser, source = "t1", target = "t1", versionAsOf = Some(1L)) + // Clone with 3L table (only useful for Iceberg table now) + checkCloneStmt(parser, source = "local.iceberg.table", target = "t1", sourceIs3LTable = true) + checkCloneStmt(parser, source = "local.iceberg.table", target = "delta.table", + sourceIs3LTable = true) + // Custom source format with path + checkCloneStmt(parser, source = "/path/to/iceberg", target = "t1", sourceFormat = "iceberg", + sourceIsTable = false) + + // Target table with 3L name + checkCloneStmt(parser, source = "/path/to/iceberg", target = "a.b.t1", sourceFormat = "iceberg", + sourceIsTable = false) + checkCloneStmt( + parser, source = "spark_catalog.tmp.table", target = "a.b.t1", sourceIs3LTable = true) + checkCloneStmt(parser, source = "t2", target = "a.b.t1") + } + + for (truncateHistory <- Seq(true, false)) + test(s"DROP FEATURE command is parsed as expected - truncateHistory: $truncateHistory") { + val parser = new DeltaSqlParser(null) + val table = "tbl" + val featureName = "feature_name" + val sql = s"ALTER TABLE $table DROP FEATURE $featureName " + + (if (truncateHistory) "TRUNCATE HISTORY" else "") + val parsedCmd = parser.parsePlan(sql) + assert(parsedCmd === + AlterTableDropFeature( + UnresolvedTable(Seq(table), "ALTER TABLE ... DROP FEATURE", None), + featureName, + truncateHistory)) + } + + private def unresolvedAttr(colName: String*): UnresolvedAttribute = { + new UnresolvedAttribute(colName) + } + + private def tblId( + tblName: String, + schema: String = null, + catalog: String = null): TableIdentifier = { + if (catalog == null) { + if (schema == null) new TableIdentifier(tblName) + else new TableIdentifier(tblName, Some(schema)) + } else { + assert(schema != null) + new TableIdentifier(tblName, Some(schema), Some(catalog)) + } + } + + private def clusterByStatement( + createOrReplaceClause: String, + asSelect: Boolean, + schema: String, + clusterByClause: String): String = { + val tableSchema = if (asSelect) { + "" + } else { + s"($schema)" + } + val select = if (asSelect) { + "AS SELECT * FROM tbl2" + } else { + "" + } + s"$createOrReplaceClause TABLE tbl $tableSchema USING DELTA $clusterByClause $select" + } + + private def validateClusterByTransform( + clause: String, + asSelect: Boolean, + plan: LogicalPlan, + expectedColumns: Seq[Seq[String]]): Unit = { + val partitioning = if (clause == "CREATE") { + if (asSelect) { + plan.asInstanceOf[CreateTableAsSelect].partitioning + } else { + plan.asInstanceOf[CreateTable].partitioning + } + } else { + if (asSelect) { + plan.asInstanceOf[ReplaceTableAsSelect].partitioning + } else { + plan.asInstanceOf[ReplaceTable].partitioning + } + } + assert(partitioning.size === 1) + val transform = partitioning.head + val actualColumns = transform match { + case ClusterByTransform(columnNames) => columnNames.map(_.fieldNames.toSeq) + case _ => assert(false, "Should not reach here") + } + assert(actualColumns === expectedColumns) + } + + for (asSelect <- BOOLEAN_DOMAIN) { + Seq("CREATE", "REPLACE").foreach { clause => + test(s"CLUSTER BY - $clause TABLE asSelect = $asSelect") { + val parser = new DeltaSqlParser(new SparkSqlParser()) + val sql = clusterByStatement(clause, asSelect, "a int, b string", "CLUSTER BY (a)") + val parsedPlan = parser.parsePlan(sql) + validateClusterByTransform(clause, asSelect, parsedPlan, Seq(Seq("a"))) + } + + test(s"CLUSTER BY nested column - $clause TABLE asSelect = $asSelect") { + val parser = new DeltaSqlParser(new SparkSqlParser()) + val sql = + clusterByStatement(clause, asSelect, "a struct", "CLUSTER BY (a.b, a.c)") + val parsedPlan = parser.parsePlan(sql) + validateClusterByTransform(clause, asSelect, parsedPlan, Seq(Seq("a", "b"), Seq("a", "c"))) + } + + test(s"CLUSTER BY backquoted column - $clause TABLE asSelect = $asSelect") { + val parser = new DeltaSqlParser(new SparkSqlParser()) + val sql = + clusterByStatement(clause, asSelect, "`a.b.c` int", "CLUSTER BY (`a.b.c`)") + val parsedPlan = parser.parsePlan(sql) + validateClusterByTransform(clause, asSelect, parsedPlan, Seq(Seq("a.b.c"))) + } + + test(s"CLUSTER BY comma column - $clause TABLE asSelect = $asSelect") { + val parser = new DeltaSqlParser(new SparkSqlParser()) + val sql = + clusterByStatement(clause, asSelect, "`a,b` int", "CLUSTER BY (`a,b`)") + val parsedPlan = parser.parsePlan(sql) + validateClusterByTransform(clause, asSelect, parsedPlan, Seq(Seq("a,b"))) + } + + test(s"CLUSTER BY duplicated clauses - $clause TABLE asSelect = $asSelect") { + val parser = new DeltaSqlParser(new SparkSqlParser()) + val sql = + clusterByStatement(clause, asSelect, "a int, b string", "CLUSTER BY (a) CLUSTER BY (b)") + checkError(exception = intercept[ParseException] { + parser.parsePlan(sql) + }, errorClass = "DUPLICATE_CLAUSES", parameters = Map("clauseName" -> "CLUSTER BY")) + } + + test("CLUSTER BY set clustering column property is ignored - " + + s"$clause TABLE asSelect = $asSelect") { + val parser = new DeltaSqlParser(new SparkSqlParser()) + val sql = + clusterByStatement( + clause, + asSelect, + "a int, b string", + "CLUSTER BY (a) " + + s"TBLPROPERTIES ('${ClusteredTableUtils.PROP_CLUSTERING_COLUMNS}' = 'b')") + val parsedPlan = parser.parsePlan(sql) + validateClusterByTransform(clause, asSelect, parsedPlan, Seq(Seq("a"))) + } + + test(s"CLUSTER BY with PARTITIONED BY - $clause TABLE asSelect = $asSelect") { + val parser = new DeltaSqlParser(new SparkSqlParser()) + val sql = + clusterByStatement( + clause, + asSelect, + "a int, b string", + "CLUSTER BY (a) PARTITIONED BY (b)") + val errorMsg = "Clustering and partitioning cannot both be specified. " + + "Please remove PARTITIONED BY if you want to create a Delta table with clustering" + checkError(exception = intercept[ParseException] { + parser.parsePlan(sql) + }, errorClass = "_LEGACY_ERROR_TEMP_0035", parameters = Map("message" -> errorMsg)) + } + + test(s"CLUSTER BY with bucketing - $clause TABLE asSelect = $asSelect") { + val parser = new DeltaSqlParser(new SparkSqlParser()) + val sql = + clusterByStatement( + clause, + asSelect, + "a int, b string", + "CLUSTER BY (a) CLUSTERED BY (b) INTO 2 BUCKETS") + val errorMsg = "Clustering and bucketing cannot both be specified. " + + "Please remove CLUSTERED BY INTO BUCKETS if you " + + "want to create a Delta table with clustering" + checkError(exception = intercept[ParseException] { + parser.parsePlan(sql) + }, errorClass = "_LEGACY_ERROR_TEMP_0035", parameters = Map("message" -> errorMsg)) + } + } + } +} diff --git a/spark/src/test/scala/io/delta/tables/DeltaTableBuilderSuite.scala b/spark/src/test/scala/io/delta/tables/DeltaTableBuilderSuite.scala new file mode 100644 index 00000000000..4761757c0b2 --- /dev/null +++ b/spark/src/test/scala/io/delta/tables/DeltaTableBuilderSuite.scala @@ -0,0 +1,465 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.sources.DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.{AnalysisException, QueryTest} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchDatabaseException, TableAlreadyExistsException} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder, StringType, StructType} + +class DeltaTableBuilderSuite extends QueryTest with SharedSparkSession with DeltaSQLCommandTest + { + + // Define the information for a default test table used by many tests. + protected val defaultTestTableSchema = "c1 int, c2 int, c3 string" + protected val defaultTestTableGeneratedColumns = Map("c2" -> "c1 + 10") + protected val defaultTestTablePartitionColumns = Seq("c1") + protected val defaultTestTableColumnComments = Map("c1" -> "foo", "c3" -> "bar") + protected val defaultTestTableComment = "tbl comment" + protected val defaultTestTableNullableCols = Set("c1", "c3") + protected val defaultTestTableProperty = ("foo", "bar") + + + /** + * Verify if the table metadata matches the test table. We use this to verify DDLs + * write correct table metadata into the transaction logs. + */ + protected def verifyTestTableMetadata( + table: String, + schemaString: String, + generatedColumns: Map[String, String] = Map.empty, + colComments: Map[String, String] = Map.empty, + colNullables: Set[String] = Set.empty, + tableComment: Option[String] = None, + partitionCols: Seq[String] = Seq.empty, + tableProperty: Option[(String, String)] = None + ): Unit = { + val deltaLog = if (table.startsWith("delta.")) { + DeltaLog.forTable(spark, table.stripPrefix("delta.`").stripSuffix("`")) + } else { + DeltaLog.forTable(spark, TableIdentifier(table)) + } + val schema = StructType.fromDDL(schemaString) + val expectedSchema = StructType(schema.map { field => + val newMetadata = new MetadataBuilder() + .withMetadata(field.metadata) + if (generatedColumns.contains(field.name)) { + newMetadata.putString(GENERATION_EXPRESSION_METADATA_KEY, generatedColumns(field.name)) + } + if (colComments.contains(field.name)) { + newMetadata.putString("comment", colComments(field.name)) + } + field.copy( + nullable = colNullables.contains(field.name), + metadata = newMetadata.build) + }) + val metadata = deltaLog.snapshot.metadata + assert(metadata.schema == expectedSchema) + assert(metadata.partitionColumns == partitionCols) + if (tableProperty.nonEmpty) { + assert(metadata.configuration(tableProperty.get._1).contentEquals(tableProperty.get._2)) + } + if (tableComment.nonEmpty) { + assert(metadata.description.contentEquals(tableComment.get)) + } + } + + protected def testCreateTable(testName: String)(createFunc: String => Unit): Unit = { + test(testName) { + withTable(testName) { + createFunc(testName) + verifyTestTableMetadata( + testName, defaultTestTableSchema, defaultTestTableGeneratedColumns, + defaultTestTableColumnComments, defaultTestTableNullableCols, + Some(defaultTestTableComment), defaultTestTablePartitionColumns, + Some(defaultTestTableProperty) + ) + } + } + } + + protected def testCreateTableWithNameAndLocation( + testName: String)(createFunc: (String, String) => Unit): Unit = { + test(testName + ": external - with location and name") { + withTempPath { path => + withTable(testName) { + createFunc(testName, path.getCanonicalPath) + verifyTestTableMetadata( + testName, + defaultTestTableSchema, defaultTestTableGeneratedColumns, + defaultTestTableColumnComments, defaultTestTableNullableCols, + Some(defaultTestTableComment), defaultTestTablePartitionColumns, + Some(defaultTestTableProperty) + ) + } + } + } + } + + protected def testCreateTableWithLocationOnly( + testName: String)(createFunc: String => Unit): Unit = { + test(testName + ": external - location only") { + withTempPath { path => + withTable(testName) { + createFunc(path.getCanonicalPath) + verifyTestTableMetadata( + s"delta.`${path.getCanonicalPath}`", + defaultTestTableSchema, defaultTestTableGeneratedColumns, + defaultTestTableColumnComments, defaultTestTableNullableCols, + Some(defaultTestTableComment), defaultTestTablePartitionColumns, + Some(defaultTestTableProperty) + ) + } + } + } + } + + def defaultCreateTableBuilder( + ifNotExists: Boolean, + tableName: Option[String] = None, + location: Option[String] = None): DeltaTableBuilder = { + val tableBuilder = if (ifNotExists) { + io.delta.tables.DeltaTable.createIfNotExists() + } else { + io.delta.tables.DeltaTable.create() + } + defaultTableBuilder(tableBuilder, tableName, location) + } + + def defaultReplaceTableBuilder( + orCreate: Boolean, + tableName: Option[String] = None, + location: Option[String] = None): DeltaTableBuilder = { + var tableBuilder = if (orCreate) { + io.delta.tables.DeltaTable.createOrReplace() + } else { + io.delta.tables.DeltaTable.replace() + } + defaultTableBuilder(tableBuilder, tableName, location) + } + + private def defaultTableBuilder( + builder: DeltaTableBuilder, + tableName: Option[String], + location: Option[String] + ) = { + var tableBuilder = builder + if (tableName.nonEmpty) { + tableBuilder = tableBuilder.tableName(tableName.get) + } + if (location.nonEmpty) { + tableBuilder = tableBuilder.location(location.get) + } + tableBuilder.addColumn( + io.delta.tables.DeltaTable.columnBuilder("c1").dataType("int").nullable(true).comment("foo") + .build() + ) + tableBuilder.addColumn( + io.delta.tables.DeltaTable.columnBuilder("c2").dataType("int") + .nullable(false).generatedAlwaysAs("c1 + 10").build() + ) + tableBuilder.addColumn( + io.delta.tables.DeltaTable.columnBuilder("c3").dataType("string").comment("bar").build() + ) + tableBuilder.partitionedBy("c1") + tableBuilder.property("foo", "bar") + tableBuilder.comment("tbl comment") + tableBuilder + } + + test("create table with existing schema and extra column") { + withTable("table") { + withTempDir { dir => + spark.range(10).toDF("key").write.format("parquet").saveAsTable("table") + val existingSchema = spark.read.format("parquet").table("table").schema + io.delta.tables.DeltaTable.create() + .location(dir.getAbsolutePath) + .addColumns(existingSchema) + .addColumn("value", "string", false) + .execute() + verifyTestTableMetadata(s"delta.`${dir.getAbsolutePath}`", + "key bigint, value string", colNullables = Set("key")) + } + } + } + + test("create table with variation of addColumns - with spark session") { + withTable("test") { + io.delta.tables.DeltaTable.create(spark) + .tableName("test") + .addColumn("c1", "int") + .addColumn("c2", IntegerType) + .addColumn("c3", "string", false) + .addColumn("c4", StringType, true) + .addColumn( + io.delta.tables.DeltaTable.columnBuilder(spark, "c5") + .dataType("bigint") + .comment("foo") + .nullable(false) + .build + ) + .addColumn( + io.delta.tables.DeltaTable.columnBuilder(spark, "c6") + .dataType(LongType) + .generatedAlwaysAs("c5 + 10") + .build + ).execute() + verifyTestTableMetadata( + "test", "c1 int, c2 int, c3 string, c4 string, c5 bigint, c6 bigint", + generatedColumns = Map("c6" -> "c5 + 10"), + colComments = Map("c5" -> "foo"), + colNullables = Set("c1", "c2", "c4", "c6") + ) + } + } + + test("test addColumn using columnBuilder, without dataType") { + val e = intercept[AnalysisException] { + DeltaTable.columnBuilder("value") + .generatedAlwaysAs("true") + .nullable(true) + .build() + } + assert(e.getMessage == "The data type of the column value is not provided") + } + + testCreateTable("create_table") { table => + defaultCreateTableBuilder(ifNotExists = false, Some(table)).execute() + } + + testCreateTableWithNameAndLocation("create_table") { (name, path) => + defaultCreateTableBuilder(ifNotExists = false, Some(name), Some(path)).execute() + } + + testCreateTableWithLocationOnly("create_table") { path => + defaultCreateTableBuilder(ifNotExists = false, location = Some(path)).execute() + } + + test("create table - errors if already exists") { + withTable("testTable") { + sql(s"CREATE TABLE testTable (c1 int) USING DELTA") + intercept[TableAlreadyExistsException] { + defaultCreateTableBuilder(ifNotExists = false, Some("testTable")).execute() + } + } + } + + test("create table - ignore if already exists") { + withTable("testTable") { + sql(s"CREATE TABLE testTable (c1 int) USING DELTA") + defaultCreateTableBuilder(ifNotExists = true, Some("testTable")).execute() + verifyTestTableMetadata("testTable", "c1 int", colNullables = Set("c1")) + } + } + + testCreateTable("create_table_if_not_exists") { table => + defaultCreateTableBuilder(ifNotExists = true, Some(table)).execute() + } + + testCreateTableWithNameAndLocation("create_table_if_not_exists") { (name, path) => + defaultCreateTableBuilder(ifNotExists = true, Some(name), Some(path)).execute() + } + + testCreateTableWithLocationOnly("create_table_if_not_exists") { path => + defaultCreateTableBuilder(ifNotExists = true, location = Some(path)).execute() + } + + test("replace table - errors if not exists") { + intercept[AnalysisException] { + defaultReplaceTableBuilder(orCreate = false, Some("testTable")).execute() + } + } + + testCreateTable("replace_table") { table => + sql(s"CREATE TABLE replace_table(c1 int) USING DELTA") + defaultReplaceTableBuilder(orCreate = false, Some(table)).execute() + } + + testCreateTableWithNameAndLocation("replace_table") { (name, path) => + sql(s"CREATE TABLE $name (c1 int) USING DELTA LOCATION '$path'") + defaultReplaceTableBuilder(orCreate = false, Some(name), Some(path)).execute() + } + + testCreateTableWithLocationOnly("replace_table") { path => + sql(s"CREATE TABLE delta.`$path` (c1 int) USING DELTA") + defaultReplaceTableBuilder(orCreate = false, location = Some(path)).execute() + } + + testCreateTable("replace_or_create_table") { table => + defaultReplaceTableBuilder(orCreate = true, Some(table)).execute() + } + + testCreateTableWithNameAndLocation("replace_or_create_table") { (name, path) => + defaultReplaceTableBuilder(orCreate = true, Some(name), Some(path)).execute() + } + + testCreateTableWithLocationOnly("replace_or_create_table") { path => + defaultReplaceTableBuilder(orCreate = true, location = Some(path)).execute() + } + + test("test no identifier and no location") { + val e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.create().addColumn("c1", "int").execute() + } + assert(e.getMessage.equals("Table name or location has to be specified")) + } + + test("partitionedBy only should contain columns in the schema") { + val e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.create().tableName("testTable") + .addColumn("c1", "int") + .partitionedBy("c2") + .execute() + } + assert(e.getMessage.startsWith("Couldn't find column c2")) + } + + test("errors if table name and location are different paths") { + withTempDir { dir => + val path = dir.getAbsolutePath + val e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.create().tableName(s"delta.`$path`") + .addColumn("c1", "int") + .location("src/test/resources/delta/dbr_8_0_non_generated_columns") + .execute() + } + assert(e.getMessage.startsWith( + "Creating path-based Delta table with a different location isn't supported.")) + } + } + + test("table name and location are the same") { + withTempDir { dir => + val path = dir.getAbsolutePath + io.delta.tables.DeltaTable.create().tableName(s"delta.`$path`") + .addColumn("c1", "int") + .location(path) + .execute() + } + } + + test("errors if use parquet path as identifier") { + withTempDir { dir => + val path = dir.getAbsolutePath + val e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.create().tableName(s"parquet.`$path`") + .addColumn("c1", "int") + .location(path) + .execute() + } + assert(e.getMessage == "Database 'main.parquet' not found" || + e.getMessage == "Database 'parquet' not found" || + e.getMessage.contains("is not a valid name") || + e.getMessage.contains("schema `parquet` cannot be found") + ) + } + } + + test("delta table property case") { + val preservedCaseConfig = Map("delta.appendOnly" -> "true", "Foo" -> "Bar", "foo" -> "Bar") + val lowerCaseEnforcedConfig = Map("delta.appendOnly" -> "true", "foo" -> "Bar") + + sealed trait DeltaTablePropertySetOperation { + def setTableProperty(tablePath: String): Unit + + def expectedConfig: Map[String, String] + + def description: String + } + + trait CasePreservingTablePropertySetOperation extends DeltaTablePropertySetOperation { + + val expectedConfig = preservedCaseConfig + } + + case object SetPropertyThroughCreate extends CasePreservingTablePropertySetOperation { + def setTableProperty(tablePath: String): Unit = sql( + s"CREATE TABLE delta.`$tablePath`(id INT) " + + s"USING delta TBLPROPERTIES('delta.appendOnly'='true', 'Foo'='Bar', 'foo'='Bar' ) " + ) + + val description = "Setting Table Property at Table Creation" + } + + case object SetPropertyThroughAlter extends CasePreservingTablePropertySetOperation { + def setTableProperty(tablePath: String): Unit = { + spark.range(1, 10).write.format("delta").save(tablePath) + sql(s"ALTER TABLE delta.`$tablePath` " + + s"SET TBLPROPERTIES('delta.appendOnly'='true', 'Foo'='Bar', 'foo'='Bar')") + } + + val description = "Setting Table Property via Table Alter" + } + + case class SetPropertyThroughTableBuilder(backwardCompatible: Boolean) extends + DeltaTablePropertySetOperation { + + def setTableProperty(tablePath: String): Unit = { + withSQLConf(DeltaSQLConf.TABLE_BUILDER_FORCE_TABLEPROPERTY_LOWERCASE.key + -> backwardCompatible.toString) { + DeltaTable.create() + .location(tablePath) + .property("delta.appendOnly", "true") + .property("Foo", "Bar") + .property("foo", "Bar") + .execute() + } + } + + override def expectedConfig : Map[String, String] = { + if (backwardCompatible) { + lowerCaseEnforcedConfig + } + else { + preservedCaseConfig + } + } + + val description = s"Setting Table Property on DeltaTableBuilder." + + s" Backward compatible enabled = ${backwardCompatible}" + } + + val examples = Seq( + SetPropertyThroughCreate, + SetPropertyThroughAlter, + SetPropertyThroughTableBuilder(backwardCompatible = true), + SetPropertyThroughTableBuilder(backwardCompatible = false) + ) + + for (example <- examples) { + withClue(example.description) { + withTempDir { dir => + val path = dir.getCanonicalPath() + example.setTableProperty(path) + val config = DeltaLog.forTable(spark, path).snapshot.metadata.configuration + assert( + config == example.expectedConfig, + s"$example's result is not correct: $config") + } + } + } + } + +} diff --git a/spark/src/test/scala/io/delta/tables/DeltaTableSuite.scala b/spark/src/test/scala/io/delta/tables/DeltaTableSuite.scala new file mode 100644 index 00000000000..07e1cc28339 --- /dev/null +++ b/spark/src/test/scala/io/delta/tables/DeltaTableSuite.scala @@ -0,0 +1,605 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables + +import java.io.File +import java.util.Locale + +import scala.language.postfixOps + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.{DeltaIllegalArgumentException, DeltaLog, DeltaTableFeatureException, FakeFileSystem, TestReaderWriterFeature, TestWriterFeature} +import org.apache.spark.sql.delta.actions.{ Metadata, Protocol } +import org.apache.spark.sql.delta.storage.LocalLogStore +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.{Path, UnsupportedFileSystemException} + +import org.apache.spark.SparkException +import org.apache.spark.network.util.JavaUtils +import org.apache.spark.sql.{functions, AnalysisException, DataFrame, Dataset, QueryTest, Row} +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +class DeltaTableSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + test("forPath") { + withTempDir { dir => + testData.write.format("delta").save(dir.getAbsolutePath) + checkAnswer( + DeltaTable.forPath(spark, dir.getAbsolutePath).toDF, + testData.collect().toSeq) + checkAnswer( + DeltaTable.forPath(dir.getAbsolutePath).toDF, + testData.collect().toSeq) + } + } + + test("forPath - with non-Delta table path") { + val msg = "not a delta table" + withTempDir { dir => + testData.write.format("parquet").mode("overwrite").save(dir.getAbsolutePath) + testError(msg) { DeltaTable.forPath(spark, dir.getAbsolutePath) } + testError(msg) { DeltaTable.forPath(dir.getAbsolutePath) } + } + } + + test("forName") { + withTempDir { dir => + withTable("deltaTable") { + testData.write.format("delta").saveAsTable("deltaTable") + + checkAnswer( + DeltaTable.forName(spark, "deltaTable").toDF, + testData.collect().toSeq) + checkAnswer( + DeltaTable.forName("deltaTable").toDF, + testData.collect().toSeq) + + } + } + } + + def testForNameOnNonDeltaName(tableName: String): Unit = { + val msg = "not a Delta table" + testError(msg) { DeltaTable.forName(spark, tableName) } + testError(msg) { DeltaTable.forName(tableName) } + } + + test("forName - with non-Delta table name") { + withTempDir { dir => + withTable("notADeltaTable") { + testData.write.format("parquet").mode("overwrite").saveAsTable("notADeltaTable") + testForNameOnNonDeltaName("notADeltaTable") + } + } + } + + test("forName - with temp view name") { + withTempDir { dir => + withTempView("viewOnDeltaTable") { + testData.write.format("delta").save(dir.getAbsolutePath) + spark.read.format("delta").load(dir.getAbsolutePath) + .createOrReplaceTempView("viewOnDeltaTable") + testForNameOnNonDeltaName("viewOnDeltaTable") + } + } + } + + test("forName - with delta.`path`") { + // for name should work on Delta table paths + withTempDir { dir => + testData.write.format("delta").save(dir.getAbsolutePath) + checkAnswer( + DeltaTable.forName(spark, s"delta.`$dir`").toDF, + testData.collect().toSeq) + checkAnswer( + DeltaTable.forName(s"delta.`$dir`").toDF, + testData.collect().toSeq) + } + + // using forName on non Delta Table paths should fail + withTempDir { dir => + testForNameOnNonDeltaName(s"delta.`$dir`") + + testData.write.format("parquet").mode("overwrite").save(dir.getAbsolutePath) + testForNameOnNonDeltaName(s"delta.`$dir`") + } + } + + test("as") { + withTempDir { dir => + testData.write.format("delta").save(dir.getAbsolutePath) + checkAnswer( + DeltaTable.forPath(dir.getAbsolutePath).as("tbl").toDF.select("tbl.value"), + testData.select("value").collect().toSeq) + } + } + + test("isDeltaTable - path - with _delta_log dir") { + withTempDir { dir => + testData.write.format("delta").save(dir.getAbsolutePath) + assert(DeltaTable.isDeltaTable(dir.getAbsolutePath)) + } + } + + test("isDeltaTable - path - with empty _delta_log dir") { + withTempDir { dir => + new File(dir, "_delta_log").mkdirs() + assert(!DeltaTable.isDeltaTable(dir.getAbsolutePath)) + } + } + + test("isDeltaTable - path - with no _delta_log dir") { + withTempDir { dir => + assert(!DeltaTable.isDeltaTable(dir.getAbsolutePath)) + } + } + + test("isDeltaTable - path - with non-existent dir") { + withTempDir { dir => + JavaUtils.deleteRecursively(dir) + assert(!DeltaTable.isDeltaTable(dir.getAbsolutePath)) + } + } + + test("isDeltaTable - with non-Delta table path") { + withTempDir { dir => + testData.write.format("parquet").mode("overwrite").save(dir.getAbsolutePath) + assert(!DeltaTable.isDeltaTable(dir.getAbsolutePath)) + } + } + + def testError(expectedMsg: String)(thunk: => Unit): Unit = { + val e = intercept[AnalysisException] { thunk } + assert(e.getMessage.toLowerCase(Locale.ROOT).contains(expectedMsg.toLowerCase(Locale.ROOT))) + } + + test("DeltaTable is Java Serializable but cannot be used in executors") { + import testImplicits._ + + // DeltaTable can be passed to executor without method calls. + withTempDir { dir => + testData.write.format("delta").mode("append").save(dir.getAbsolutePath) + val dt: DeltaTable = DeltaTable.forPath(dir.getAbsolutePath) + spark.range(5).as[Long].map{ row: Long => + val foo = dt + row + 3 + }.count() + } + + // DeltaTable can be passed to executor but method call causes exception. + val e = intercept[SparkException] { + withTempDir { dir => + testData.write.format("delta").mode("append").save(dir.getAbsolutePath) + val dt: DeltaTable = DeltaTable.forPath(dir.getAbsolutePath) + spark.range(5).as[Long].map{ row: Long => + dt.toDF + row + 3 + }.count() + } + }.getMessage + assert(e.contains("DeltaTable cannot be used in executors")) + } +} + +class DeltaTableHadoopOptionsSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + import testImplicits._ + + protected override def sparkConf = + super.sparkConf.set("spark.delta.logStore.fake.impl", classOf[LocalLogStore].getName) + + /** + * Create Hadoop file system options for `FakeFileSystem`. If Delta doesn't pick up them, + * it won't be able to read/write any files using `fake://`. + */ + private def fakeFileSystemOptions: Map[String, String] = { + Map( + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true" + ) + } + + /** Create a fake file system path to test from the dir path. */ + private def fakeFileSystemPath(dir: File): String = s"fake://${dir.getCanonicalPath}" + + private def readDeltaTableByPath(path: String): DataFrame = { + spark.read.options(fakeFileSystemOptions).format("delta").load(path) + } + + // Ensure any new API from [[DeltaTable]] has to verify it can work with custom file system + // options. + private val publicMethods = + scala.reflect.runtime.universe.typeTag[io.delta.tables.DeltaTable].tpe.decls + .filter(_.isPublic) + .map(_.name.toString).toSet + + private val ignoreMethods = Seq() + + private val testedMethods = Seq( + "addFeatureSupport", + "as", + "alias", + "delete", + "detail", + "generate", + "history", + "merge", + "optimize", + "restoreToVersion", + "restoreToTimestamp", + "toDF", + "update", + "updateExpr", + "upgradeTableProtocol", + "vacuum" + ) + + val untestedMethods = publicMethods -- ignoreMethods -- testedMethods + assert( + untestedMethods.isEmpty, + s"Found new methods added to DeltaTable: $untestedMethods. " + + "Please make sure you add a new test to verify it works with file system " + + "options in this file, and update the `testedMethods` list. " + + "If this new method doesn't need to support file system options, " + + "you can add it to the `ignoredMethods` list") + + test("forPath: as/alias/toDF with filesystem options.") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + testData.write.options(fsOptions).format("delta").save(path) + + checkAnswer( + DeltaTable.forPath(spark, path, fsOptions).as("tbl").toDF.select("tbl.value"), + testData.select("value").collect().toSeq) + + checkAnswer( + DeltaTable.forPath(spark, path, fsOptions).alias("tbl").toDF.select("tbl.value"), + testData.select("value").collect().toSeq) + } + } + + test("forPath with unsupported options") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + testData.write.options(fsOptions).format("delta").save(path) + + val finalOptions = fsOptions + ("otherKey" -> "otherVal") + assertThrows[DeltaIllegalArgumentException] { + io.delta.tables.DeltaTable.forPath(spark, path, finalOptions) + } + } + } + + test("forPath error out without filesystem options passed in.") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + testData.write.options(fsOptions).format("delta").save(path) + + val e = intercept[UnsupportedFileSystemException] { + io.delta.tables.DeltaTable.forPath(spark, path) + }.getMessage + + assert(e.contains("""No FileSystem for scheme "fake"""")) + } + } + + test("forPath - with filesystem options") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + testData.write.options(fsOptions).format("delta").save(path) + + val deltaTable = + io.delta.tables.DeltaTable.forPath(spark, path, fsOptions) + + val testDataSeq = testData.collect().toSeq + + // verify table can be read + checkAnswer(deltaTable.toDF, testDataSeq) + + // verify java friendly API. + import scala.collection.JavaConverters._ + val deltaTable2 = io.delta.tables.DeltaTable.forPath( + spark, path, new java.util.HashMap[String, String](fsOptions.asJava)) + checkAnswer(deltaTable2.toDF, testDataSeq) + } + } + + test("updateExpr - with filesystem options") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + val df = Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value") + df.write.options(fsOptions).format("delta").save(path) + + val table = io.delta.tables.DeltaTable.forPath(spark, path, fsOptions) + + table.updateExpr(Map("key" -> "100")) + + checkAnswer(readDeltaTableByPath(path), + Row(100, 10) :: Row(100, 20) :: Row(100, 30) :: Row(100, 40) :: Nil) + } + } + + test("update - with filesystem options") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val df = Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value") + df.write.options(fakeFileSystemOptions).format("delta").save(path) + + val table = io.delta.tables.DeltaTable.forPath(spark, path, fakeFileSystemOptions) + + table.update(Map("key" -> functions.expr("100"))) + + checkAnswer(readDeltaTableByPath(path), + Row(100, 10) :: Row(100, 20) :: Row(100, 30) :: Row(100, 40) :: Nil) + } + } + + test("delete - with filesystem options") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val df = Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value") + df.write.options(fakeFileSystemOptions).format("delta").save(path) + + val table = io.delta.tables.DeltaTable.forPath(spark, path, fakeFileSystemOptions) + + table.delete(functions.expr("key = 1 or key = 2")) + + checkAnswer(readDeltaTableByPath(path), Row(3, 30) :: Row(4, 40) :: Nil) + } + } + + test("merge - with filesystem options") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val target = Seq((1, 10), (2, 20)).toDF("key1", "value1") + target.write.options(fakeFileSystemOptions).format("delta").save(path) + val source = Seq((1, 100), (3, 30)).toDF("key2", "value2") + + val table = io.delta.tables.DeltaTable.forPath(spark, path, fakeFileSystemOptions) + + table.merge(source, "key1 = key2") + .whenMatched().updateExpr(Map("key1" -> "key2", "value1" -> "value2")) + .whenNotMatched().insertExpr(Map("key1" -> "key2", "value1" -> "value2")) + .execute() + + checkAnswer(readDeltaTableByPath(path), Row(1, 100) :: Row(2, 20) :: Row(3, 30) :: Nil) + } + } + + test("vacuum - with filesystem options") { + // Note: verify that [DeltaTableUtils.findDeltaTableRoot] works when either + // DELTA_FORMAT_CHECK_CACHE_ENABLED is on or off. + Seq("true", "false").foreach{ deltaFormatCheckEnabled => + withSQLConf( + "spark.databricks.delta.formatCheck.cache.enabled" -> deltaFormatCheckEnabled) { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + testData.write.options(fakeFileSystemOptions).format("delta").save(path) + val table = io.delta.tables.DeltaTable.forPath(spark, path, fakeFileSystemOptions) + + // create a uncommitted file. + val notCommittedFile = "notCommittedFile.json" + val file = new File(dir, notCommittedFile) + FileUtils.write(file, "gibberish") + // set to ancient time so that the file is eligible to be vacuumed. + file.setLastModified(0) + assert(file.exists()) + + table.vacuum() + + val file2 = new File(dir, notCommittedFile) + assert(!file2.exists()) + } + } + } + } + + + test("optimize - with filesystem options") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + + Seq(1, 2, 3).toDF().write.options(fsOptions).format("delta").save(path) + Seq(4, 5, 6) + .toDF().write.options(fsOptions).format("delta").mode("append").save(path) + + val origData: DataFrame = spark.read.options(fsOptions).format("delta").load(path) + + val deltaLog = DeltaLog.forTable(spark, new Path(path), fsOptions) + val table = io.delta.tables.DeltaTable.forPath(spark, path, fsOptions) + val versionBeforeOptimize = deltaLog.snapshot.version + + table.optimize().executeCompaction() + deltaLog.update() + assert(deltaLog.snapshot.version == versionBeforeOptimize + 1) + checkDatasetUnorderly(origData.as[Int], 1, 2, 3, 4, 5, 6) + } + } + + test("history - with filesystem options") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + + Seq(1, 2, 3).toDF().write.options(fsOptions).format("delta").save(path) + + val table = io.delta.tables.DeltaTable.forPath(spark, path, fsOptions) + table.history().collect() + } + } + + test("generate - with filesystem options") { + withSQLConf("spark.databricks.delta.symlinkFormatManifest.fileSystemCheck.enabled" -> "false") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + + Seq(1, 2, 3).toDF().write.options(fsOptions).format("delta").save(path) + + val table = io.delta.tables.DeltaTable.forPath(spark, path, fsOptions) + table.generate("symlink_format_manifest") + } + } + } + + test("restoreTable - with filesystem options") { + withSQLConf("spark.databricks.service.checkSerialization" -> "false") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + + val df1 = Seq(1, 2, 3).toDF("id") + val df2 = Seq(4, 5).toDF("id") + val df3 = Seq(6, 7).toDF("id") + + // version 0. + df1.write.format("delta").options(fsOptions).save(path) + val deltaLog = DeltaLog.forTable(spark, new Path(path), fsOptions) + assert(deltaLog.snapshot.version == 0) + + // version 1. + df2.write.format("delta").options(fsOptions).mode("append").save(path) + deltaLog.update() + assert(deltaLog.snapshot.version == 1) + + // version 2. + df3.write.format("delta").options(fsOptions).mode("append").save(path) + deltaLog.update() + assert(deltaLog.snapshot.version == 2) + + checkAnswer( + spark.read.format("delta").options(fsOptions).load(path), + df1.union(df2).union(df3)) + + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, path, fsOptions) + deltaTable.restoreToVersion(1) + + checkAnswer( + spark.read.format("delta").options(fsOptions).load(path), + df1.union(df2) + ) + + // set the time to first file with a early time and verify the delta table can be restored + // to it. + val desiredTime = "1996-01-12" + val format = new java.text.SimpleDateFormat("yyyy-MM-dd") + val time = format.parse(desiredTime).getTime + + val logPath = new Path(dir.getCanonicalPath, "_delta_log") + val file = new File(FileNames.deltaFile(logPath, 0).toString) + assert(file.setLastModified(time)) + + val deltaTable2 = io.delta.tables.DeltaTable.forPath(spark, path, fsOptions) + deltaTable2.restoreToTimestamp(desiredTime) + + checkAnswer( + spark.read.format("delta").options(fsOptions).load(path), + df1 + ) + } + } + } + + test("upgradeTableProtocol - with filesystem options.") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + + // create a table with a default Protocol. + val testSchema = spark.range(1).schema + val log = DeltaLog.forTable(spark, new Path(path), fsOptions) + log.ensureLogDirectoryExist() + log.store.write( + FileNames.deltaFile(log.logPath, 0), + Iterator(Metadata(schemaString = testSchema.json).json, Protocol(0, 0).json), + overwrite = false, + log.newDeltaHadoopConf()) + log.update() + + // update the protocol. + val table = DeltaTable.forPath(spark, path, fsOptions) + table.upgradeTableProtocol(1, 2) + + val expectedProtocol = Protocol(1, 2) + assert(log.snapshot.protocol === expectedProtocol) + } + } + + test( + "addFeatureSupport - with filesystem options.") { + withTempDir { dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + + // create a table with a default Protocol. + val testSchema = spark.range(1).schema + val log = DeltaLog.forTable(spark, new Path(path), fsOptions) + log.ensureLogDirectoryExist() + log.store.write( + FileNames.deltaFile(log.logPath, 0), + Iterator(Metadata(schemaString = testSchema.json).json, Protocol(1, 2).json), + overwrite = false, + log.newDeltaHadoopConf()) + log.update() + + // update the protocol to support a writer feature. + val table = DeltaTable.forPath(spark, path, fsOptions) + table.addFeatureSupport(TestWriterFeature.name) + assert(log.update().protocol === Protocol(1, 7) + .merge(Protocol(1, 2)).withFeature(TestWriterFeature)) + table.addFeatureSupport(TestReaderWriterFeature.name) + assert( + log.update().protocol === Protocol(3, 7) + .merge(Protocol(1, 2)) + .withFeatures(Seq(TestWriterFeature, TestReaderWriterFeature))) + + // update the protocol again with invalid feature name. + assert(intercept[DeltaTableFeatureException] { + table.addFeatureSupport("__invalid_feature__") + }.getErrorClass === "DELTA_UNSUPPORTED_FEATURES_IN_CONFIG") + } + } + + test("details - with filesystem options.") { + withTempDir{ dir => + val path = fakeFileSystemPath(dir) + val fsOptions = fakeFileSystemOptions + Seq(1, 2, 3).toDF().write.format("delta").options(fsOptions).save(path) + + val deltaTable = DeltaTable.forPath(spark, path, fsOptions) + checkAnswer( + deltaTable.detail().select("format"), + Seq(Row("delta")) + ) + } + } +} diff --git a/spark/src/test/scala/io/delta/tables/DeltaTableTestUtils.scala b/spark/src/test/scala/io/delta/tables/DeltaTableTestUtils.scala new file mode 100644 index 00000000000..7169df35d92 --- /dev/null +++ b/spark/src/test/scala/io/delta/tables/DeltaTableTestUtils.scala @@ -0,0 +1,30 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.tables + +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.catalog.DeltaTableV2 + +import org.apache.spark.sql.DataFrame + +object DeltaTableTestUtils { + + /** A utility method to access the private constructor of [[DeltaTable]] in tests. */ + def createTable(df: DataFrame, deltaLog: DeltaLog): DeltaTable = { + new DeltaTable(df, DeltaTableV2(df.sparkSession, deltaLog.dataPath)) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala new file mode 100644 index 00000000000..6bc37aa01dc --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala @@ -0,0 +1,621 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.UUID + +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.{TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Utils + +// scalastyle:off: removeFile +class ActionSerializerSuite extends QueryTest with SharedSparkSession with DeltaSQLCommandTest { + + roundTripCompare("Add", + AddFile("test", Map.empty, 1, 1, dataChange = true)) + roundTripCompare("Add with partitions", + AddFile("test", Map("a" -> "1"), 1, 1, dataChange = true)) + roundTripCompare("Add with stats", + AddFile("test", Map.empty, 1, 1, dataChange = true, stats = "stats")) + roundTripCompare("Add with tags", + AddFile("test", Map.empty, 1, 1, dataChange = true, tags = Map("a" -> "1"))) + roundTripCompare("Add with empty tags", + AddFile("test", Map.empty, 1, 1, dataChange = true, tags = Map.empty)) + + roundTripCompare("Remove", + RemoveFile("test", Some(2))) + + test("AddFile tags") { + val action1 = + AddFile( + path = "a", + partitionValues = Map.empty, + size = 1, + modificationTime = 2, + dataChange = false, + stats = null, + tags = Map("key1" -> "val1", "key2" -> "val2")) + val json1 = + """{ + | "add": { + | "path": "a", + | "partitionValues": {}, + | "size": 1, + | "modificationTime": 2, + | "dataChange": false, + | "tags": { + | "key1": "val1", + | "key2": "val2" + | } + | } + |}""".stripMargin + assert(action1 === Action.fromJson(json1)) + assert(action1.json === json1.replaceAll("\\s", "")) + + val json2 = + """{ + | "add": { + | "path": "a", + | "partitionValues": {}, + | "size": 1, + | "modificationTime": 2, + | "dataChange": false, + | "tags": {} + | } + |}""".stripMargin + val action2 = + AddFile( + path = "a", + partitionValues = Map.empty, + size = 1, + modificationTime = 2, + dataChange = false, + stats = null, + tags = Map.empty) + assert(action2 === Action.fromJson(json2)) + assert(action2.json === json2.replaceAll("\\s", "")) + } + + // This is the same test as "removefile" in OSS, but due to a Jackson library upgrade the behavior + // has diverged between Spark 3.1 and Spark 3.2. + // We don't believe this is a practical issue because all extant versions of Delta explicitly + // write the dataChange field. + test("remove file deserialization") { + val removeJson = RemoveFile("a", Some(2L)).json + assert(removeJson.contains(""""deletionTimestamp":2""")) + assert(!removeJson.contains("""delTimestamp""")) + val json1 = """{"remove":{"path":"a","deletionTimestamp":2,"dataChange":true}}""" + val json2 = """{"remove":{"path":"a","dataChange":false}}""" + val json4 = """{"remove":{"path":"a","deletionTimestamp":5}}""" + assert(Action.fromJson(json1) === RemoveFile("a", Some(2L), dataChange = true)) + assert(Action.fromJson(json2) === RemoveFile("a", None, dataChange = false)) + assert(Action.fromJson(json4) === RemoveFile("a", Some(5L), dataChange = true)) + } + + roundTripCompare("SetTransaction", + SetTransaction("a", 1, Some(1234L))) + + roundTripCompare("SetTransaction without lastUpdated", + SetTransaction("a", 1, None)) + + roundTripCompare("MetaData", + Metadata( + "id", + "table", + "testing", + Format("parquet", Map.empty), + new StructType().json, + Seq("a"))) + + test("extra fields") { + // TODO reading from checkpoint + Action.fromJson("""{"txn": {"test": 1}}""") + } + + test("deserialization of CommitInfo without tags") { + val expectedCommitInfo = CommitInfo( + time = 123L, + operation = "CONVERT", + operationParameters = Map.empty, + commandContext = Map.empty, + readVersion = Some(23), + isolationLevel = Some("SnapshotIsolation"), + isBlindAppend = Some(true), + operationMetrics = Some(Map("m1" -> "v1", "m2" -> "v2")), + userMetadata = Some("123"), + tags = None, + txnId = None).copy(engineInfo = None) + + // json of commit info actions without tag or engineInfo field + val json1 = + """{"commitInfo":{"timestamp":123,"operation":"CONVERT",""" + + """"operationParameters":{},"readVersion":23,""" + + """"isolationLevel":"SnapshotIsolation","isBlindAppend":true,""" + + """"operationMetrics":{"m1":"v1","m2":"v2"},"userMetadata":"123"}}""".stripMargin + assert(Action.fromJson(json1) === expectedCommitInfo) + } + + testActionSerDe( + "Protocol - json serialization/deserialization", + Protocol(minReaderVersion = 1, minWriterVersion = 2), + expectedJson = """{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}""") + + testActionSerDe( + "Protocol - json serialization/deserialization with writer features", + Protocol(minReaderVersion = 1, minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(AppendOnlyTableFeature), + expectedJson = """{"protocol":{"minReaderVersion":1,""" + + s""""minWriterVersion":$TABLE_FEATURES_MIN_WRITER_VERSION,""" + + """"writerFeatures":["appendOnly"]}}""") + + testActionSerDe( + "Protocol - json serialization/deserialization with reader and writer features", + Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature), + expectedJson = + s"""{"protocol":{"minReaderVersion":$TABLE_FEATURES_MIN_READER_VERSION,""" + + s""""minWriterVersion":$TABLE_FEATURES_MIN_WRITER_VERSION,""" + + """"readerFeatures":["testLegacyReaderWriter"],""" + + """"writerFeatures":["testLegacyReaderWriter"]}}""") + + testActionSerDe( + "Protocol - json serialization/deserialization with empty reader and writer features", + Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION), + expectedJson = + s"""{"protocol":{"minReaderVersion":$TABLE_FEATURES_MIN_READER_VERSION,""" + + s""""minWriterVersion":$TABLE_FEATURES_MIN_WRITER_VERSION,""" + + """"readerFeatures":[],"writerFeatures":[]}}""") + + testActionSerDe( + "SetTransaction (lastUpdated is None) - json serialization/deserialization", + SetTransaction(appId = "app-1", version = 2L, lastUpdated = None), + expectedJson = """{"txn":{"appId":"app-1","version":2}}""".stripMargin) + + testActionSerDe( + "SetTransaction (lastUpdated is not None) - json serialization/deserialization", + SetTransaction(appId = "app-2", version = 3L, lastUpdated = Some(4L)), + expectedJson = """{"txn":{"appId":"app-2","version":3,"lastUpdated":4}}""".stripMargin) + + testActionSerDe( + "AddFile (without tags) - json serialization/deserialization", + AddFile("x=2/f1", partitionValues = Map("x" -> "2"), + size = 10, modificationTime = 1, dataChange = true, stats = "{\"numRecords\": 2}"), + expectedJson = """{"add":{"path":"x=2/f1","partitionValues":{"x":"2"},"size":10,""" + + """"modificationTime":1,"dataChange":true,"stats":"{\"numRecords\": 2}"}}""".stripMargin) + + testActionSerDe( + "AddFile (with tags) - json serialization/deserialization", + AddFile("part=p1/f1", partitionValues = Map("x" -> "2"), size = 10, modificationTime = 1, + dataChange = true, stats = "{\"numRecords\": 2}", tags = Map("TAG1" -> "23")), + expectedJson = """{"add":{"path":"part=p1/f1","partitionValues":{"x":"2"},"size":10""" + + ""","modificationTime":1,"dataChange":true,"stats":"{\"numRecords\": 2}",""" + + """"tags":{"TAG1":"23"}}}""" + ) + + testActionSerDe( + "AddFile (with clusteringProvider) - json serialization/deserialization", + AddFile( + "clusteredFile.part", + partitionValues = Map.empty[String, String], + size = 10, + modificationTime = 1, + dataChange = true, + stats = "{\"numRecords\": 2}", + tags = Map("TAG1" -> "23"), + clusteringProvider = Some("liquid")), + expectedJson = + """{"add":{"path":"clusteredFile.part","partitionValues":{},"size":10""" + + ""","modificationTime":1,"dataChange":true,"stats":"{\"numRecords\": 2}",""" + + """"tags":{"TAG1":"23"}""" + + ""","clusteringProvider":"liquid"}}""") + + testActionSerDe( + "RemoveFile (without tags) - json serialization/deserialization", + AddFile("part=p1/f1", partitionValues = Map("x" -> "2"), size = 10, modificationTime = 1, + dataChange = true, stats = "{\"numRecords\": 2}").removeWithTimestamp(timestamp = 11), + expectedJson = """{"remove":{"path":"part=p1/f1","deletionTimestamp":11,"dataChange":true,""" + + """"extendedFileMetadata":true,"partitionValues":{"x":"2"},"size":10,""" + + """"stats":"{\"numRecords\": 2}"}}""") + + testActionSerDe( + "RemoveFile (without tags and stats) - json serialization/deserialization", + AddFile("part=p1/f1", partitionValues = Map("x" -> "2"), size = 10, modificationTime = 1, + dataChange = true, stats = "{\"numRecords\": 2}") + .removeWithTimestamp(timestamp = 11) + .copy(stats = null), + expectedJson = """{"remove":{"path":"part=p1/f1","deletionTimestamp":11,"dataChange":true,""" + + """"extendedFileMetadata":true,"partitionValues":{"x":"2"},"size":10}}""") + + private def deletionVectorWithRelativePath: DeletionVectorDescriptor = + DeletionVectorDescriptor.onDiskWithRelativePath( + id = UUID.randomUUID(), + randomPrefix = "a1", + sizeInBytes = 10, + cardinality = 2, + offset = Some(10)) + + private def deletionVectorWithAbsolutePath: DeletionVectorDescriptor = + DeletionVectorDescriptor.onDiskWithAbsolutePath( + path = "/test.dv", + sizeInBytes = 10, + cardinality = 2, + offset = Some(10)) + + private def deletionVectorInline: DeletionVectorDescriptor = + DeletionVectorDescriptor.inlineInLog(Array(1, 2, 3, 4), 1) + + roundTripCompare("Add with deletion vector - relative path", + AddFile( + path = "test", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + tags = Map.empty, + deletionVector = deletionVectorWithRelativePath)) + roundTripCompare("Add with deletion vector - absolute path", + AddFile( + path = "test", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + tags = Map.empty, + deletionVector = deletionVectorWithAbsolutePath)) + roundTripCompare("Add with deletion vector - inline", + AddFile( + path = "test", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + tags = Map.empty, + deletionVector = deletionVectorInline)) + + roundTripCompare("Remove with deletion vector - relative path", + RemoveFile( + path = "test", + deletionTimestamp = Some(1L), + extendedFileMetadata = Some(true), + partitionValues = Map.empty, + dataChange = true, + size = Some(1L), + tags = Map.empty, + deletionVector = deletionVectorWithRelativePath)) + roundTripCompare("Remove with deletion vector - absolute path", + RemoveFile( + path = "test", + deletionTimestamp = Some(1L), + extendedFileMetadata = Some(true), + partitionValues = Map.empty, + dataChange = true, + size = Some(1L), + tags = Map.empty, + deletionVector = deletionVectorWithAbsolutePath)) + roundTripCompare("Remove with deletion vector - inline", + RemoveFile( + path = "test", + deletionTimestamp = Some(1L), + extendedFileMetadata = Some(true), + partitionValues = Map.empty, + dataChange = true, + size = Some(1L), + tags = Map.empty, + deletionVector = deletionVectorInline)) + + // These make sure we don't accidentally serialise something we didn't mean to. + testActionSerDe( + name = "AddFile (with deletion vector) - json serialization/deserialization", + action = AddFile( + path = "test", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + stats = """{"numRecords":3}""", + tags = Map.empty, + deletionVector = deletionVectorWithAbsolutePath), + expectedJson = + """ + |{"add":{ + |"path":"test", + |"partitionValues":{}, + |"size":1, + |"modificationTime":1, + |"dataChange":true, + |"stats":"{\"numRecords\":3}", + |"tags":{}, + |"deletionVector":{ + |"storageType":"p", + |"pathOrInlineDv":"/test.dv", + |"offset":10, + |"sizeInBytes":10, + |"cardinality":2}} + |}""".stripMargin.replaceAll("\n", ""), + extraSettings = Seq( + // Skip the table property check, so this write doesn't fail. + DeltaSQLConf.DELETION_VECTORS_COMMIT_CHECK_ENABLED.key -> "false") + ) + + test("DomainMetadata action - json serialization/deserialization") { + val table = "testTable" + withTable(table) { + sql( + s""" + | CREATE TABLE $table(id int) USING delta + | tblproperties + | ('${TableFeatureProtocolUtils.propertyKey(DomainMetadataTableFeature)}' = 'enabled') + |""".stripMargin) + val deltaTable = DeltaTableV2(spark, TableIdentifier(table)) + val deltaLog = deltaTable.deltaLog + val domainMetadatas = DomainMetadata( + domain = "testDomain", + configuration = JsonUtils.toJson(Map("key1" -> "value1")), + removed = false) :: Nil + val version = deltaTable.startTransactionWithInitialSnapshot() + .commit(domainMetadatas, ManualUpdate) + val committedActions = deltaLog.store.read( + FileNames.deltaFile(deltaLog.logPath, version), + deltaLog.newDeltaHadoopConf()) + assert(committedActions.size == 2) + val serializedJson = committedActions.last + val expectedJson = + """ + |{"domainMetadata":{ + |"domain":"testDomain", + |"configuration": + |"{\"key1\":\"value1\"}", + |"removed":false} + |}""".stripMargin.replaceAll("\n", "") + assert(serializedJson === expectedJson) + val asObject = Action.fromJson(serializedJson) + assert(domainMetadatas.head === asObject) + } + } + + test("CheckpointMetadata - serialize/deserialize") { + val m1 = CheckpointMetadata(version = 1, tags = null) // tags are null + val m2 = m1.copy(tags = Map()) // tags are empty + val m3 = m1.copy( // tags are non empty + tags = Map("k1" -> "v1", "schema" -> """{"type":"struct","fields":[]}""") + ) + + assert(m1.json === """{"checkpointMetadata":{"version":1}}""") + assert(m2.json === """{"checkpointMetadata":{"version":1,"tags":{}}}""") + assert(m3.json === + """{"checkpointMetadata":{"version":1,""" + + """"tags":{"k1":"v1","schema":"{\"type\":\"struct\",\"fields\":[]}"}}}""") + + Seq(m1, m2, m3).foreach { metadata => + assert(metadata === JsonUtils.fromJson[SingleAction](metadata.json).unwrap) + } + } + + test("SidecarFile - serialize/deserialize") { + val f1 = // tags are null + SidecarFile(path = "/t1/p1", sizeInBytes = 1L, modificationTime = 3, tags = null) + val f2 = f1.copy(tags = Map()) // tags are empty + val f3 = f2.copy( // tags are non empty + tags = Map("k1" -> "v1", "schema" -> """{"type":"struct","fields":[]}""") + ) + + assert(f1.json === + """{"sidecar":{"path":"/t1/p1","sizeInBytes":1,"modificationTime":3}}""") + assert(f2.json === + """{"sidecar":{"path":"/t1/p1","sizeInBytes":1,""" + + """"modificationTime":3,"tags":{}}}""") + assert(f3.json === + """{"sidecar":{"path":"/t1/p1","sizeInBytes":1,"modificationTime":3,""" + + """"tags":{"k1":"v1","schema":"{\"type\":\"struct\",\"fields\":[]}"}}}""".stripMargin) + + Seq(f1, f2, f3).foreach { file => + assert(file === JsonUtils.fromJson[SingleAction](file.json).unwrap) + } + } + + testActionSerDe( + "AddCDCFile (without tags) - json serialization/deserialization", + AddCDCFile("part=p1/f1", partitionValues = Map("x" -> "2"), size = 10), + expectedJson = """{"cdc":{"path":"part=p1/f1","partitionValues":{"x":"2"},""" + + """"size":10,"dataChange":false}}""".stripMargin) + + testActionSerDe( + "AddCDCFile (with tags) - json serialization/deserialization", + AddCDCFile("part=p2/f1", partitionValues = Map("x" -> "2"), + size = 11, tags = Map("key1" -> "value1")), + expectedJson = """{"cdc":{"path":"part=p2/f1","partitionValues":{"x":"2"},""" + + """"size":11,"tags":{"key1":"value1"},"dataChange":false}}""".stripMargin) + + testActionSerDe( + "AddCDCFile (without null value in partitionValues) - json serialization/deserialization", + AddCDCFile("part=p1/f1", partitionValues = Map("x" -> null), size = 10), + expectedJson = """{"cdc":{"path":"part=p1/f1","partitionValues":{"x":null},""" + + """"size":10,"dataChange":false}}""".stripMargin) + + { + // We want this metadata to be lazy so it is instantiated after `SparkFunSuite::beforeAll`. + // This will ensure that `Utils.isTesting` returns true and that its id is set to 'testId'. + lazy val metadata = Metadata(createdTime = Some(2222)) + testActionSerDe( + "Metadata (with all defaults) - json serialization/deserialization", + metadata, + expectedJson = """{"metaData":{"id":"testId","format":{"provider":"parquet",""" + + """"options":{}},"partitionColumns":[],"configuration":{},"createdTime":2222}}""") + } + + { + val schemaStr = new StructType().add("a", "long").json + // We want this metadata to be lazy so it is instantiated after `SparkFunSuite::beforeAll`. + // This will ensure that `Utils.isTesting` returns true and that its id is set to 'testId'. + lazy val metadata = Metadata( + name = "t1", + description = "desc", + format = Format(provider = "parquet", options = Map("o1" -> "v1")), + partitionColumns = Seq("a"), + createdTime = Some(2222), + configuration = Map("delta.enableXyz" -> "true"), + schemaString = schemaStr) + testActionSerDe( + "Metadata - json serialization/deserialization", metadata, + expectedJson = """{"metaData":{"id":"testId","name":"t1","description":"desc",""" + + """"format":{"provider":"parquet","options":{"o1":"v1"}},""" + + s""""schemaString":${JsonUtils.toJson(schemaStr)},"partitionColumns":["a"],""" + + """"configuration":{"delta.enableXyz":"true"},"createdTime":2222}}""".stripMargin) + testActionSerDe( + "Metadata with empty createdTime- json serialization/deserialization", + metadata.copy(createdTime = None), + expectedJson = """{"metaData":{"id":"testId","name":"t1","description":"desc",""" + + """"format":{"provider":"parquet","options":{"o1":"v1"}},""" + + s""""schemaString":${JsonUtils.toJson(schemaStr)},"partitionColumns":["a"],""" + + """"configuration":{"delta.enableXyz":"true"}}}""".stripMargin) + } + + { + // Test for CommitInfo + val commitInfo = CommitInfo( + time = 123L, + operation = "CONVERT", + operationParameters = Map.empty, + commandContext = Map("clusterId" -> "23"), + readVersion = Some(23), + isolationLevel = Some("SnapshotIsolation"), + isBlindAppend = Some(true), + operationMetrics = Some(Map("m1" -> "v1", "m2" -> "v2")), + userMetadata = Some("123"), + tags = Some(Map("k1" -> "v1")), + txnId = Some("123") + ).copy(engineInfo = None) + + testActionSerDe( + "CommitInfo (without operationParameters) - json serialization/deserialization", + commitInfo, + expectedJson = """{"commitInfo":{"timestamp":123,"operation":"CONVERT",""" + + """"operationParameters":{},"clusterId":"23","readVersion":23,""" + + """"isolationLevel":"SnapshotIsolation","isBlindAppend":true,""" + + """"operationMetrics":{"m1":"v1","m2":"v2"},"userMetadata":"123",""" + + """"tags":{"k1":"v1"},"txnId":"123"}}""".stripMargin) + + test("CommitInfo (with operationParameters) - json serialization/deserialization") { + val operation = DeltaOperations.Convert( + numFiles = 23L, + partitionBy = Seq("a", "b"), + collectStats = false, + catalogTable = Some("t1"), + sourceFormat = Some("parquet")) + val commitInfo1 = commitInfo.copy(operationParameters = operation.jsonEncodedValues) + val expectedCommitInfoJson1 = // TODO JSON ordering differs between 2.12 and 2.13 + if (scala.util.Properties.versionNumberString.startsWith("2.13")) { + """{"commitInfo":{"timestamp":123,"operation":"CONVERT","operationParameters"""" + + """:{"catalogTable":"t1","numFiles":23,"partitionedBy":"[\"a\",\"b\"]",""" + + """"sourceFormat":"parquet","collectStats":false},"clusterId":"23","readVersion"""" + + """:23,"isolationLevel":"SnapshotIsolation","isBlindAppend":true,""" + + """"operationMetrics":{"m1":"v1","m2":"v2"},""" + + """"userMetadata":"123","tags":{"k1":"v1"},"txnId":"123"}}""" + } else { + """{"commitInfo":{"timestamp":123,"operation":"CONVERT","operationParameters"""" + + """:{"catalogTable":"t1","numFiles":23,"partitionedBy":"[\"a\",\"b\"]",""" + + """"sourceFormat":"parquet","collectStats":false},"clusterId":"23","readVersion""" + + """":23,"isolationLevel":"SnapshotIsolation","isBlindAppend":true,""" + + """"operationMetrics":{"m1":"v1","m2":"v2"},""" + + """"userMetadata":"123","tags":{"k1":"v1"},"txnId":"123"}}""" + } + assert(commitInfo1.json == expectedCommitInfoJson1) + val newCommitInfo1 = Action.fromJson(expectedCommitInfoJson1).asInstanceOf[CommitInfo] + // TODO: operationParameters serialization/deserialization is broken as it uses a custom + // serializer but a default deserializer and needs to be fixed. + assert(newCommitInfo1.copy(operationParameters = Map.empty) == + commitInfo.copy(operationParameters = Map.empty)) + } + + testActionSerDe( + "CommitInfo (with engineInfo) - json serialization/deserialization", + commitInfo.copy(engineInfo = Some("Apache-Spark/3.1.1 Delta-Lake/10.1.0")), + expectedJson = """{"commitInfo":{"timestamp":123,"operation":"CONVERT",""" + + """"operationParameters":{},"clusterId":"23","readVersion":23,""" + + """"isolationLevel":"SnapshotIsolation","isBlindAppend":true,""" + + """"operationMetrics":{"m1":"v1","m2":"v2"},"userMetadata":"123",""" + + """"tags":{"k1":"v1"},"engineInfo":"Apache-Spark/3.1.1 Delta-Lake/10.1.0",""" + + """"txnId":"123"}}""".stripMargin) + } + + private def roundTripCompare(name: String, actions: Action*) = { + test(name) { + val asJson = actions.map(_.json) + val asObjects = asJson.map(Action.fromJson) + + assert(actions === asObjects) + } + } + + /** Test serialization/deserialization of [[Action]] by doing an actual commit */ + private def testActionSerDe( + name: String, + action: => Action, + expectedJson: String, + extraSettings: Seq[(String, String)] = Seq.empty, + testTags: Seq[org.scalatest.Tag] = Seq.empty): Unit = { + import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + test(name, testTags: _*) { + withTempDir { tempDir => + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getAbsolutePath)) + // Disable different delta validations so that the passed action can be committed in + // all cases. + val settings = Seq( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1", + DeltaSQLConf.DELTA_COMMIT_VALIDATION_ENABLED.key -> "false") ++ extraSettings + withSQLConf(settings: _*) { + + // Do one empty commit so that protocol gets committed. + val protocol = Protocol( + minReaderVersion = spark.conf.get(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION), + minWriterVersion = spark.conf.get(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION)) + deltaLog.startTransaction().commitManually(protocol, Metadata()) + + // Commit the actual action. + val version = deltaLog.startTransaction().commit(Seq(action), ManualUpdate) + // Read the commit file and get the serialized committed actions + val committedActions = deltaLog.store.read( + FileNames.deltaFile(deltaLog.logPath, version), + deltaLog.newDeltaHadoopConf()) + + assert(committedActions.size == 2) + val serializedJson = committedActions.last + assert(serializedJson === expectedJson) + val asObject = Action.fromJson(serializedJson) + assert(action === asObject) + } + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala new file mode 100644 index 00000000000..01467883054 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala @@ -0,0 +1,332 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.{Log4jUsageLogger, UsageRecord} +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.commands.optimize._ +import org.apache.spark.sql.delta.hooks.{AutoCompact, AutoCompactType} +import org.apache.spark.sql.delta.optimize.CompactionTestHelperForAutoCompaction +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.AutoCompactPartitionStats +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StringType +import org.apache.spark.unsafe.types.UTF8String + +trait AutoCompactTestUtils { + def captureOptimizeLogs(metrics: String)(f: => Unit): Seq[UsageRecord] = { + val usageLogs = Log4jUsageLogger.track(f) + usageLogs.filter { usageLog => + usageLog.tags.get("opType") == Some(metrics) + } + } + +} + + +/** + * This class extends the [[CompactionSuiteBase]] and runs all the [[CompactionSuiteBase]] tests + * with AutoCompaction. + * + * It also tests any AutoCompaction specific behavior. + */ +class AutoCompactSuite extends + CompactionTestHelperForAutoCompaction + with DeltaSQLCommandTest + with SharedSparkSession + with AutoCompactTestUtils { + + test("auto-compact-type: test table properties") { + withTempDir { tempDir => + val dir = tempDir.getCanonicalPath + spark.range(0, 1).write.format("delta").mode("append").save(dir) + val deltaLog = DeltaLog.forTable(spark, dir) + val defaultAutoCompactType = AutoCompact.getAutoCompactType(conf, deltaLog.snapshot.metadata) + Map( + "true" -> Some(AutoCompactType.Enabled), + "tRue" -> Some(AutoCompactType.Enabled), + "'true'" -> Some(AutoCompactType.Enabled), + "false" -> None, + "fALse" -> None, + "'false'" -> None + ).foreach { case (propertyValue, expectedAutoCompactType) => + setTableProperty(deltaLog, "delta.autoOptimize.autoCompact", propertyValue) + assert(AutoCompact.getAutoCompactType(conf, deltaLog.snapshot.metadata) == + expectedAutoCompactType) + } + } + } + + test("auto-compact-type: test confs") { + withTempDir { tempDir => + val dir = tempDir.getCanonicalPath + spark.range(0, 1).write.format("delta").mode("append").save(dir) + val deltaLog = DeltaLog.forTable(spark, dir) + val defaultAutoCompactType = AutoCompact.getAutoCompactType(conf, deltaLog.snapshot.metadata) + + Map( + "true" -> Some(AutoCompactType.Enabled), + "TrUE" -> Some(AutoCompactType.Enabled), + "false" -> None, + "FalsE" -> None + ).foreach { case (confValue, expectedAutoCompactType) => + withSQLConf(DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED.key -> confValue) { + assert(AutoCompact.getAutoCompactType(conf, deltaLog.snapshot.metadata) == + expectedAutoCompactType) + } + } + } + } + + private def testBothModesViaProperty(testName: String)(f: String => Unit): Unit = { + def runTest(autoCompactConfValue: String): Unit = { + withTempDir { dir => + withSQLConf( + "spark.databricks.delta.properties.defaults.autoOptimize.autoCompact" -> + s"$autoCompactConfValue", + DeltaSQLConf.DELTA_AUTO_COMPACT_MIN_NUM_FILES.key -> "0", + DeltaSQLConf.DELTA_AUTO_COMPACT_MODIFIED_PARTITIONS_ONLY_ENABLED.key -> "false") { + f(dir.getCanonicalPath) + } + } + } + + test(s"auto-compact-enabled-property: $testName") { runTest(autoCompactConfValue = "true") } + } + + private def testBothModesViaConf(testName: String)(f: String => Unit): Unit = { + def runTest(autoCompactConfValue: String): Unit = { + withTempDir { dir => + withSQLConf( + DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED.key -> s"$autoCompactConfValue", + DeltaSQLConf.DELTA_AUTO_COMPACT_MIN_NUM_FILES.key -> "0") { + f(dir.getCanonicalPath) + } + } + } + + test(s"auto-compact-enabled-conf: $testName") { runTest(autoCompactConfValue = "true") } + } + + private def checkAutoOptimizeLogging(f: => Unit): Boolean = { + val logs = Log4jUsageLogger.track { + f + } + logs.exists(_.opType.map(_.typeName) === Some("delta.commit.hooks.autoOptimize")) + } + + import testImplicits._ + + test("auto compact event log: inline AC") { + withTempDir { dir => + withSQLConf( + DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED.key -> s"true", + DeltaSQLConf.DELTA_AUTO_COMPACT_MIN_NUM_FILES.key -> "30") { + val path = dir.getCanonicalPath + // Append 1 file to each partition: record runOnModifiedPartitions event, as is first write + var usageLogs = captureOptimizeLogs(AutoCompact.OP_TYPE) { + createFilesToPartitions(numFilePartitions = 3, numFilesPerPartition = 1, path) + } + var log = JsonUtils.mapper.readValue[Map[String, String]](usageLogs.head.blob) + assert(log("status") == "runOnModifiedPartitions" && log("partitions") == "3") + // Append 10 more file to each partition: record skipInsufficientFilesInModifiedPartitions + // event. + usageLogs = captureOptimizeLogs(AutoCompact.OP_TYPE) { + createFilesToPartitions(numFilePartitions = 3, numFilesPerPartition = 10, path) + } + log = JsonUtils.mapper.readValue[Map[String, String]](usageLogs.head.blob) + assert(log("status") == "skipInsufficientFilesInModifiedPartitions") + // Append 20 more files to each partition: record runOnModifiedPartitions on all 3 + // partitions. + usageLogs = captureOptimizeLogs(AutoCompact.OP_TYPE) { + createFilesToPartitions(numFilePartitions = 3, numFilesPerPartition = 20, path) + } + log = JsonUtils.mapper.readValue[Map[String, String]](usageLogs.head.blob) + assert(log("status") == "runOnModifiedPartitions" && log("partitions") == "3") + // Append 30 more file to each partition and check OptimizeMetrics. + usageLogs = captureOptimizeLogs(metrics = s"${AutoCompact.OP_TYPE}.execute.metrics") { + createFilesToPartitions(numFilePartitions = 3, numFilesPerPartition = 30, path) + } + val metricsLog = JsonUtils.mapper.readValue[OptimizeMetrics](usageLogs.head.blob) + assert(metricsLog.numBytesSkippedToReduceWriteAmplification === 0) + assert(metricsLog.numFilesSkippedToReduceWriteAmplification === 0) + assert(metricsLog.totalConsideredFiles === 93) + assert(metricsLog.numFilesAdded == 3) + assert(metricsLog.numFilesRemoved == 93) + assert(metricsLog.numBatches === 3) + } + } + } + + private def checkAutoCompactionWorks(dir: String): Unit = { + spark.range(10).write.format("delta").mode("append").save(dir) + val deltaLog = DeltaLog.forTable(spark, dir) + val newSnapshot = deltaLog.update() + assert(newSnapshot.version === 1) // 0 is the first commit, 1 is optimize + assert(deltaLog.update().numOfFiles === 1) + + val isLogged = checkAutoOptimizeLogging { + spark.range(10).write.format("delta").mode("append").save(dir) + } + + assert(isLogged) + val lastEvent = deltaLog.history.getHistory(Some(1)).head + assert(lastEvent.operation === "OPTIMIZE") + assert(lastEvent.operationParameters("auto") === "true") + + assert(deltaLog.update().numOfFiles === 1, "Files should be optimized into a single one") + checkAnswer( + spark.range(10).union(spark.range(10)).toDF(), + spark.read.format("delta").load(dir) + ) + } + + testBothModesViaProperty("auto compact should kick in when enabled - table config") { dir => + checkAutoCompactionWorks(dir) + } + + testBothModesViaConf("auto compact should kick in when enabled - session config") { dir => + checkAutoCompactionWorks(dir) + } + + testBothModesViaProperty("auto compact should not kick in when session config is off") { dir => + withSQLConf(DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED.key -> "false") { + val isLogged = checkAutoOptimizeLogging { + spark.range(10).write.format("delta").mode("append").save(dir) + } + + val deltaLog = DeltaLog.forTable(spark, dir) + val newSnapshot = deltaLog.update() + assert(newSnapshot.version === 0) // 0 is the first commit + assert(deltaLog.update().numOfFiles > 1) + assert(!isLogged) + } + } + + test("auto compact should not kick in after optimize") { + withTempDir { tempDir => + val dir = tempDir.getCanonicalPath + spark.range(0, 12, 1, 4).write.format("delta").mode("append").save(dir) + val deltaLog = DeltaLog.forTable(spark, dir) + val newSnapshot = deltaLog.update() + assert(newSnapshot.version === 0) + assert(deltaLog.update().numOfFiles === 4) + spark.sql(s"ALTER TABLE delta.`${tempDir.getCanonicalPath}` SET TBLPROPERTIES " + + "(delta.autoOptimize.autoCompact = true)") + + val isLogged = checkAutoOptimizeLogging { + sql(s"optimize delta.`$dir`") + } + + assert(!isLogged) + val lastEvent = deltaLog.history.getHistory(Some(1)).head + assert(lastEvent.operation === "OPTIMIZE") + assert(lastEvent.operationParameters("auto") === "false") + } + } + + testBothModesViaProperty("auto compact should not kick in when there aren't " + + "enough files") { dir => + withSQLConf(DeltaSQLConf.DELTA_AUTO_COMPACT_MIN_NUM_FILES.key -> "5") { + AutoCompactPartitionStats.instance(spark).resetTestOnly() + spark.range(10).repartition(4).write.format("delta").mode("append").save(dir) + + val deltaLog = DeltaLog.forTable(spark, dir) + val newSnapshot = deltaLog.update() + assert(newSnapshot.version === 0) + assert(deltaLog.update().numOfFiles === 4) + + val isLogged2 = checkAutoOptimizeLogging { + spark.range(10).repartition(4).write.format("delta").mode("append").save(dir) + } + + assert(isLogged2) + val lastEvent = deltaLog.history.getHistory(Some(1)).head + assert(lastEvent.operation === "OPTIMIZE") + assert(lastEvent.operationParameters("auto") === "true") + + assert(deltaLog.update().numOfFiles === 1, "Files should be optimized into a single one") + + checkAnswer( + spark.read.format("delta").load(dir), + spark.range(10).union(spark.range(10)).toDF() + ) + } + } + + testBothModesViaProperty("ensure no NPE in auto compact UDF with null " + + "partition values") { dir => + Seq(null, "", " ").map(UTF8String.fromString).zipWithIndex.foreach { case (partValue, i) => + val path = new File(dir, i.toString).getCanonicalPath + val df1 = spark.range(5).withColumn("part", new Column(Literal(partValue, StringType))) + val df2 = spark.range(5, 10).withColumn("part", new Column(Literal("1"))) + val isLogged = checkAutoOptimizeLogging { + // repartition to increase number of files written + df1.union(df2).repartition(4) + .write.format("delta").partitionBy("part").mode("append").save(path) + } + val deltaLog = DeltaLog.forTable(spark, path) + val newSnapshot = deltaLog.update() + assert(newSnapshot.version === 1) // 0 is the first commit, 1 and 2 are optimizes + assert(newSnapshot.numOfFiles === 2) + + assert(isLogged) + val lastEvent = deltaLog.history.getHistory(Some(1)).head + assert(lastEvent.operation === "OPTIMIZE") + assert(lastEvent.operationParameters("auto") === "true") + } + } + + testBothModesViaProperty("check auto compact recorded metrics") { dir => + val logs = Log4jUsageLogger.track { + spark.range(30).repartition(3).write.format("delta").save(dir) + } + val metrics = JsonUtils.mapper.readValue[OptimizeMetrics](logs.filter( + _.tags.get("opType") == Some(s"${AutoCompact.OP_TYPE}.execute.metrics")).head.blob) + + assert(metrics.numFilesRemoved == 3) + assert(metrics.numFilesAdded == 1) + } + + private def setTableProperty(log: DeltaLog, key: String, value: String): Unit = { + spark.sql(s"ALTER TABLE delta.`${log.dataPath}` SET TBLPROPERTIES " + + s"($key = $value)") + } +} + +class AutoCompactIdColumnMappingSuite extends AutoCompactSuite + with DeltaColumnMappingEnableIdMode { + override def runAllTests: Boolean = true +} + +class AutoCompactNameColumnMappingSuite extends AutoCompactSuite + with DeltaColumnMappingEnableNameMode { + override def runAllTests: Boolean = true +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/BlockWritesLocalFileSystem.scala b/spark/src/test/scala/org/apache/spark/sql/delta/BlockWritesLocalFileSystem.scala new file mode 100644 index 00000000000..b0e1de30d10 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/BlockWritesLocalFileSystem.scala @@ -0,0 +1,90 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.net.URI +import java.util.concurrent.CountDownLatch + +import org.apache.spark.sql.delta.BlockWritesLocalFileSystem.scheme +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{DelegateToFileSystem, FSDataOutputStream, Path, RawLocalFileSystem} +import org.apache.hadoop.util.Progressable + +/** + * This custom fs implementation is used for testing the execution multiple batches of Optimize. + */ +class BlockWritesLocalFileSystem extends RawLocalFileSystem { + + private var uri: URI = _ + + override def getScheme: String = scheme + + override def initialize(name: URI, conf: Configuration): Unit = { + uri = URI.create(name.getScheme + ":///") + super.initialize(name, conf) + } + + override def getUri(): URI = if (uri == null) { + // RawLocalFileSystem's constructor will call this one before `initialize` is called. + // Just return the super's URI to avoid NPE. + super.getUri + } else { + uri + } + + override def create( + f: Path, + overwrite: Boolean, + bufferSize: Int, + replication: Short, + blockSize: Long, + progress: Progressable): FSDataOutputStream = { + // called when data files and log files are written + BlockWritesLocalFileSystem.blockLatch.countDown() + BlockWritesLocalFileSystem.blockLatch.await() + super.create(f, overwrite, bufferSize, replication, blockSize, progress) + } +} + +/** + * An AbstractFileSystem implementation wrapper around [[BlockWritesLocalFileSystem]]. + */ +class BlockWritesAbstractFileSystem(uri: URI, conf: Configuration) + extends DelegateToFileSystem( + uri, + new BlockWritesLocalFileSystem, + conf, + BlockWritesLocalFileSystem.scheme, + false) + +/** + * Singleton for BlockWritesLocalFileSystem used to initialize the file system countdown latch. + */ +object BlockWritesLocalFileSystem { + val scheme = "block" + + /** latch that blocks writes */ + private var blockLatch: CountDownLatch = _ + + /** + * @param numWrites - writing is blocked until there are `numWrites` concurrent writes to + * the file system. + */ + def blockUntilConcurrentWrites(numWrites: Integer): Unit = { + blockLatch = new CountDownLatch(numWrites) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CheckCDCAnswer.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CheckCDCAnswer.scala new file mode 100644 index 00000000000..77e8730d524 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CheckCDCAnswer.scala @@ -0,0 +1,70 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.sql.Timestamp + +import org.apache.spark.sql.delta.commands.cdc.CDCReader.{CDC_COMMIT_TIMESTAMP, CDC_COMMIT_VERSION} + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} + +trait CheckCDCAnswer extends QueryTest { + /** + * Check the result of a CDC operation. The expected answer should include only CDC type and + * log version - the timestamp is nondeterministic, so we'll check just that it matches the + * correct value in the Delta log. + * + * @param log The Delta log for the table CDC is being extracted from. + * @param df The computed dataframe, which should match the default CDC result schema. + * Callers doing projections on top should use checkAnswer directly. + * @param expectedAnswer The expected results for the CDC query, excluding the CDC_LOG_TIMESTAMP + * column which we handle inside this method. + */ + def checkCDCAnswer(log: DeltaLog, df: => DataFrame, expectedAnswer: Seq[Row]): Unit = { + checkAnswer(df.drop(CDC_COMMIT_TIMESTAMP), expectedAnswer) + + val timestampsByVersion = df.select(CDC_COMMIT_VERSION, CDC_COMMIT_TIMESTAMP).collect() + .map { row => + val version = row.getLong(0) + val ts = row.getTimestamp(1) + (version -> ts) + }.toMap + val correctTimestampsByVersion = { + // Results should match the fully monotonized commits. Note that this map will include + // all versions of the table but only the ones in timestampsByVersion are checked for + // correctness. + val commits = DeltaHistoryManager.getCommits( + log.store, + log.logPath, + start = 0, + end = None, + log.newDeltaHadoopConf()) + + // Note that the timestamps come from filesystem modification timestamps, so they're + // milliseconds since epoch and we don't need to deal with timezones. + commits.map(f => (f.version -> new Timestamp(f.timestamp))).toMap + } + + timestampsByVersion.keySet.foreach { version => + assert(timestampsByVersion(version) === correctTimestampsByVersion(version)) + } + } + + def checkCDCAnswer(log: DeltaLog, df: => DataFrame, expectedAnswer: DataFrame): Unit = { + checkCDCAnswer(log, df, expectedAnswer.collect()) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointInstanceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointInstanceSuite.scala new file mode 100644 index 00000000000..38a384e403c --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointInstanceSuite.scala @@ -0,0 +1,55 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.CheckpointInstance.Format + +import org.apache.spark.SparkFunSuite + +class CheckpointInstanceSuite extends SparkFunSuite { + + test("checkpoint instance comparisons") { + val ci1_single_1 = CheckpointInstance(1, Format.SINGLE, numParts = None) + val ci1_withparts_2 = CheckpointInstance(1, Format.WITH_PARTS, numParts = Some(2)) + val ci1_sentinel = CheckpointInstance.sentinelValue(Some(1)) + + val ci2_single_1 = CheckpointInstance(2, Format.SINGLE, numParts = None) + val ci2_withparts_4 = CheckpointInstance(2, Format.WITH_PARTS, numParts = Some(4)) + val ci2_sentinel = CheckpointInstance.sentinelValue(Some(2)) + + val ci3_single_1 = CheckpointInstance(3, Format.SINGLE, numParts = None) + val ci3_withparts_2 = CheckpointInstance(3, Format.WITH_PARTS, numParts = Some(2)) + + assert(ci1_single_1 < ci2_single_1) // version takes priority + assert(ci1_single_1 < ci1_withparts_2) // parts takes priority when versions are same + assert(ci2_withparts_4 < ci3_withparts_2) // version takes priority over parts + + // all checkpoint instances for version 1/2 are less than sentinel value for version 2. + Seq(ci1_single_1, ci1_withparts_2, ci1_sentinel, ci2_single_1, ci2_withparts_4) + .foreach(ci => assert(ci < ci2_sentinel)) + + // all checkpoint instances for version 3 are greater than sentinel value for version 2. + Seq(ci3_single_1, ci3_withparts_2).foreach(ci => assert(ci > ci2_sentinel)) + + // Everything is less than CheckpointInstance.MaxValue + Seq( + ci1_single_1, ci1_withparts_2, ci1_sentinel, + ci2_single_1, ci2_withparts_4, ci2_sentinel, + ci3_single_1, ci3_withparts_2 + ).foreach(ci => assert(ci < CheckpointInstance.MaxValue)) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointProviderSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointProviderSuite.scala new file mode 100644 index 00000000000..a2859169ecb --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointProviderSuite.scala @@ -0,0 +1,86 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.{Action} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.FileNames._ + +import org.apache.spark.sql.test.SharedSparkSession + +class CheckpointProviderSuite + extends SharedSparkSession + with DeltaSQLCommandTest { + + for (v2CheckpointFormat <- Seq("json", "parquet")) + test(s"V2 Checkpoint compat file equivalency to normal V2 Checkpoint" + + s" [v2CheckpointFormat: $v2CheckpointFormat]") { + withSQLConf( + DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> CheckpointPolicy.V2.name, + DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key -> v2CheckpointFormat + ) { + withTempDir { tempDir => + spark.range(10).write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + spark.range(10).write.mode("append").format("delta").save(tempDir.getAbsolutePath) + + deltaLog.checkpoint() // Checkpoint 1 + val snapshot = deltaLog.update() + + deltaLog.createSinglePartCheckpointForBackwardCompat( + snapshot, new deltaLog.V2CompatCheckpointMetrics) // Compatibility Checkpoint 1 + + val fs = deltaLog.logPath.getFileSystem(deltaLog.newDeltaHadoopConf()) + val v2CompatCheckpoint = fs.getFileStatus( + checkpointFileSingular(deltaLog.logPath, snapshot.checkpointProvider.version)) + + val origCheckpoint = snapshot.checkpointProvider + .asInstanceOf[LazyCompleteCheckpointProvider] + .underlyingCheckpointProvider + .asInstanceOf[V2CheckpointProvider] + val compatCheckpoint = CheckpointProvider( + spark, + deltaLog.snapshot, + None, + UninitializedV2CheckpointProvider( + 2L, + v2CompatCheckpoint, + deltaLog.logPath, + deltaLog.newDeltaHadoopConf(), + deltaLog.options, + deltaLog.store, + None)) + .asInstanceOf[LazyCompleteCheckpointProvider] + .underlyingCheckpointProvider + .asInstanceOf[V2CheckpointProvider] + + // Check whether these checkpoints are equivalent after being loaded + assert(compatCheckpoint.sidecarFiles.toSet === origCheckpoint.sidecarFiles.toSet) + assert(compatCheckpoint.checkpointMetadata === origCheckpoint.checkpointMetadata) + + val compatDf = + deltaLog.loadIndex(compatCheckpoint.topLevelFileIndex.get, Action.logSchema) + // Check whether the manifest content is same or not + val originalDf = + deltaLog.loadIndex(origCheckpoint.topLevelFileIndex.get, Action.logSchema) + assert(originalDf.sort().collect() === compatDf.sort().collect()) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala new file mode 100644 index 00000000000..5e5df69ae2d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala @@ -0,0 +1,986 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.net.URI + +import scala.concurrent.duration._ + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.{Log4jUsageLogger, MetricDefinitions, UsageRecord} +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.deletionvectors.DeletionVectorsSuite +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage.LocalLogStore +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, FSDataOutputStream, Path, RawLocalFileSystem} +import org.apache.hadoop.fs.permission.FsPermission +import org.apache.hadoop.util.Progressable + +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType + +class CheckpointsSuite + extends QueryTest + with SharedSparkSession + with DeltaCheckpointTestUtils + with DeltaSQLCommandTest { + + def testDifferentV2Checkpoints(testName: String)(f: => Unit): Unit = { + for (checkpointFormat <- Seq(V2Checkpoint.Format.JSON.name, V2Checkpoint.Format.PARQUET.name)) { + test(s"$testName [v2CheckpointFormat: $checkpointFormat]") { + withSQLConf( + DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> CheckpointPolicy.V2.name, + DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key -> checkpointFormat + ) { + f + } + } + } + } + + /** Get V2 [[CheckpointProvider]] from the underlying deltalog snapshot */ + def getV2CheckpointProvider( + deltaLog: DeltaLog, + update: Boolean = true): V2CheckpointProvider = { + val snapshot = if (update) deltaLog.update() else deltaLog.unsafeVolatileSnapshot + snapshot.checkpointProvider match { + case v2CheckpointProvider: V2CheckpointProvider => + v2CheckpointProvider + case provider : LazyCompleteCheckpointProvider + if provider.underlyingCheckpointProvider.isInstanceOf[V2CheckpointProvider] => + provider.underlyingCheckpointProvider.asInstanceOf[V2CheckpointProvider] + case EmptyCheckpointProvider => + throw new IllegalStateException("underlying snapshot doesn't have a checkpoint") + case other => + throw new IllegalStateException(s"The underlying checkpoint is not a v2 checkpoint. " + + s"It is: ${other.getClass.getName}") + } + } + + protected override def sparkConf = { + // Set the gs LogStore impl to `LocalLogStore` so that it will work with `FakeGCSFileSystem`. + // The default one is `HDFSLogStore` which requires a `FileContext` but we don't have one. + super.sparkConf.set("spark.delta.logStore.gs.impl", classOf[LocalLogStore].getName) + } + + test("checkpoint metadata - checkpoint schema above the configured threshold are not" + + " written to LAST_CHECKPOINT") { + withTempDir { tempDir => + spark.range(10).write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + deltaLog.checkpoint() + val lastCheckpointOpt = deltaLog.readLastCheckpointFile() + assert(lastCheckpointOpt.nonEmpty) + assert(lastCheckpointOpt.get.checkpointSchema.nonEmpty) + val expectedCheckpointSchema = + Seq("txn", "add", "remove", "metaData", "protocol", "domainMetadata") + assert(lastCheckpointOpt.get.checkpointSchema.get.fieldNames.toSeq === + expectedCheckpointSchema) + + spark.range(10).write.mode("append").format("delta").save(tempDir.getAbsolutePath) + withSQLConf(DeltaSQLConf.CHECKPOINT_SCHEMA_WRITE_THRESHOLD_LENGTH.key-> "10") { + deltaLog.checkpoint() + val lastCheckpointOpt = deltaLog.readLastCheckpointFile() + assert(lastCheckpointOpt.nonEmpty) + assert(lastCheckpointOpt.get.checkpointSchema.isEmpty) + } + } + } + + testDifferentV2Checkpoints("checkpoint metadata - checkpoint schema not persisted in" + + " json v2 checkpoints but persisted in parquet v2 checkpoints") { + withTempDir { tempDir => + spark.range(10).write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + deltaLog.checkpoint() + val lastCheckpointOpt = deltaLog.readLastCheckpointFile() + assert(lastCheckpointOpt.nonEmpty) + val expectedFormat = + spark.conf.getOption(DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key) + assert(lastCheckpointOpt.get.checkpointSchema.isEmpty === + (expectedFormat.contains(V2Checkpoint.Format.JSON.name))) + } + } + + testDifferentCheckpoints("test empty checkpoints") { (checkpointPolicy, _) => + val tableName = "test_empty_table" + withTable(tableName) { + sql(s"CREATE TABLE `$tableName` (a INT) USING DELTA") + sql(s"ALTER TABLE `$tableName` SET TBLPROPERTIES('comment' = 'A table comment')") + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + deltaLog.checkpoint() + def validateSnapshot(snapshot: Snapshot): Unit = { + assert(!snapshot.checkpointProvider.isEmpty) + assert(snapshot.checkpointProvider.version === 1) + val checkpointFile = snapshot.checkpointProvider.topLevelFiles.head.getPath + val fileActions = getCheckpointDfForFilesContainingFileActions(deltaLog, checkpointFile) + assert(fileActions.where("add is not null or remove is not null").collect().size === 0) + if (checkpointPolicy == CheckpointPolicy.V2) { + val v2CheckpointProvider = snapshot.checkpointProvider match { + case lazyCompleteCheckpointProvider: LazyCompleteCheckpointProvider => + lazyCompleteCheckpointProvider.underlyingCheckpointProvider + .asInstanceOf[V2CheckpointProvider] + case cp: V2CheckpointProvider => cp + case _ => throw new IllegalStateException("Unexpected checkpoint provider") + } + assert(v2CheckpointProvider.sidecarFiles.size === 1) + val sidecar = v2CheckpointProvider.sidecarFiles.head.toFileStatus(deltaLog.logPath) + assert(spark.read.parquet(sidecar.getPath.toString).count() === 0) + } + } + validateSnapshot(deltaLog.update()) + DeltaLog.clearCache() + validateSnapshot(DeltaLog.forTable(spark, TableIdentifier(tableName)).unsafeVolatileSnapshot) + } + } + + testDifferentV2Checkpoints(s"V2 Checkpoint write test" + + s" - metadata, protocol, sidecar, checkpoint metadata actions") { + withTempDir { tempDir => + spark.range(10).write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + deltaLog.checkpoint() + val checkpointFiles = deltaLog.listFrom(0).filter(FileNames.isCheckpointFile).toList + assert(checkpointFiles.length == 1) + val checkpoint = checkpointFiles.head + val fileNameParts = checkpoint.getPath.getName.split("\\.") + // The file name should be .checkpoint..parquet. + assert(fileNameParts.length == 4) + fileNameParts match { + case Array(version, checkpointLiteral, _, format) => + val expectedFormat = + spark.conf.getOption(DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key).get + assert(format == expectedFormat) + assert(version.toLong == 0) + assert(checkpointLiteral == "checkpoint") + } + + def getCheckpointFileActions(checkpoint: FileStatus) : Seq[Action] = { + if (checkpoint.getPath.toString.endsWith("json")) { + deltaLog.store.read(checkpoint.getPath).map(Action.fromJson) + } else { + val fileIndex = + DeltaLogFileIndex(DeltaLogFileIndex.CHECKPOINT_FILE_FORMAT_PARQUET, Seq(checkpoint)).get + deltaLog.loadIndex(fileIndex, Action.logSchema) + .as[SingleAction].collect().map(_.unwrap).toSeq + } + } + val actions = getCheckpointFileActions(checkpoint) + // V2 Checkpoints should contain exactly one action each of types + // Metadata, CheckpointMetadata, and Protocol + // In this particular case, we should only have one sidecar file + val sidecarActions = actions.collect{ case s: SidecarFile => s} + assert(sidecarActions.length == 1) + val sidecarPath = sidecarActions.head.path + assert(sidecarPath.endsWith("parquet")) + + val metadataActions = actions.collect { case m: Metadata => m } + assert(metadataActions.length == 1) + + val checkpointMetadataActions = actions.collect { case cm: CheckpointMetadata => cm } + assert(checkpointMetadataActions.length == 1) + + assert( + DeltaConfigs.CHECKPOINT_POLICY.fromMetaData(metadataActions.head) + .needsV2CheckpointSupport + ) + + val protocolActions = actions.collect { case p: Protocol => p } + assert(protocolActions.length == 1) + assert(CheckpointProvider.isV2CheckpointEnabled(protocolActions.head)) + } + } + + test("SC-86940: isGCSPath") { + val conf = new Configuration() + assert(Checkpoints.isGCSPath(conf, new Path("gs://foo/bar"))) + // Scheme is case insensitive + assert(Checkpoints.isGCSPath(conf, new Path("Gs://foo/bar"))) + assert(Checkpoints.isGCSPath(conf, new Path("GS://foo/bar"))) + assert(Checkpoints.isGCSPath(conf, new Path("gS://foo/bar"))) + assert(!Checkpoints.isGCSPath(conf, new Path("non-gs://foo/bar"))) + assert(!Checkpoints.isGCSPath(conf, new Path("/foo"))) + // Set the default file system and verify we can detect it + conf.set("fs.defaultFS", "gs://foo/") + conf.set("fs.gs.impl", classOf[FakeGCSFileSystem].getName) + conf.set("fs.gs.impl.disable.cache", "true") + assert(Checkpoints.isGCSPath(conf, new Path("/foo"))) + } + + test("SC-86940: writing a GCS checkpoint should happen in a new thread") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + spark.range(1).write.format("delta").save(path) + + // Use `FakeGCSFileSystem` which will verify we write in a separate gcs thread. + withSQLConf( + "fs.gs.impl" -> classOf[FakeGCSFileSystem].getName, + "fs.gs.impl.disable.cache" -> "true") { + DeltaLog.clearCache() + val gsPath = new Path(s"gs://${tempDir.getCanonicalPath}") + val deltaLog = DeltaLog.forTable(spark, gsPath) + deltaLog.checkpoint() + } + } + } + + private def verifyCheckpoint( + checkpoint: Option[LastCheckpointInfo], + version: Int, + parts: Option[Int]): Unit = { + assert(checkpoint.isDefined) + checkpoint.foreach { lastCheckpointInfo => + assert(lastCheckpointInfo.version == version) + assert(lastCheckpointInfo.parts == parts) + } + } + + test("multipart checkpoints") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + + withSQLConf( + DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "10", + DeltaConfigs.CHECKPOINT_INTERVAL.defaultTablePropertyKey -> "1") { + // 1 file actions + spark.range(1).repartition(1).write.format("delta").save(path) + val deltaLog = DeltaLog.forTable(spark, path) + + // 2 file actions, 1 new file + spark.range(1).repartition(1).write.format("delta").mode("append").save(path) + + verifyCheckpoint(deltaLog.readLastCheckpointFile(), 1, None) + + val checkpointPath = + FileNames.checkpointFileSingular(deltaLog.logPath, deltaLog.snapshot.version).toUri + assert(new File(checkpointPath).exists()) + + // 11 total file actions, 9 new files + spark.range(30).repartition(9).write.format("delta").mode("append").save(path) + verifyCheckpoint(deltaLog.readLastCheckpointFile(), 2, Some(2)) + + var checkpointPaths = + FileNames.checkpointFileWithParts(deltaLog.logPath, deltaLog.snapshot.version, 2) + checkpointPaths.foreach(p => assert(new File(p.toUri).exists())) + + // 20 total actions, 9 new files + spark.range(100).repartition(9).write.format("delta").mode("append").save(path) + verifyCheckpoint(deltaLog.readLastCheckpointFile(), 3, Some(2)) + + assert(deltaLog.snapshot.version == 3) + checkpointPaths = + FileNames.checkpointFileWithParts(deltaLog.logPath, deltaLog.snapshot.version, 2) + checkpointPaths.foreach(p => assert(new File(p.toUri).exists())) + + // 31 total actions, 11 new files + spark.range(100).repartition(11).write.format("delta").mode("append").save(path) + verifyCheckpoint(deltaLog.readLastCheckpointFile(), 4, Some(4)) + + assert(deltaLog.snapshot.version == 4) + checkpointPaths = + FileNames.checkpointFileWithParts(deltaLog.logPath, deltaLog.snapshot.version, 4) + checkpointPaths.foreach(p => assert(new File(p.toUri).exists())) + } + + // Increase max actions + withSQLConf(DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "100") { + val deltaLog = DeltaLog.forTable(spark, path) + // 100 total actions, 69 new files + spark.range(1000).repartition(69).write.format("delta").mode("append").save(path) + verifyCheckpoint(deltaLog.readLastCheckpointFile(), 5, None) + val checkpointPath = + FileNames.checkpointFileSingular(deltaLog.logPath, deltaLog.snapshot.version).toUri + assert(new File(checkpointPath).exists()) + + // 101 total actions, 1 new file + spark.range(1).repartition(1).write.format("delta").mode("append").save(path) + verifyCheckpoint(deltaLog.readLastCheckpointFile(), 6, Some(2)) + var checkpointPaths = + FileNames.checkpointFileWithParts(deltaLog.logPath, deltaLog.snapshot.version, 2) + checkpointPaths.foreach(p => assert(new File(p.toUri).exists())) + } + } + } + + testDifferentV2Checkpoints("multipart v2 checkpoint") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + + withSQLConf( + DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "10", + DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> CheckpointPolicy.V2.name, + DeltaConfigs.CHECKPOINT_INTERVAL.defaultTablePropertyKey -> "1") { + // 1 file actions + spark.range(1).repartition(1).write.format("delta").save(path) + val deltaLog = DeltaLog.forTable(spark, path) + + def getNumFilesInSidecarDirectory(): Int = { + val fs = deltaLog.sidecarDirPath.getFileSystem(deltaLog.newDeltaHadoopConf()) + fs.listStatus(deltaLog.sidecarDirPath).size + } + + // 2 file actions, 1 new file + spark.range(1).repartition(1).write.format("delta").mode("append").save(path) + assert(getV2CheckpointProvider(deltaLog).version == 1) + assert(getV2CheckpointProvider(deltaLog).sidecarFileStatuses.size == 1) + assert(getNumFilesInSidecarDirectory() == 1) + + // 11 total file actions, 9 new files + spark.range(30).repartition(9).write.format("delta").mode("append").save(path) + assert(getV2CheckpointProvider(deltaLog).version == 2) + assert(getV2CheckpointProvider(deltaLog).sidecarFileStatuses.size == 2) + assert(getNumFilesInSidecarDirectory() == 3) + + // 20 total actions, 9 new files + spark.range(100).repartition(9).write.format("delta").mode("append").save(path) + assert(getV2CheckpointProvider(deltaLog).version == 3) + assert(getV2CheckpointProvider(deltaLog).sidecarFileStatuses.size == 2) + assert(getNumFilesInSidecarDirectory() == 5) + + // 31 total actions, 11 new files + spark.range(100).repartition(11).write.format("delta").mode("append").save(path) + assert(getV2CheckpointProvider(deltaLog).version == 4) + assert(getV2CheckpointProvider(deltaLog).sidecarFileStatuses.size == 4) + assert(getNumFilesInSidecarDirectory() == 9) + + // Increase max actions + withSQLConf(DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "100") { + // 100 total actions, 69 new files + spark.range(1000).repartition(69).write.format("delta").mode("append").save(path) + assert(getV2CheckpointProvider(deltaLog).version == 5) + assert(getV2CheckpointProvider(deltaLog).sidecarFileStatuses.size == 1) + assert(getNumFilesInSidecarDirectory() == 10) + + // 101 total actions, 1 new file + spark.range(1).repartition(1).write.format("delta").mode("append").save(path) + assert(getV2CheckpointProvider(deltaLog).version == 6) + assert(getV2CheckpointProvider(deltaLog).sidecarFileStatuses.size == 2) + assert(getNumFilesInSidecarDirectory() == 12) + } + } + } + } + + test("checkpoint does not contain CDC field") { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true" + ) { + withTempDir { tempDir => + withTempView("src") { + spark.range(10).write.format("delta").save(tempDir.getAbsolutePath) + spark.range(5, 15).createOrReplaceTempView("src") + sql( + s""" + |MERGE INTO delta.`$tempDir` t USING src s ON t.id = s.id + |WHEN MATCHED THEN DELETE + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + spark.read.format("delta").load(tempDir.getAbsolutePath), + Seq(0, 1, 2, 3, 4, 10, 11, 12, 13, 14).map { i => Row(i) }) + + // CDC should exist in the log as seen through getChanges, but it shouldn't be in the + // snapshots and the checkpoint file shouldn't have a CDC column. + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + assert(deltaLog.getChanges(1).next()._2.exists(_.isInstanceOf[AddCDCFile])) + assert(deltaLog.snapshot.stateDS.collect().forall { sa => sa.cdc == null }) + deltaLog.checkpoint() + val checkpointFile = FileNames.checkpointFileSingular(deltaLog.logPath, 1) + val checkpointSchema = spark.read.format("parquet").load(checkpointFile.toString).schema + val expectedCheckpointSchema = + Seq( + "txn", + "add", + "remove", + "metaData", + "protocol", + "domainMetadata") + assert(checkpointSchema.fieldNames.toSeq == expectedCheckpointSchema) + } + } + } + } + + testDifferentV2Checkpoints("v2 checkpoint contains only addfile and removefile and" + + " remove file does not contain remove.tags and remove.numRecords") { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true" + ) { + val expectedCheckpointSchema = Seq("add", "remove") + val expectedRemoveFileSchema = Seq( + "path", + "deletionTimestamp", + "dataChange", + "extendedFileMetadata", + "partitionValues", + "size", + "deletionVector", + "baseRowId", + "defaultRowCommitVersion") + withTempDir { tempDir => + withTempView("src") { + val tablePath = tempDir.getAbsolutePath + // Append rows [0, 9] to table and merge tablePath. + spark.range(end = 10).write.format("delta").mode("overwrite").save(tablePath) + spark.range(5, 15).createOrReplaceTempView("src") + sql( + s""" + |MERGE INTO delta.`$tempDir` t USING src s ON t.id = s.id + |WHEN MATCHED THEN DELETE + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + spark.read.format("delta").load(tempDir.getAbsolutePath), + Seq(0, 1, 2, 3, 4, 10, 11, 12, 13, 14).map { i => Row(i) }) + + // CDC should exist in the log as seen through getChanges, but it shouldn't be in the + // snapshots and the checkpoint file shouldn't have a CDC column. + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + assert(deltaLog.getChanges(1).next()._2.exists(_.isInstanceOf[AddCDCFile])) + assert(deltaLog.snapshot.stateDS.collect().forall { sa => sa.cdc == null }) + deltaLog.checkpoint() + var sidecarCheckpointFiles = getV2CheckpointProvider(deltaLog).sidecarFileStatuses + assert(sidecarCheckpointFiles.size == 1) + var sidecarFile = sidecarCheckpointFiles.head.getPath.toString + var checkpointSchema = spark.read.format("parquet").load(sidecarFile).schema + var removeSchemaName = + checkpointSchema("remove").dataType.asInstanceOf[StructType].fieldNames + assert(checkpointSchema.fieldNames.toSeq == expectedCheckpointSchema) + assert(removeSchemaName.toSeq === expectedRemoveFileSchema) + + // Append rows [0, 9] to table and merge one more time. + spark.range(end = 10).write.format("delta").mode("append").save(tablePath) + sql( + s""" + |MERGE INTO delta.`$tempDir` t USING src s ON t.id = s.id + |WHEN MATCHED THEN DELETE + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + deltaLog.checkpoint() + sidecarCheckpointFiles = getV2CheckpointProvider(deltaLog).sidecarFileStatuses + sidecarFile = sidecarCheckpointFiles.head.getPath.toString + checkpointSchema = spark.read.format(source = "parquet").load(sidecarFile).schema + removeSchemaName = checkpointSchema("remove").dataType.asInstanceOf[StructType].fieldNames + assert(removeSchemaName.toSeq === expectedRemoveFileSchema) + checkAnswer( + spark.sql(s"select * from delta.`$tablePath`"), + Seq(0, 0, 1, 1, 2, 2, 3, 3, 4, 4).map { i => Row(i) }) + } + } + } + } + + test("checkpoint does not contain remove.tags and remove.numRecords") { + withTempDir { tempDir => + val expectedRemoveFileSchema = Seq( + "path", + "deletionTimestamp", + "dataChange", + "extendedFileMetadata", + "partitionValues", + "size", + "deletionVector", + "baseRowId", + "defaultRowCommitVersion") + + val tablePath = tempDir.getAbsolutePath + // Append rows [0, 9] to table and merge tablePath. + spark.range(end = 10).write.format("delta").mode("overwrite").save(tablePath) + spark.range(5, 15).createOrReplaceTempView("src") + sql( + s""" + |MERGE INTO delta.`$tempDir` t USING src s ON t.id = s.id + |WHEN MATCHED THEN DELETE + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + val deltaLog = DeltaLog.forTable(spark, tablePath) + deltaLog.checkpoint() + var checkpointFile = FileNames.checkpointFileSingular(deltaLog.logPath, 1).toString + var checkpointSchema = spark.read.format(source = "parquet").load(checkpointFile).schema + var removeSchemaName = checkpointSchema("remove").dataType.asInstanceOf[StructType].fieldNames + assert(removeSchemaName.toSeq === expectedRemoveFileSchema) + checkAnswer( + spark.sql(s"select * from delta.`$tablePath`"), + Seq(0, 1, 2, 3, 4, 10, 11, 12, 13, 14).map { i => Row(i) }) + // Append rows [0, 9] to table and merge one more time. + spark.range(end = 10).write.format("delta").mode("append").save(tablePath) + sql( + s""" + |MERGE INTO delta.`$tempDir` t USING src s ON t.id = s.id + |WHEN MATCHED THEN DELETE + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + deltaLog.checkpoint() + checkpointFile = FileNames.checkpointFileSingular(deltaLog.logPath, 1).toString + checkpointSchema = spark.read.format(source = "parquet").load(checkpointFile).schema + removeSchemaName = checkpointSchema("remove").dataType.asInstanceOf[StructType].fieldNames + assert(removeSchemaName.toSeq === expectedRemoveFileSchema) + checkAnswer( + spark.sql(s"select * from delta.`$tablePath`"), + Seq(0, 0, 1, 1, 2, 2, 3, 3, 4, 4).map { i => Row(i) }) + } + } + + test("checkpoint with DVs") { + for (v2Checkpoint <- Seq(true, false)) + withTempDir { tempDir => + val source = new File(DeletionVectorsSuite.table1Path) // this table has DVs in two versions + val target = new File(tempDir, "insertTest") + + // Copy the source2 DV table to a temporary directory, so that we do updates to it + FileUtils.copyDirectory(source, target) + + if (v2Checkpoint) { + spark.sql(s"ALTER TABLE delta.`${target.getAbsolutePath}` SET TBLPROPERTIES " + + s"('${DeltaConfigs.CHECKPOINT_POLICY.key}' = 'v2')") + } + + sql(s"ALTER TABLE delta.`${target.getAbsolutePath}` " + + s"SET TBLPROPERTIES (${DeltaConfigs.CHECKPOINT_INTERVAL.key} = 10)") + def insertData(data: String): Unit = { + spark.sql(s"INSERT INTO TABLE delta.`${target.getAbsolutePath}` $data") + } + val newData = Seq.range(3000, 3010) + newData.foreach { i => insertData(s"VALUES($i)") } + + // Check the target file has checkpoint generated + val deltaLog = DeltaLog.forTable(spark, target.getAbsolutePath) + verifyCheckpoint(deltaLog.readLastCheckpointFile(), version = 10, parts = None) + + // Delete the commit files 0-9, so that we are forced to read the checkpoint file + val logPath = new Path(new File(target, "_delta_log").getAbsolutePath) + for (i <- 0 to 10) { + val file = new File(FileNames.deltaFile(logPath, version = i).toString) + file.delete() + } + + // Make sure the contents are the same + import testImplicits._ + checkAnswer( + spark.sql(s"SELECT * FROM delta.`${target.getAbsolutePath}`"), + (DeletionVectorsSuite.expectedTable1DataV4 ++ newData).toSeq.toDF()) + } + } + + + + testDifferentV2Checkpoints(s"V2 Checkpoint compat file equivalency to normal V2 Checkpoint") { + withTempDir { tempDir => + spark.range(10).write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + spark.range(10, 20).write.mode("append").format("delta").save(tempDir.getAbsolutePath) + + deltaLog.checkpoint() // Checkpoint 1 + val normalCheckpointSnapshot = deltaLog.update() + + deltaLog.createSinglePartCheckpointForBackwardCompat( // Compatibility Checkpoint 1 + normalCheckpointSnapshot, new deltaLog.V2CompatCheckpointMetrics) + + val allFiles = normalCheckpointSnapshot.allFiles.collect().sortBy(_.path).toList + val setTransactions = normalCheckpointSnapshot.setTransactions + val numOfFiles = normalCheckpointSnapshot.numOfFiles + val numOfRemoves = normalCheckpointSnapshot.numOfRemoves + val numOfMetadata = normalCheckpointSnapshot.numOfMetadata + val numOfProtocol = normalCheckpointSnapshot.numOfProtocol + val actions = normalCheckpointSnapshot.stateDS.collect().toSet + + val fs = deltaLog.logPath.getFileSystem(deltaLog.newDeltaHadoopConf()) + + // Delete the normal V2 Checkpoint so that the snapshot can be initialized + // using the compat checkpoint. + fs.delete(normalCheckpointSnapshot.checkpointProvider.topLevelFiles.head.getPath) + + DeltaLog.clearCache() + val deltaLog2 = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + val compatCheckpointSnapshot = deltaLog2.update() + assert(!compatCheckpointSnapshot.checkpointProvider.isEmpty) + assert(compatCheckpointSnapshot.checkpointProvider.version == + normalCheckpointSnapshot.checkpointProvider.version) + assert( + compatCheckpointSnapshot.checkpointProvider.topLevelFiles.head.getPath.getName + == + FileNames.checkpointFileSingular( + deltaLog2.logPath, + normalCheckpointSnapshot.checkpointProvider.version).getName + ) + + assert( + compatCheckpointSnapshot.allFiles.collect().sortBy(_.path).toList + == allFiles + ) + + assert(compatCheckpointSnapshot.setTransactions == setTransactions) + + assert(compatCheckpointSnapshot.stateDS.collect().toSet == actions) + + assert(compatCheckpointSnapshot.numOfFiles == numOfFiles) + + assert(compatCheckpointSnapshot.numOfRemoves == numOfRemoves) + + assert(compatCheckpointSnapshot.numOfMetadata == numOfMetadata) + + assert(compatCheckpointSnapshot.numOfProtocol == numOfProtocol) + + val tableData = + spark.sql(s"SELECT * FROM delta.`${deltaLog.dataPath}` ORDER BY id") + .collect() + .map(_.getLong(0)) + assert(tableData.toSeq == (0 to 19)) + } + } + + testDifferentCheckpoints("last checkpoint contains correct schema for v1/v2" + + " Checkpoints") { (checkpointPolicy, v2CheckpointFormatOpt) => + withTempDir { tempDir => + spark.range(10).write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + deltaLog.checkpoint() + val lastCheckpointOpt = deltaLog.readLastCheckpointFile() + assert(lastCheckpointOpt.nonEmpty) + if (checkpointPolicy.needsV2CheckpointSupport) { + if (v2CheckpointFormatOpt.contains(V2Checkpoint.Format.JSON)) { + assert(lastCheckpointOpt.get.checkpointSchema.isEmpty) + } else { + assert(lastCheckpointOpt.get.checkpointSchema.nonEmpty) + assert(lastCheckpointOpt.get.checkpointSchema.get.fieldNames.toSeq === + Seq("txn", "add", "remove", "metaData", "protocol", + "domainMetadata", "checkpointMetadata", "sidecar")) + } + } else { + assert(lastCheckpointOpt.get.checkpointSchema.nonEmpty) + assert(lastCheckpointOpt.get.checkpointSchema.get.fieldNames.toSeq === + Seq("txn", "add", "remove", "metaData", "protocol", "domainMetadata")) + } + } + } + + test("last checkpoint - v2 checkpoint fields threshold") { + withTempDir { tempDir => + val tablePath = tempDir.getAbsolutePath + spark.range(1).write.format("delta").save(tablePath) + val deltaLog = DeltaLog.forTable(spark, tablePath) + // Enable v2Checkpoint table feature. + spark.sql(s"ALTER TABLE delta.`$tablePath` SET TBLPROPERTIES " + + s"('${DeltaConfigs.CHECKPOINT_POLICY.key}' = 'v2')") + + def writeCheckpoint( + adds: Int, + nonFileActionThreshold: Int, + sidecarActionThreshold: Int): LastCheckpointInfo = { + withSQLConf( + DeltaSQLConf.LAST_CHECKPOINT_NON_FILE_ACTIONS_THRESHOLD.key -> s"$nonFileActionThreshold", + DeltaSQLConf.LAST_CHECKPOINT_SIDECARS_THRESHOLD.key -> s"$sidecarActionThreshold" + ) { + val addFiles = (1 to adds).map(_ => + AddFile( + path = java.util.UUID.randomUUID.toString, + partitionValues = Map(), + size = 128L, + modificationTime = 1L, + dataChange = true + )) + deltaLog.startTransaction().commit(addFiles, DeltaOperations.ManualUpdate) + deltaLog.checkpoint() + } + val lastCheckpointInfoOpt = deltaLog.readLastCheckpointFile() + assert(lastCheckpointInfoOpt.nonEmpty) + lastCheckpointInfoOpt.get + } + + // Append 1 AddFile [AddFile-2] + val lc1 = writeCheckpoint(adds = 1, nonFileActionThreshold = 10, sidecarActionThreshold = 10) + assert(lc1.v2Checkpoint.nonEmpty) + // 3 non file actions - protocol/metadata/checkpointMetadata, 1 sidecar + assert(lc1.v2Checkpoint.get.nonFileActions.get.size === 3) + assert(lc1.v2Checkpoint.get.sidecarFiles.get.size === 1) + + // Append 1 SetTxn, 8 more AddFiles [SetTxn-1, AddFile-10] + deltaLog.startTransaction() + .commit(Seq(SetTransaction("app-1", 2, None)), DeltaOperations.ManualUpdate) + val lc2 = writeCheckpoint(adds = 8, nonFileActionThreshold = 4, sidecarActionThreshold = 10) + assert(lc2.v2Checkpoint.nonEmpty) + // 4 non file actions - protocol/metadata/checkpointMetadata/setTxn, 1 sidecar + assert(lc2.v2Checkpoint.get.nonFileActions.get.size === 4) + assert(lc2.v2Checkpoint.get.sidecarFiles.get.size === 1) + + // Append 10 more AddFiles [SetTxn-1, AddFile-20] + val lc3 = writeCheckpoint(adds = 10, nonFileActionThreshold = 3, sidecarActionThreshold = 10) + assert(lc3.v2Checkpoint.nonEmpty) + // non-file actions exceeded threshold, 1 sidecar + assert(lc3.v2Checkpoint.get.nonFileActions.isEmpty) + assert(lc3.v2Checkpoint.get.sidecarFiles.get.size === 1) + + withSQLConf(DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "5") { + // Append 10 more AddFiles [SetTxn-1, AddFile-30] + val lc4 = + writeCheckpoint(adds = 10, nonFileActionThreshold = 3, sidecarActionThreshold = 10) + assert(lc4.v2Checkpoint.nonEmpty) + // non-file actions exceeded threshold + // total 30 file actions, across 6 sidecar files (5 actions per file) + assert(lc4.v2Checkpoint.get.nonFileActions.isEmpty) + assert(lc4.v2Checkpoint.get.sidecarFiles.get.size === 6) + } + + withSQLConf(DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "2") { + // Append 0 AddFiles [SetTxn-1, AddFile-30] + val lc5 = + writeCheckpoint(adds = 0, nonFileActionThreshold = 10, sidecarActionThreshold = 10) + assert(lc5.v2Checkpoint.nonEmpty) + // 4 non file actions - protocol/metadata/checkpointMetadata/setTxn + // total 30 file actions, across 15 sidecar files (2 actions per file) + assert(lc5.v2Checkpoint.get.nonFileActions.get.size === 4) + assert(lc5.v2Checkpoint.get.sidecarFiles.isEmpty) + } + } + } + + def checkIntermittentError(tempDir: File, lastCheckpointMissing: Boolean): Unit = { + // Create a table with commit version 0, 1 and a checkpoint. + val tablePath = tempDir.getAbsolutePath + spark.range(10).write.format("delta").save(tablePath) + spark.sql(s"INSERT INTO delta.`$tablePath`" + + s"SELECT * FROM delta.`$tablePath` WHERE id = 1").collect() + + val log = DeltaLog.forTable(spark, tablePath) + val conf = log.newDeltaHadoopConf() + log.checkpoint() + + // Delete _last_checkpoint based on test configuration. + val fs = log.logPath.getFileSystem(conf) + if (lastCheckpointMissing) { + fs.delete(log.LAST_CHECKPOINT) + } + + // In order to trigger an intermittent failure while reading checkpoint, this test corrupts + // the checkpoint temporarily so that json/parquet checkpoint reader fails. The corrupted + // file is written with same length so that when the file is uncorrupted in future, then we + // can test that delta is able to read that file and produce correct results. If the "bad" file + // is not of same length, then the read with "good" file will also fail as parquet reader will + // use the cache file status's getLen to find out where the footer is and will fail after not + // finding the magic bytes. + val checkpointFileStatus = + log.listFrom(0).filter(FileNames.isCheckpointFile).toSeq.head + // Rename the correct checkpoint to a temp path and create a checkpoint with character 'r' + // repeated. + val tempPath = checkpointFileStatus.getPath.suffix(".temp") + fs.rename(checkpointFileStatus.getPath, tempPath) + val randomContentToWrite = Seq("r" * (checkpointFileStatus.getLen.toInt - 1)) // + 1 (\n) + log.store.write( + checkpointFileStatus.getPath, randomContentToWrite.toIterator, overwrite = true, conf) + assert(log.store.read(checkpointFileStatus.getPath, conf) === randomContentToWrite) + assert(fs.getFileStatus(tempPath).getLen === checkpointFileStatus.getLen) + + DeltaLog.clearCache() + sql(s"SELECT * FROM delta.`$tablePath`").collect() + val snapshot = DeltaLog.forTable(spark, tablePath).unsafeVolatileSnapshot + snapshot.computeChecksum + assert(snapshot.checkpointProvider.isEmpty) + } + + + /** + * Writes all actions in the top-level file of a new V2 Checkpoint. No sidecar files are + * written. + */ + private def writeAllActionsInV2Manifest( + snapshot: Snapshot, + v2CheckpointFormat: V2Checkpoint.Format): Path = { + val checkpointMetadata = CheckpointMetadata(version = snapshot.version) + val actionsDS = snapshot.stateDS + .where("checkpointMetadata is null and " + + "commitInfo is null and cdc is null and sidecar is null") + .union(spark.createDataset(Seq(checkpointMetadata.wrap))) + .toDF() + + val actionsToWrite = Checkpoints + .buildCheckpoint(actionsDS, snapshot) + .as[SingleAction] + .collect() + .toSeq + .map(_.unwrap) + + val deltaLog = snapshot.deltaLog + val (v2CheckpointPath, _) = + if (v2CheckpointFormat == V2Checkpoint.Format.JSON) { + val v2CheckpointPath = + FileNames.newV2CheckpointJsonFile(deltaLog.logPath, snapshot.version) + deltaLog.store.write( + v2CheckpointPath, + actionsToWrite.map(_.json).toIterator, + overwrite = true, + hadoopConf = deltaLog.newDeltaHadoopConf()) + (v2CheckpointPath, None) + } else if (v2CheckpointFormat == V2Checkpoint.Format.PARQUET) { + val sparkSession = spark + // scalastyle:off sparkimplicits + import sparkSession.implicits._ + // scalastyle:on sparkimplicits + val dfToWrite = actionsToWrite.map(_.wrap).toDF() + val v2CheckpointPath = + FileNames.newV2CheckpointParquetFile(deltaLog.logPath, snapshot.version) + val schemaOfDfWritten = + Checkpoints.createCheckpointV2ParquetFile( + spark, + dfToWrite, + v2CheckpointPath, + deltaLog.newDeltaHadoopConf(), + false) + (v2CheckpointPath, Some(schemaOfDfWritten)) + } else { + throw DeltaErrors.assertionFailedError( + s"Unrecognized checkpoint V2 format: $v2CheckpointFormat") + } + v2CheckpointPath + } + + for (checkpointFormat <- V2Checkpoint.Format.ALL) + test(s"All actions in V2 manifest [v2CheckpointFormat: ${checkpointFormat.name}]") { + withSQLConf( + DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> CheckpointPolicy.V2.name) { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + spark.sql(s"INSERT INTO delta.`${log.dataPath}` VALUES (2718);") + log + .startTransaction() + .commit(Seq(SetTransaction("app-1", 2, None)), DeltaOperations.ManualUpdate) + + val snapshot = log.update() + val allFiles = snapshot.allFiles.collect().toSet + val setTransactions = snapshot.setTransactions.toSet + val numOfFiles = snapshot.numOfFiles + val numOfRemoves = snapshot.numOfRemoves + val numOfMetadata = snapshot.numOfMetadata + val numOfProtocol = snapshot.numOfProtocol + val actions = snapshot.stateDS.collect().toSet + + assert(snapshot.version == 2) + + writeAllActionsInV2Manifest(snapshot, checkpointFormat) + + DeltaLog.clearCache() + val checkpointSnapshot = log.update() + + assert(!checkpointSnapshot.checkpointProvider.isEmpty) + + assert(checkpointSnapshot.checkpointProvider.version == 2) + + // Check the integrity of the data in the checkpoint-backed table. + val data = spark + .sql(s"SELECT * FROM delta.`${log.dataPath}` ORDER BY ID;") + .collect() + .map(_.getLong(0)) + + val expectedData = ((0 to 9).toList :+ 2718).toArray + assert(data sameElements expectedData) + assert(checkpointSnapshot.setTransactions.toSet == setTransactions) + + assert(checkpointSnapshot.stateDS.collect().toSet == actions) + + assert(checkpointSnapshot.numOfFiles == numOfFiles) + + assert(checkpointSnapshot.numOfRemoves == numOfRemoves) + + assert(checkpointSnapshot.numOfMetadata == numOfMetadata) + + assert(checkpointSnapshot.numOfProtocol == numOfProtocol) + + assert(checkpointSnapshot.allFiles.collect().toSet == allFiles) + } + } + } + for (lastCheckpointMissing <- BOOLEAN_DOMAIN) + testDifferentCheckpoints("intermittent error while reading checkpoint should not" + + s" stick to snapshot [lastCheckpointMissing: $lastCheckpointMissing]") { (_, _) => + withTempDir { tempDir => checkIntermittentError(tempDir, lastCheckpointMissing) } + } + + test("validate metadata cleanup is not called with createCheckpointAtVersion API") { + withTempDir { dir => + val usageRecords1 = Log4jUsageLogger.track { + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + log.createCheckpointAtVersion(0) + } + assert(filterUsageRecords(usageRecords1, "delta.log.cleanup").size === 0L) + + val usageRecords2 = Log4jUsageLogger.track { + spark.range(10).write.mode("overwrite").format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + log.checkpoint() + + } + assert(filterUsageRecords(usageRecords2, "delta.log.cleanup").size > 0) + } + } + + protected def filterUsageRecords( + usageRecords: Seq[UsageRecord], opType: String): Seq[UsageRecord] = { + usageRecords.filter { r => + r.tags.get("opType").contains(opType) || r.opType.map(_.typeName).contains(opType) + } + } +} + +/** + * A fake GCS file system to verify delta commits are written in a separate gcs thread. + */ +class FakeGCSFileSystem extends RawLocalFileSystem { + override def getScheme: String = "gs" + override def getUri: URI = URI.create("gs:/") + + private def assertGCSThread(f: Path): Unit = { + if (f.getName.contains(".json") || f.getName.contains(".checkpoint")) { + assert( + Thread.currentThread().getName.contains("delta-gcs-"), + s"writing $f was happening in non gcs thread: ${Thread.currentThread()}") + } + } + + override def create( + f: Path, + permission: FsPermission, + overwrite: Boolean, + bufferSize: Int, + replication: Short, + blockSize: Long, + progress: Progressable): FSDataOutputStream = { + assertGCSThread(f) + super.create(f, permission, overwrite, bufferSize, replication, blockSize, progress) + } + + override def create( + f: Path, + overwrite: Boolean, + bufferSize: Int, + replication: Short, + blockSize: Long, + progress: Progressable): FSDataOutputStream = { + assertGCSThread(f) + super.create(f, overwrite, bufferSize, replication, blockSize, progress) + } +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CloneParquetSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CloneParquetSuite.scala new file mode 100644 index 00000000000..1a79551bde7 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CloneParquetSuite.scala @@ -0,0 +1,94 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.SparkException +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col + +class CloneParquetByPathSuite extends CloneParquetSuiteBase +{ + + protected def withParquetTable( + df: DataFrame, partCols: Seq[String] = Seq.empty[String])( + func: ParquetIdent => Unit): Unit = { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + if (partCols.nonEmpty) { + df.write.format("parquet").mode("overwrite").partitionBy(partCols: _*).save(tempDir) + } else { + df.write.format("parquet").mode("overwrite").save(tempDir) + } + + func(ParquetIdent(tempDir, isTable = false)) + } + } + + // CLONE doesn't support partitioned parquet table using path since it requires customer to + // provide the partition schema in the command like `CONVERT TO DELTA`, but such an option is not + // available in CLONE yet. + testClone("clone partitioned parquet to delta table") { mode => + val df = spark.range(100) + .withColumn("key1", col("id") % 4) + .withColumn("key2", col("id") % 7 cast "String") + + withParquetTable(df, Seq("key1", "key2")) { sourceIdent => + val tableName = "cloneTable" + withTable(tableName) { + val se = intercept[SparkException] { + sql(s"CREATE TABLE $tableName $mode CLONE $sourceIdent") + } + assert(se.getMessage.contains("Expecting 0 partition column(s)")) + } + } + } +} + +class CloneParquetByNameSuite extends CloneParquetSuiteBase +{ + + protected def withParquetTable( + df: DataFrame, partCols: Seq[String] = Seq.empty[String])( + func: ParquetIdent => Unit): Unit = { + val tableName = "parquet_table" + withTable(tableName) { + if (partCols.nonEmpty) { + df.write.format("parquet").partitionBy(partCols: _*).saveAsTable(tableName) + } else { + df.write.format("parquet").saveAsTable(tableName) + } + + func(ParquetIdent(tableName, isTable = true)) + } + } + + testClone("clone partitioned parquet to delta table") { mode => + val df = spark.range(100) + .withColumn("key1", col("id") % 4) + .withColumn("key2", col("id") % 7 cast "String") + + withParquetTable(df, Seq("key1", "key2")) { sourceIdent => + val tableName = "cloneTable" + withTable(tableName) { + sql(s"CREATE TABLE $tableName $mode CLONE $sourceIdent") + + checkAnswer(spark.table(tableName), df) + } + } + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CloneParquetSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CloneParquetSuiteBase.scala new file mode 100644 index 00000000000..08b3cb8029a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CloneParquetSuiteBase.scala @@ -0,0 +1,139 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import com.databricks.spark.util.Log4jUsageLogger +import org.apache.spark.sql.delta.commands.CloneParquetSource +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.JsonUtils + +import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.test.SharedSparkSession + +trait CloneParquetSuiteBase extends QueryTest + with DeltaSQLCommandTest + with SharedSparkSession { + + // Identifier to represent a Parquet source + protected case class ParquetIdent(name: String, isTable: Boolean) { + + override def toString: String = if (isTable) name else s"parquet.`$name`" + + def toTableIdent: TableIdentifier = + if (isTable) TableIdentifier(name) else TableIdentifier(name, Some("parquet")) + + def toCloneSource: CloneParquetSource = { + val catalogTableOpt = + if (isTable) Some(spark.sessionState.catalog.getTableMetadata(toTableIdent)) else None + CloneParquetSource(toTableIdent, catalogTableOpt, spark) + } + } + + protected def supportedModes: Seq[String] = Seq("SHALLOW") + + protected def testClone(testName: String)(f: String => Unit): Unit = + supportedModes.foreach { mode => test(s"$testName - $mode") { f(mode) } } + + protected def withParquetTable( + df: DataFrame, partCols: Seq[String] = Seq.empty[String])(func: ParquetIdent => Unit): Unit + + protected def validateBlob( + blob: Map[String, Any], + mode: String, + source: CloneParquetSource, + target: DeltaLog): Unit = { + // scalastyle:off deltahadoopconfiguration + val hadoopConf = spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + + val sourcePath = source.dataPath + val sourceFs = sourcePath.getFileSystem(hadoopConf) + val qualifiedSourcePath = sourceFs.makeQualified(sourcePath) + + val targetPath = target.dataPath + val targetFs = targetPath.getFileSystem(hadoopConf) + val qualifiedTargetPath = targetFs.makeQualified(targetPath) + + assert(blob("sourcePath") === qualifiedSourcePath.toString) + assert(blob("target") === qualifiedTargetPath.toString) + assert(blob("sourceTableSize") === source.sizeInBytes) + assert(blob("sourceNumOfFiles") === source.numOfFiles) + assert(blob("partitionBy") === source.metadata.partitionColumns) + } + + testClone("validate clone metrics") { mode => + val df = spark.range(100).withColumn("key", col("id") % 3) + withParquetTable(df) { sourceIdent => + val tableName = "cloneTable" + withTable(tableName) { + val allLogs = Log4jUsageLogger.track { + sql(s"CREATE TABLE $tableName $mode CLONE $sourceIdent") + } + + val source = sourceIdent.toCloneSource + val target = DeltaLog.forTable(spark, TableIdentifier(tableName)) + + val blob = JsonUtils.fromJson[Map[String, Any]](allLogs + .filter(_.metric == "tahoeEvent") + .filter(_.tags.get("opType").contains("delta.clone")) + .filter(_.blob.contains("source")) + .map(_.blob).last) + validateBlob(blob, mode, source, target) + + val sourceMetadata = source.metadata + val targetMetadata = target.update().metadata + + assert(sourceMetadata.schema === targetMetadata.schema) + assert(sourceMetadata.configuration === targetMetadata.configuration) + assert(sourceMetadata.dataSchema === targetMetadata.dataSchema) + assert(sourceMetadata.partitionColumns === targetMetadata.partitionColumns) + } + } + } + + testClone("clone non-partitioned parquet to delta table") { mode => + val df = spark.range(100) + .withColumn("key1", col("id") % 4) + .withColumn("key2", col("id") % 7 cast "String") + + withParquetTable(df) { sourceIdent => + val tableName = "cloneTable" + withTable(tableName) { + sql(s"CREATE TABLE $tableName $mode CLONE $sourceIdent") + + checkAnswer(spark.table(tableName), df) + } + } + } + + testClone("clone non-partitioned parquet to delta path") { mode => + val df = spark.range(100) + .withColumn("key1", col("id") % 4) + .withColumn("key2", col("id") % 7 cast "String") + + withParquetTable(df) { sourceIdent => + withTempDir { dir => + val deltaDir = dir.getCanonicalPath + sql(s"CREATE TABLE delta.`$deltaDir` $mode CLONE $sourceIdent") + + checkAnswer(spark.read.format("delta").load(deltaDir), df) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CloneTableSQLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CloneTableSQLSuite.scala new file mode 100644 index 00000000000..548986984f0 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CloneTableSQLSuite.scala @@ -0,0 +1,530 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.immutable.NumericRange + +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions.{AddFile, FileAction, RemoveFile} +import org.apache.spark.sql.delta.test.{DeltaExcludedTestMixin, DeltaSQLCommandTest} +import org.apache.spark.sql.delta.util.DeltaFileOperations +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.CatalogTableType +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.util.Utils + +class CloneTableSQLSuite extends CloneTableSuiteBase + with DeltaColumnMappingTestUtils +{ + // scalastyle:off argcount + override protected def cloneTable( + source: String, + target: String, + sourceIsTable: Boolean = false, + targetIsTable: Boolean = false, + targetLocation: Option[String] = None, + versionAsOf: Option[Long] = None, + timestampAsOf: Option[String] = None, + isCreate: Boolean = true, + isReplace: Boolean = false, + tableProperties: Map[String, String] = Map.empty): Unit = { + val commandSql = CloneTableSQLTestUtils.buildCloneSqlString( + source, target, + sourceIsTable, + targetIsTable, + "delta", + targetLocation, + versionAsOf, + timestampAsOf, + isCreate, + isReplace, + tableProperties) + sql(commandSql) + } + // scalastyle:on argcount + + testAllClones(s"table version as of syntax") { (_, target, isShallow) => + val tbl = "source" + testSyntax( + tbl, + target, + s"CREATE TABLE delta.`$target` ${cloneTypeStr(isShallow)} CLONE $tbl VERSION AS OF 0" + ) + } + + testAllClones("CREATE OR REPLACE syntax when there is no existing table") { + (_, clone, isShallow) => + val tbl = "source" + testSyntax( + tbl, + clone, + s"CREATE OR REPLACE TABLE delta.`$clone` ${cloneTypeStr(isShallow)} CLONE $tbl" + ) + } + + cloneTest("REPLACE cannot be used with IF NOT EXISTS") { (shallow, _) => + val tbl = "source" + intercept[ParseException] { + testSyntax(tbl, shallow, + s"CREATE OR REPLACE TABLE IF NOT EXISTS delta.`$shallow` SHALLOW CLONE $tbl") + } + intercept[ParseException] { + testSyntax(tbl, shallow, + s"REPLACE TABLE IF NOT EXISTS delta.`$shallow` SHALLOW CLONE $tbl") + } + } + + testAllClones( + "IF NOT EXISTS should not go through with CLONE if table exists") { (tblExt, _, isShallow) => + val sourceTable = "source" + val conflictingTable = "conflict" + withTable(sourceTable, conflictingTable) { + sql(s"CREATE TABLE $conflictingTable " + + s"USING PARQUET LOCATION '$tblExt' TBLPROPERTIES ('abc'='def', 'def'='ghi') AS SELECT 1") + spark.range(5).write.format("delta").saveAsTable(sourceTable) + + sql(s"CREATE TABLE IF NOT EXISTS " + + s"$conflictingTable ${cloneTypeStr(isShallow)} CLONE $sourceTable") + + checkAnswer(sql(s"SELECT COUNT(*) FROM $conflictingTable"), Row(1)) + } + } + + testAllClones("IF NOT EXISTS should throw an error if path exists") { (_, target, isShallow) => + spark.range(5).write.format("delta").save(target) + + val ex = intercept[AnalysisException] { + sql(s"CREATE TABLE IF NOT EXISTS " + + s"delta.`$target` ${cloneTypeStr(isShallow)} CLONE delta.`$target`") + } + + assert(ex.getMessage.contains("is not empty")) + } + + cloneTest("Negative test: REPLACE table where there is no existing table") { (shallow, _) => + val tbl = "source" + val ex = intercept[AnalysisException] { + testSyntax(tbl, shallow, s"REPLACE TABLE delta.`$shallow` SHALLOW CLONE $tbl") + } + + assert(ex.getMessage.contains("cannot be replaced as it does not exist.")) + } + + cloneTest("cloning a table that doesn't exist") { (tblExt, _) => + val ex = intercept[AnalysisException] { + sql(s"CREATE TABLE delta.`$tblExt` SHALLOW CLONE not_exists") + } + assert(ex.getMessage.contains("Table not found") || + ex.getMessage.contains("The table or view `not_exists` cannot be found")) + + val ex2 = intercept[AnalysisException] { + sql(s"CREATE TABLE delta.`$tblExt` SHALLOW CLONE not_exists VERSION AS OF 0") + } + assert(ex2.getMessage.contains("Table not found") || + ex2.getMessage.contains("The table or view `not_exists` cannot be found")) + } + + cloneTest("cloning a view") { (tblExt, _) => + withTempView("tmp") { + sql("CREATE OR REPLACE TEMP VIEW tmp AS SELECT * FROM range(10)") + val ex = intercept[AnalysisException] { + sql(s"CREATE TABLE delta.`$tblExt` SHALLOW CLONE tmp") + } + assert(ex.errorClass === Some("DELTA_CLONE_UNSUPPORTED_SOURCE")) + assert(ex.getMessage.contains("clone source 'tmp', whose format is View.")) + } + } + + cloneTest("Clone on table with delta statistics columns") { (source, target) => + withTable("delta_table", "delta_table_shadow_clone", "delta_table_clone") { + sql( + "create table delta_table (c0 long, c1 long, c2 long) using delta " + + "TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c1,c2', " + + "'delta.columnMapping.mode' = 'name', " + + "'delta.minReaderVersion' = '2', " + + "'delta.minWriterVersion' = '5')" + ) + sql(s"CREATE TABLE delta_table_shadow_clone SHALLOW CLONE delta_table LOCATION '$source'") + var dataSkippingStatsColumns = sql("SHOW TBLPROPERTIES delta_table_shadow_clone") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == "delta.dataSkippingStatsColumns") + .toSeq + val result1 = Seq(("delta.dataSkippingStatsColumns", "c1,c2")) + assert(dataSkippingStatsColumns == result1) + } + } + + cloneTest("Clone on table with nested delta statistics columns") { (source, target) => + withTable("delta_table", "delta_table_shadow_clone", "delta_table_clone") { + sql( + "create table delta_table (c0 long, c1 long, c2 struct) using delta " + + "TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c1,c2.a,c2.b', " + + "'delta.columnMapping.mode' = 'name', " + + "'delta.minReaderVersion' = '2', " + + "'delta.minWriterVersion' = '5')" + ) + sql(s"CREATE TABLE delta_table_shadow_clone SHALLOW CLONE delta_table LOCATION '$source'") + var dataSkippingStatsColumns = sql("SHOW TBLPROPERTIES delta_table_shadow_clone") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == "delta.dataSkippingStatsColumns") + .toSeq + val result1 = Seq(("delta.dataSkippingStatsColumns", "c1,c2.a,c2.b")) + assert(dataSkippingStatsColumns == result1) + } + } + + cloneTest("cloning a view over a Delta table") { (tblExt, _) => + withTable("delta_table") { + withView("tmp") { + sql("CREATE TABLE delta_table USING delta AS SELECT * FROM range(10)") + sql("CREATE VIEW tmp AS SELECT * FROM delta_table") + val ex = intercept[AnalysisException] { + sql(s"CREATE TABLE delta.`$tblExt` SHALLOW CLONE tmp") + } + assert(ex.errorClass === Some("DELTA_CLONE_UNSUPPORTED_SOURCE")) + assert( + ex.getMessage.contains("clone source") && + ex.getMessage.contains("default.tmp', whose format is View.") + ) + } + } + } + + cloneTest("check metrics returned from shallow clone", TAG_HAS_SHALLOW_CLONE) { (_, _) => + val source = "source" + val target = "target" + withTable(source, target) { + spark.range(100).write.format("delta").saveAsTable(source) + + val res = sql(s"CREATE TABLE $target SHALLOW CLONE $source") + + // schema check + val expectedColumns = Seq( + "source_table_size", + "source_num_of_files", + "num_removed_files", + "num_copied_files", + "removed_files_size", + "copied_files_size" + ) + assert(expectedColumns == res.columns.toSeq) + + // logic check + assert(res.count() == 1) + val returnedMetrics = res.first() + assert(returnedMetrics.getAs[Long]("source_table_size") != 0L) + assert(returnedMetrics.getAs[Long]("source_num_of_files") != 0L) + // Delta-OSS doesn't support copied file metrics + assert(returnedMetrics.getAs[Long]("num_copied_files") == 0L) + assert(returnedMetrics.getAs[Long]("copied_files_size") == 0L) + } + } + + cloneTest("Negative test: Clone to target path and also have external location") { (deep, ext) => + val sourceTable = "source" + withTable(sourceTable) { + spark.range(5).write.format("delta").saveAsTable(sourceTable) + val ex = intercept[IllegalArgumentException] { + runAndValidateClone( + sourceTable, + deep, + sourceIsTable = true, + targetLocation = Some(ext))() + } + + assert(ex.getMessage.contains("Two paths were provided as the CLONE target")) + } + } +} + + +class CloneTableSQLIdColumnMappingSuite + extends CloneTableSQLSuite + with CloneTableColumnMappingSuiteBase + with DeltaColumnMappingEnableIdMode { +} + +class CloneTableSQLNameColumnMappingSuite + extends CloneTableSQLSuite + with CloneTableColumnMappingNameSuiteBase + with DeltaColumnMappingEnableNameMode { +} + +object CloneTableSQLTestUtils { + + // scalastyle:off argcount + def buildCloneSqlString( + source: String, + target: String, + sourceIsTable: Boolean = false, + targetIsTable: Boolean = false, + sourceFormat: String = "delta", + targetLocation: Option[String] = None, + versionAsOf: Option[Long] = None, + timestampAsOf: Option[String] = None, + isCreate: Boolean = true, + isReplace: Boolean = false, + tableProperties: Map[String, String] = Map.empty): String = { + val header = if (isCreate && isReplace) { + "CREATE OR REPLACE" + } else if (isReplace) { + "REPLACE" + } else { + "CREATE" + } + // e.g. CREATE TABLE targetTable + val createTbl = + if (targetIsTable) s"$header TABLE $target" else s"$header TABLE delta.`$target`" + // e.g. CREATE TABLE targetTable SHALLOW CLONE + val withMethod = + createTbl + " SHALLOW CLONE " + // e.g. CREATE TABLE targetTable SHALLOW CLONE delta.`/source/table` + val withSource = if (sourceIsTable) { + withMethod + s"$source " + } else { + withMethod + s"$sourceFormat.`$source` " + } + // e.g. CREATE TABLE targetTable SHALLOW CLONE delta.`/source/table` VERSION AS OF 0 + val withVersion = if (versionAsOf.isDefined) { + withSource + s"VERSION AS OF ${versionAsOf.get}" + } else if (timestampAsOf.isDefined) { + withSource + s"TIMESTAMP AS OF '${timestampAsOf.get}'" + } else { + withSource + } + // e.g. CREATE TABLE targetTable SHALLOW CLONE delta.`/source/table` VERSION AS OF 0 + // LOCATION '/desired/target/location' + val withLocation = if (targetLocation.isDefined) { + s" $withVersion LOCATION '${targetLocation.get}'" + } else { + withVersion + } + val withProperties = if (tableProperties.nonEmpty) { + val props = tableProperties.map(p => s"'${p._1}' = '${p._2}'").mkString(",") + s" $withLocation TBLPROPERTIES ($props)" + } else { + withLocation + } + withProperties + } + // scalastyle:on argcount +} + +class CloneTableScalaDeletionVectorSuite + extends CloneTableSQLSuite + with DeltaSQLCommandTest + with DeltaExcludedTestMixin + with DeletionVectorsTestUtils { + + override def excluded: Seq[String] = super.excluded ++ + Seq( + // These require the initial table protocol version to be low to work properly. + "Cloning a table with new table properties that force protocol version upgrade -" + + " delta.enableChangeDataFeed" + , "Cloning a table with new table properties that force protocol version upgrade -" + + " delta.enableDeletionVectors" + , "Cloning a table without DV property should not upgrade protocol version" + , "CLONE respects table features set by table property override, targetExists=true" + , "CLONE ignores reader/writer session defaults") + + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectors(spark.conf) + } + + override protected def uniqueFileActionGroupBy(action: FileAction): String = { + val filePath = action.pathAsUri.toString + val dvId = action match { + case add: AddFile => Option(add.deletionVector).map(_.uniqueId).getOrElse("") + case remove: RemoveFile => Option(remove.deletionVector).map(_.uniqueId).getOrElse("") + case _ => "" + } + filePath + dvId + } + + testAllClones("Cloning table with persistent DVs") { (source, target, isShallow) => + // Create source table + writeMultiFileSourceTable( + source, + fileRanges = Seq(0L until 30L, 30L until 60L, 60L until 90L)) + // Add DVs to 2 files, leave 1 file without DVs. + spark.sql(s"DELETE FROM delta.`$source` WHERE id IN (24, 42)") + runAndValidateCloneWithDVs( + source, + target, + expectedNumFilesWithDVs = 2) + } + + testAllClones("Cloning table with persistent DVs and absolute parquet paths" + ) { (source, target, isShallow) => + withTempDir { originalSourceDir => + val originalSource = originalSourceDir.getCanonicalPath + // Create source table, by writing to an upstream table and then shallow cloning before + // adding DVs. + writeMultiFileSourceTable( + source = originalSource, + fileRanges = Seq(0L until 30L, 30L until 60L, 60L until 90L)) + spark.sql(s"CREATE OR REPLACE TABLE delta.`$source` SHALLOW CLONE delta.`$originalSource`") + // Add DVs to 2 files, leave 1 file without DVs. + spark.sql(s"DELETE FROM delta.`$source` WHERE id IN (24, 42)") + runAndValidateCloneWithDVs( + source, + target, + expectedNumFilesWithDVs = 2) + } + } + + testAllClones("Cloning table with persistent DVs and absolute DV file paths" + ) { (source, target, isShallow) => + withTempDir { originalSourceDir => + val originalSource = originalSourceDir.getCanonicalPath + // Create source table, by writing to an upstream table, adding DVs and then shallow cloning. + writeMultiFileSourceTable( + source = originalSource, + fileRanges = Seq(0L until 30L, 30L until 60L, 60L until 90L)) + // Add DVs to 2 files, leave 1 file without DVs. + spark.sql(s"DELETE FROM delta.`$originalSource` WHERE id IN (24, 42)") + val originalSourceTable = io.delta.tables.DeltaTable.forPath(spark, originalSource) + spark.sql(s"CREATE OR REPLACE TABLE delta.`$source` SHALLOW CLONE delta.`$originalSource`") + // Double check this clone was correct. + checkAnswer( + spark.read.format("delta").load(source), expectedAnswer = originalSourceTable.toDF) + runAndValidateCloneWithDVs( + source, + target, + expectedNumFilesWithDVs = 2) + } + } + + cloneTest("Shallow clone round-trip with DVs") { (source, target) => + // Create source table. + writeMultiFileSourceTable( + source = source, + fileRanges = Seq( + 0L until 30L, // file 1 + 30L until 60L, // file 2 + 60L until 90L, // file 3 + 90L until 120L)) // file 4 + // Add DVs to files 1 and 2 and then shallow clone. + spark.sql(s"DELETE FROM delta.`$source` WHERE id IN (24, 42)") + runAndValidateCloneWithDVs( + source = source, + target = target, + expectedNumFilesWithDVs = 2) + + // Add a new DV to file 3 and update the DV file 2, + // leaving file 4 without a DV and file 1 with the existing DV. + // Then shallow clone back into source. + spark.sql(s"DELETE FROM delta.`$target` WHERE id IN (43, 69)") + runAndValidateCloneWithDVs( + source = target, + target = source, + expectedNumFilesWithDVs = 3, + isReplaceOperation = true) + } + + /** Write one file per range in `fileRanges`. */ + private def writeMultiFileSourceTable( + source: String, + fileRanges: Seq[NumericRange.Exclusive[Long]]): Unit = { + for (range <- fileRanges) { + spark.range(start = range.start, end = range.end, step = 1L, numPartitions = 1).toDF("id") + .write.format("delta").mode("append").save(source) + } + } + + private def tagAllFilesWithUniqueId(deltaLog: DeltaLog, tagName: String): Unit = { + deltaLog.withNewTransaction { txn => + val allFiles = txn.snapshot.allFiles.collect() + val allFilesWithTags = allFiles.map { addFile => + addFile.copyWithTags(Map(tagName -> java.util.UUID.randomUUID().toString)) + } + txn.commit(allFilesWithTags, DeltaOperations.ManualUpdate) + } + // Double check that the result is as expected. + val snapshotWithTags = deltaLog.update() + val filesWithTags = snapshotWithTags.allFiles.collect() + assert(filesWithTags.forall(_.tags.get(tagName).isDefined)) + assert(filesWithTags.map(_.tags(tagName)).toSet.size === filesWithTags.size) + } + + private def runAndValidateCloneWithDVs( + source: String, + target: String, + expectedNumFilesWithDVs: Int, + isReplaceOperation: Boolean = false): Unit = { + val sourceDeltaLog = DeltaLog.forTable(spark, source) + // Add a unique tag to each file, so we can use this later to match up pre-/post-clone entries + // without having to resolve all the possible combinations of relative vs. absolute paths. + val uniqueIdTag = "unique-file-id" + tagAllFilesWithUniqueId(sourceDeltaLog, uniqueIdTag) + + val targetDeltaLog = DeltaLog.forTable(spark, target) + val filesWithDVsInSource = getFilesWithDeletionVectors(sourceDeltaLog) + assert(filesWithDVsInSource.size === expectedNumFilesWithDVs) + val numberOfUniqueDVFilesInSource = filesWithDVsInSource + .map(_.deletionVector.pathOrInlineDv) + .toSet + .size + + runAndValidateClone( + source, + target, + isReplaceOperation = isReplaceOperation)() + val filesWithDVsInTarget = getFilesWithDeletionVectors(targetDeltaLog) + val numberOfUniqueDVFilesInTarget = filesWithDVsInTarget + .map(_.deletionVector.pathOrInlineDv) + .toSet + .size + // Make sure we didn't accidentally copy some file multiple times. + assert(numberOfUniqueDVFilesInSource === numberOfUniqueDVFilesInTarget) + // Check contents of the copied DV files. + val filesWithDVsInTargetByUniqueId = filesWithDVsInTarget + .map(addFile => addFile.tags(uniqueIdTag) -> addFile) + .toMap + // scalastyle:off deltahadoopconfiguration + val hadoopConf = spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + for (sourceFile <- filesWithDVsInSource) { + val targetFile = filesWithDVsInTargetByUniqueId(sourceFile.tags(uniqueIdTag)) + if (sourceFile.deletionVector.isInline) { + assert(targetFile.deletionVector.isInline) + assert(sourceFile.deletionVector.inlineData === targetFile.deletionVector.inlineData) + } else { + def readDVData(path: Path): Array[Byte] = { + val fs = path.getFileSystem(hadoopConf) + val size = fs.getFileStatus(path).getLen + val data = new Array[Byte](size.toInt) + Utils.tryWithResource(fs.open(path)) { reader => + reader.readFully(data) + } + data + } + val sourceDVPath = sourceFile.deletionVector.absolutePath(sourceDeltaLog.dataPath) + val targetDVPath = targetFile.deletionVector.absolutePath(targetDeltaLog.dataPath) + val sourceData = readDVData(sourceDVPath) + val targetData = readDVData(targetDVPath) + assert(sourceData === targetData) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CloneTableSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CloneTableSuiteBase.scala new file mode 100644 index 00000000000..6cab79522a7 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CloneTableSuiteBase.scala @@ -0,0 +1,971 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File +import java.net.URI +import java.util.Locale + +import com.databricks.spark.util.{Log4jUsageLogger, UsageRecord} +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions.{FileAction, Metadata, Protocol, SetTransaction, SingleAction, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.apache.spark.sql.delta.util.FileNames.{checksumFile, deltaFile} +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{Path, RawLocalFileSystem} +import org.scalatest.Tag + +import org.apache.spark.{DebugFilesystem, SparkException, TaskFailedReason} +import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, FileSystemBasedCheckpointFileManager, MemoryStream} +import org.apache.spark.sql.functions.{col, floor, from_json} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.OutputMode +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{IntegerType, StructType} +import org.apache.spark.util.Utils +// scalastyle:on import.ordering.noEmptyLine + +trait CloneTableSuiteBase extends QueryTest + with SharedSparkSession + with DeltaColumnMappingTestUtils + with DeltaSQLCommandTest { + + protected val TAG_HAS_SHALLOW_CLONE = new Tag("SHALLOW CLONE") + protected val TAG_MODIFY_PROTOCOL = new Tag("CHANGES PROTOCOL") + protected val TAG_CHANGE_COLUMN_MAPPING_MODE = new Tag("CHANGES COLUMN MAPPING MODE") + protected val TAG_USES_CONVERT_TO_DELTA = new Tag("USES CONVERT TO DELTA") + + protected def withSourceTargetDir(f: (String, String) => Unit): Unit = { + withTempDir { dir => + val firstDir = new File(dir, "source").getCanonicalPath + val secondDir = new File(dir, "clone").getCanonicalPath + f(firstDir, secondDir) + } + } + + protected def cloneTypeStr(isShallow: Boolean): String = { + "SHALLOW" + } + + /** + * Run the given test function for SHALLOW clone. + */ + protected def testAllClones(testName: String, testTags: org.scalatest.Tag*) + (testFunc: (String, String, Boolean) => Unit): Unit = { + val tags = Seq(TAG_HAS_SHALLOW_CLONE) + cloneTest(s"$testName", testTags ++ tags: _*) { + (source, target) => testFunc(source, target, true) + } + } + + protected def cloneTest( + testName: String, testTags: org.scalatest.Tag*)(f: (String, String) => Unit): Unit = { + if (testTags.exists(_.name == TAG_CHANGE_COLUMN_MAPPING_MODE.name) && + columnMappingMode != "none") { + ignore(testName + " (not supporting changing column mapping mode)") { + withSourceTargetDir(f) + } + } else { + test(testName, testTags: _*) { + withSourceTargetDir(f) + } + } + } + + // Extracted function so it can be overriden in subclasses. + protected def uniqueFileActionGroupBy(action: FileAction): String = action.pathAsUri.toString + + import testImplicits._ + // scalastyle:off + protected def runAndValidateClone( + source: String, + target: String, + sourceIsTable: Boolean = false, + targetIsTable: Boolean = false, + targetLocation: Option[String] = None, + sourceVersion: Option[Long] = None, + sourceTimestamp: Option[String] = None, + isCreate: Boolean = true, + // If we are doing a replace on an existing table + isReplaceOperation: Boolean = false, + // If we are doing a replace, whether it is on a Delta table + isReplaceDelta: Boolean = true, + tableProperties: Map[String, String] = Map.empty, + commitLargeMetricsMap: Map[String, String] = Map.empty, + expectedDataframe: DataFrame = spark.emptyDataFrame) + (f: () => Unit = + () => cloneTable( + source, + target, + sourceIsTable, + targetIsTable, + targetLocation, + sourceVersion, + sourceTimestamp, + isCreate, + isReplaceOperation, + tableProperties)): Unit = { + // scalastyle:on + + // Truncate table before REPLACE + try { + if (isReplaceOperation) { + val targetTbl = if (targetIsTable) { + target + } else { + s"delta.`$target`" + } + sql(s"DELETE FROM $targetTbl") + } + } catch { + case _: Throwable => + // ignore all + } + + // Check logged blob for expected values + val allLogs = Log4jUsageLogger.track { + f() + } + verifyAllCloneOperationsEmitted(allLogs, + isReplaceOperation && isReplaceDelta, + commitLargeMetricsMap) + + val blob = JsonUtils.fromJson[Map[String, Any]](allLogs + .filter(_.metric == "tahoeEvent") + .filter(_.tags.get("opType").contains("delta.clone")) + .filter(_.blob.contains("source")) + .map(_.blob).last) + + val sourceIdent = resolveTableIdentifier(source, Some("delta"), sourceIsTable) + val (cloneSource: CloneSource, sourceDf: DataFrame) = { + val sourceLog = DeltaLog.forTable(spark, sourceIdent) + val timeTravelSpec: Option[DeltaTimeTravelSpec] = + if (sourceVersion.isDefined || sourceTimestamp.isDefined) { + Some(DeltaTimeTravelSpec(sourceTimestamp.map(Literal(_)), sourceVersion, None)) + } else { + None + } + val deltaTable = DeltaTableV2(spark, sourceLog.dataPath, None, None, timeTravelSpec) + val sourceData = Dataset.ofRows( + spark, + LogicalRelation(sourceLog.createRelation( + snapshotToUseOpt = Some(deltaTable.initialSnapshot), + isTimeTravelQuery = sourceVersion.isDefined || sourceTimestamp.isDefined))) + (new CloneDeltaSource(deltaTable), sourceData) + } + + val targetLog = if (targetIsTable) { + DeltaLog.forTable(spark, TableIdentifier(target)) + } else { + DeltaLog.forTable(spark, target) + } + + val sourceSnapshot = cloneSource.snapshot + + val sourcePath = cloneSource.dataPath + // scalastyle:off deltahadoopconfiguration + val fs = sourcePath.getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val qualifiedSourcePath = fs.makeQualified(sourcePath) + val logSource = if (sourceIsTable) { + val catalog = CatalogManager.SESSION_CATALOG_NAME + s"$catalog.default.$source".toLowerCase(Locale.ROOT) + } else { + s"delta.`$qualifiedSourcePath`" + } + + val rawTarget = new Path(targetLocation.getOrElse(targetLog.dataPath.toString)) + // scalastyle:off deltahadoopconfiguration + val targetFs = rawTarget.getFileSystem(targetLog.newDeltaHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val qualifiedTarget = targetFs.makeQualified(rawTarget) + + // Check whether recordEvent operation is of correct form + assert(blob("source") != null) + val actualLogSource = blob("source").toString + assert(actualLogSource === logSource) + if (source != target) { + assert(blob("sourceVersion") === sourceSnapshot.get.version) + } + val replacingDeltaTable = isReplaceOperation && isReplaceDelta + assert(blob("sourcePath") === qualifiedSourcePath.toString) + assert(blob("target") === qualifiedTarget.toString) + assert(blob("isReplaceDelta") === replacingDeltaTable) + assert(blob("sourceTableSize") === cloneSource.sizeInBytes) + assert(blob("sourceNumOfFiles") === cloneSource.numOfFiles) + assert(blob("partitionBy") === cloneSource.metadata.partitionColumns) + + + // Check whether resulting metadata of target and source at version is the same + compareMetadata( + cloneSource, + targetLog.unsafeVolatileSnapshot, + targetLocation.isEmpty && targetIsTable, + isReplaceOperation) + + val commit = deltaFile(targetLog.logPath, targetLog.unsafeVolatileSnapshot.version) + val hadoopConf = targetLog.newDeltaHadoopConf() + val filePaths: Seq[FileAction] = targetLog.store.read(commit, hadoopConf).flatMap { line => + JsonUtils.fromJson[SingleAction](line) match { + case a if a.add != null => Some(a.add) + case a if a.remove != null => Some(a.remove) + case _ => None + } + } + assert(filePaths.groupBy(uniqueFileActionGroupBy(_)).forall(_._2.length === 1), + "A file was added and removed in the same commit") + + // Check whether the resulting datasets are the same + val targetDf = Dataset.ofRows( + spark, + LogicalRelation(targetLog.createRelation())) + checkAnswer( + targetDf, + sourceDf) + } + + + protected def verifyAllCloneOperationsEmitted( + allLogs: Seq[UsageRecord], + emitHandleExistingTable: Boolean, + commitLargeMetricsMap: Map[String, String] = Map.empty): Unit = { + val cloneLogs = allLogs + .filter(_.metric === "sparkOperationDuration") + .filter(_.opType.isDefined) + .filter(_.opType.get.typeName.contains("delta.clone")) + + + assert(cloneLogs.count(_.opType.get.typeName.equals("delta.clone.makeAbsolute")) == 1) + + val commitStatsUsageRecords = allLogs + .filter(_.metric === "tahoeEvent") + .filter( + _.tags.get("opType") === Some("delta.commit.stats")) + assert(commitStatsUsageRecords.length === 1) + val commitStatsMap = JsonUtils.fromJson[Map[String, Any]](commitStatsUsageRecords.head.blob) + commitLargeMetricsMap.foreach { case (name, expectedValue) => + assert(commitStatsMap(name).toString == expectedValue, + s"Expected value for $name metrics did not match with the captured value") + } + + } + + private def compareMetadata( + cloneSource: CloneSource, + targetLog: Snapshot, + targetIsTable: Boolean, + isReplace: Boolean = false): Unit = { + val sourceMetadata = cloneSource.metadata + val targetMetadata = targetLog.metadata + + assert(sourceMetadata.schema === targetMetadata.schema && + sourceMetadata.configuration === targetMetadata.configuration && + sourceMetadata.dataSchema === targetMetadata.dataSchema && + sourceMetadata.partitionColumns === targetMetadata.partitionColumns && + sourceMetadata.format === sourceMetadata.format) + + // Protocol should be changed, if source.protocol >= target.protocol, otherwise target must + // retain it's existing protocol version (i.e. no downgrades). + assert(cloneSource.protocol === targetLog.protocol || ( + cloneSource.protocol.minReaderVersion <= targetLog.protocol.minReaderVersion && + cloneSource.protocol.minWriterVersion <= targetLog.protocol.minWriterVersion)) + + assert(targetLog.setTransactions.isEmpty) + + if (!isReplace) { + assert(sourceMetadata.id != targetMetadata.id && + targetMetadata.name === null && + targetMetadata.description === null) + } + } + + protected def deleteSourceAndCompareData( + source: String, + actual: => DataFrame, + expected: DataFrame): Unit = { + Utils.deleteRecursively(new File(source)) + checkAnswer(actual, expected) + } + + // scalastyle:off argcount + protected def cloneTable( + source: String, + target: String, + sourceIsTable: Boolean = false, + targetIsTable: Boolean = false, + targetLocation: Option[String] = None, + versionAsOf: Option[Long] = None, + timestampAsOf: Option[String] = None, + isCreate: Boolean = true, + isReplace: Boolean = false, + tableProperties: Map[String, String] = Map.empty): Unit + // scalastyle:on argcount + + protected def verifyAllFilePaths( + table: String, + targetIsTable: Boolean = false, + expectAbsolute: Boolean): Unit = { + val targetLog = if (targetIsTable) { + DeltaLog.forTable(spark, TableIdentifier(table)) + } else { + DeltaLog.forTable(spark, table) + } + assert(targetLog.unsafeVolatileSnapshot.allFiles.collect() + .forall(p => new Path(p.pathAsUri).isAbsolute == expectAbsolute)) + } + + protected def customConvertToDelta(internal: String, external: String): Unit = { + ConvertToDeltaCommand( + TableIdentifier(external, Some("parquet")), + Option(new StructType().add("part", IntegerType)), + collectStats = true, + Some(internal)).run(spark) + } + + protected def resolveTableIdentifier( + name: String, format: Option[String], isTable: Boolean): TableIdentifier = { + if (isTable) { + TableIdentifier(name) + } else { + TableIdentifier(name, format) + } + } + + // Test a basic clone with different syntaxes + protected def testSyntax( + source: String, + target: String, + sqlString: String, + targetIsTable: Boolean = false): Unit = { + withTable(source) { + spark.range(5).write.format("delta").saveAsTable(source) + runAndValidateClone( + source, + target, + sourceIsTable = true, + targetIsTable = targetIsTable) { + () => sql(sqlString) + } + } + } + + cloneTest("simple shallow clone", TAG_HAS_SHALLOW_CLONE) { (source, clone) => + val df1 = Seq(1, 2, 3, 4, 5).toDF("id").withColumn("part", 'id % 2) + val df2 = Seq(8, 9, 10).toDF("id").withColumn("part", 'id % 2) + df1.write.format("delta").partitionBy("part").mode("append").save(source) + df2.write.format("delta").mode("append").save(source) + + runAndValidateClone( + source, + clone + )() + // no files should be copied + val cloneDir = new File(clone).list() + assert(cloneDir.length === 1, + s"There should only be a _delta_log directory but found:\n${cloneDir.mkString("\n")}") + + val cloneLog = DeltaLog.forTable(spark, clone) + assert(cloneLog.snapshot.version === 0) + assert(cloneLog.snapshot.metadata.partitionColumns === Seq("part")) + val files = cloneLog.snapshot.allFiles.collect() + assert(files.forall(_.pathAsUri.toString.startsWith("file:/")), "paths must be absolute") + + checkAnswer( + spark.read.format("delta").load(clone), + df1.union(df2) + ) + } + + cloneTest("shallow clone a shallow clone", TAG_HAS_SHALLOW_CLONE) { (source, clone) => + val shallow1 = new File(clone, "shallow1").getCanonicalPath + val shallow2 = new File(clone, "shallow2").getCanonicalPath + val df1 = Seq(1, 2, 3, 4, 5).toDF("id").withColumn("part", 'id % 2) + df1.write.format("delta").partitionBy("part").mode("append").save(source) + + runAndValidateClone( + source, + shallow1 + )() + + runAndValidateClone( + shallow1, + shallow2 + )() + + deleteSourceAndCompareData(shallow1, spark.read.format("delta").load(shallow2), df1) + } + + testAllClones(s"validate commitLarge usage metrics") { (source, clone, isShallow) => + val df1 = Seq(1, 2, 3, 4, 5).toDF("id").withColumn("part", 'id % 5) + df1.write.format("delta").partitionBy("part").mode("append").save(source) + val df2 = Seq(1, 2).toDF("id").withColumn("part", 'id % 5) + df2.write.format("delta").partitionBy("part").mode("append").save(source) + + val numAbsolutePathsInAdd = if (isShallow) 7 else 0 + val commitLargeMetricsMap = Map( + "numAdd" -> "7", + "numRemove" -> "0", + "numFilesTotal" -> "7", + "numCdcFiles" -> "0", + "commitVersion" -> "0", + "readVersion" -> "0", + "numAbsolutePathsInAdd" -> s"$numAbsolutePathsInAdd", + "startVersion" -> "-1", + "numDistinctPartitionsInAdd" -> "-1") // distinct Parts are not tracked in commitLarge flow + runAndValidateClone( + source, + clone, + commitLargeMetricsMap = commitLargeMetricsMap)() + + checkAnswer( + spark.read.format("delta").load(clone), + df1.union(df2) + ) + } + + cloneTest("shallow clone across file systems", TAG_HAS_SHALLOW_CLONE) { (source, clone) => + withSQLConf( + "fs.s3.impl" -> classOf[S3LikeLocalFileSystem].getName, + "fs.s3.impl.disable.cache" -> "true") { + val df1 = Seq(1, 2, 3, 4, 5).toDF("id") + df1.write.format("delta").mode("append").save(source) + + val baseS3 = new URI("s3", null, source, null, null).toString + + runAndValidateClone( + baseS3, + s"file:$clone" + )() + + checkAnswer( + spark.read.format("delta").load(clone), + df1 + ) + + val cloneLog = DeltaLog.forTable(spark, clone) + assert(cloneLog.snapshot.version === 0) + val files = cloneLog.snapshot.allFiles.collect() + assert(files.forall(_.pathAsUri.toString.startsWith("s3:/"))) + } + } + + testAllClones("Negative test: clone into a non-empty directory that has a path based " + + "delta table") { (source, clone, isShallow) => + // Create table to clone + spark.range(5).write.format("delta").mode("append").save(source) + + // Table already exists at destination directory + spark.range(5).write.format("delta").mode("append").save(clone) + + // Clone should fail since destination directory is non-empty + val ex = intercept[AnalysisException] { + runAndValidateClone( + source, + clone + )() + } + assert(ex.getMessage.contains("is not empty")) + } + + cloneTest("Negative test: cloning into a non-empty parquet directory", + TAG_HAS_SHALLOW_CLONE) { (source, clone) => + // Create table to clone + spark.range(5).write.format("delta").mode("append").save(source) + + // Table already exists at destination directory + spark.range(5).write.format("parquet").mode("overwrite").save(clone) + + // Clone should fail since destination directory is non-empty + val ex = intercept[AnalysisException] { + sql(s"CREATE TABLE delta.`$clone` SHALLOW CLONE delta.`$source`") + } + assert(ex.getMessage.contains("is not empty and also not a Delta table")) + } + + testAllClones( + "Changes to clones only affect the cloned directory") { (source, target, isShallow) => + // Create base directory + Seq(1, 2, 3, 4, 5).toDF("id").write.format("delta").save(source) + + // Create a clone + runAndValidateClone( + source, + target + )() + + // Write to clone should be visible + Seq(6, 7, 8).toDF("id").write.format("delta").mode("append").save(target) + assert(spark.read.format("delta").load(target).count() === 8) + + // Write to clone should not be visible in original table + assert(spark.read.format("delta").load(source).count() === 5) + } + + testAllClones("simple clone of source using table name") { (_, target, isShallow) => + val tableName = "source" + withTable(tableName) { + spark.range(5).write.format("delta").saveAsTable(tableName) + runAndValidateClone( + tableName, + target, + sourceIsTable = true)() + } + } + + testAllClones("Clone a time traveled source") { (_, target, isShallow) => + val tableName = "source" + withTable(tableName) { + spark.range(5).write.format("delta").saveAsTable(tableName) + spark.range(5).write.format("delta").mode("append").saveAsTable(tableName) + spark.range(5).write.format("delta").mode("append").saveAsTable(tableName) + spark.range(5).write.format("delta").mode("append").saveAsTable(tableName) + assert(DeltaLog.forTableWithSnapshot(spark, TableIdentifier(tableName))._2.version === 3) + + runAndValidateClone( + tableName, + target, + sourceIsTable = true, + sourceVersion = Some(2))() + assert(spark.read.format("delta").load(target).count() === 15) + } + } + + cloneTest("create or replace table - shallow", TAG_HAS_SHALLOW_CLONE) { (_, _) => + val tbl = "source" + val target = "target" + Seq(true, false).foreach { isCreate => + withTable(tbl, target) { + spark.range(5).write.format("delta").saveAsTable(tbl) + spark.range(25).write.format("delta").saveAsTable(target) + + runAndValidateClone( + tbl, + target, + sourceIsTable = true, + targetIsTable = true, + isCreate = isCreate, + isReplaceOperation = true)() + } + } + } + + cloneTest("create or replace non Delta table - shallow", TAG_HAS_SHALLOW_CLONE) { (_, _) => + val tbl = "source" + val target = "target" + Seq(true, false).foreach { isCreate => + Seq("parquet", "json").foreach { format => + withTable(tbl, target) { + spark.range(5).write.format("delta").saveAsTable(tbl) + spark.range(25).write.format(format).saveAsTable(target) + + runAndValidateClone( + tbl, + target, + sourceIsTable = true, + targetIsTable = true, + isCreate = isCreate, + isReplaceOperation = true, + isReplaceDelta = false)() + } + } + } + } + + cloneTest("shallow clone a table unto itself", TAG_HAS_SHALLOW_CLONE) { (_, _) => + val tbl = "source" + Seq(true, false).foreach { isCreate => + withTable(tbl) { + spark.range(5).write.format("delta").saveAsTable(tbl) + + runAndValidateClone( + tbl, + tbl, + sourceIsTable = true, + targetIsTable = true, + isCreate = isCreate, + isReplaceOperation = true)() + + val allFiles = + DeltaLog.forTableWithSnapshot(spark, TableIdentifier(tbl))._2.allFiles.collect() + allFiles.foreach { file => + assert(!file.pathAsUri.isAbsolute, "File paths should not be absolute") + } + } + } + } + + cloneTest("CLONE ignores reader/writer session defaults", TAG_HAS_SHALLOW_CLONE) { + (source, clone) => + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + // Create table without a default property setting. + spark.range(1L).write.format("delta").mode("overwrite").save(source) + val oldProtocol = DeltaLog.forTable(spark, source).update().protocol + assert(oldProtocol === Protocol(1, 1)) + // Just use something that can be default. + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "2", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2", + TableFeatureProtocolUtils.defaultPropertyKey(TestWriterFeature) -> "enabled") { + // Clone in a session with default properties and check that they aren't merged + // (i.e. target properties are identical to source properties). + runAndValidateClone( + source, + clone + )() + } + + val log = DeltaLog.forTable(spark, clone) + val targetProtocol = log.update().protocol + assert(targetProtocol === oldProtocol) + } + } + + testAllClones("clone a time traveled source using timestamp") { (source, clone, isShallow) => + // Create source + spark.range(5).write.format("delta").save(source) + spark.range(5).write.format("delta").mode("append").save(source) + spark.range(5).write.format("delta").mode("append").save(source) + assert(spark.read.format("delta").load(source).count() === 15) + + // Get time corresponding to date + val desiredTime = "1996-01-12" + val format = new java.text.SimpleDateFormat("yyyy-MM-dd") + val time = format.parse(desiredTime).getTime + + // Change modification time of commit + val path = new Path(source + "/_delta_log/00000000000000000000.json") + // scalastyle:off deltahadoopconfiguration + val fs = path.getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + fs.setTimes(path, time, 0) + + runAndValidateClone( + source, + clone, + sourceTimestamp = Some(desiredTime))() + } + + cloneTest("clones take protocol from the source", + TAG_HAS_SHALLOW_CLONE, TAG_MODIFY_PROTOCOL, TAG_CHANGE_COLUMN_MAPPING_MODE) { (source, clone) => + // Change protocol versions of (read, write) = (2, 3). We cannot initialize this to (0, 0) + // because min reader and writer versions are at least 1. + val defaultNewTableProtocol = Protocol.forNewTable(spark, metadataOpt = None) + val sourceProtocol = Protocol(2, 3) + // Make sure this is actually an upgrade. Downgrades are not supported, and if it's the same + // version, we aren't testing anything there. + assert(sourceProtocol.minWriterVersion > defaultNewTableProtocol.minWriterVersion && + sourceProtocol.minReaderVersion > defaultNewTableProtocol.minReaderVersion) + val log = DeltaLog.forTable(spark, source) + // make sure to have a dummy schema because we can't have empty schema table by default + val newSchema = new StructType().add("id", IntegerType, nullable = true) + log.ensureLogDirectoryExist() + log.store.write( + deltaFile(log.logPath, 0), + Iterator(Metadata(schemaString = newSchema.json).json, sourceProtocol.json), + overwrite = false, + log.newDeltaHadoopConf()) + log.update() + + // Validate that clone has the new protocol version + runAndValidateClone( + source, + clone + )() + } + + testAllClones("clones take the set transactions of the source") { (_, target, isShallow) => + withTempDir { dir => + // Create source + val path = dir.getCanonicalPath + spark.range(5).write.format("delta").save(path) + + // Add a Set Transaction + val log = DeltaLog.forTable(spark, path) + val txn = log.startTransaction() + val setTxn = SetTransaction("app-id", 0, Some(0L)) :: Nil + val op = DeltaOperations.StreamingUpdate(OutputMode.Complete(), "app-id", 0L) + txn.commit(setTxn, op) + log.update() + + runAndValidateClone( + path, + target + )() + } + } + + testAllClones("CLONE with table properties to disable DV") { (source, target, isShallow) => + withSQLConf(DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey -> "true") { + spark.range(10).write.format("delta").save(source) + spark.sql(s"DELETE FROM delta.`$source` WHERE id = 1") + } + intercept[DeltaCommandUnsupportedWithDeletionVectorsException] { + runAndValidateClone( + source, + target, + tableProperties = Map(DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key -> "false"))() + }.getErrorClass === "DELTA_ADDING_DELETION_VECTORS_DISALLOWED" + } + + for(targetExists <- BOOLEAN_DOMAIN) + testAllClones(s"CLONE respects table features set by table property override, " + + s"targetExists=$targetExists", TAG_MODIFY_PROTOCOL) { + (source, target, isShallow) => + spark.range(10).write.format("delta").save(source) + + if (targetExists) { + spark.range(0).write.format("delta").save(target) + } + + val tblPropertyOverrides = + Seq( + s"delta.feature.${TestWriterFeature.name}" -> "enabled", + "delta.minWriterVersion" -> s"$TABLE_FEATURES_MIN_WRITER_VERSION").toMap + cloneTable( + source, + target, + isReplace = true, + tableProperties = tblPropertyOverrides) + + val targetLog = DeltaLog.forTable(spark, target) + assert(targetLog.update().protocol.isFeatureSupported(TestWriterFeature)) + } + + case class TableFeatureWithProperty( + feature: TableFeature, + property: DeltaConfig[Boolean]) + + // Delta properties that automatically cause a version upgrade when enabled via ALTER TABLE. + final val featuresWithAutomaticProtocolUpgrade: Seq[TableFeatureWithProperty] = Seq( + TableFeatureWithProperty(ChangeDataFeedTableFeature, DeltaConfigs.CHANGE_DATA_FEED), + TableFeatureWithProperty( + DeletionVectorsTableFeature, DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION)) + // This test ensures this upgrade also happens when enabled during a CLONE. + for (featureWithProperty <- featuresWithAutomaticProtocolUpgrade) + testAllClones("Cloning a table with new table properties" + + s" that force protocol version upgrade - ${featureWithProperty.property.key}" + ) { (source, target, isShallow) => + import DeltaTestUtils.StrictProtocolOrdering + + spark.range(5).write.format("delta").save(source) + val sourceDeltaLog = DeltaLog.forTable(spark, source) + val sourceSnapshot = sourceDeltaLog.update() + // This only works if the featureWithProperty is not enabled by default. + assert(!featureWithProperty.property.fromMetaData(sourceSnapshot.metadata)) + // Check that the original version is not already sufficient for the featureWithProperty. + assert(!StrictProtocolOrdering.fulfillsVersionRequirements( + actual = sourceSnapshot.protocol, + requirement = featureWithProperty.feature.minProtocolVersion + )) + + // Clone the table, enabling the featureWithProperty in an override. + val tblProperties = Map(featureWithProperty.property.key -> "true") + cloneTable( + source, + target, + isReplace = true, + tableProperties = tblProperties) + + val targetDeltaLog = DeltaLog.forTable(spark, target) + val targetSnapshot = targetDeltaLog.update() + assert(targetSnapshot.metadata.configuration === + tblProperties ++ sourceSnapshot.metadata.configuration) + // Check that the protocol has been upgraded. + assert(StrictProtocolOrdering.fulfillsVersionRequirements( + actual = targetSnapshot.protocol, + requirement = featureWithProperty.feature.minProtocolVersion + )) + } + + testAllClones("Cloning a table without DV property should not upgrade protocol version" + ) { (source, target, isShallow) => + import DeltaTestUtils.StrictProtocolOrdering + + spark.range(5).write.format("delta").save(source) + withSQLConf(DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey -> "true") { + val sourceDeltaLog = DeltaLog.forTable(spark, source) + val sourceSnapshot = sourceDeltaLog.update() + // Should not be enabled, just because it's allowed. + assert(!DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.fromMetaData(sourceSnapshot.metadata)) + // Check that the original version is not already sufficient for the feature. + assert(!StrictProtocolOrdering.fulfillsVersionRequirements( + actual = sourceSnapshot.protocol, + requirement = DeletionVectorsTableFeature.minProtocolVersion + )) + + // Clone the table. + cloneTable( + source, + target, + isReplace = true) + + val targetDeltaLog = DeltaLog.forTable(spark, target) + val targetSnapshot = targetDeltaLog.update() + // Protocol should not have been upgraded. + assert(sourceSnapshot.protocol === targetSnapshot.protocol) + } + } +} + + +trait CloneTableColumnMappingSuiteBase + extends CloneTableSuiteBase + with DeltaColumnMappingSelectedTestMixin +{ + + override protected def runOnlyTests: Seq[String] = Seq( + "simple shallow clone", + "shallow clone a shallow clone", + "create or replace table - shallow", + "shallow clone a table unto itself", + "Clone a time traveled source", + + "validate commitLarge usage metrics", + "clones take the set transactions of the source", + "block changing column mapping mode and modify max id modes under CLONE" + ) + + import testImplicits._ + + testAllClones("block changing column mapping mode and modify max id modes under CLONE") { + (_, _, isShallow) => + val df1 = Seq(1, 2, 3, 4, 5).toDF("id").withColumn("part", 'id % 2) + + // block setting max id + def validateModifyMaxIdError(f: => Any): Unit = { + val e = intercept[UnsupportedOperationException] { f } + assert(e.getMessage == DeltaErrors.cannotModifyTableProperty( + DeltaConfigs.COLUMN_MAPPING_MAX_ID.key + ).getMessage) + } + + withSourceTargetDir { (source, target) => + df1.write.format("delta").partitionBy("part").mode("append").save(source) + // change max id w/ table property should be blocked + validateModifyMaxIdError { + cloneTable( + source, + target, + tableProperties = Map( + DeltaConfigs.COLUMN_MAPPING_MAX_ID.key -> "123123" + )) + } + // change max id w/ SQLConf should be blocked by table property guard + validateModifyMaxIdError { + withMaxColumnIdConf("123123") { + cloneTable( + source, + target + ) + } + } + } + + // block changing column mapping mode + def validateChangeModeError(f: => Any): Unit = { + val e = intercept[ColumnMappingUnsupportedException] { f } + assert(e.getMessage.contains("Changing column mapping mode from")) + } + + val currentMode = columnMappingModeString + + // currentMode to otherMode + val otherMode = if (currentMode == "id") "name" else "id" + withSourceTargetDir { (source, target) => + df1.write.format("delta").partitionBy("part").mode("append").save(source) + // change mode w/ table property should be blocked + validateChangeModeError { + cloneTable( + source, + target, + tableProperties = Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> otherMode + )) + } + } + + withSourceTargetDir { (source, target) => + df1.write.format("delta").partitionBy("part").mode("append").save(source) + // change mode w/ SQLConf should have no effects + withColumnMappingConf(otherMode) { + cloneTable( + source, + target + ) + } + assert(DeltaLog.forTable(spark, target).snapshot.metadata.columnMappingMode.name == + currentMode) + } + + // currentMode to none + withSourceTargetDir { (source, target) => + df1.write.format("delta").partitionBy("part").mode("append").save(source) + // change mode w/ table property + validateChangeModeError { + cloneTable( + source, + target, + tableProperties = Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "none" + )) + } + } + withSourceTargetDir { (source, target) => + df1.write.format("delta").partitionBy("part").mode("append").save(source) + // change mode w/ SQLConf should have no effects + withColumnMappingConf("none") { + cloneTable( + source, + target + ) + } + assert(DeltaLog.forTable(spark, target).snapshot.metadata.columnMappingMode.name == + currentMode) + } + } +} + +trait CloneTableColumnMappingNameSuiteBase extends CloneTableColumnMappingSuiteBase { + override protected def customConvertToDelta(internal: String, external: String): Unit = { + withColumnMappingConf("none") { + super.customConvertToDelta(internal, external) + sql( + s"""ALTER TABLE delta.`$internal` SET TBLPROPERTIES ( + |${DeltaConfigs.COLUMN_MAPPING_MODE.key} = 'name', + |${DeltaConfigs.MIN_READER_VERSION.key} = '2', + |${DeltaConfigs.MIN_WRITER_VERSION.key} = '5' + |)""".stripMargin) + .collect() + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ConflictCheckerPredicateEliminationUnitSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ConflictCheckerPredicateEliminationUnitSuite.scala new file mode 100644 index 00000000000..cc5c9a7f243 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ConflictCheckerPredicateEliminationUnitSuite.scala @@ -0,0 +1,176 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.util.DeltaSparkPlanUtils + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.dsl.expressions.DslExpression +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.sql.catalyst.expressions.ScalarSubquery +import org.apache.spark.sql.functions.{col, rand, udf} +import org.apache.spark.sql.test.SharedSparkSession + +/** + * A set cheaper unit tests, that behave the same no matter if DVs, CDF, etc. are enabled + * and do not need to be repeated in each conflict checker suite. + */ +class ConflictCheckerPredicateEliminationUnitSuite + extends QueryTest + with SharedSparkSession + with ConflictCheckerPredicateElimination { + + val simpleExpressionA: Expression = (col("a") === 1).expr + val simpleExpressionB: Expression = (col("b") === "test").expr + + val deterministicExpression: Expression = (col("c") > 5L).expr + val nonDeterministicExpression: Expression = (col("c") > rand()).expr + lazy val deterministicSubquery: Expression = { + val df = spark.sql("SELECT 5") + df.collect() + col("c").expr > ScalarSubquery(df.queryExecution.analyzed) + } + lazy val nonDeterministicSubquery: Expression = { + val df = spark.sql("SELECT rand()") + df.collect() + col("c").expr > ScalarSubquery(df.queryExecution.analyzed) + } + + private def defaultEliminationFunction(e: Seq[Expression]): PredicateElimination = { + val options = DeltaSparkPlanUtils.CheckDeterministicOptions(allowDeterministicUdf = false) + eliminateNonDeterministicPredicates(e, options) + } + + private def checkEliminationResult( + predicate: Expression, + expected: PredicateElimination, + eliminationFunction: Seq[Expression] => PredicateElimination = defaultEliminationFunction) + : Unit = { + require(expected.newPredicates.size === 1) + val actual = eliminationFunction(Seq(predicate)) + assert(actual.newPredicates.size === 1) + assert(actual.newPredicates.head.canonicalized == expected.newPredicates.head.canonicalized, + s"actual=$actual\nexpected=$expected") + assert(actual.eliminatedPredicates === expected.eliminatedPredicates) + } + + for { + deterministic <- BOOLEAN_DOMAIN + subquery <- BOOLEAN_DOMAIN + } { + lazy val exprUnderTest = if (deterministic) { + if (subquery) deterministicSubquery else deterministicExpression + } else { + if (subquery) nonDeterministicSubquery else nonDeterministicExpression + } + + val testSuffix = s"deterministic $deterministic - subquery $subquery" + + def newPredicates(exprF: Expression => Expression): PredicateElimination = PredicateElimination( + newPredicates = Seq(exprF(if (deterministic) exprUnderTest else Literal.TrueLiteral)), + eliminatedPredicates = if (deterministic) Seq.empty else Seq("rand")) + + test(s"and expression - $testSuffix") { + checkEliminationResult( + predicate = simpleExpressionA && exprUnderTest, + expected = newPredicates { eliminatedExprUnderTest => + if (deterministic) { + simpleExpressionA && eliminatedExprUnderTest + } else { + simpleExpressionA + } + } + ) + } + + test(s"or expression - $testSuffix") { + checkEliminationResult( + predicate = simpleExpressionA || exprUnderTest, + expected = newPredicates { _ => + if (deterministic) { + simpleExpressionA || exprUnderTest + } else { + Literal.TrueLiteral + } + } + ) + } + + test(s"and or expression - $testSuffix") { + checkEliminationResult( + predicate = simpleExpressionA && (simpleExpressionB || exprUnderTest), + expected = newPredicates { _ => + if (deterministic) { + simpleExpressionA && (simpleExpressionB || exprUnderTest) + } else { + simpleExpressionA + } + } + ) + } + + test(s"or and expression - $testSuffix") { + checkEliminationResult( + predicate = simpleExpressionA || (simpleExpressionB && exprUnderTest), + expected = newPredicates { _ => + if (deterministic) { + simpleExpressionA || (simpleExpressionB && exprUnderTest) + } else { + simpleExpressionA || simpleExpressionB + } + } + ) + } + + test(s"or not and expression - $testSuffix") { + checkEliminationResult( + predicate = simpleExpressionA || !(simpleExpressionB && exprUnderTest), + expected = newPredicates { _ => + if (deterministic) { + simpleExpressionA || !(simpleExpressionB && exprUnderTest) + } else { + Literal.TrueLiteral + } + } + ) + } + + test(s"and not or expression - $testSuffix") { + checkEliminationResult( + predicate = simpleExpressionA && !(simpleExpressionB || exprUnderTest), + expected = newPredicates { _ => + if (deterministic) { + simpleExpressionA && !(simpleExpressionB || exprUnderTest) + } else { + simpleExpressionA + } + }) + } + } + + test("udf name is not exposed") { + val random = udf(() => Math.random()) + .asNondeterministic() + .withName("sensitive_udf_name") + checkEliminationResult( + predicate = simpleExpressionA && (col("c") > random()).expr, + expected = PredicateElimination( + newPredicates = Seq(simpleExpressionA), + eliminatedPredicates = Seq("scalaudf"))) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaSQLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaSQLSuite.scala new file mode 100644 index 00000000000..37b2635e101 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaSQLSuite.scala @@ -0,0 +1,64 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.functions.{col, from_json} + +trait ConvertToDeltaSQLSuiteBase extends ConvertToDeltaSuiteBaseCommons + with DeltaSQLCommandTest { + override protected def convertToDelta( + identifier: String, + partitionSchema: Option[String] = None, collectStats: Boolean = true): Unit = { + if (partitionSchema.isEmpty) { + sql(s"convert to delta $identifier ${collectStatisticsStringOption(collectStats)}") + } else { + val stringSchema = partitionSchema.get + sql(s"convert to delta $identifier ${collectStatisticsStringOption(collectStats)}" + + s" partitioned by ($stringSchema)") + } + } + + // TODO: Move to ConvertToDeltaSuiteBaseCommons when DeltaTable API contains collectStats option + test("convert with collectStats set to false") { + withTempDir { dir => + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "true") { + + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF) + convertToDelta(s"parquet.`$tempDir`", collectStats = false) + val deltaLog = DeltaLog.forTable(spark, tempDir) + val history = io.delta.tables.DeltaTable.forPath(tempDir).history() + checkAnswer( + spark.read.format("delta").load(tempDir), + simpleDF + ) + assert(history.count == 1) + val statsDf = deltaLog.unsafeVolatileSnapshot.allFiles + .select(from_json(col("stats"), deltaLog.unsafeVolatileSnapshot.statsSchema) + .as("stats")).select("stats.*") + assert(statsDf.filter(col("numRecords").isNotNull).count == 0) + } + } + } + +} + +class ConvertToDeltaSQLSuite extends ConvertToDeltaSQLSuiteBase + with ConvertToDeltaSuiteBase diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaScalaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaScalaSuite.scala new file mode 100644 index 00000000000..22c567e6a0d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaScalaSuite.scala @@ -0,0 +1,38 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.types.StructType + +class ConvertToDeltaScalaSuite extends ConvertToDeltaSuiteBase { + override protected def convertToDelta( + identifier: String, + partitionSchema: Option[String] = None, collectStats: Boolean = true): Unit = { + if (partitionSchema.isDefined) { + io.delta.tables.DeltaTable.convertToDelta( + spark, + identifier, + StructType.fromDDL(partitionSchema.get) + ) + } else { + io.delta.tables.DeltaTable.convertToDelta( + spark, + identifier + ) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaSuiteBase.scala new file mode 100644 index 00000000000..ad20154d321 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ConvertToDeltaSuiteBase.scala @@ -0,0 +1,1391 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File, FileNotFoundException} + +import org.apache.spark.sql.delta.files.TahoeLogFileIndex +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + +/** + * Common functions used across CONVERT TO DELTA test suites. We separate out these functions + * so that we can re-use them in tests using Hive support. Tests that leverage Hive support cannot + * extend the `SharedSparkSession`, therefore we keep this utility class as bare-bones as possible. + */ +trait ConvertToDeltaTestUtils extends QueryTest { self: SQLTestUtils => + + protected def collectStatisticsStringOption(collectStats: Boolean): String = Option(collectStats) + .filterNot(identity).map(_ => "NO STATISTICS").getOrElse("") + + protected def simpleDF = spark.range(100) + .withColumn("key1", col("id") % 2) + .withColumn("key2", col("id") % 3 cast "String") + + protected def convertToDelta(identifier: String, partitionSchema: Option[String] = None, + collectStats: Boolean = true): Unit + + protected val blockNonDeltaMsg = "A transaction log for Delta was found at" + protected val parquetOnlyMsg = "CONVERT TO DELTA only supports parquet tables" + protected val invalidParquetMsg = " not a Parquet file. Expected magic number at tail" + // scalastyle:off deltahadoopconfiguration + protected def sessionHadoopConf = spark.sessionState.newHadoopConf + // scalastyle:on deltahadoopconfiguration + + protected def deltaRead(df: => DataFrame): Boolean = { + val analyzed = df.queryExecution.analyzed + analyzed.find { + case DeltaTable(_: TahoeLogFileIndex) => true + case _ => false + }.isDefined + } + + protected def writeFiles( + dir: String, + df: DataFrame, + format: String = "parquet", + partCols: Seq[String] = Nil, + mode: String = "overwrite"): Unit = { + if (partCols.nonEmpty) { + df.write.partitionBy(partCols: _*).format(format).mode(mode).save(dir) + } else { + df.write.format(format).mode(mode).save(dir) + } + } +} + +trait ConvertToDeltaSuiteBaseCommons extends ConvertToDeltaTestUtils + with SharedSparkSession + with SQLTestUtils + with DeltaSQLCommandTest + with DeltaTestUtilsForTempViews + +/** Tests for CONVERT TO DELTA that can be leveraged across SQL and Scala APIs. */ +trait ConvertToDeltaSuiteBase extends ConvertToDeltaSuiteBaseCommons + with ConvertToDeltaHiveTableTests { + + import org.apache.spark.sql.functions._ + import testImplicits._ + + // Use different batch sizes to cover different merge schema code paths. + protected def testSchemaMerging(testName: String)(block: => Unit): Unit = { + Seq("1", "5").foreach { batchSize => + test(s"$testName - batch size: $batchSize") { + withSQLConf( + DeltaSQLConf.DELTA_IMPORT_BATCH_SIZE_SCHEMA_INFERENCE.key -> batchSize) { + block + } + } + } + } + + test("convert with collectStats true") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF) + convertToDelta(s"parquet.`$tempDir`", collectStats = true) + val deltaLog = DeltaLog.forTable(spark, tempDir) + val history = io.delta.tables.DeltaTable.forPath(tempDir).history() + checkAnswer( + spark.read.format("delta").load(tempDir), + simpleDF + ) + assert(history.count == 1) + val statsDf = deltaLog.unsafeVolatileSnapshot.allFiles + .select(from_json($"stats", deltaLog.unsafeVolatileSnapshot.statsSchema) + .as("stats")).select("stats.*") + assert(statsDf.filter($"numRecords".isNull).count == 0) + assert(statsDf.agg(sum("numRecords")).as[Long].head() == simpleDF.count) + } + } + + test("convert with collectStats true but config set to false -> Do not collect stats") { + withTempDir { dir => + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF) + convertToDelta(s"parquet.`$tempDir`", collectStats = true) + val deltaLog = DeltaLog.forTable(spark, tempDir) + val history = io.delta.tables.DeltaTable.forPath(tempDir).history() + checkAnswer( + spark.read.format("delta").load(tempDir), + simpleDF + ) + assert(history.count == 1) + val statsDf = deltaLog.unsafeVolatileSnapshot.allFiles + .select(from_json($"stats", deltaLog.unsafeVolatileSnapshot.statsSchema) + .as("stats")).select("stats.*") + assert(statsDf.filter($"numRecords".isNotNull).count == 0) + } + } + } + + test("negative case: convert a non-delta path falsely claimed as parquet") { + Seq("orc", "json", "csv").foreach { format => + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, format) + // exception from executor reading parquet footer + intercept[SparkException] { + convertToDelta(s"parquet.`$tempDir`") + } + } + } + } + + test("negative case: convert non-parquet path to delta") { + Seq("orc", "json", "csv").foreach { format => + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, format) + val ae = intercept[AnalysisException] { + convertToDelta(s"$format.`$tempDir`") + } + assert(ae.getMessage.contains(parquetOnlyMsg)) + } + } + } + + test("negative case: convert non-parquet file to delta") { + Seq("orc", "json", "csv").foreach { format => + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, format) + + val se = intercept[SparkException] { + convertToDelta(s"parquet.`$tempDir`") + } + assert(se.getMessage.contains(invalidParquetMsg)) + } + } + } + + test("filter non-parquet file for schema inference when not using catalog schema") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir + "/part=1/", Seq(1).toDF("corrupted_id"), format = "orc") + writeFiles(tempDir + "/part=2/", Seq(2).toDF("id")) + + val tableName = "pqtable" + withTable(tableName) { + // Create a catalog table on top of the parquet table with the wrong schema + // The schema should be picked from the parquet data files + sql(s"CREATE TABLE $tableName (key1 long, key2 string) " + + s"USING PARQUET PARTITIONED BY (part string) LOCATION '$dir'") + // Required for discovering partition of the table + sql(s"MSCK REPAIR TABLE $tableName") + + withSQLConf( + "spark.sql.files.ignoreCorruptFiles" -> "false", + DeltaSQLConf.DELTA_CONVERT_USE_CATALOG_SCHEMA.key -> "false") { + val se = intercept[SparkException] { + convertToDelta(tableName) + } + assert(se.getMessage.contains(invalidParquetMsg)) + } + + withSQLConf( + "spark.sql.files.ignoreCorruptFiles" -> "true", + DeltaSQLConf.DELTA_CONVERT_USE_CATALOG_SCHEMA.key -> "false") { + + convertToDelta(tableName) + + val tableId = TableIdentifier(tableName, Some("default")) + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, tableId) + val expectedSchema = StructType( + StructField("id", IntegerType, true) :: StructField("part", StringType, true) :: Nil) + // Schema is inferred from the data + assert(snapshot.schema.equals(expectedSchema)) + } + } + } + } + + test("filter non-parquet files during delta conversion") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir + "/part=1/", Seq(1).toDF("id"), format = "json") + writeFiles(tempDir + "/part=2/", Seq(2).toDF("id")) + withSQLConf("spark.sql.files.ignoreCorruptFiles" -> "true") { + convertToDelta(s"parquet.`$tempDir`", Some("part string")) + checkAnswer(spark.read.format("delta").load(tempDir), Row(2, "2") :: Nil) + } + } + } + + testQuietlyWithTempView("negative case: convert temp views to delta") { isSQLTempView => + val tableName = "pqtbl" + withTable(tableName) { + // Create view + simpleDF.write.format("parquet").saveAsTable(tableName) + createTempViewFromTable(tableName, isSQLTempView, format = Some("parquet")) + + // Attempt to convert to delta + val ae = intercept[AnalysisException] { + convertToDelta("v") + } + + assert(ae.getMessage.contains("Converting a view to a Delta table") || + ae.getMessage.contains("Table default.v not found") || + ae.getMessage.contains("Table or view 'v' not found in database 'default'") || + ae.getMessage.contains("table or view `default`.`v` cannot be found")) + } + } + + test("negative case: missing data source name") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, "parquet", Seq("key1", "key2")) + val ae = intercept[AnalysisException] { + convertToDelta(s"`$tempDir`", None) + } + assert(ae.getMessage.contains(parquetOnlyMsg)) + } + } + + test("negative case: # partitions unmatched") { + withTempDir { dir => + val path = dir.getCanonicalPath + writeFiles(path, simpleDF, partCols = Seq("key1", "key2")) + + val ae = intercept[AnalysisException] { + convertToDelta(s"parquet.`$path`", Some("key1 long")) + } + assert(ae.getMessage.contains("Expecting 1 partition column(s)")) + } + } + + test("negative case: unmatched partition column names") { + withTempDir { dir => + val path = dir.getCanonicalPath + writeFiles(path, simpleDF, partCols = Seq("key1", "key2")) + + val ae = intercept[AnalysisException] { + convertToDelta(s"parquet.`$path`", Some("key1 long, key22 string")) + } + assert(ae.getMessage.contains("Expecting partition column ")) + } + } + + test("negative case: failed to cast partition value") { + withTempDir { dir => + val path = dir.getCanonicalPath + val df = simpleDF.withColumn("partKey", lit("randomstring")) + writeFiles(path, df, partCols = Seq("partKey")) + val ae = intercept[RuntimeException] { + convertToDelta(s"parquet.`$path`", Some("partKey int")) + } + assert(ae.getMessage.contains("Failed to cast partition value")) + } + } + + test("negative case: inconsistent directory structure") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF) + writeFiles(tempDir + "/key1=1/", simpleDF) + + var ae = intercept[AnalysisException] { + convertToDelta(s"parquet.`$tempDir`") + } + assert(ae.getMessage.contains("Expecting 0 partition column")) + + ae = intercept[AnalysisException] { + convertToDelta(s"parquet.`$tempDir`", Some("key1 string")) + } + assert(ae.getMessage.contains("Expecting 1 partition column")) + } + } + + test("negative case: empty and non-existent root dir") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + val re = intercept[FileNotFoundException] { + convertToDelta(s"parquet.`$tempDir`") + } + assert(re.getMessage.contains("No file found in the directory")) + Utils.deleteRecursively(dir) + + val ae = intercept[FileNotFoundException] { + convertToDelta(s"parquet.`$tempDir`") + } + assert(ae.getMessage.contains("doesn't exist")) + } + } + + testSchemaMerging("negative case: merge type conflict - string vs int") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir + "/part=1/", Seq(1).toDF("id")) + for (i <- 2 to 8 by 2) { + writeFiles(tempDir + s"/part=$i/", Seq(1).toDF("id")) + } + for (i <- 3 to 9 by 2) { + writeFiles(tempDir + s"/part=$i/", Seq("1").toDF("id")) + } + + val exception = intercept[Exception] { + convertToDelta(s"parquet.`$tempDir`", Some("part string")) + } + + val realCause = exception match { + case se: SparkException => se.getCause + case ae: AnalysisException => ae + } + assert(realCause.getMessage.contains("Failed to merge")) + assert(exception.isInstanceOf[AnalysisException] || + realCause.getMessage.contains("/part="), + "Error message should contain the file name") + } + } + + test("convert a streaming parquet path: use metadata") { + val stream = MemoryStream[Int] + val df = stream.toDS().toDF() + + withTempDir { outputDir => + val checkpoint = new File(outputDir, "_check").toString + val dataLocation = new File(outputDir, "data").toString + val options = Map("checkpointLocation" -> checkpoint) + + // Add initial data to parquet file sink + val q = df.writeStream.options(options).format("parquet").start(dataLocation) + stream.addData(1, 2, 3) + q.processAllAvailable() + q.stop() + + // Add non-streaming data: this should be ignored in conversion. + spark.range(10, 20).write.mode("append").parquet(dataLocation) + sql(s"CONVERT TO DELTA parquet.`$dataLocation`") + + // Write data to delta + val q2 = df.writeStream.options(options).format("delta").start(dataLocation) + + try { + stream.addData(4, 5, 6) + q2.processAllAvailable() + + // Should only read streaming data. + checkAnswer( + spark.read.format("delta").load(dataLocation), + (1 to 6).map { Row(_) } + ) + } finally { + q2.stop() + } + } + } + + test("convert a streaming parquet path: ignore metadata") { + val stream = MemoryStream[Int] + val df = stream.toDS().toDF("col1") + + withTempDir { outputDir => + val checkpoint = new File(outputDir, "_check").toString + val dataLocation = new File(outputDir, "data").toString + val options = Map( + "checkpointLocation" -> checkpoint + ) + + // Add initial data to parquet file sink + val q = df.writeStream.options(options).format("parquet").start(dataLocation) + stream.addData(1 to 5) + q.processAllAvailable() + q.stop() + + // Add non-streaming data: this should not be ignored in conversion. + spark.range(11, 21).select('id.cast("int") as "col1") + .write.mode("append").parquet(dataLocation) + + withSQLConf(("spark.databricks.delta.convert.useMetadataLog", "false")) { + sql(s"CONVERT TO DELTA parquet.`$dataLocation`") + } + + // Write data to delta + val q2 = df.writeStream.options(options).format("delta").start(dataLocation) + + try { + stream.addData(6 to 10) + q2.processAllAvailable() + + // Should read all data not just streaming data + checkAnswer( + spark.read.format("delta").load(dataLocation), + (1 to 20).map { Row(_) } + ) + } finally { + q2.stop() + } + } + } + + test("convert a parquet path") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, partCols = Seq("key1", "key2")) + convertToDelta(s"parquet.`$tempDir`", Some("key1 long, key2 string")) + + + // reads actually went through Delta + assert(deltaRead(spark.read.format("delta").load(tempDir).select("id"))) + + // query through Delta is correct + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = 0").select("id"), + simpleDF.filter("id % 2 == 0").select("id")) + + + // delta writers went through + writeFiles( + tempDir, simpleDF, format = "delta", partCols = Seq("key1", "key2"), mode = "append") + + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = 1").select("id"), + simpleDF.union(simpleDF).filter("id % 2 == 1").select("id")) + } + } + + private def testSpecialCharactersInDirectoryNames(c: String, expectFailure: Boolean): Unit = { + test(s"partition column names and values contain '$c'") { + withTempDir { dir => + val path = dir.getCanonicalPath + + val key1 = s"${c}key1${c}${c}" + val key2 = s"${c}key2${c}${c}" + + val valueA = s"${c}some${c}${c}value${c}A" + val valueB = s"${c}some${c}${c}value${c}B" + val valueC = s"${c}some${c}${c}value${c}C" + val valueD = s"${c}some${c}${c}value${c}D" + + val df1 = spark.range(3) + .withColumn(key1, lit(valueA)) + .withColumn(key2, lit(valueB)) + val df2 = spark.range(4, 7) + .withColumn(key1, lit(valueC)) + .withColumn(key2, lit(valueD)) + val df = df1.union(df2) + writeFiles(path, df, format = "parquet", partCols = Seq(key1, key2)) + + if (expectFailure) { + val e = intercept[AnalysisException] { + convertToDelta(s"parquet.`$path`", Some(s"`$key1` string, `$key2` string")) + } + assert(e.getMessage.contains("invalid character")) + } else { + convertToDelta(s"parquet.`$path`", Some(s"`$key1` string, `$key2` string")) + + // missing one char from valueA, so no match + checkAnswer( + spark.read.format("delta").load(path).where(s"`$key1` = '${c}some${c}value${c}A'") + .select("id"), Nil) + + checkAnswer( + spark.read.format("delta").load(path) + .where(s"`$key1` = '$valueA' and `$key2` = '$valueB'").select("id"), + Row(0) :: Row(1) :: Row(2) :: Nil) + + checkAnswer( + spark.read.format("delta").load(path).where(s"`$key2` = '$valueD' and id > 4") + .select("id"), + Row(5) :: Row(6) :: Nil) + } + } + } + } + + " ,;{}()\n\t=".foreach { char => + testSpecialCharactersInDirectoryNames(char.toString, expectFailure = true) + } + testSpecialCharactersInDirectoryNames("%!@#$%^&*-", expectFailure = false) + testSpecialCharactersInDirectoryNames("?.+<_>|/", expectFailure = false) + + test("can ignore empty sub-directories") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + val fs = new Path(tempDir).getFileSystem(sessionHadoopConf) + + writeFiles(tempDir + "/key1=1/", Seq(1).toDF) + assert(fs.mkdirs(new Path(tempDir + "/key1=2/"))) + assert(fs.mkdirs(new Path(tempDir + "/random_dir/"))) + convertToDelta(s"parquet.`$tempDir`", Some("key1 string")) + checkAnswer(spark.read.format("delta").load(tempDir), Row(1, "1")) + } + } + + test("allow file names to have = character") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir + "/part=1/", Seq(1).toDF("id")) + + val fs = new Path(tempDir).getFileSystem(sessionHadoopConf) + // Rename the parquet file in partition "part=1" with something containing "=" + val files = fs.listStatus(new Path(tempDir + "/part=1/")) + .map(_.getPath) + .filter(path => !path.getName.startsWith("_") && !path.getName.startsWith(".")) + + assert(files.length == 1) + fs.rename( + files.head, new Path(files.head.getParent.getName, "some-data-id=1.snappy.parquet")) + + convertToDelta(s"parquet.`$tempDir`", Some("part string")) + checkAnswer(spark.read.format("delta").load(tempDir), Row(1, "1")) + } + } + + test("allow file names to not have .parquet suffix") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir + "/part=1/", Seq(1).toDF("id")) + writeFiles(tempDir + "/part=2/", Seq(2).toDF("id")) + + val fs = new Path(tempDir).getFileSystem(sessionHadoopConf) + // Remove the suffix of the parquet file in partition "part=1" + val files = fs.listStatus(new Path(tempDir + "/part=1/")) + .map(_.getPath) + .filter(path => !path.getName.startsWith("_") && !path.getName.startsWith(".")) + + assert(files.length == 1) + fs.rename(files.head, new Path(files.head.getParent.toString, "unknown_suffix")) + + convertToDelta(s"parquet.`$tempDir`", Some("part string")) + checkAnswer(spark.read.format("delta").load(tempDir), Row(1, "1") :: Row(2, "2") :: Nil) + } + } + + test("backticks") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF) + + // wrap parquet with backticks should work + convertToDelta(s"`parquet`.`$tempDir`", None) + checkAnswer(spark.read.format("delta").load(tempDir), simpleDF) + + // path with no backticks should fail parsing + intercept[ParseException] { + convertToDelta(s"parquet.$tempDir") + } + } + } + + test("overlapping partition and data columns") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + val df = spark.range(1) + .withColumn("partKey1", lit("1")) + .withColumn("partKey2", lit("2")) + df.write.parquet(tempDir + "/partKey1=1") + convertToDelta(s"parquet.`$tempDir`", Some("partKey1 int")) + + // Same as in [[HadoopFsRelation]], for common columns, + // respecting the order of data schema but the type of partition schema + checkAnswer(spark.read.format("delta").load(tempDir), Row(0, 1, "2")) + } + } + + test("some partition value is null") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + val df1 = Seq(0).toDF("id") + .withColumn("key1", lit("A1")) + .withColumn("key2", lit(null)) + + val df2 = Seq(1).toDF("id") + .withColumn("key1", lit(null)) + .withColumn("key2", lit(100)) + + writeFiles(tempDir, df1.union(df2), partCols = Seq("key1", "key2")) + convertToDelta(s"parquet.`$tempDir`", Some("key1 string, key2 int")) + checkAnswer( + spark.read.format("delta").load(tempDir).where("key2 is null") + .select("id"), Row(0)) + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 is null") + .select("id"), Row(1)) + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = 'A1'") + .select("id"), Row(0)) + checkAnswer( + spark.read.format("delta").load(tempDir).where("key2 = 100") + .select("id"), Row(1)) + } + } + + test("converting tables with dateType partition columns") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + val df1 = Seq(0).toDF("id").withColumn("key1", lit("2019-11-22").cast("date")) + + val df2 = Seq(1).toDF("id").withColumn("key1", lit(null)) + + writeFiles(tempDir, df1.union(df2), partCols = Seq("key1")) + convertToDelta(s"parquet.`$tempDir`", Some("key1 date")) + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 is null").select("id"), + Row(1)) + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = '2019-11-22'").select("id"), + Row(0)) + } + } + + test("empty string partition value will be read back as null") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + val df1 = Seq(0).toDF("id") + .withColumn("key1", lit("A1")) + .withColumn("key2", lit("")) + + val df2 = Seq(1).toDF("id") + .withColumn("key1", lit("")) + .withColumn("key2", lit("")) + + writeFiles(tempDir, df1.union(df2), partCols = Seq("key1", "key2")) + convertToDelta(s"parquet.`$tempDir`", Some("key1 string, key2 string")) + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 is null and key2 is null") + .select("id"), Row(1)) + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = 'A1'") + .select("id"), Row(0)) + } + } + + testSchemaMerging("can merge schema with different columns") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir + "/part=1/", Seq(1).toDF("id1")) + writeFiles(tempDir + "/part=2/", Seq(2).toDF("id2")) + writeFiles(tempDir + "/part=3/", Seq(3).toDF("id3")) + + convertToDelta(s"parquet.`$tempDir`", Some("part string")) + + // spell out the columns as intra-batch and inter-batch merging logic may order + // the columns differently + val cols = Seq("id1", "id2", "id3", "part") + checkAnswer( + spark.read.format("delta").load(tempDir).where("id2 = 2") + .select(cols.head, cols.tail: _*), + Row(null, 2, null, "2") :: Nil) + } + } + + testSchemaMerging("can merge schema with different nullability") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir + "/part=1/", Seq(1).toDF("id")) + val schema = new StructType().add(StructField("id", IntegerType, false)) + val df = spark.createDataFrame(spark.sparkContext.parallelize(Seq(Row(1))), schema) + writeFiles(tempDir + "/part=2/", df) + + convertToDelta(s"parquet.`$tempDir`", Some("part string")) + val fields = spark.read.format("delta").load(tempDir).schema.fields.toSeq + assert(fields.map(_.name) === Seq("id", "part")) + assert(fields.map(_.nullable) === Seq(true, true)) + } + } + + testSchemaMerging("can upcast in schema merging: short vs int") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir + "/part=1/", Seq(1 << 20).toDF("id")) + writeFiles(tempDir + "/part=2/", + Seq(1).toDF("id").select(col("id") cast ShortType)) + + convertToDelta(s"parquet.`$tempDir`", Some("part string")) + checkAnswer( + spark.read.format("delta").load(tempDir), Row(1 << 20, "1") :: Row(1, "2") :: Nil) + + val expectedSchema = new StructType().add("id", IntegerType).add("part", StringType) + val deltaLog = DeltaLog.forTable(spark, tempDir) + assert(deltaLog.update().metadata.schema === expectedSchema) + } + } + + test("can fetch global configs") { + withTempDir { dir => + val path = dir.getCanonicalPath + val deltaLog = DeltaLog.forTable(spark, path) + withSQLConf("spark.databricks.delta.properties.defaults.appendOnly" -> "true") { + writeFiles(path, simpleDF.coalesce(1)) + convertToDelta(s"parquet.`$path`") + } + assert(deltaLog.snapshot.metadata.configuration("delta.appendOnly") === "true") + } + } + + test("convert to delta with string partition columns") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, partCols = Seq("key1", "key2")) + convertToDelta(s"parquet.`$tempDir`", Some("key1 long, key2 string")) + + // reads actually went through Delta + assert(deltaRead(spark.read.format("delta").load(tempDir).select("id"))) + + // query through Delta is correct + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = 0").select("id"), + simpleDF.filter("id % 2 == 0").select("id")) + + // delta writers went through + writeFiles( + tempDir, simpleDF, format = "delta", partCols = Seq("key1", "key2"), mode = "append") + + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = 1").select("id"), + simpleDF.union(simpleDF).filter("id % 2 == 1").select("id")) + } + } + + test("convert a delta path falsely claimed as parquet") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, "delta") + + // Convert to delta + convertToDelta(s"parquet.`$tempDir`") + + // Verify that table converted to delta + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = 1").select("id"), + simpleDF.filter("id % 2 == 1").select("id")) + } + } + + test("converting a delta path should not error for idempotency") { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, "delta") + convertToDelta(s"delta.`$tempDir`") + + checkAnswer( + spark.read.format("delta").load(tempDir).where("key1 = 1").select("id"), + simpleDF.filter("id % 2 == 1").select("id")) + } + } + + test("partition column name starting with underscore and dot") { + withTempDir { dir => + val df = spark.range(100) + .withColumn("_key1", col("id") % 2) + .withColumn(".key2", col("id") % 7 cast "String") + + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, df, partCols = Seq("_key1", ".key2")) + + convertToDelta(s"parquet.`$tempDir`", Some("_key1 long, `.key2` string")) + + checkAnswer(sql(s"SELECT * FROM delta.`$tempDir`"), df) + } + } +} + +/** + * Tests that involve tables defined in a Catalog such as Hive. We test in the sql as well as + * hive package, where the hive package uses a proper HiveExternalCatalog to alter table definitions + * in the HiveMetaStore. This test trait *should not* extend SharedSparkSession so that it can be + * mixed in with the Hive test utilities. + */ +trait ConvertToDeltaHiveTableTests extends ConvertToDeltaTestUtils with SQLTestUtils { + + // Test conversion with and without the new CatalogFileManifest. + protected def testCatalogFileManifest(testName: String)(block: (Boolean) => Unit): Unit = { + Seq(true, false).foreach { useCatalogFileManifest => + test(s"$testName - $useCatalogFileManifest") { + withSQLConf( + DeltaSQLConf.DELTA_CONVERT_USE_CATALOG_PARTITIONS.key + -> useCatalogFileManifest.toString) { + block(useCatalogFileManifest) + } + } + } + } + + protected def testCatalogSchema(testName: String)(testFn: (Boolean) => Unit): Unit = { + Seq(true, false).foreach { + useCatalogSchema => + test(s"$testName - $useCatalogSchema") { + withSQLConf( + DeltaSQLConf.DELTA_CONVERT_USE_CATALOG_SCHEMA.key -> useCatalogSchema.toString) { + testFn(useCatalogSchema) + } + } + } + } + + protected def getPathForTableName(tableName: String): String = { + spark + .sessionState + .catalog + .getTableMetadata(TableIdentifier(tableName, Some("default"))).location.getPath + } + + protected def verifyExternalCatalogMetadata(tableName: String): Unit = { + val catalog = spark.sessionState.catalog.externalCatalog.getTable("default", tableName) + // Hive automatically adds some properties + val cleanProps = catalog.properties.filterKeys(_ != "transient_lastDdlTime") + assert(catalog.schema.isEmpty, + s"Schema wasn't empty in the catalog for table $tableName: ${catalog.schema}") + assert(catalog.partitionColumnNames.isEmpty, "Partition columns weren't empty in the " + + s"catalog for table $tableName: ${catalog.partitionColumnNames}") + assert(cleanProps.isEmpty, + s"Table properties weren't empty for table $tableName: $cleanProps") + } + + testQuietly("negative case: converting non-parquet table") { + val tableName = "csvtable" + withTable(tableName) { + // Create a csv table + simpleDF.write.partitionBy("key1", "key2").format("csv").saveAsTable(tableName) + + // Attempt to convert to delta + val ae = intercept[AnalysisException] { + convertToDelta(tableName, Some("key1 long, key2 string")) + } + + // Get error message + assert(ae.getMessage.contains(parquetOnlyMsg)) + } + } + + testQuietly("negative case: convert parquet path to delta when there is a database called " + + "parquet but no table or path exists") { + val dbName = "parquet" + withDatabase(dbName) { + withTempDir { dir => + sql(s"CREATE DATABASE $dbName") + + val tempDir = dir.getCanonicalPath + // Attempt to convert to delta + val ae = intercept[FileNotFoundException] { + convertToDelta(s"parquet.`$tempDir`") + } + + // Get error message + assert(ae.getMessage.contains("No file found in the directory")) + } + } + } + + testQuietly("negative case: convert views to delta") { + val viewName = "view" + val tableName = "pqtbl" + withTable(tableName) { + // Create view + simpleDF.write.format("parquet").saveAsTable(tableName) + sql(s"CREATE VIEW $viewName as SELECT * from $tableName") + + // Attempt to convert to delta + val ae = intercept[AnalysisException] { + convertToDelta(viewName) + } + + assert(ae.getMessage.contains("Converting a view to a Delta table")) + } + } + + testQuietly("negative case: converting a table that doesn't exist but the database does") { + val dbName = "db" + withDatabase(dbName) { + sql(s"CREATE DATABASE $dbName") + + // Attempt to convert to delta + val ae = intercept[AnalysisException] { + convertToDelta(s"$dbName.faketable", Some("key1 long, key2 string")) + } + + assert(ae.getMessage.contains("Table or view 'faketable' not found") || + ae.getMessage.contains(s"table or view `$dbName`.`faketable` cannot be found")) + } + } + + testQuietly("negative case: unmatched partition schema") { + val tableName = "pqtable" + withTable(tableName) { + // Create a partitioned parquet table + simpleDF.write.partitionBy("key1", "key2").format("parquet").saveAsTable(tableName) + + // Check the partition schema in the catalog, key1's data type is original Long. + assert(spark.sessionState.catalog.getTableMetadata( + TableIdentifier(tableName, Some("default"))).partitionSchema + .equals( + (new StructType) + .add(StructField("key1", LongType, true)) + .add(StructField("key2", StringType, true)) + )) + + // Convert to delta with partition schema mismatch on key1's data type, which is String. + val ae = intercept[AnalysisException] { + convertToDelta(tableName, Some("key1 string, key2 string")) + } + + assert(ae.getMessage.contains("CONVERT TO DELTA was called with a partition schema " + + "different from the partition schema inferred from the catalog")) + } + } + + testQuietly("convert two external tables pointing to same underlying files " + + "with differing table properties should error if conf enabled otherwise merge properties") { + val externalTblName = "extpqtbl" + val secondExternalTbl = "othertbl" + withTable(externalTblName, secondExternalTbl) { + withTempDir { dir => + val path = dir.getCanonicalPath + + // Create external table + sql(s"CREATE TABLE $externalTblName " + + s"USING PARQUET LOCATION '$path' TBLPROPERTIES ('abc'='def', 'def'='ghi') AS SELECT 1") + + // Create second external table with different table properties + sql(s"CREATE TABLE $secondExternalTbl " + + s"USING PARQUET LOCATION '$path' TBLPROPERTIES ('abc'='111', 'jkl'='mno')") + + // Convert first table to delta + convertToDelta(externalTblName) + + // Verify that files converted to delta + checkAnswer( + sql(s"select * from delta.`$path`"), Row(1)) + + // Verify first table converted to delta + assert(spark.sessionState.catalog.getTableMetadata( + TableIdentifier(externalTblName, Some("default"))).provider.contains("delta")) + + // Attempt to convert second external table to delta + val ae = intercept[AnalysisException] { + convertToDelta(secondExternalTbl) + } + + assert( + ae.getMessage.contains("You are trying to convert a table which already has a delta") && + ae.getMessage.contains("convert.metadataCheck.enabled")) + + // Disable convert metadata check + withSQLConf(DeltaSQLConf.DELTA_CONVERT_METADATA_CHECK_ENABLED.key -> "false") { + // Convert second external table to delta + convertToDelta(secondExternalTbl) + + // Check delta table configuration has updated properties + assert(DeltaLog.forTable(spark, path).startTransaction().metadata.configuration == + Map("abc" -> "111", "def" -> "ghi", "jkl" -> "mno")) + } + } + } + } + + testQuietly("convert two external tables pointing to the same underlying files") { + val externalTblName = "extpqtbl" + val secondExternalTbl = "othertbl" + withTable(externalTblName, secondExternalTbl) { + withTempDir { dir => + val path = dir.getCanonicalPath + writeFiles(path, simpleDF, "delta") + val deltaLog = DeltaLog.forTable(spark, path) + + // Create external table + sql(s"CREATE TABLE $externalTblName (key1 long, key2 string) " + + s"USING PARQUET LOCATION '$path'") + + // Create second external table + sql(s"CREATE TABLE $secondExternalTbl (key1 long, key2 string) " + + s"USING PARQUET LOCATION '$path'") + + assert(deltaLog.update().version == 0) + + // Convert first table to delta + convertToDelta(externalTblName) + + // Convert should not update version since delta log metadata is not changing + assert(deltaLog.update().version == 0) + // Check that the metadata in the catalog was emptied and pushed to the delta log + verifyExternalCatalogMetadata(externalTblName) + + // Convert second external table to delta + convertToDelta(secondExternalTbl) + verifyExternalCatalogMetadata(secondExternalTbl) + + // Verify that underlying files converted to delta + checkAnswer( + sql(s"select id from delta.`$path` where key1 = 1"), + simpleDF.filter("id % 2 == 1").select("id")) + + // Verify catalog table provider is 'delta' for both tables + assert(spark.sessionState.catalog.getTableMetadata( + TableIdentifier(externalTblName, Some("default"))).provider.contains("delta")) + + assert(spark.sessionState.catalog.getTableMetadata( + TableIdentifier(secondExternalTbl, Some("default"))).provider.contains("delta")) + + } + } + } + + testQuietly("convert an external parquet table") { + val tableName = "pqtbl" + val externalTblName = "extpqtbl" + withTable(tableName) { + simpleDF.write.format("parquet").saveAsTable(tableName) + + // Get where the table is stored and try to access it using parquet rather than delta + val path = getPathForTableName(tableName) + + // Create external table + sql(s"CREATE TABLE $externalTblName (key1 long, key2 string) " + + s"USING PARQUET LOCATION '$path'") + + // Convert to delta + sql(s"convert to delta $externalTblName") + + assert(spark.sessionState.catalog.getTableMetadata( + TableIdentifier(externalTblName, Some("default"))).provider.contains("delta")) + + // Verify that table converted to delta + checkAnswer( + sql(s"select key2 from delta.`$path` where key1 = 1"), + simpleDF.filter("id % 2 == 1").select("key2")) + + checkAnswer( + sql(s"select key2 from $externalTblName where key1 = 1"), + simpleDF.filter("id % 2 == 1").select("key2")) + } + } + + testCatalogSchema("convert a parquet table with catalog schema") { + useCatalogSchema => { + withTempDir { + dir => + // Create a parquet table with all 3 columns: id, key1 and key2 + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF) + + val tableName = "pqtable" + withTable(tableName) { + // Create a catalog table on top of the parquet table excluding column id + sql(s"CREATE TABLE $tableName (key1 long, key2 string) " + + s"USING PARQUET LOCATION '$dir'") + + convertToDelta(tableName) + + val tableId = TableIdentifier(tableName, Some("default")) + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, tableId) + val catalog_columns = Seq[StructField]( + StructField("key1", LongType, true), + StructField("key2", StringType, true) + ) + + if (useCatalogSchema) { + // Catalog schema is used, column id is excluded. + assert(snapshot.metadata.schema.equals(StructType(catalog_columns))) + } else { + // Schema is inferred from the data, all 3 columns are included. + assert(snapshot.metadata.schema + .equals(StructType(StructField("id", LongType, true) +: catalog_columns))) + } + } + } + } + } + + testQuietly("converting a delta table should not error for idempotency") { + val tableName = "deltatbl" + val format = "delta" + withTable(tableName) { + simpleDF.write.partitionBy("key1", "key2").format(format).saveAsTable(tableName) + convertToDelta(tableName) + + // reads actually went through Delta + val path = getPathForTableName(tableName) + checkAnswer( + sql(s"select id from $format.`$path` where key1 = 1"), + simpleDF.filter("id % 2 == 1").select("id")) + } + } + + testQuietly("convert to delta using table name without database name") { + val tableName = "pqtable" + withTable(tableName) { + // Create a parquet table + simpleDF.write.partitionBy("key1", "key2").format("parquet").saveAsTable(tableName) + + // Convert to delta using only table name + convertToDelta(tableName, Some("key1 long, key2 string")) + + // reads actually went through Delta + val path = getPathForTableName(tableName) + checkAnswer( + sql(s"select id from delta.`$path` where key1 = 1"), + simpleDF.filter("id % 2 == 1").select("id")) + } + } + + testQuietly("convert a parquet table to delta with database name as parquet") { + val dbName = "parquet" + val tableName = "pqtbl" + withDatabase(dbName) { + withTable(dbName + "." + tableName) { + sql(s"CREATE DATABASE $dbName") + val table = TableIdentifier(tableName, Some(dbName)) + simpleDF.write.partitionBy("key1", "key2") + .format("parquet").saveAsTable(dbName + "." + tableName) + + convertToDelta(dbName + "." + tableName, Some("key1 long, key2 string")) + + // reads actually went through Delta + val path = spark + .sessionState + .catalog + .getTableMetadata(table).location.getPath + + checkAnswer( + sql(s"select id from delta.`$path` where key1 = 1"), + simpleDF.filter("id % 2 == 1").select("id")) + } + } + } + + testQuietly("convert a parquet path to delta while database called parquet exists") { + val dbName = "parquet" + withDatabase(dbName) { + withTempDir { dir => + // Create a database called parquet + sql(s"CREATE DATABASE $dbName") + + // Create a parquet table at given path + val tempDir = dir.getCanonicalPath + writeFiles(tempDir, simpleDF, partCols = Seq("key1", "key2")) + + // Convert should convert the path instead of trying to find a table in that database + convertToDelta(s"parquet.`$tempDir`", Some("key1 long, key2 string")) + + // reads actually went through Delta + checkAnswer( + sql(s"select id from delta.`$tempDir` where key1 = 1"), + simpleDF.filter("id % 2 == 1").select("id")) + } + } + } + + testQuietly("convert a delta table where metadata does not reflect that the table is " + + "already converted should update the metadata") { + val tableName = "deltatbl" + withTable(tableName) { + simpleDF.write.partitionBy("key1", "key2").format("parquet").saveAsTable(tableName) + + // Get where the table is stored and try to access it using parquet rather than delta + val path = getPathForTableName(tableName) + + // Convert using path so that metadata is not updated + convertToDelta(s"parquet.`$path`", Some("key1 long, key2 string")) + + // Call convert again + convertToDelta(s"default.$tableName", Some("key1 long, key2 string")) + + // Metadata should be updated so we can use table name + checkAnswer( + sql(s"select id from default.$tableName where key1 = 1"), + simpleDF.filter("id % 2 == 1").select("id")) + } + } + + testQuietly("convert a parquet table using table name") { + val tableName = "pqtable2" + withTable(tableName) { + // Create a parquet table + simpleDF.write.partitionBy("key1", "key2").format("parquet").saveAsTable(tableName) + + // Convert to delta + convertToDelta(s"default.$tableName", Some("key1 long, key2 string")) + + // Get where the table is stored and try to access it using parquet rather than delta + val path = getPathForTableName(tableName) + + + // reads actually went through Delta + assert(deltaRead(sql(s"select id from default.$tableName"))) + + // query through Delta is correct + checkAnswer( + sql(s"select id from default.$tableName where key1 = 0"), + simpleDF.filter("id % 2 == 0").select("id")) + + + // delta writers went through + writeFiles(path, simpleDF, format = "delta", partCols = Seq("key1", "key2"), mode = "append") + + checkAnswer( + sql(s"select id from default.$tableName where key1 = 1"), + simpleDF.union(simpleDF).filter("id % 2 == 1").select("id")) + } + } + + testQuietly("Convert a partitioned parquet table with partition schema autofill") { + val tableName = "ppqtable" + withTable(tableName) { + // Create a partitioned parquet table + simpleDF.write.partitionBy("key1", "key2").format("parquet").saveAsTable(tableName) + + // Convert to delta without partition schema, partition schema is autofill from catalog + convertToDelta(tableName) + + // Verify that table is converted to delta + assert(spark.sessionState.catalog.getTableMetadata( + TableIdentifier(tableName, Some("default"))).provider.contains("delta")) + + // Check the partition schema in the transaction log + val tableId = TableIdentifier(tableName, Some("default")) + assert(DeltaLog.forTableWithSnapshot(spark, tableId)._2.metadata.partitionSchema.equals( + (new StructType()) + .add(StructField("key1", LongType, true)) + .add(StructField("key2", StringType, true)) + )) + + // Check data in the converted delta table. + checkAnswer( + sql(s"SELECT id from default.$tableName where key2 = '2'"), + simpleDF.filter("id % 3 == 2").select("id")) + } + } + + testCatalogFileManifest("convert partitioned parquet table with catalog partitions") { + useCatalogFileManifest => { + val tableName = "ppqtable" + withTable(tableName) { + simpleDF.write.partitionBy("key1").format("parquet").saveAsTable(tableName) + val path = getPathForTableName(tableName) + + // Create an orphan partition + val df = spark.range(100, 200) + .withColumn("key1", lit(2)) + .withColumn("key2", col("id") % 4 cast "String") + + df.write.partitionBy("key1") + .format("parquet") + .mode("Append") + .save(path) + + // The path should contains 3 partitions. + val partitionDirs = new File(path).listFiles().filter(_.isDirectory) + assert(partitionDirs.map(_.getName).sorted + .sameElements(Array("key1=0", "key1=1", "key1=2"))) + + // Catalog only contains 2 partitions. + assert(spark.sessionState.catalog + .listPartitions(TableIdentifier(tableName, Some("default"))).size == 2) + + // Convert table to delta + convertToDelta(tableName) + + // Verify that table is converted to delta + assert(spark.sessionState.catalog.getTableMetadata( + TableIdentifier(tableName, Some("default"))).provider.contains("delta")) + + // Check data in the converted delta table. + if (useCatalogFileManifest) { + // Partition "key1=2" is pruned. + checkAnswer(sql(s"SELECT DISTINCT key1 from default.${tableName}"), spark.range(2).toDF()) + } else { + // All partitions are preserved. + checkAnswer(sql(s"SELECT DISTINCT key1 from default.${tableName}"), spark.range(3).toDF()) + } + } + } + } + + test("external tables use correct path scheme") { + withTempDir { dir => + withTable("externalTable") { + withSQLConf(("fs.s3.impl", classOf[S3LikeLocalFileSystem].getCanonicalName)) { + sql(s"CREATE TABLE externalTable USING parquet LOCATION 's3://$dir' AS SELECT 1") + + // Ideally we would test a successful conversion with a remote filesystem, but there's + // no good way to set one up in unit tests. So instead we delete the data, and let the + // FileNotFoundException tell us which scheme it was using to look for it. + Utils.deleteRecursively(dir) + + val ex = intercept[FileNotFoundException] { + convertToDelta("default.externalTable", None) + } + + // If the path incorrectly used the default scheme, this would be file: at the end. + assert(ex.getMessage.contains(s"s3:$dir doesn't exist")) + } + } + } + } + + test("can convert a partition-like table path") { + withTempDir { dir => + val path = dir.getCanonicalPath + writeFiles(path, simpleDF, partCols = Seq("key1", "key2")) + + val basePath = s"$path/key1=1/" + convertToDelta(s"parquet.`$basePath`", Some("key2 string")) + + checkAnswer( + sql(s"select id from delta.`$basePath` where key2 = '1'"), + simpleDF.filter("id % 2 == 1").filter("id % 3 == 1").select("id")) + } + } + + test("can convert table with partition overwrite") { + val tableName = "ppqtable" + withTable(tableName) { + // Create table with original partitions of "key1=0" and "key1=1". + val df = spark.range(0, 100) + .withColumn("key1", col("id") % 2) + .withColumn("key2", col("id") % 3 cast "String") + df.write.format("parquet").partitionBy("key1").mode("append").saveAsTable(tableName) + checkAnswer(sql(s"SELECT id FROM $tableName"), df.select("id")) + + val dataDir = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).location.toString + + // Create orphan partition "key1=0;key2=3" with additional column. + val df1 = spark.range(100, 120, 2) + .withColumn("key1", col("id") % 2) + .withColumn("key2", lit("3")) + df1.write.format("parquet").partitionBy("key1", "key2").mode("append").save(dataDir) + + // Point table partition "key1=0" to the path of orphan partition "key1=0;key2=3" + sql(s"ALTER TABLE $tableName PARTITION (key1=0) SET LOCATION '$dataDir/key1=0/key2=3/'") + checkAnswer(sql(s"SELECT id FROM $tableName WHERE key1 = 0"), df1.select("id")) + + // ConvertToDelta should work without inferring the partition values from partition path. + convertToDelta(tableName) + + // Verify that table is converted to delta + assert(spark.sessionState.catalog.getTableMetadata( + TableIdentifier(tableName, Some("default"))).provider.contains("delta")) + + // Check data in the converted delta table. + checkAnswer(sql(s"SELECT id FROM $tableName WHERE key1 = 0"), df1.select("id")) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/CustomCatalogSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/CustomCatalogSuite.scala new file mode 100644 index 00000000000..93c22fa3882 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/CustomCatalogSuite.scala @@ -0,0 +1,368 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.JavaConverters._ + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands._ +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{QueryTest, Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.analysis.ResolvedTable +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, SetTableProperties, UnaryNode, UnsetTableProperties} +import org.apache.spark.sql.catalyst.trees.UnaryLike +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog, TableChange} +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.execution.command.LeafRunnableCommand +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.util.Utils + +class CustomCatalogSuite extends QueryTest with SharedSparkSession + with DeltaSQLCommandTest with DescribeDeltaDetailSuiteBase { + + override def sparkConf: SparkConf = + super.sparkConf.set("spark.sql.catalog.dummy", classOf[DummyCatalog].getName) + + test("CatalogTable exists in DeltaTableV2 if use table identifier") { + def catalogTableExists(sqlCmd: String): Unit = { + val plan = spark.sql(sqlCmd).queryExecution.analyzed + val catalogTable = plan match { + case cmd: UnaryNode with DeltaCommand => + cmd.getDeltaTable(cmd.child, "dummy").catalogTable + case AppendData(DataSourceV2Relation(table: DeltaTableV2, _, _, _, _), _, _, _, _, _) => + table.catalogTable + case cmd: DeleteCommand => + cmd.catalogTable + case cmd: DescribeDeltaHistoryCommand => + cmd.table.catalogTable + case cmd: MergeIntoCommand => + cmd.catalogTable + case cmd: RestoreTableCommand => + cmd.sourceTable.catalogTable + case SetTableProperties(ResolvedTable(_, _, table: DeltaTableV2, _), _) => + table.catalogTable + case UnsetTableProperties(ResolvedTable(_, _, table: DeltaTableV2, _), _, _) => + table.catalogTable + case cmd: UpdateCommand => + cmd.catalogTable + case cmd: WriteIntoDelta => + cmd.catalogTableOpt + } + assert(catalogTable.nonEmpty) + } + + val mergeSrcTable = "merge_src_table" + val tableName = "delta_commands_table" + + withTable(tableName, mergeSrcTable) { + sql(f"CREATE TABLE $tableName (c1 int, c2 int) USING delta PARTITIONED BY (c1)") + // DQL + catalogTableExists(s"DESCRIBE DETAIL $tableName") + catalogTableExists(s"DESCRIBE HISTORY $tableName") + + // DDL + catalogTableExists(s"ALTER TABLE $tableName SET TBLPROPERTIES ('a' = 'b') ") + catalogTableExists(s"ALTER TABLE $tableName UNSET TBLPROPERTIES ('a') ") + + // DML insert + catalogTableExists(s"INSERT INTO $tableName VALUES (1, 1) ") + + // DML merge + sql(s"CREATE TABLE $mergeSrcTable (c1 int, c2 int) USING delta PARTITIONED BY (c1)") + sql(s"INSERT INTO $mergeSrcTable VALUES (1, 1) ") + catalogTableExists(s"MERGE INTO $tableName USING $mergeSrcTable " + + s"ON ${mergeSrcTable}.c1 = ${tableName}.c1 WHEN MATCHED THEN DELETE") + + // DML update + catalogTableExists(s"UPDATE $tableName SET c1 = 4 WHERE true ") + + // DML delete + catalogTableExists(s"DELETE FROM $tableName WHERE true ") + + // optimize + sql(s"INSERT INTO $tableName VALUES (1, 1) ") + sql(s"INSERT INTO $tableName VALUES (1, 1) ") + catalogTableExists(s"OPTIMIZE $tableName") + + // vacuum + catalogTableExists(s"VACUUM $tableName") + } + } + + test("DESC DETAIL a delta table from DummyCatalog") { + val tableName = "desc_detail_table" + withTable(tableName) { + val dummyCatalog = + spark.sessionState.catalogManager.catalog("dummy").asInstanceOf[DummyCatalog] + val tablePath = dummyCatalog.getTablePath(tableName) + sql("SET CATALOG dummy") + sql(f"CREATE TABLE $tableName (id bigint) USING delta") + sql("SET CATALOG spark_catalog") + // Insert some data into the table in the dummy catalog. + // To make it simple, here we insert data directly into the table path. + sql(f"INSERT INTO delta.`$tablePath` VALUES (0)") + sql("SET CATALOG dummy") + // Test simple desc detail command under the dummy catalog + checkResult( + sql(f"DESC DETAIL $tableName"), + Seq("delta", 1), + Seq("format", "numFiles")) + // Test 3-part identifier + checkResult( + sql(f"DESC DETAIL dummy.default.$tableName"), + Seq("delta", 1), + Seq("format", "numFiles")) + // Test table path + checkResult( + sql(f"DESC DETAIL delta.`$tablePath`"), + Seq("delta", 1), + Seq("format", "numFiles")) + // Test 3-part identifier when the current catalog is not dummy catalog + sql("SET CATALOG spark_catalog") + checkResult( + sql(f"DESC DETAIL dummy.default.$tableName"), + Seq("delta", 1), + Seq("format", "numFiles")) + } + } + + test("RESTORE a table from DummyCatalog") { + val dummyCatalog = + spark.sessionState.catalogManager.catalog("dummy").asInstanceOf[DummyCatalog] + val tableName = "restore_table" + val tablePath = dummyCatalog.getTablePath(tableName) + withTable(tableName) { + sql("SET CATALOG dummy") + sql(f"CREATE TABLE $tableName (id bigint) USING delta") + sql("SET CATALOG spark_catalog") + // Insert some data into the table in the dummy catalog. + // To make it simple, here we insert data directly into the table path. + sql(f"INSERT INTO delta.`$tablePath` VALUES (0)") + sql(f"INSERT INTO delta.`$tablePath` VALUES (1)") + // Test 3-part identifier when the current catalog is the default catalog + sql(f"RESTORE TABLE dummy.default.$tableName VERSION AS OF 1") + checkAnswer(spark.table(f"dummy.default.$tableName"), spark.range(1).toDF()) + + sql("SET CATALOG dummy") + sql(f"RESTORE TABLE $tableName VERSION AS OF 0") + checkAnswer(spark.table(tableName), Nil) + sql(f"RESTORE TABLE $tableName VERSION AS OF 1") + checkAnswer(spark.table(tableName), spark.range(1).toDF()) + // Test 3-part identifier + sql(f"RESTORE TABLE dummy.default.$tableName VERSION AS OF 2") + checkAnswer(spark.table(tableName), spark.range(2).toDF()) + // Test file path table + sql(f"RESTORE TABLE delta.`$tablePath` VERSION AS OF 1") + checkAnswer(spark.table(tableName), spark.range(1).toDF()) + } + } + + test("Shallow Clone a table with time travel") { + val srcTable = "shallow_clone_src_table" + val destTable1 = "shallow_clone_dest_table_1" + val destTable2 = "shallow_clone_dest_table_2" + val destTable3 = "shallow_clone_dest_table_3" + val destTable4 = "shallow_clone_dest_table_4" + val dummyCatalog = + spark.sessionState.catalogManager.catalog("dummy").asInstanceOf[DummyCatalog] + val tablePath = dummyCatalog.getTablePath(srcTable) + withTable(srcTable) { + sql("SET CATALOG dummy") + sql(f"CREATE TABLE $srcTable (id bigint) USING delta") + sql("SET CATALOG spark_catalog") + // Insert some data into the table in the dummy catalog. + // To make it simple, here we insert data directly into the table path. + sql(f"INSERT INTO delta.`$tablePath` VALUES (0)") + sql(f"INSERT INTO delta.`$tablePath` VALUES (1)") + withTable(destTable1) { + // Test 3-part identifier when the current catalog is the default catalog + sql(f"CREATE TABLE $destTable1 SHALLOW CLONE dummy.default.$srcTable VERSION AS OF 1") + checkAnswer(spark.table(destTable1), spark.range(1).toDF()) + } + + sql("SET CATALOG dummy") + Seq(true, false).foreach { createTableInDummy => + val (dest2, dest3, dest4) = if (createTableInDummy) { + (destTable2, destTable3, destTable4) + } else { + val prefix = "spark_catalog.default" + (s"$prefix.$destTable2", s"$prefix.$destTable3", s"$prefix.$destTable4") + } + withTable(dest2, dest3, dest4) { + // Test simple shallow clone command under the dummy catalog + sql(f"CREATE TABLE $dest2 SHALLOW CLONE $srcTable") + checkAnswer(spark.table(dest2), spark.range(2).toDF()) + // Test time travel on the src table + sql(f"CREATE TABLE $dest3 SHALLOW CLONE dummy.default.$srcTable VERSION AS OF 1") + checkAnswer(spark.table(dest3), spark.range(1).toDF()) + // Test time travel on the src table delta path + sql(f"CREATE TABLE $dest4 SHALLOW CLONE delta.`$tablePath` VERSION AS OF 1") + checkAnswer(spark.table(dest4), spark.range(1).toDF()) + } + } + } + } + + test("DESCRIBE HISTORY a delta table from DummyCatalog") { + val tableName = "desc_history_table" + withTable(tableName) { + sql("SET CATALOG dummy") + val dummyCatalog = + spark.sessionState.catalogManager.catalog("dummy").asInstanceOf[DummyCatalog] + val tablePath = dummyCatalog.getTablePath(tableName) + sql(f"CREATE TABLE $tableName (column1 bigint) USING delta") + sql("SET CATALOG spark_catalog") + // Insert some data into the table in the dummy catalog. + sql(f"INSERT INTO delta.`$tablePath` VALUES (0)") + + sql("SET CATALOG dummy") + // Test simple desc detail command under the dummy catalog + var result = sql(s"DESCRIBE HISTORY $tableName").collect() + assert(result.length == 2) + assert(result(0).getAs[Long]("version") == 1) + // Test 3-part identifier + result = sql(f"DESCRIBE HISTORY dummy.default.$tableName").collect() + assert(result.length == 2) + assert(result(0).getAs[Long]("version") == 1) + // Test table path + sql(f"DESC DETAIL delta.`$tablePath`").collect() + assert(result.length == 2) + assert(result(0).getAs[Long]("version") == 1) + // Test 3-part identifier when the current catalog is not dummy catalog + sql("SET CATALOG spark_catalog") + result = sql(s"DESCRIBE HISTORY dummy.default.$tableName").collect() + assert(result.length == 2) + assert(result(0).getAs[Long]("version") == 1) + } + } + + test("SELECT Table Changes from DummyCatalog") { + val dummyTableName = "dummy_table" + val sparkTableName = "spark_catalog.default.spark_table" + withTable(dummyTableName, sparkTableName) { + sql("SET CATALOG spark_catalog") + sql(f"CREATE TABLE $sparkTableName (id bigint, s string) USING delta" + + f" TBLPROPERTIES(delta.enableChangeDataFeed=true)") + sql(f"INSERT INTO $sparkTableName VALUES (0, 'a')") + sql(f"INSERT INTO $sparkTableName VALUES (1, 'b')") + sql("SET CATALOG dummy") + // Since the dummy catalog doesn't pass through the TBLPROPERTIES 'delta.enableChangeDataFeed' + // here we clone a table with the same schema as the spark table to test the table changes. + sql(f"CREATE TABLE $dummyTableName SHALLOW CLONE $sparkTableName") + // table_changes() should be able to read the table changes from the dummy catalog + Seq(dummyTableName, f"dummy.default.$dummyTableName").foreach { name => + val rows = sql(f"SELECT * from table_changes('$name', 1)").collect() + assert(rows.length == 2) + } + } + } + +} + +class DummyCatalog extends TableCatalog { + private val spark: SparkSession = SparkSession.active + private val tempDir: Path = new Path(Utils.createTempDir().getAbsolutePath) + // scalastyle:off deltahadoopconfiguration + private val fs: FileSystem = + tempDir.getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + + override def name: String = "dummy" + + def getTablePath(tableName: String): Path = { + new Path(tempDir.toString + "/" + tableName) + } + override def defaultNamespace(): Array[String] = Array("default") + + override def listTables(namespace: Array[String]): Array[Identifier] = { + val status = fs.listStatus(tempDir) + status.filter(_.isDirectory).map { dir => + Identifier.of(namespace, dir.getPath.getName) + } + } + + override def tableExists(ident: Identifier): Boolean = { + val tablePath = getTablePath(ident.name()) + fs.exists(tablePath) + } + override def loadTable(ident: Identifier): Table = { + if (!tableExists(ident)) { + throw new NoSuchTableException("") + } + val tablePath = getTablePath(ident.name()) + DeltaTableV2(spark = spark, path = tablePath, catalogTable = Some(createCatalogTable(ident))) + } + + override def createTable( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: java.util.Map[String, String]): Table = { + val tablePath = getTablePath(ident.name()) + // Create an empty Delta table on the tablePath + val part = partitions.map(_.arguments().head.toString) + spark.createDataFrame(List.empty[Row].asJava, schema) + .write.format("delta").partitionBy(part: _*).save(tablePath.toString) + DeltaTableV2(spark = spark, path = tablePath, catalogTable = Some(createCatalogTable(ident))) + } + + override def alterTable(ident: Identifier, changes: TableChange*): Table = { + // hack hack: no-op just for testing + loadTable(ident) + } + + override def dropTable(ident: Identifier): Boolean = { + val tablePath = getTablePath(ident.name()) + try { + fs.delete(tablePath, true) + true + } catch { + case _: Exception => false + } + } + + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { + throw new UnsupportedOperationException("Rename table operation is not supported.") + } + + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + // Initialize tempDir here + if (!fs.exists(tempDir)) { + fs.mkdirs(tempDir) + } + } + + private def createCatalogTable(ident: Identifier): CatalogTable = { + val tablePath = getTablePath(ident.name()) + CatalogTable( + identifier = TableIdentifier(ident.name(), defaultNamespace.headOption, Some(name)), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat(Some(tablePath.toUri), None, None, None, false, Map.empty), + schema = spark.range(0).schema + ) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DelegatingLogStoreSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DelegatingLogStoreSuite.scala new file mode 100644 index 00000000000..1410e33f877 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DelegatingLogStoreSuite.scala @@ -0,0 +1,193 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.storage.{DelegatingLogStore, LogStore, LogStoreAdaptor} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.sql.LocalSparkSession._ +import org.apache.spark.sql.SparkSession + +class DelegatingLogStoreSuite + extends SparkFunSuite { + + + private val customLogStoreClassName = classOf[CustomPublicLogStore].getName + private def fakeSchemeWithNoDefault = "fake" + + private def constructSparkConf(confs: Seq[(String, String)]): SparkConf = { + val sparkConf = new SparkConf(loadDefaults = false).setMaster("local") + confs.foreach { case (key, value) => sparkConf.set(key, value) } + sparkConf + } + + /** + * Test DelegatingLogStore by directly creating a DelegatingLogStore and test LogStore + * resolution based on input `scheme`. This is not an end-to-end test. + * + * @param scheme The scheme to be used for testing. + * @param sparkConf The spark configuration to use. + * @param expClassName Expected LogStore class name resolved by DelegatingLogStore. + * @param expAdaptor True if DelegatingLogStore is expected to resolve to LogStore adaptor, for + * which the actual implementation inside will be checked. This happens when + * LogStore is set to subclass of the new [[io.delta.storage.LogStore]] API. + */ + private def testDelegatingLogStore( + scheme: String, + sparkConf: SparkConf, + expClassName: String, + expAdaptor: Boolean): Unit = { + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + val sc = spark.sparkContext + val delegatingLogStore = new DelegatingLogStore(sc.hadoopConfiguration) + val actualLogStore = delegatingLogStore.getDelegate( + new Path(s"${scheme}://dummy")) + if (expAdaptor) { + assert(actualLogStore.isInstanceOf[LogStoreAdaptor]) + assert(actualLogStore.asInstanceOf[LogStoreAdaptor] + .logStoreImpl.getClass.getName == expClassName) + } else { + assert(actualLogStore.getClass.getName == expClassName) + } + } + } + + /** Test the default LogStore resolution for `scheme` */ + private def testDefaultSchemeResolution(scheme: String, expClassName: String): Unit = { + testDelegatingLogStore( + scheme, + constructSparkConf(Seq.empty), // we set no custom LogStore confs + expClassName, + expAdaptor = true // all default implementations are from delta-storage + ) + } + + /** Test LogStore resolution with a customized scheme conf */ + private def testCustomSchemeResolution( + scheme: String, className: String, expAdaptor: Boolean): Unit = { + val sparkPrefixKey = LogStore.logStoreSchemeConfKey(scheme) + val nonSparkPrefixKey = sparkPrefixKey.stripPrefix("spark.") + // only set spark-prefixed key + testDelegatingLogStore( + scheme, + constructSparkConf(Seq((sparkPrefixKey, className))), + className, // we expect our custom-set LogStore class + expAdaptor + ) + // only set non-spark-prefixed key + testDelegatingLogStore( + scheme, + constructSparkConf(Seq((nonSparkPrefixKey, className))), + className, // we expect our custom-set LogStore class + expAdaptor + ) + // set both + testDelegatingLogStore( + scheme, + constructSparkConf(Seq((nonSparkPrefixKey, className), (sparkPrefixKey, className))), + className, // we expect our custom-set LogStore class + expAdaptor + ) + } + + test("DelegatingLogStore resolution using default scheme confs") { + for (scheme <- DelegatingLogStore.s3Schemes) { + testDefaultSchemeResolution( + scheme, + expClassName = DelegatingLogStore.defaultS3LogStoreClassName) + } + for (scheme <- DelegatingLogStore.azureSchemes) { + testDefaultSchemeResolution( + scheme, + expClassName = DelegatingLogStore.defaultAzureLogStoreClassName) + } + for (scheme <- DelegatingLogStore.gsSchemes) { + testDefaultSchemeResolution( + scheme, + expClassName = DelegatingLogStore.defaultGCSLogStoreClassName) + } + testDefaultSchemeResolution( + scheme = fakeSchemeWithNoDefault, + expClassName = DelegatingLogStore.defaultHDFSLogStoreClassName) + } + + test("DelegatingLogStore resolution using customized scheme confs") { + val allTestSchemes = DelegatingLogStore.s3Schemes ++ DelegatingLogStore.azureSchemes + + fakeSchemeWithNoDefault + for (scheme <- allTestSchemes) { + for (store <- Seq( + // default (java) classes (in io.delta.storage) + "io.delta.storage.S3SingleDriverLogStore", + "io.delta.storage.AzureLogStore", + "io.delta.storage.HDFSLogStore", + // deprecated (scala) classes + classOf[org.apache.spark.sql.delta.storage.S3SingleDriverLogStore].getName, + classOf[org.apache.spark.sql.delta.storage.AzureLogStore].getName, + classOf[org.apache.spark.sql.delta.storage.HDFSLogStore].getName, + customLogStoreClassName)) { + + // we set spark.delta.logStore.${scheme}.impl -> $store + testCustomSchemeResolution( + scheme, + store, + expAdaptor = store.contains("io.delta.storage") || store == customLogStoreClassName) + } + } + } +} + +////////////////// +// Helper Class // +////////////////// + +class CustomPublicLogStore(initHadoopConf: Configuration) + extends io.delta.storage.LogStore(initHadoopConf) { + + private val logStoreInternal = new io.delta.storage.HDFSLogStore(initHadoopConf) + + override def read( + path: Path, + hadoopConf: Configuration): io.delta.storage.CloseableIterator[String] = { + logStoreInternal.read(path, hadoopConf) + } + + override def write( + path: Path, + actions: java.util.Iterator[String], + overwrite: java.lang.Boolean, + hadoopConf: Configuration): Unit = { + logStoreInternal.write(path, actions, overwrite, hadoopConf) + } + + override def listFrom( + path: Path, + hadoopConf: Configuration): java.util.Iterator[FileStatus] = { + logStoreInternal.listFrom(path, hadoopConf) + } + + override def resolvePathOnPhysicalStorage( + path: Path, + hadoopConf: Configuration): Path = { + logStoreInternal.resolvePathOnPhysicalStorage(path, hadoopConf) + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): java.lang.Boolean = { + logStoreInternal.isPartialWriteVisible(path, hadoopConf) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeleteMetricsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteMetricsSuite.scala new file mode 100644 index 00000000000..8f397ab835d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteMetricsSuite.scala @@ -0,0 +1,426 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.DatabricksLogging +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions.{Action, AddFile, FileAction, RemoveFile} +import org.apache.spark.sql.delta.commands.DeleteMetric +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{Dataset, QueryTest} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.expr +import org.apache.spark.sql.test.SharedSparkSession + +/** + * Tests for metrics of Delta DELETE command. + */ +class DeleteMetricsSuite extends QueryTest + with SharedSparkSession + with DatabricksLogging + with DeltaSQLCommandTest { + + + /* + * Case class to parameterize tests. + */ + case class TestConfiguration( + partitioned: Boolean, + cdfEnabled: Boolean + ) + + case class TestMetricResults( + operationMetrics: Map[String, Long], + numAffectedRows: Long + ) + + /* + * Helper to generate tests for all configuration parameters. + */ + protected def testDeleteMetrics(name: String)(testFn: TestConfiguration => Unit): Unit = { + for { + partitioned <- BOOLEAN_DOMAIN + cdfEnabled <- BOOLEAN_DOMAIN + } { + val testConfig = TestConfiguration( + partitioned = partitioned, + cdfEnabled = cdfEnabled + ) + var testName = + s"delete-metrics: $name - Partitioned = $partitioned, cdfEnabled = $cdfEnabled" + test(testName) { + testFn(testConfig) + } + } + } + + /* + * Create a table from the provided dataset. + * + * If an partitioned table is needed, then we create one data partition per Spark partition, + * i.e. every data partition will contain one file. + * + * Also an extra column is added to be used in non-partition filters. + */ + protected def createTempTable( + table: Dataset[_], + tableName: String, + testConfig: TestConfiguration): Unit = { + val numRows = table.count() + val numPartitions = table.rdd.getNumPartitions + val numRowsPerPart = if (numRows > 0 && numPartitions < numRows) numRows / numPartitions else 1 + val partitionBy = if (testConfig.partitioned) Seq("partCol") else Seq() + table.withColumn("partCol", expr(s"floor(id / $numRowsPerPart)")) + .withColumn("extraCol", expr(s"$numRows - id")) + .write + .partitionBy(partitionBy: _*) + .format("delta") + .saveAsTable(tableName) + } + + /* + * Run a delete command, and capture number of affected rows, operation metrics from Delta + * log and usage metrics. + */ + def runDeleteAndCaptureMetrics( + table: Dataset[_], + where: String, + testConfig: TestConfiguration): TestMetricResults = { + val tableName = "target" + val whereClause = Option(where).map(c => s"WHERE $c").getOrElse("") + var numAffectedRows = -1L + var operationMetrics: Map[String, Long] = null + withSQLConf( + DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true", + DeltaSQLConf.DELTA_SKIP_RECORDING_EMPTY_COMMITS.key -> "false", + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> + testConfig.cdfEnabled.toString) { + withTable(tableName) { + createTempTable(table, tableName, testConfig) + + val resultDf = spark.sql(s"DELETE FROM $tableName $whereClause") + assert(!resultDf.isEmpty) + numAffectedRows = resultDf.take(1).head(0).toString.toLong + + operationMetrics = DeltaMetricsUtils.getLastOperationMetrics(tableName) + + // Check operation metrics against commit actions. + val (deltaLog, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(tableName)) + DeltaMetricsUtils.checkOperationMetricsAgainstCommitActions( + deltaLog, snapshot.version, operationMetrics) + + } + } + TestMetricResults( + operationMetrics, + numAffectedRows + ) + } + + /* + * Run a delete command and check all available metrics. + * We allow some metrics to be missing, by setting their value to -1. + */ + def runDeleteAndCheckMetrics( + table: Dataset[_], + where: String, + expectedNumAffectedRows: Long, + expectedOperationMetrics: Map[String, Long], + testConfig: TestConfiguration): Unit = { + // Run the delete capture and get all metrics. + val testMetricResults = runDeleteAndCaptureMetrics(table, where, testConfig) + val operationMetrics = testMetricResults.operationMetrics + + // Check the number of deleted rows. + assert(testMetricResults.numAffectedRows === expectedNumAffectedRows) + + // Check operation metrics schema. + val unknownKeys = operationMetrics.keySet -- DeltaOperationMetrics.DELETE -- + DeltaOperationMetrics.WRITE + assert(unknownKeys.isEmpty, + s"Unknown operation metrics for DELETE command: ${unknownKeys.mkString(", ")}") + + // Check values of expected operation metrics. For all unspecified deterministic metrics, + // we implicitly expect a zero value. + val requiredMetrics = Set( + "numCopiedRows", + "numDeletedRows", + "numAddedFiles", + "numRemovedFiles", + "numAddedChangeFiles") + val expectedMetricsWithDefaults = + requiredMetrics.map(k => k -> 0L).toMap ++ expectedOperationMetrics + val expectedMetricsFiltered = expectedMetricsWithDefaults.filter(_._2 >= 0) + DeltaMetricsUtils.checkOperationMetrics( + expectedMetrics = expectedMetricsFiltered, + operationMetrics = operationMetrics) + + + // Check time operation metrics. + val expectedTimeMetrics = + Set("scanTimeMs", "rewriteTimeMs", "executionTimeMs").filter( + k => expectedOperationMetrics.get(k).forall(_ >= 0) + ) + DeltaMetricsUtils.checkOperationTimeMetrics( + operationMetrics = operationMetrics, + expectedMetrics = expectedTimeMetrics) + } + + + val zeroDeleteMetrics: DeleteMetric = DeleteMetric( + condition = "", + numFilesTotal = 0, + numTouchedFiles = 0, + numRewrittenFiles = 0, + numRemovedFiles = 0, + numAddedFiles = 0, + numAddedChangeFiles = 0, + numFilesBeforeSkipping = 0, + numBytesBeforeSkipping = -1, // We don't want to assert equality on bytes + numFilesAfterSkipping = 0, + numBytesAfterSkipping = -1, // We don't want to assert equality on bytes + numPartitionsAfterSkipping = None, + numPartitionsAddedTo = None, + numPartitionsRemovedFrom = None, + numCopiedRows = None, + numDeletedRows = None, + numBytesAdded = -1, // We don't want to assert equality on bytes + numBytesRemoved = -1, // We don't want to assert equality on bytes + changeFileBytes = -1, // We don't want to assert equality on bytes + scanTimeMs = 0, + rewriteTimeMs = 0, + numDeletionVectorsAdded = 0, + numDeletionVectorsRemoved = 0, + numDeletionVectorsUpdated = 0 + ) + + + test("delete along partition boundary") { + import testImplicits._ + + Seq(true, false).foreach { cdfEnabled => + Seq(true, false).foreach { deltaCollectStatsEnabled => + Seq(true, false).foreach { deltaDmlMetricsFromMetadataEnabled => + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> cdfEnabled.toString, + DeltaSQLConf.DELTA_COLLECT_STATS.key -> deltaCollectStatsEnabled.toString, + DeltaSQLConf.DELTA_DML_METRICS_FROM_METADATA.key + -> deltaDmlMetricsFromMetadataEnabled.toString + ) { + withTable("t1") { + spark.range(100).withColumn("part", 'id % 10).toDF().write + .partitionBy("part").format("delta").saveAsTable("t1") + val result = spark.sql("DELETE FROM t1 WHERE part=1") + .take(1).head(0).toString.toLong + val opMetrics = DeltaMetricsUtils.getLastOperationMetrics("t1") + + assert(opMetrics("numRemovedFiles") > 0) + if (deltaCollectStatsEnabled && deltaDmlMetricsFromMetadataEnabled) { + assert(opMetrics("numDeletedRows") == 10) + assert(result == 10) + } else { + assert(!opMetrics.contains("numDeletedRows")) + assert(result == -1) + } + } + } + } + } + } + } + + testDeleteMetrics("delete from empty table") { testConfig => + for (where <- Seq("", "1 = 1", "1 != 1", "id > 50")) { + def executeTest: Unit = runDeleteAndCheckMetrics( + table = spark.range(0), + where = where, + expectedNumAffectedRows = 0, + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numDeletedRows" -> 0, + "numAddedFiles" -> 0, + "numRemovedFiles" -> 0, + "numAddedChangeFiles" -> 0, + "scanTimeMs" -> -1, + "rewriteTimeMs" -> -1, + "executionTimeMs" -> -1 + ), + testConfig = testConfig + ) + + executeTest + } + } + + for (whereClause <- Seq("", "1 = 1")) { + testDeleteMetrics(s"delete all with where = '$whereClause'") { testConfig => + runDeleteAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = whereClause, + expectedNumAffectedRows = 100, + expectedOperationMetrics = Map( + "numCopiedRows" -> -1, + "numDeletedRows" -> 100, + "numOutputRows" -> -1, + "numFiles" -> -1, + "numAddedFiles" -> -1, + "numRemovedFiles" -> 5, + "numAddedChangeFiles" -> 0 + ), + testConfig = testConfig + ) + } + } + + testDeleteMetrics("delete with false predicate") { testConfig => { + runDeleteAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "1 != 1", + expectedNumAffectedRows = 0L, + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numDeletedRows" -> 0, + "numAddedFiles" -> 0, + "numRemovedFiles" -> 0, + "numAddedChangeFiles" -> 0, + "scanTimeMs" -> -1, + "rewriteTimeMs" -> -1, + "executionTimeMs" -> -1 + ), + testConfig = testConfig + ) + }} + + testDeleteMetrics("delete with unsatisfied static predicate") { testConfig => { + runDeleteAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "id < 0 or id > 100", + expectedNumAffectedRows = 0L, + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numDeletedRows" -> 0, + "numAddedFiles" -> 0, + "numRemovedFiles" -> 0, + "numAddedChangeFiles" -> 0, + "scanTimeMs" -> -1, + "rewriteTimeMs" -> -1, + "executionTimeMs" -> -1 + ), + testConfig = testConfig + ) + }} + + testDeleteMetrics("delete with unsatisfied dynamic predicate") { testConfig => { + runDeleteAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "id / 200 > 1 ", + expectedNumAffectedRows = 0L, + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numDeletedRows" -> 0, + "numAddedFiles" -> 0, + "numRemovedFiles" -> 0, + "numAddedChangeFiles" -> 0, + "scanTimeMs" -> -1, + "rewriteTimeMs" -> -1, + "executionTimeMs" -> -1 + ), + testConfig = testConfig + ) + }} + + for (whereClause <- Seq("id = 0", "id >= 49 and id < 50")) { + testDeleteMetrics(s"delete one row with where = `$whereClause`") { testConfig => + var numAddedFiles = 1 + var numRemovedFiles = 1 + val numRemovedRows = 1 + var numCopiedRows = 19 + runDeleteAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = whereClause, + expectedNumAffectedRows = 1L, + expectedOperationMetrics = Map( + "numCopiedRows" -> numCopiedRows, + "numDeletedRows" -> numRemovedRows, + "numAddedFiles" -> numAddedFiles, + "numRemovedFiles" -> numRemovedFiles, + "numAddedChangeFiles" -> { + if (testConfig.cdfEnabled + ) { + 1 + } else { + 0 + } + } + ), + testConfig = testConfig + ) + } + } + + testDeleteMetrics("delete one file") { testConfig => + val numRemovedFiles = 1 + val numRemovedRows = 20 + + def executeTest: Unit = runDeleteAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "id < 20", + expectedNumAffectedRows = 20L, + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numDeletedRows" -> numRemovedRows, + "numAddedFiles" -> 0, + "numRemovedFiles" -> numRemovedFiles, + "numAddedChangeFiles" -> { + if (testConfig.cdfEnabled + ) { + 1 + } else { + 0 + } + } + ), + testConfig = testConfig + ) + + executeTest + } + + testDeleteMetrics("delete one row per file") { testConfig => + var numRemovedFiles = 5 + val numRemovedRows = 5 + var numCopiedRows = 95 + var numAddedFiles = if (testConfig.partitioned) 5 else 2 + runDeleteAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "id in (5, 25, 45, 65, 85)", + expectedNumAffectedRows = 5L, + expectedOperationMetrics = Map( + "numCopiedRows" -> numCopiedRows, + "numDeletedRows" -> numRemovedRows, + "numAddedFiles" -> numAddedFiles, + "numRemovedFiles" -> numRemovedFiles, + "numAddedChangeFiles" -> { if (testConfig.cdfEnabled) numAddedFiles else 0 } + ), + testConfig = testConfig + ) + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSQLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSQLSuite.scala new file mode 100644 index 00000000000..ca1c76e555f --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSQLSuite.scala @@ -0,0 +1,130 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.test.{DeltaExcludedTestMixin, DeltaSQLCommandTest} + +import org.apache.spark.sql.Row + +class DeleteSQLSuite extends DeleteSuiteBase + with DeltaSQLCommandTest { + + import testImplicits._ + + override protected def executeDelete(target: String, where: String = null): Unit = { + val whereClause = Option(where).map(c => s"WHERE $c").getOrElse("") + sql(s"DELETE FROM $target $whereClause") + } + + // For EXPLAIN, which is not supported in OSS + test("explain") { + append(Seq((2, 2)).toDF("key", "value")) + val df = sql(s"EXPLAIN DELETE FROM delta.`$tempPath` WHERE key = 2") + val outputs = df.collect().map(_.mkString).mkString + assert(outputs.contains("Delta")) + assert(!outputs.contains("index") && !outputs.contains("ActionLog")) + // no change should be made by explain + checkAnswer(readDeltaTable(tempPath), Row(2, 2)) + } + + test("delete from a temp view") { + withTable("tab") { + withTempView("v") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key", "value").write.format("delta").saveAsTable("tab") + spark.table("tab").as("name").createTempView("v") + sql("DELETE FROM v WHERE key = 1") + checkAnswer(spark.table("tab"), Row(0, 3)) + } + } + } + + test("delete from a SQL temp view") { + withTable("tab") { + withTempView("v") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key", "value").write.format("delta").saveAsTable("tab") + sql("CREATE TEMP VIEW v AS SELECT * FROM tab") + sql("DELETE FROM v WHERE key = 1 AND VALUE = 5") + checkAnswer(spark.table("tab"), Seq(Row(1, 1), Row(0, 3))) + } + } + } + + Seq(true, false).foreach { partitioned => + test(s"User defined _change_type column doesn't get dropped - partitioned=$partitioned") { + withTable("tab") { + sql( + s"""CREATE TABLE tab USING DELTA + |${if (partitioned) "PARTITIONED BY (part) " else ""} + |TBLPROPERTIES (delta.enableChangeDataFeed = false) + |AS SELECT id, int(id / 10) AS part, 'foo' as _change_type + |FROM RANGE(1000) + |""".stripMargin) + val rowsToDelete = (1 to 1000 by 42).mkString("(", ", ", ")") + executeDelete("tab", s"id in $rowsToDelete") + sql("SELECT id, _change_type FROM tab").collect().foreach { row => + val _change_type = row.getString(1) + assert(_change_type === "foo", s"Invalid _change_type for id=${row.get(0)}") + } + } + } + } +} + + +class DeleteSQLNameColumnMappingSuite extends DeleteSQLSuite + with DeltaColumnMappingEnableNameMode { + + + protected override def runOnlyTests: Seq[String] = Seq(true, false).map { isPartitioned => + s"basic case - delete from a Delta table by name - Partition=$isPartitioned" + } ++ Seq(true, false).flatMap { isPartitioned => + Seq( + s"where key columns - Partition=$isPartitioned", + s"where data columns - Partition=$isPartitioned") + } + +} + +class DeleteSQLWithDeletionVectorsSuite extends DeleteSQLSuite + with DeltaExcludedTestMixin + with DeletionVectorsTestUtils { + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectors(spark, delete = true) + } + + override def excluded: Seq[String] = super.excluded ++ + Seq( + // The following two tests must fail when DV is used. Covered by another test case: + // "throw error when non-pinned TahoeFileIndex snapshot is used". + "data and partition columns - Partition=true Skipping=false", + "data and partition columns - Partition=false Skipping=false", + // The scan schema contains additional row index filter columns. + "nested schema pruning on data condition", + // The number of records is not recomputed when using DVs + "delete throws error if number of records increases", + "delete logs error if number of records are missing in stats" + ) + + // This works correctly with DVs, but fails in classic DELETE. + override def testSuperSetColsTempView(): Unit = { + testComplexTempViews("superset cols")( + text = "SELECT key, value, 1 FROM tab", + expectResult = Row(0, 3, 1) :: Nil) + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeleteScalaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteScalaSuite.scala new file mode 100644 index 00000000000..1e01dc1ca76 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteScalaSuite.scala @@ -0,0 +1,85 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.test.{DeltaExcludedTestMixin, DeltaSQLCommandTest} + +import org.apache.spark.sql.{functions, Row} + +class DeleteScalaSuite extends DeleteSuiteBase + with DeltaSQLCommandTest + with DeltaExcludedTestMixin { + + override def excluded: Seq[String] = super.excluded ++ Seq( + // Exclude tempViews, because DeltaTable.forName does not resolve them correctly, so no one can + // use them anyway with the Scala API. + // scalastyle:off line.size.limit + "test delete on temp view - basic - Partition=true - SQL TempView", + "test delete on temp view - basic - Partition=true - Dataset TempView", + "test delete on temp view - basic - Partition=false - SQL TempView", + "test delete on temp view - basic - Partition=false - Dataset TempView", + "test delete on temp view - subset cols - SQL TempView", + "test delete on temp view - subset cols - Dataset TempView", + "test delete on temp view - superset cols - SQL TempView", + "test delete on temp view - superset cols - Dataset TempView", + "test delete on temp view - nontrivial projection - SQL TempView", + "test delete on temp view - nontrivial projection - Dataset TempView", + "test delete on temp view - view with too many internal aliases - SQL TempView", + "test delete on temp view - view with too many internal aliases - Dataset TempView", + "test delete on temp view - nontrivial projection with write amplification reduction - SQL TempView", + "test delete on temp view - nontrivial projection with write amplification reduction - Dataset TempView", + "test delete on temp view - view with too many internal aliases with write amplification reduction - SQL TempView", + "test delete on temp view - view with too many internal aliases with write amplification reduction - Dataset TempView" + // scalastyle:on line.size.limit + ) + + import testImplicits._ + + test("delete usage test - without condition") { + append(Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value")) + val table = io.delta.tables.DeltaTable.forPath(tempPath) + table.delete() + checkAnswer(readDeltaTable(tempPath), Nil) + } + + test("delete usage test - with condition") { + append(Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value")) + val table = io.delta.tables.DeltaTable.forPath(tempPath) + table.delete("key = 1 or key = 2") + checkAnswer(readDeltaTable(tempPath), Row(3, 30) :: Row(4, 40) :: Nil) + } + + test("delete usage test - with Column condition") { + append(Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value")) + val table = io.delta.tables.DeltaTable.forPath(tempPath) + table.delete(functions.expr("key = 1 or key = 2")) + checkAnswer(readDeltaTable(tempPath), Row(3, 30) :: Row(4, 40) :: Nil) + } + + override protected def executeDelete(target: String, where: String = null): Unit = { + val deltaTable: io.delta.tables.DeltaTable = + DeltaTestUtils.getDeltaTableForIdentifierOrPath( + spark, + DeltaTestUtils.getTableIdentifierOrPath(target)) + + if (where != null) { + deltaTable.delete(where) + } else { + deltaTable.delete() + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala new file mode 100644 index 00000000000..db1b02effa2 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala @@ -0,0 +1,531 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.execution.FileSourceScanExec +import org.apache.spark.sql.functions.struct +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType + +abstract class DeleteSuiteBase extends QueryTest + with SharedSparkSession + with DeltaDMLTestUtils + with DeltaTestUtilsForTempViews { + + import testImplicits._ + + protected def executeDelete(target: String, where: String = null): Unit + + protected def checkDelete( + condition: Option[String], + expectedResults: Seq[Row], + tableName: Option[String] = None): Unit = { + executeDelete(target = tableName.getOrElse(s"delta.`$tempPath`"), where = condition.orNull) + checkAnswer(readDeltaTable(tempPath), expectedResults) + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic case - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkDelete(condition = None, Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic case - delete from a Delta table by path - Partition=$isPartitioned") { + withTable("deltaTable") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + val input = Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value") + append(input, partitions) + + checkDelete(Some("value = 4 and key = 3"), + Row(2, 2) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil) + checkDelete(Some("value = 4 and key = 1"), + Row(2, 2) :: Row(1, 1) :: Row(0, 3) :: Nil) + checkDelete(Some("value = 2 or key = 1"), + Row(0, 3) :: Nil) + checkDelete(Some("key = 0 or value = 99"), Nil) + } + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic case - delete from a Delta table by name - Partition=$isPartitioned") { + withTable("delta_table") { + val partitionByClause = if (isPartitioned) "PARTITIONED BY (key)" else "" + sql( + s""" + |CREATE TABLE delta_table(key INT, value INT) + |USING delta + |OPTIONS('path'='$tempPath') + |$partitionByClause + """.stripMargin) + + val input = Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value") + append(input) + + checkDelete(Some("value = 4 and key = 3"), + Row(2, 2) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil, + Some("delta_table")) + checkDelete(Some("value = 4 and key = 1"), + Row(2, 2) :: Row(1, 1) :: Row(0, 3) :: Nil, + Some("delta_table")) + checkDelete(Some("value = 2 or key = 1"), + Row(0, 3) :: Nil, + Some("delta_table")) + checkDelete(Some("key = 0 or value = 99"), + Nil, + Some("delta_table")) + } + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic key columns - Partition=$isPartitioned") { + val input = Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value") + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(input, partitions) + + checkDelete(Some("key > 2"), Row(2, 2) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil) + checkDelete(Some("key < 2"), Row(2, 2) :: Nil) + checkDelete(Some("key = 2"), Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"where key columns - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkDelete(Some("key = 1"), Row(2, 2) :: Row(0, 3) :: Nil) + checkDelete(Some("key = 2"), Row(0, 3) :: Nil) + checkDelete(Some("key = 0"), Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"where data columns - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkDelete(Some("value <= 2"), Row(1, 4) :: Row(0, 3) :: Nil) + checkDelete(Some("value = 3"), Row(1, 4) :: Nil) + checkDelete(Some("value != 0"), Nil) + } + } + + test("where data columns and partition columns") { + val input = Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value") + append(input, Seq("key")) + + checkDelete(Some("value = 4 and key = 3"), + Row(2, 2) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil) + checkDelete(Some("value = 4 and key = 1"), + Row(2, 2) :: Row(1, 1) :: Row(0, 3) :: Nil) + checkDelete(Some("value = 2 or key = 1"), + Row(0, 3) :: Nil) + checkDelete(Some("key = 0 or value = 99"), + Nil) + } + + Seq(true, false).foreach { skippingEnabled => + Seq(true, false).foreach { isPartitioned => + test(s"data and partition columns - Partition=$isPartitioned Skipping=$skippingEnabled") { + withSQLConf(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> skippingEnabled.toString) { + val partitions = if (isPartitioned) "key" :: Nil else Nil + val input = Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value") + append(input, partitions) + + checkDelete(Some("value = 4 and key = 3"), + Row(2, 2) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil) + checkDelete(Some("value = 4 and key = 1"), + Row(2, 2) :: Row(1, 1) :: Row(0, 3) :: Nil) + checkDelete(Some("value = 2 or key = 1"), + Row(0, 3) :: Nil) + checkDelete(Some("key = 0 or value = 99"), + Nil) + } + } + } + } + + test("Negative case - non-Delta target") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key1", "value") + .write.format("parquet").mode("append").save(tempPath) + val e = intercept[DeltaAnalysisException] { + executeDelete(target = s"delta.`$tempPath`") + }.getMessage + assert(e.contains("DELETE destination only supports Delta sources") || + e.contains("is not a Delta table") || e.contains("doesn't exist") || + e.contains("Incompatible format")) + } + + test("Negative case - non-deterministic condition") { + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value")) + val e = intercept[AnalysisException] { + executeDelete(target = s"delta.`$tempPath`", where = "rand() > 0.5") + }.getMessage + assert(e.contains("nondeterministic expressions are only allowed in") || + e.contains("The operator expects a deterministic expression")) + } + + test("Negative case - DELETE the child directory") { + append(Seq((2, 2), (3, 2)).toDF("key", "value"), partitionBy = "key" :: Nil) + val e = intercept[AnalysisException] { + executeDelete(target = s"delta.`$tempPath/key=2`", where = "value = 2") + }.getMessage + assert(e.contains("Expect a full scan of Delta sources, but found a partial scan")) + } + + test("delete cached table by name") { + withTable("cached_delta_table") { + Seq((2, 2), (1, 4)).toDF("key", "value") + .write.format("delta").saveAsTable("cached_delta_table") + + spark.table("cached_delta_table").cache() + spark.table("cached_delta_table").collect() + executeDelete(target = "cached_delta_table", where = "key = 2") + checkAnswer(spark.table("cached_delta_table"), Row(1, 4) :: Nil) + } + } + + test("delete cached table by path") { + Seq((2, 2), (1, 4)).toDF("key", "value") + .write.mode("overwrite").format("delta").save(tempPath) + spark.read.format("delta").load(tempPath).cache() + spark.read.format("delta").load(tempPath).collect() + executeDelete(s"delta.`$tempPath`", where = "key = 2") + checkAnswer(spark.read.format("delta").load(tempPath), Row(1, 4) :: Nil) + } + + Seq(true, false).foreach { isPartitioned => + test(s"condition having current_date - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append( + Seq((java.sql.Date.valueOf("1969-12-31"), 2), + (java.sql.Date.valueOf("2099-12-31"), 4)) + .toDF("key", "value"), partitions) + + checkDelete(Some("CURRENT_DATE > key"), + Row(java.sql.Date.valueOf("2099-12-31"), 4) :: Nil) + checkDelete(Some("CURRENT_DATE <= key"), Nil) + } + } + + test("condition having current_timestamp - Partition by Timestamp") { + append( + Seq((java.sql.Timestamp.valueOf("2012-12-31 16:00:10.011"), 2), + (java.sql.Timestamp.valueOf("2099-12-31 16:00:10.011"), 4)) + .toDF("key", "value"), Seq("key")) + + checkDelete(Some("CURRENT_TIMESTAMP > key"), + Row(java.sql.Timestamp.valueOf("2099-12-31 16:00:10.011"), 4) :: Nil) + checkDelete(Some("CURRENT_TIMESTAMP <= key"), Nil) + } + + Seq(true, false).foreach { isPartitioned => + test(s"foldable condition - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + val allRows = Row(2, 2) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil + + checkDelete(Some("false"), allRows) + checkDelete(Some("1 <> 1"), allRows) + checkDelete(Some("1 > null"), allRows) + checkDelete(Some("true"), Nil) + checkDelete(Some("1 = 1"), Nil) + } + } + + test("SC-12232: should not delete the rows where condition evaluates to null") { + append(Seq(("a", null), ("b", null), ("c", "v"), ("d", "vv")).toDF("key", "value").coalesce(1)) + + // "null = null" evaluates to null + checkDelete(Some("value = null"), + Row("a", null) :: Row("b", null) :: Row("c", "v") :: Row("d", "vv") :: Nil) + + // these expressions evaluate to null when value is null + checkDelete(Some("value = 'v'"), + Row("a", null) :: Row("b", null) :: Row("d", "vv") :: Nil) + checkDelete(Some("value <> 'v'"), + Row("a", null) :: Row("b", null) :: Nil) + } + + test("SC-12232: delete rows with null values using isNull") { + append(Seq(("a", null), ("b", null), ("c", "v"), ("d", "vv")).toDF("key", "value").coalesce(1)) + + // when value is null, this expression evaluates to true + checkDelete(Some("value is null"), + Row("c", "v") :: Row("d", "vv") :: Nil) + } + + test("SC-12232: delete rows with null values using EqualNullSafe") { + append(Seq(("a", null), ("b", null), ("c", "v"), ("d", "vv")).toDF("key", "value").coalesce(1)) + + // when value is null, this expression evaluates to true + checkDelete(Some("value <=> null"), + Row("c", "v") :: Row("d", "vv") :: Nil) + } + + test("do not support subquery test") { + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value")) + Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("c", "d").createOrReplaceTempView("source") + + // basic subquery + val e0 = intercept[AnalysisException] { + executeDelete(target = s"delta.`$tempPath`", "key < (SELECT max(c) FROM source)") + }.getMessage + assert(e0.contains("Subqueries are not supported")) + + // subquery with EXISTS + val e1 = intercept[AnalysisException] { + executeDelete(target = s"delta.`$tempPath`", "EXISTS (SELECT max(c) FROM source)") + }.getMessage + assert(e1.contains("Subqueries are not supported")) + + // subquery with NOT EXISTS + val e2 = intercept[AnalysisException] { + executeDelete(target = s"delta.`$tempPath`", "NOT EXISTS (SELECT max(c) FROM source)") + }.getMessage + assert(e2.contains("Subqueries are not supported")) + + // subquery with IN + val e3 = intercept[AnalysisException] { + executeDelete(target = s"delta.`$tempPath`", "key IN (SELECT max(c) FROM source)") + }.getMessage + assert(e3.contains("Subqueries are not supported")) + + // subquery with NOT IN + val e4 = intercept[AnalysisException] { + executeDelete(target = s"delta.`$tempPath`", "key NOT IN (SELECT max(c) FROM source)") + }.getMessage + assert(e4.contains("Subqueries are not supported")) + } + + test("schema pruning on data condition") { + val input = Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value") + append(input, Nil) + + val executedPlans = DeltaTestUtils.withPhysicalPlansCaptured(spark) { + checkDelete(Some("key = 2"), + Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil) + } + + val scans = executedPlans.flatMap(_.collect { + case f: FileSourceScanExec => f + }) + + // The first scan is for finding files to delete. We only are matching against the key + // so that should be the only field in the schema + assert(scans.head.schema.findNestedField(Seq("key")).nonEmpty) + assert(scans.head.schema.findNestedField(Seq("value")).isEmpty) + } + + + test("nested schema pruning on data condition") { + val input = Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value") + .select(struct("key", "value").alias("nested")) + append(input, Nil) + + val executedPlans = DeltaTestUtils.withPhysicalPlansCaptured(spark) { + checkDelete(Some("nested.key = 2"), + Row(Row(1, 4)) :: Row(Row(1, 1)) :: Row(Row(0, 3)) :: Nil) + } + + val scans = executedPlans.flatMap(_.collect { + case f: FileSourceScanExec => f + }) + + assert(scans.head.schema == StructType.fromDDL("nested STRUCT")) + } + + /** + * @param function the unsupported function. + * @param functionType The type of the unsupported expression to be tested. + * @param data the data in the table. + * @param where the where clause containing the unsupported expression. + * @param expectException whether an exception is expected to be thrown + * @param customErrorRegex customized error regex. + */ + def testUnsupportedExpression( + function: String, + functionType: String, + data: => DataFrame, + where: String, + expectException: Boolean, + customErrorRegex: Option[String] = None) { + test(s"$functionType functions in delete - expect exception: $expectException") { + withTable("deltaTable") { + data.write.format("delta").saveAsTable("deltaTable") + + val expectedErrorRegex = "(?s).*(?i)unsupported.*(?i).*Invalid expressions.*" + + var catchException = true + + var errorRegex = if (functionType.equals("Generate")) { + ".*Subqueries are not supported in the DELETE.*" + } else customErrorRegex.getOrElse(expectedErrorRegex) + + + if (catchException) { + val dataBeforeException = spark.read.format("delta").table("deltaTable").collect() + val e = intercept[Exception] { + executeDelete(target = "deltaTable", where = where) + } + val message = if (e.getCause != null) { + e.getCause.getMessage + } else e.getMessage + assert(message.matches(errorRegex)) + checkAnswer(spark.read.format("delta").table("deltaTable"), dataBeforeException) + } else { + executeDelete(target = "deltaTable", where = where) + } + } + } + } + + testUnsupportedExpression( + function = "row_number", + functionType = "Window", + data = Seq((2, 2), (1, 4)).toDF("key", "value"), + where = "row_number() over (order by value) > 1", + expectException = true + ) + + testUnsupportedExpression( + function = "max", + functionType = "Aggregate", + data = Seq((2, 2), (1, 4)).toDF("key", "value"), + where = "key > max(value)", + expectException = true + ) + + // Explode functions are supported in where if only one row generated. + testUnsupportedExpression( + function = "explode", + functionType = "Generate", + data = Seq((2, List(2))).toDF("key", "value"), + where = "key = (select explode(value) from deltaTable)", + expectException = false // generate only one row, no exception. + ) + + // Explode functions are supported in where but if there's more than one row generated, + // it will throw an exception. + testUnsupportedExpression( + function = "explode", + functionType = "Generate", + data = Seq((2, List(2)), (1, List(4, 5))).toDF("key", "value"), + where = "key = (select explode(value) from deltaTable)", + expectException = true, // generate more than one row. Exception expected. + customErrorRegex = + Some(".*More than one row returned by a subquery used as an expression(?s).*") + ) + + Seq(true, false).foreach { isPartitioned => + val name = s"test delete on temp view - basic - Partition=$isPartitioned" + testWithTempView(name) { isSQLTempView => + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + createTempViewFromTable(s"delta.`$tempPath`", isSQLTempView) + checkDelete( + condition = Some("key <= 1"), + expectedResults = Row(2, 2) :: Nil, + tableName = Some("v")) + } + } + + protected def testInvalidTempViews(name: String)( + text: String, + expectedErrorMsgForSQLTempView: String = null, + expectedErrorMsgForDataSetTempView: String = null, + expectedErrorClassForSQLTempView: String = null, + expectedErrorClassForDataSetTempView: String = null): Unit = { + testWithTempView(s"test delete on temp view - $name") { isSQLTempView => + withTable("tab") { + Seq((0, 3), (1, 2)).toDF("key", "value").write.format("delta").saveAsTable("tab") + if (isSQLTempView) { + sql(s"CREATE TEMP VIEW v AS $text") + } else { + sql(text).createOrReplaceTempView("v") + } + val ex = intercept[AnalysisException] { + executeDelete( + "v", + "key >= 1 and value < 3" + ) + } + testErrorMessageAndClass( + isSQLTempView, + ex, + expectedErrorMsgForSQLTempView, + expectedErrorMsgForDataSetTempView, + expectedErrorClassForSQLTempView, + expectedErrorClassForDataSetTempView) + } + } + } + testInvalidTempViews("subset cols")( + text = "SELECT key FROM tab", + expectedErrorClassForSQLTempView = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + expectedErrorClassForDataSetTempView = "UNRESOLVED_COLUMN.WITH_SUGGESTION" + ) + + // Need to be able to override this, because it works in some configurations. + protected def testSuperSetColsTempView(): Unit = { + testInvalidTempViews("superset cols")( + text = "SELECT key, value, 1 FROM tab", + // The analyzer can't tell whether the table originally had the extra column or not. + expectedErrorMsgForSQLTempView = "Can't resolve column 1 in root", + expectedErrorMsgForDataSetTempView = "Can't resolve column 1 in root" + ) + } + + testSuperSetColsTempView() + + protected def testComplexTempViews(name: String)( + text: String, + expectResult: Seq[Row]): Unit = { + testWithTempView(s"test delete on temp view - $name") { isSQLTempView => + withTable("tab") { + Seq((0, 3), (1, 2)).toDF("key", "value").write.format("delta").saveAsTable("tab") + createTempViewFromSelect(text, isSQLTempView) + executeDelete( + "v", + "key >= 1 and value < 3" + ) + checkAnswer(spark.read.format("delta").table("v"), expectResult) + } + } + } + + testComplexTempViews("nontrivial projection")( + text = "SELECT value as key, key as value FROM tab", + expectResult = Row(3, 0) :: Nil + ) + + testComplexTempViews("view with too many internal aliases")( + text = "SELECT * FROM (SELECT * FROM tab AS t1) AS t2", + expectResult = Row(0, 3) :: Nil + ) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeletionVectorsTestUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeletionVectorsTestUtils.scala new file mode 100644 index 00000000000..db1368ebfd0 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeletionVectorsTestUtils.scala @@ -0,0 +1,355 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.util.UUID + +import org.apache.spark.sql.delta.DeltaOperations.Truncate +import org.apache.spark.sql.delta.actions.{Action, AddFile, DeletionVectorDescriptor, RemoveFile} +import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, RoaringBitmapArrayFormat} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore +import org.apache.spark.sql.delta.util.PathWithFileSystem +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{DataFrame, QueryTest, RuntimeConfig, SparkSession} +import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.test.SharedSparkSession + +/** Collection of test utilities related with persistent Deletion Vectors. */ +trait DeletionVectorsTestUtils extends QueryTest with SharedSparkSession { + + def enableDeletionVectors( + spark: SparkSession, + delete: Boolean = false, + update: Boolean = false, + merge: Boolean = false): Unit = { + val global = delete || update || merge + spark.conf + .set(DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey, global.toString) + spark.conf.set(DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key, delete.toString) + spark.conf.set(DeltaSQLConf.UPDATE_USE_PERSISTENT_DELETION_VECTORS.key, update.toString) + spark.conf.set(DeltaSQLConf.MERGE_USE_PERSISTENT_DELETION_VECTORS.key, merge.toString) + } + + def enableDeletionVectorsForAllSupportedOperations(spark: SparkSession): Unit = + enableDeletionVectors(spark, delete = true, update = true) + + def testWithDVs(testName: String, testTags: org.scalatest.Tag*)(thunk: => Unit): Unit = { + test(testName, testTags : _*) { + withDeletionVectorsEnabled() { + thunk + } + } + } + + /** Run a thunk with Deletion Vectors enabled/disabled. */ + def withDeletionVectorsEnabled(enabled: Boolean = true)(thunk: => Unit): Unit = { + val enabledStr = enabled.toString + withSQLConf( + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey -> enabledStr, + DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key -> enabledStr, + DeltaSQLConf.UPDATE_USE_PERSISTENT_DELETION_VECTORS.key -> enabledStr, + DeltaSQLConf.MERGE_USE_PERSISTENT_DELETION_VECTORS.key -> enabledStr) { + thunk + } + } + + /** Helper to run 'fn' with a temporary Delta table. */ + def withTempDeltaTable( + dataDF: DataFrame, + partitionBy: Seq[String] = Seq.empty, + enableDVs: Boolean = true, + conf: Seq[(String, String)] = Nil) + (fn: (() => io.delta.tables.DeltaTable, DeltaLog) => Unit): Unit = { + withTempPath { path => + val tablePath = new Path(path.getAbsolutePath) + withSQLConf(conf: _*) { + dataDF.write + .option(DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key, enableDVs.toString) + .partitionBy(partitionBy: _*) + .format("delta") + .save(tablePath.toString) + } + // DeltaTable hangs on to the DataFrame it is created with for the entire object lifetime. + // That means subsequent `targetTable.toDF` calls will return the same snapshot. + // The DV tests are generally written assuming `targetTable.toDF` would return a new snapshot. + // So create a function here instead of a n instance, so `targetTable().toDF` + // will actually provide a new snapshot. + val targetTable = + () => io.delta.tables.DeltaTable.forPath(tablePath.toString) + val targetLog = DeltaLog.forTable(spark, tablePath) + fn(targetTable, targetLog) + } + } + + /** Helper that verifies whether a defined number of DVs exist */ + def verifyDVsExist(targetLog: DeltaLog, filesWithDVsSize: Int): Unit = { + val filesWithDVs = getFilesWithDeletionVectors(targetLog) + assert(filesWithDVs.size === filesWithDVsSize) + assertDeletionVectorsExist(targetLog, filesWithDVs) + } + + /** Returns all [[AddFile]] actions of a Delta table that contain Deletion Vectors. */ + def getFilesWithDeletionVectors(log: DeltaLog): Seq[AddFile] = + log.update().allFiles.collect().filter(_.deletionVector != null).toSeq + + /** Lists the Deletion Vectors files of a table. */ + def listDeletionVectors(log: DeltaLog): Seq[File] = { + val dir = new File(log.dataPath.toUri.getPath) + dir.listFiles().filter(_.getName.startsWith( + DeletionVectorDescriptor.DELETION_VECTOR_FILE_NAME_CORE)) + } + + /** Helper to check that the Deletion Vectors of the provided file actions exist on disk. */ + def assertDeletionVectorsExist(log: DeltaLog, filesWithDVs: Seq[AddFile]): Unit = { + val tablePath = new Path(log.dataPath.toUri.getPath) + for (file <- filesWithDVs) { + val dv = file.deletionVector + assert(dv != null) + assert(dv.isOnDisk && !dv.isInline) + assert(dv.offset.isDefined) + + // Check that DV exists. + val dvPath = dv.absolutePath(tablePath) + val dvPathStr = DeletionVectorStore.pathToEscapedString(dvPath) + assert(new File(dvPathStr).exists(), s"DV not found $dvPath") + + // Check that cardinality is correct. + val bitmap = newDVStore.read(dvPath, dv.offset.get, dv.sizeInBytes) + assert(dv.cardinality === bitmap.cardinality) + } + } + + /** Enable persistent deletion vectors in new Delta tables. */ + def enableDeletionVectorsInNewTables(conf: RuntimeConfig): Unit = + conf.set(DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey, "true") + + /** Enable persistent Deletion Vectors in a Delta table. */ + def enableDeletionVectorsInTable(tablePath: Path, enable: Boolean): Unit = + spark.sql( + s"""ALTER TABLE delta.`$tablePath` + |SET TBLPROPERTIES ('${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key}' = '$enable') + |""".stripMargin) + + /** Enable persistent Deletion Vectors in a Delta table. */ + def enableDeletionVectorsInTable(deltaLog: DeltaLog, enable: Boolean = true): Unit = + enableDeletionVectorsInTable(deltaLog.dataPath, enable) + + /** Enable persistent deletion vectors in new tables and DELETE DML commands. */ + def enableDeletionVectors(conf: RuntimeConfig): Unit = { + enableDeletionVectorsInNewTables(conf) + conf.set(DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key, "true") + } + + // ======== HELPER METHODS TO WRITE DVs ========== + /** Helper method to remove the specified rows in the given file using DVs */ + protected def removeRowsFromFileUsingDV( + log: DeltaLog, + addFile: AddFile, + rowIds: Seq[Long]): Seq[Action] = { + val dv = RoaringBitmapArray(rowIds: _*) + writeFileWithDV(log, addFile, dv) + } + + /** Utility method to remove a ratio of rows from the given file */ + protected def deleteRows( + log: DeltaLog, file: AddFile, approxPhyRows: Long, ratioOfRowsToDelete: Double): Unit = { + val numRowsToDelete = + Math.ceil(ratioOfRowsToDelete * file.numPhysicalRecords.getOrElse(approxPhyRows)).toInt + removeRowsFromFile(log, file, Seq.range(0, numRowsToDelete)) + } + + /** Utility method to remove the given rows from the given file using DVs */ + protected def removeRowsFromFile( + log: DeltaLog, addFile: AddFile, rowIndexesToRemove: Seq[Long]): Unit = { + val txn = log.startTransaction() + val actions = removeRowsFromFileUsingDV(log, addFile, rowIndexesToRemove) + txn.commit(actions, Truncate()) + } + + protected def getFileActionsInLastVersion(log: DeltaLog): (Seq[AddFile], Seq[RemoveFile]) = { + val version = log.update().version + val allFiles = log.getChanges(version).toSeq.head._2 + val add = allFiles.collect { case a: AddFile => a } + val remove = allFiles.collect { case r: RemoveFile => r } + (add, remove) + } + + protected def serializeRoaringBitmapArrayWithDefaultFormat( + dv: RoaringBitmapArray): Array[Byte] = { + val serializationFormat = RoaringBitmapArrayFormat.Portable + dv.serializeAsByteArray(serializationFormat) + } + + /** + * Produce a new [[AddFile]] that will store `dv` in the log using default settings for choosing + * inline or on-disk storage. + * + * Also returns the corresponding [[RemoveFile]] action for `currentFile`. + * + * TODO: Always on-disk for now. Inline support comes later. + */ + protected def writeFileWithDV( + log: DeltaLog, + currentFile: AddFile, + dv: RoaringBitmapArray): Seq[Action] = { + writeFileWithDVOnDisk(log, currentFile, dv) + } + + /** Name of the partition column used by [[createTestDF()]]. */ + val PARTITION_COL = "partitionColumn" + + def createTestDF( + start: Long, + end: Long, + numFiles: Int, + partitionColumn: Option[Int] = None): DataFrame = { + val df = spark.range(start, end, 1, numFiles).withColumn("v", col("id")) + if (partitionColumn.isEmpty) { + df + } else { + df.withColumn(PARTITION_COL, lit(partitionColumn.get)) + } + } + + /** + * Produce a new [[AddFile]] that will reference the `dv` in the log while storing it on-disk. + * + * Also returns the corresponding [[RemoveFile]] action for `currentFile`. + */ + protected def writeFileWithDVOnDisk( + log: DeltaLog, + currentFile: AddFile, + dv: RoaringBitmapArray): Seq[Action] = writeFilesWithDVsOnDisk(log, Seq((currentFile, dv))) + + protected def withDVWriter[T]( + log: DeltaLog, + dvFileID: UUID)(fn: DeletionVectorStore.Writer => T): T = { + val dvStore = newDVStore + // scalastyle:off deltahadoopconfiguration + val conf = spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + val tableWithFS = PathWithFileSystem.withConf(log.dataPath, conf) + val dvPath = + DeletionVectorStore.assembleDeletionVectorPathWithFileSystem(tableWithFS, dvFileID) + val writer = dvStore.createWriter(dvPath) + try { + fn(writer) + } finally { + writer.close() + } + } + + /** + * Produce new [[AddFile]] actions that will reference associated DVs in the log while storing + * all DVs in the same file on-disk. + * + * Also returns the corresponding [[RemoveFile]] actions for the original file entries. + */ + protected def writeFilesWithDVsOnDisk( + log: DeltaLog, + filesWithDVs: Seq[(AddFile, RoaringBitmapArray)]): Seq[Action] = { + val dvFileId = UUID.randomUUID() + withDVWriter(log, dvFileId) { writer => + filesWithDVs.flatMap { case (currentFile, dv) => + val range = writer.write(serializeRoaringBitmapArrayWithDefaultFormat(dv)) + val dvData = DeletionVectorDescriptor.onDiskWithRelativePath( + id = dvFileId, + sizeInBytes = range.length, + cardinality = dv.cardinality, + offset = Some(range.offset)) + val (add, remove) = currentFile.removeRows( + dvData, + updateStats = true + ) + Seq(add, remove) + } + } + } + + /** + * Removes the `numRowsToRemovePerFile` from each file via DV. + * Returns the total number of rows removed. + */ + protected def removeRowsFromAllFilesInLog( + log: DeltaLog, + numRowsToRemovePerFile: Long): Long = { + var numFiles: Option[Int] = None + // This is needed to make the manual commit work correctly, since we are not actually + // running a command that produces metrics. + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "false") { + val txn = log.startTransaction() + val allAddFiles = txn.snapshot.allFiles.collect() + numFiles = Some(allAddFiles.length) + val bitmap = RoaringBitmapArray(0L until numRowsToRemovePerFile: _*) + val actions = allAddFiles.flatMap { file => + if (file.numPhysicalRecords.isDefined) { + // Only when stats are enabled. Can't check when stats are disabled + assert(file.numPhysicalRecords.get > numRowsToRemovePerFile) + } + writeFileWithDV(log, file, bitmap) + } + txn.commit(actions, DeltaOperations.Delete(predicate = Seq.empty)) + } + numFiles.get * numRowsToRemovePerFile + } + + def newDVStore(): DeletionVectorStore = { + // scalastyle:off deltahadoopconfiguration + DeletionVectorStore.createInstance(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + } + + /** + * Updates an [[AddFile]] with a [[DeletionVectorDescriptor]]. + */ + protected def updateFileDV( + addFile: AddFile, + dvDescriptor: DeletionVectorDescriptor): (AddFile, RemoveFile) = { + addFile.removeRows( + dvDescriptor, + updateStats = true + ) + } + + /** Delete the DV file in the given [[AddFile]]. Assumes the [[AddFile]] has a valid DV. */ + protected def deleteDVFile(tablePath: String, addFile: AddFile): Unit = { + assert(addFile.deletionVector != null) + val dvPath = addFile.deletionVector.absolutePath(new Path(tablePath)) + FileUtils.delete(new File(dvPath.toString)) + } + + /** + * Creates a [[DeletionVectorDescriptor]] from an [[RoaringBitmapArray]] + */ + protected def writeDV( + log: DeltaLog, + bitmapArray: RoaringBitmapArray): DeletionVectorDescriptor = { + val dvFileId = UUID.randomUUID() + withDVWriter(log, dvFileId) { writer => + val range = writer.write(serializeRoaringBitmapArrayWithDefaultFormat(bitmapArray)) + DeletionVectorDescriptor.onDiskWithRelativePath( + id = dvFileId, + sizeInBytes = range.length, + cardinality = bitmapArray.cardinality, + offset = Some(range.offset)) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaAlterTableReplaceTests.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaAlterTableReplaceTests.scala new file mode 100644 index 00000000000..6d9c082283e --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaAlterTableReplaceTests.scala @@ -0,0 +1,744 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.functions.{array, col, map, struct} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{ArrayType, MapType, StructType} + +trait DeltaAlterTableReplaceTests extends DeltaAlterTableTestBase { + + import testImplicits._ + + ddlTest("REPLACE COLUMNS - add a comment") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + + withDeltaTable(df) { tableName => + + sql(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int COMMENT 'a comment for v1', + | v2 string COMMENT 'a comment for v2', + | s STRUCT< + | v1:int COMMENT 'a comment for s.v1', + | v2:string COMMENT 'a comment for s.v2'> COMMENT 'a comment for s', + | a ARRAY> COMMENT 'a comment for a', + | m MAP, + | STRUCT< + | v1:int COMMENT 'a comment for m.value.v1', + | v2:string COMMENT 'a comment for m.value.v2'>> COMMENT 'a comment for m' + |)""".stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + val expectedSchema = new StructType() + .add("v1", "integer", true, "a comment for v1") + .add("v2", "string", true, "a comment for v2") + .add("s", new StructType() + .add("v1", "integer", true, "a comment for s.v1") + .add("v2", "string", true, "a comment for s.v2"), true, "a comment for s") + .add("a", ArrayType(new StructType() + .add("v1", "integer", true, "a comment for a.v1") + .add("v2", "string", true, "a comment for a.v2")), true, "a comment for a") + .add("m", MapType( + new StructType() + .add("v1", "integer", true, "a comment for m.key.v1") + .add("v2", "string", true, "a comment for m.key.v2"), + new StructType() + .add("v1", "integer", true, "a comment for m.value.v1") + .add("v2", "string", true, "a comment for m.value.v2")), true, "a comment for m") + assertEqual(snapshot.schema, expectedSchema) + + implicit val ordering = Ordering.by[ + (Int, String, (Int, String), Seq[(Int, String)], Map[(Int, String), (Int, String)]), Int] { + case (v1, _, _, _, _) => v1 + } + checkDatasetUnorderly( + spark.table(tableName) + .as[(Int, String, (Int, String), Seq[(Int, String)], Map[(Int, String), (Int, String)])], + (1, "a", (1, "a"), Seq((1, "a")), Map((1, "a") -> ((1, "a")))), + (2, "b", (2, "b"), Seq((2, "b")), Map((2, "b") -> ((2, "b"))))) + + // REPLACE COLUMNS doesn't remove metadata. + sql(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + assertEqual(deltaLog.snapshot.schema, expectedSchema) + } + } + + ddlTest("REPLACE COLUMNS - reorder") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + + sql(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | m MAP, STRUCT>, + | v2 string, + | a ARRAY>, + | v1 int, + | s STRUCT + |)""".stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("m", MapType( + new StructType().add("v2", "string").add("v1", "integer"), + new StructType().add("v2", "string").add("v1", "integer"))) + .add("v2", "string") + .add("a", ArrayType(new StructType().add("v2", "string").add("v1", "integer"))) + .add("v1", "integer") + .add("s", new StructType().add("v2", "string").add("v1", "integer"))) + + implicit val ordering = Ordering.by[ + (Map[(String, Int), (String, Int)], String, Seq[(String, Int)], Int, (String, Int)), Int] { + case (_, _, _, v1, _) => v1 + } + checkDatasetUnorderly( + spark.table(tableName) + .as[(Map[(String, Int), (String, Int)], String, Seq[(String, Int)], Int, (String, Int))], + (Map(("a", 1) -> (("a", 1))), "a", Seq(("a", 1)), 1, ("a", 1)), + (Map(("b", 2) -> (("b", 2))), "b", Seq(("b", 2)), 2, ("b", 2))) + } + } + + ddlTest("REPLACE COLUMNS - add columns") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + + sql(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | v3 long, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer") + .add("v2", "string") + .add("v3", "long") + .add("s", new StructType() + .add("v1", "integer").add("v2", "string").add("v3", "long")) + .add("a", ArrayType(new StructType() + .add("v1", "integer").add("v2", "string").add("v3", "long"))) + .add("m", MapType( + new StructType().add("v1", "integer").add("v2", "string").add("v3", "long"), + new StructType().add("v1", "integer").add("v2", "string").add("v3", "long")))) + + implicit val ordering = Ordering.by[ + (Int, String, Option[Long], + (Int, String, Option[Long]), + Seq[(Int, String, Option[Long])], + Map[(Int, String, Option[Long]), (Int, String, Option[Long])]), Int] { + case (v1, _, _, _, _, _) => v1 + } + checkDatasetUnorderly( + spark.table(tableName).as[ + (Int, String, Option[Long], + (Int, String, Option[Long]), + Seq[(Int, String, Option[Long])], + Map[(Int, String, Option[Long]), (Int, String, Option[Long])])], + (1, "a", None, (1, "a", None), + Seq((1, "a", None)), Map((1, "a", Option.empty[Long]) -> ((1, "a", None)))), + (2, "b", None, (2, "b", None), + Seq((2, "b", None)), Map((2, "b", Option.empty[Long]) -> ((2, "b", None))))) + } + } + + ddlTest("REPLACE COLUMNS - special column names") { + val df = Seq((1, "a"), (2, "b")).toDF("x.x", "y.y") + .withColumn("s.s", struct("`x.x`", "`y.y`")) + .withColumn("a.a", array("`s.s`")) + .withColumn("m.m", map(col("`s.s`"), col("`s.s`"))) + withDeltaTable(df) { tableName => + + sql(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | `m.m` MAP, STRUCT<`y.y`:string, `x.x`:int>>, + | `y.y` string, + | `a.a` ARRAY>, + | `x.x` int, + | `s.s` STRUCT<`y.y`:string, `x.x`:int> + |)""".stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("m.m", MapType( + new StructType().add("y.y", "string").add("x.x", "integer"), + new StructType().add("y.y", "string").add("x.x", "integer"))) + .add("y.y", "string") + .add("a.a", ArrayType(new StructType().add("y.y", "string").add("x.x", "integer"))) + .add("x.x", "integer") + .add("s.s", new StructType().add("y.y", "string").add("x.x", "integer"))) + } + } + + ddlTest("REPLACE COLUMNS - drop column") { + // Column Mapping allows columns to be dropped + def checkReplace( + text: String, + tableName: String, + columnDropped: Seq[String], + messages: String*): Unit = { + if (columnMappingEnabled) { + spark.sql(text) + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + val field = snapshot.schema.findNestedField(columnDropped, includeCollections = true) + assert(field.isEmpty, "Column was not deleted") + } else { + assertNotSupported(text, messages: _*) + } + } + + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + + // trying to drop v1 of each struct, but it should fail because dropping column is + // not supported unless column mapping is enabled + checkReplace( + s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + tableName, Seq("v1"), "dropping column(s)", "v1") + // s.v1 + checkReplace( + s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + tableName, Seq("s", "v1"), "dropping column(s)", "v1", "from s") + // a.v1 + checkReplace( + s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + tableName, Seq("a", "element", "v1"), "dropping column(s)", "v1", "from a") + // m.key.v1 + checkReplace( + s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + tableName, Seq("m", "key", "v1"), "dropping column(s)", "v1", "from m.key") + // m.value.v1 + checkReplace( + s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + tableName, Seq("m", "value", "v1"), "dropping column(s)", "v1", "from m.value") + } + } + + ddlTest("REPLACE COLUMNS - incompatible data type") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + + // trying to change the data type of v1 of each struct to long, but it should fail because + // changing data type is not supported. + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 long, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "changing data type", "v1", "from IntegerType to LongType") + // s.v1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "changing data type", "s.v1", "from IntegerType to LongType") + // a.element.v1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "changing data type", "a.element.v1", "from IntegerType to LongType") + // m.key.v1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "changing data type", "m.key.v1", "from IntegerType to LongType") + // m.value.v1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "changing data type", "m.value.v1", "from IntegerType to LongType") + } + } + + ddlTest("REPLACE COLUMNS - case insensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + + val (deltaLog, _) = getDeltaLogWithSnapshot(tableName) + def checkSchema(command: String): Unit = { + sql(command) + + assertEqual(deltaLog.update().schema, new StructType() + .add("v1", "integer") + .add("v2", "string") + .add("s", new StructType().add("v1", "integer").add("v2", "string")) + .add("a", ArrayType(new StructType().add("v1", "integer").add("v2", "string"))) + .add("m", MapType( + new StructType().add("v1", "integer").add("v2", "string"), + new StructType().add("v1", "integer").add("v2", "string")))) + } + + // trying to use V1 instead of v1 of each struct. + checkSchema(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | V1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + // s.V1 + checkSchema(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + // a.V1 + checkSchema(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + // m.key.V1 + checkSchema(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + // m.value.V1 + checkSchema(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + } + } + } + + ddlTest("REPLACE COLUMNS - case sensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + + // trying to use V1 instead of v1 of each struct, but it should fail because case sensitive. + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | V1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "ambiguous", "v1") + // s.V1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "ambiguous", "data type of s") + // a.V1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "ambiguous", "data type of a.element") + // m.key.V1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "ambiguous", "data type of m.key") + // m.value.V1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "ambiguous", "data type of m.value") + } + } + } + + ddlTest("REPLACE COLUMNS - duplicate") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + def assertDuplicate(command: String): Unit = { + val ex = intercept[AnalysisException] { + sql(command) + } + assert(ex.getMessage.contains("duplicate column(s)")) + } + + // trying to add a V1 column, but it should fail because Delta doesn't allow columns + // at the same level of nesting that differ only by case. + assertDuplicate(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | V1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + // s.V1 + assertDuplicate(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + // a.V1 + assertDuplicate(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + // m.key.V1 + assertDuplicate(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + // m.value.V1 + assertDuplicate(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + } + } + } + + test("REPLACE COLUMNS - loosen nullability with unenforced allowed") { + withSQLConf(("spark.databricks.delta.constraints.allowUnenforcedNotNull.enabled", "true")) { + val schema = + """ + | v1 int NOT NULL, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + """.stripMargin + withDeltaTable(schema) { tableName => + + sql( + s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer") + .add("v2", "string") + .add("s", new StructType() + .add("v1", "integer").add("v2", "string")) + .add("a", ArrayType(new StructType() + .add("v1", "integer").add("v2", "string"))) + .add("m", MapType( + new StructType().add("v1", "integer").add("v2", "string"), + new StructType().add("v1", "integer").add("v2", "string")))) + } + } + } + + test("REPLACE COLUMNS - loosen nullability") { + val schema = + """ + | v1 int NOT NULL, + | v2 string, + | s STRUCT, + | a ARRAY> NOT NULL, + | m MAP, STRUCT> NOT NULL + """.stripMargin + withDeltaTable(schema) { tableName => + + sql(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer") + .add("v2", "string") + .add("s", new StructType() + .add("v1", "integer").add("v2", "string")) + .add("a", ArrayType(new StructType() + .add("v1", "integer").add("v2", "string"))) + .add("m", MapType( + new StructType().add("v1", "integer").add("v2", "string"), + new StructType().add("v1", "integer").add("v2", "string")))) + } + } + + test("REPLACE COLUMNS - add not-null column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + // trying to add not-null column, but it should fail because adding not-null column is + // not supported. + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | v3 long NOT NULL, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "NOT NULL is not supported in Hive-style REPLACE COLUMNS") + // s.v3 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "adding non-nullable column", "s.v3") + // a.element.v3 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "adding non-nullable column", "a.element.v3") + // m.key.v3 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "adding non-nullable column", "m.key.v3") + // m.value.v3 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "adding non-nullable column", "m.value.v3") + } + } + + test("REPLACE COLUMNS - incompatible nullability") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + + // trying to change the data type of v1 of each struct to not null, but it should fail because + // tightening nullability is not supported. + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int NOT NULL, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "NOT NULL is not supported in Hive-style REPLACE COLUMNS") + // s.v1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "tightening nullability", "s.v1") + // a.element.v1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "tightening nullability", "a.element.v1") + // m.key.v1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "tightening nullability", "m.key.v1") + // m.value.v1 + assertNotSupported(s""" + |ALTER TABLE $tableName REPLACE COLUMNS ( + | v1 int, + | v2 string, + | s STRUCT, + | a ARRAY>, + | m MAP, STRUCT> + |)""".stripMargin, + "tightening nullability", "m.value.v1") + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaAlterTableTests.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaAlterTableTests.scala new file mode 100644 index 00000000000..37ebabcf75e --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaAlterTableTests.scala @@ -0,0 +1,1720 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File + +import org.apache.spark.sql.delta.DeltaConfigs.CHECKPOINT_INTERVAL +import org.apache.spark.sql.delta.actions.Metadata +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + +trait DeltaAlterTableTestBase + extends QueryTest + with SharedSparkSession + with DeltaColumnMappingTestUtils + with DeltaTestUtilsForTempViews { + + protected def createTable(schema: String, tblProperties: Map[String, String]): String + + protected def createTable(df: DataFrame, partitionedBy: Seq[String]): String + + protected def dropTable(identifier: String): Unit + + protected def getDeltaLogWithSnapshot(identifier: String): (DeltaLog, Snapshot) + + final protected def withDeltaTable(schema: String)(f: String => Unit): Unit = { + withDeltaTable(schema, Map.empty[String, String])(i => f(i)) + } + + final protected def withDeltaTable( + schema: String, + tblProperties: Map[String, String])(f: String => Unit): Unit = { + val identifier = createTable(schema, tblProperties) + try { + f(identifier) + } finally { + dropTable(identifier) + } + } + + final protected def withDeltaTable(df: DataFrame)(f: String => Unit): Unit = { + withDeltaTable(df, Seq.empty[String])(i => f(i)) + } + + final protected def withDeltaTable( + df: DataFrame, + partitionedBy: Seq[String])(f: String => Unit): Unit = { + val identifier = createTable(df, partitionedBy) + try { + f(identifier) + } finally { + dropTable(identifier) + } + } + + protected def ddlTest(testName: String)(f: => Unit): Unit = { + testQuietly(testName)(f) + } + + protected def assertNotSupported(command: String, messages: String*): Unit = { + val ex = intercept[Exception] { + sql(command) + }.getMessage + assert(ex.contains("not supported") || ex.contains("Unsupported") || ex.contains("Cannot")) + messages.foreach(msg => assert(ex.contains(msg))) + } +} + +trait DeltaAlterTableTests extends DeltaAlterTableTestBase { + + import testImplicits._ + + /////////////////////////////// + // SET/UNSET TBLPROPERTIES + /////////////////////////////// + + ddlTest("SET/UNSET TBLPROPERTIES - simple") { + withDeltaTable("v1 int, v2 string") { tableName => + + sql(s""" + |ALTER TABLE $tableName + |SET TBLPROPERTIES ( + | 'delta.logRetentionDuration' = '2 weeks', + | 'delta.checkpointInterval' = '20', + | 'key' = 'value' + |)""".stripMargin) + + val (deltaLog, snapshot1) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot1.metadata.configuration, Map( + "delta.logRetentionDuration" -> "2 weeks", + "delta.checkpointInterval" -> "20", + "key" -> "value")) + assert(deltaLog.deltaRetentionMillis(snapshot1.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot1.metadata) == 20) + + sql(s"ALTER TABLE $tableName UNSET TBLPROPERTIES ('delta.checkpointInterval', 'key')") + + val snapshot2 = deltaLog.update() + assertEqual(snapshot2.metadata.configuration, + Map("delta.logRetentionDuration" -> "2 weeks")) + assert(deltaLog.deltaRetentionMillis(snapshot2.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot2.metadata) == + CHECKPOINT_INTERVAL.fromString(CHECKPOINT_INTERVAL.defaultValue)) + } + } + + testQuietlyWithTempView("negative case - not supported on temp views") { isSQLTempView => + withDeltaTable("v1 int, v2 string") { tableName => + createTempViewFromTable(tableName, isSQLTempView) + + val e = intercept[AnalysisException] { + sql( + """ + |ALTER TABLE v + |SET TBLPROPERTIES ( + | 'delta.logRetentionDuration' = '2 weeks', + | 'delta.checkpointInterval' = '20', + | 'key' = 'value' + |)""".stripMargin) + } + assert(e.getMessage.contains("expects a table. Please use ALTER VIEW instead.") || + e.getMessage.contains("EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW")) + } + } + + ddlTest("SET/UNSET TBLPROPERTIES - case insensitivity") { + withDeltaTable("v1 int, v2 string") { tableName => + + sql(s""" + |ALTER TABLE $tableName + |SET TBLPROPERTIES ( + | 'dEltA.lOgrEteNtiOndURaTion' = '1 weeks', + | 'DelTa.ChEckPoiNtinTervAl' = '5', + | 'key' = 'value1' + |)""".stripMargin) + + val (deltaLog, snapshot1) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot1.metadata.configuration, Map( + "delta.logRetentionDuration" -> "1 weeks", + "delta.checkpointInterval" -> "5", + "key" -> "value1")) + assert(deltaLog.deltaRetentionMillis(snapshot1.metadata) == 1 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot1.metadata) == 5) + + sql(s""" + |ALTER TABLE $tableName + |SET TBLPROPERTIES ( + | 'dEltA.lOgrEteNtiOndURaTion' = '2 weeks', + | 'DelTa.ChEckPoiNtinTervAl' = '20', + | 'kEy' = 'value2' + |)""".stripMargin) + + val snapshot2 = deltaLog.update() + assertEqual(snapshot2.metadata.configuration, Map( + "delta.logRetentionDuration" -> "2 weeks", + "delta.checkpointInterval" -> "20", + "key" -> "value1", + "kEy" -> "value2")) + assert(deltaLog.deltaRetentionMillis(snapshot2.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot2.metadata) == 20) + + sql(s"ALTER TABLE $tableName UNSET TBLPROPERTIES ('DelTa.ChEckPoiNtinTervAl', 'kEy')") + + val snapshot3 = deltaLog.update() + assertEqual(snapshot3.metadata.configuration, + Map("delta.logRetentionDuration" -> "2 weeks", "key" -> "value1")) + assert(deltaLog.deltaRetentionMillis(snapshot3.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot3.metadata) == + CHECKPOINT_INTERVAL.fromString(CHECKPOINT_INTERVAL.defaultValue)) + } + } + + ddlTest("SET/UNSET TBLPROPERTIES - set unknown config") { + withDeltaTable("v1 int, v2 string") { tableName => + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName SET TBLPROPERTIES ('delta.key' = 'value')") + } + assert(ex.getMessage.contains("Unknown configuration was specified: delta.key")) + } + } + + ddlTest("SET/UNSET TBLPROPERTIES - set invalid value") { + withDeltaTable("v1 int, v2 string") { tableName => + + val ex1 = intercept[Exception] { + sql(s"ALTER TABLE $tableName SET TBLPROPERTIES ('delta.randomPrefixLength' = '-1')") + } + assert(ex1.getMessage.contains("randomPrefixLength needs to be greater than 0.")) + + val ex2 = intercept[Exception] { + sql(s"ALTER TABLE $tableName SET TBLPROPERTIES ('delta.randomPrefixLength' = 'value')") + } + assert(ex2.getMessage.contains("randomPrefixLength needs to be greater than 0.")) + } + } + + test("SET/UNSET comment by TBLPROPERTIES") { + withDeltaTable("v1 int, v2 string") { tableName => + def assertCommentEmpty(): Unit = { + val props = sql(s"DESC EXTENDED $tableName").collect() + assert(!props.exists(_.getString(0) === "Comment"), "Comment should be empty") + + val desc = sql(s"DESCRIBE DETAIL $tableName").head() + val fieldIndex = desc.fieldIndex("description") + assert(desc.isNullAt(fieldIndex)) + } + + assertCommentEmpty() + + sql(s"ALTER TABLE $tableName SET TBLPROPERTIES ('comment'='does it work?')") + + val props = sql(s"DESC EXTENDED $tableName").collect() + assert(props.exists(r => r.getString(0) === "Comment" && r.getString(1) === "does it work?"), + s"Comment not found in:\n${props.mkString("\n")}") + + val desc = sql(s"DESCRIBE DETAIL $tableName").head() + assert(desc.getAs[String]("description") === "does it work?") + + sql(s"ALTER TABLE $tableName UNSET TBLPROPERTIES ('comment')") + assertCommentEmpty() + } + } + + test("update comment by TBLPROPERTIES") { + val tableName = "comment_table" + + def checkComment(expected: String): Unit = { + val props = sql(s"DESC EXTENDED $tableName").collect() + assert(props.exists(r => r.getString(0) === "Comment" && r.getString(1) === expected), + s"Comment not found in:\n${props.mkString("\n")}") + + val desc = sql(s"DESCRIBE DETAIL $tableName").head() + assert(desc.getAs[String]("description") === expected) + } + + withTable(tableName) { + sql(s"CREATE TABLE $tableName (id bigint) USING delta COMMENT 'x'") + checkComment("x") + + sql(s"ALTER TABLE $tableName SET TBLPROPERTIES ('comment'='y')") + + checkComment("y") + } + } + + ddlTest("Invalid TBLPROPERTIES") { + withDeltaTable("v1 int, v2 string") { tableName => + // Handled by Spark + intercept[ParseException] { + sql(s"ALTER TABLE $tableName SET TBLPROPERTIES ('location'='/some/new/path')") + } + // Handled by Spark + intercept[ParseException] { + sql(s"ALTER TABLE $tableName SET TBLPROPERTIES ('provider'='json')") + } + // Illegal to add constraints + val e3 = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName SET TBLPROPERTIES ('delta.constraints.c1'='age >= 25')") + } + assert(e3.getMessage.contains("ALTER TABLE ADD CONSTRAINT")) + } + } + + /////////////////////////////// + // ADD COLUMNS + /////////////////////////////// + + ddlTest("ADD COLUMNS - simple") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String)], + (1, "a"), (2, "b")) + + sql(s"ALTER TABLE $tableName ADD COLUMNS (v3 long, v4 double)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("v3", "long").add("v4", "double")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, Option[Long], Option[Double])], + (1, "a", None, None), (2, "b", None, None)) + } + } + + ddlTest("ADD COLUMNS into complex types - Array") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("a", array(struct("v1")))) { tableName => + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (a.element.v3 long) + """.stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("a", ArrayType(new StructType() + .add("v1", "integer") + .add("v3", "long")))) + + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (a.element.v4 struct) + """.stripMargin) + + assertEqual(deltaLog.snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("a", ArrayType(new StructType() + .add("v1", "integer") + .add("v3", "long") + .add("v4", new StructType().add("f1", "long"))))) + + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (a.element.v4.f2 string) + """.stripMargin) + + assertEqual(deltaLog.snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("a", ArrayType(new StructType() + .add("v1", "integer") + .add("v3", "long") + .add("v4", new StructType() + .add("f1", "long") + .add("f2", "string"))))) + } + } + + ddlTest("ADD COLUMNS into complex types - Map with simple key") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("m", map('v1, struct("v2")))) { tableName => + + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (m.value.mvv3 long) + """.stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("m", MapType(IntegerType, + new StructType() + .add("v2", "string") + .add("mvv3", "long")))) + } + } + + ddlTest("ADD COLUMNS into complex types - Map with simple value") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("m", map(struct("v1"), 'v2))) { tableName => + + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (m.key.mkv3 long) + """.stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("m", MapType( + new StructType() + .add("v1", "integer") + .add("mkv3", "long"), + StringType))) + } + } + + private def checkErrMsg(msg: String, field: Seq[String]): Unit = { + val fieldStr = field.map(f => s"`$f`").mkString(".") + val fieldParentStr = field.dropRight(1).map(f => s"`$f`").mkString(".") + assert(msg.contains( + s"Field name $fieldStr is invalid: $fieldParentStr is not a struct")) + } + + ddlTest("ADD COLUMNS should not be able to add column to basic type key/value of " + + "MapType") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("m", map('v1, 'v2))) { tableName => + var ex = intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (m.key.mkv3 long) + """.stripMargin) + } + checkErrMsg(ex.getMessage, Seq("m", "key", "mkv3")) + + ex = intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (m.value.mkv3 long) + """.stripMargin) + } + checkErrMsg(ex.getMessage, Seq("m", "value", "mkv3")) + } + } + + ddlTest("ADD COLUMNS into complex types - Map") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("m", map(struct("v1"), struct("v2")))) { tableName => + + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (m.key.mkv3 long, m.value.mvv3 long) + """.stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("m", MapType( + new StructType() + .add("v1", "integer") + .add("mkv3", "long"), + new StructType() + .add("v2", "string") + .add("mvv3", "long")))) + } + } + + ddlTest("ADD COLUMNS into complex types - Map (nested)") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("m", map(struct("v1"), struct("v2")))) { tableName => + + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS + |(m.key.mkv3 long, m.value.mvv3 struct>>) + """.stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("m", MapType( + new StructType() + .add("v1", "integer") + .add("mkv3", "long"), + new StructType() + .add("v2", "string") + .add("mvv3", new StructType() + .add("f1", "long") + .add("f2", ArrayType(new StructType() + .add("n", "long"))))))) + + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS + |(m.value.mvv3.f2.element.p string) + """.stripMargin) + + assertEqual(deltaLog.snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("m", MapType( + new StructType() + .add("v1", "integer") + .add("mkv3", "long"), + new StructType() + .add("v2", "string") + .add("mvv3", new StructType() + .add("f1", "long") + .add("f2", ArrayType(new StructType() + .add("n", "long") + .add("p", "string"))))))) + } + } + + ddlTest("ADD COLUMNS into Map should fail if key or value not specified") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("m", map(struct("v1"), struct("v2")))) { tableName => + + val ex = intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (m.mkv3 long) + """.stripMargin) + } + checkErrMsg(ex.getMessage, Seq("m", "mkv3")) + } + } + + ddlTest("ADD COLUMNS into Array should fail if element is not specified") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("a", array(struct("v1")))) { tableName => + + intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName ADD COLUMNS (a.v3 long) + """.stripMargin) + } + } + } + + ddlTest("ADD COLUMNS - a partitioned table") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2"), Seq("v2")) { tableName => + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String)], + (1, "a"), (2, "b")) + + sql(s"ALTER TABLE $tableName ADD COLUMNS (v3 long, v4 double)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("v3", "long").add("v4", "double")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, Option[Long], Option[Double])], + (1, "a", None, None), (2, "b", None, None)) + } + } + + ddlTest("ADD COLUMNS - with a comment") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String)], + (1, "a"), (2, "b")) + + sql(s"ALTER TABLE $tableName ADD COLUMNS (v3 long COMMENT 'new column')") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("v3", "long", true, "new column")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, Option[Long])], + (1, "a", None), (2, "b", None)) + } + } + + ddlTest("ADD COLUMNS - adding to a non-struct column") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ADD COLUMNS (v2.x long)") + } + checkErrMsg(ex.getMessage, Seq("v2", "x")) + } + } + + ddlTest("ADD COLUMNS - a duplicate name") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ADD COLUMNS (v2 long)") + } + } + } + + ddlTest("ADD COLUMNS - a duplicate name (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ADD COLUMNS (struct.v2 long)") + } + } + } + + ddlTest("ADD COLUMNS - column name with spaces") { + if (!columnMappingEnabled) { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ADD COLUMNS (`a column name with spaces` long)") + } + assert(ex.getMessage.contains("invalid character(s)")) + } + } else { + // column mapping mode supports arbitrary column names + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + sql(s"ALTER TABLE $tableName ADD COLUMNS (`a column name with spaces` long)") + } + } + } + + ddlTest("ADD COLUMNS - column name with spaces (nested)") { + if (!columnMappingEnabled) { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ADD COLUMNS (struct.`a column name with spaces` long)") + } + assert(ex.getMessage.contains("invalid character(s)")) + } + } else { + // column mapping mode supports arbitrary column names + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + sql(s"ALTER TABLE $tableName ADD COLUMNS (struct.`a column name with spaces` long)") + } + } + } + + ddlTest("ADD COLUMNS - special column names") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("z.z", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, (Int, String))], + (1, "a", (1, "a")), (2, "b", (2, "b"))) + + sql(s"ALTER TABLE $tableName ADD COLUMNS (`x.x` long, `z.z`.`y.y` double)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("z.z", new StructType() + .add("v1", "integer").add("v2", "string").add("y.y", "double")) + .add("x.x", "long")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, (Int, String, Option[Double]), Option[Long])], + (1, "a", (1, "a", None), None), (2, "b", (2, "b", None), None)) + } + } + + test("ADD COLUMNS - with positions") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String)], + (1, "a"), (2, "b")) + + sql(s"ALTER TABLE $tableName ADD COLUMNS (v3 long FIRST, v4 long AFTER v1, v5 long)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v3", "long").add("v1", "integer") + .add("v4", "long").add("v2", "string").add("v5", "long")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Option[Long], Int, Option[Long], String, Option[Long])], + (None, 1, None, "a", None), (None, 2, None, "b", None)) + } + } + + test("ADD COLUMNS - with positions using an added column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + checkDatasetUnorderly( + spark.table("delta_test").as[(Int, String)], + (1, "a"), (2, "b")) + + sql("ALTER TABLE delta_test ADD COLUMNS (v3 long FIRST, v4 long AFTER v3, v5 long AFTER v4)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v3", "long").add("v4", "long").add("v5", "long") + .add("v1", "integer").add("v2", "string")) + + checkDatasetUnorderly( + spark.table("delta_test").as[(Option[Long], Option[Long], Option[Long], Int, String)], + (None, None, None, 1, "a"), (None, None, None, 2, "b")) + } + } + + test("ADD COLUMNS - nested columns") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + checkDatasetUnorderly( + spark.table("delta_test").as[(Int, String, (Int, String))], + (1, "a", (1, "a")), (2, "b", (2, "b"))) + + sql("ALTER TABLE delta_test ADD COLUMNS " + + "(struct.v3 long FIRST, struct.v4 long AFTER v1, struct.v5 long)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("struct", new StructType() + .add("v3", "long").add("v1", "integer") + .add("v4", "long").add("v2", "string").add("v5", "long"))) + + checkDatasetUnorderly( + spark.table("delta_test") + .as[(Int, String, (Option[Long], Int, Option[Long], String, Option[Long]))], + (1, "a", (None, 1, None, "a", None)), (2, "b", (None, 2, None, "b", None))) + } + } + + test("ADD COLUMNS - special column names with positions") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("z.z", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, (Int, String))], + (1, "a", (1, "a")), (2, "b", (2, "b"))) + + sql(s"ALTER TABLE $tableName ADD COLUMNS (`x.x` long after v1, `z.z`.`y.y` double)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("x.x", "long").add("v2", "string") + .add("z.z", new StructType() + .add("v1", "integer").add("v2", "string").add("y.y", "double")) + ) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, Option[Long], String, (Int, String, Option[Double]))], + (1, None, "a", (1, "a", None)), (2, None, "b", (2, "b", None))) + } + } + + test("ADD COLUMNS - adding after an unknown column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ADD COLUMNS (v3 long AFTER unknown)") + } + assert( + ex.getMessage.contains("Couldn't find") || ex.getMessage.contains("No such struct field")) + } + } + + test("ADD COLUMNS - case insensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName ADD COLUMNS (v3 long AFTER V1)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v3", "long").add("v2", "string")) + } + } + } + + test("ADD COLUMNS - case sensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ADD COLUMNS (v3 long AFTER V1)") + } + assert( + ex.getMessage.contains("Couldn't find") || ex.getMessage.contains("No such struct field")) + } + } + } + + test("ADD COLUMNS - adding after an Array column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("v3", array(map(col("v1"), col("v2")))) + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName ADD COLUMNS (v4 string AFTER V3)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", IntegerType) + .add("v2", StringType) + .add("v3", ArrayType( + MapType(IntegerType, StringType))) + .add("v4", StringType)) + } + } + + /////////////////////////////// + // CHANGE COLUMN + /////////////////////////////// + + ddlTest("CHANGE COLUMN - add a comment") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 integer COMMENT 'a comment'") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer", true, "a comment").add("v2", "string")) + } + } + + ddlTest("CHANGE COLUMN - add a comment to a partitioned table") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2"), Seq("v2")) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v2 v2 string COMMENT 'a comment'") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string", true, "a comment")) + } + } + + ddlTest("CHANGE COLUMN - add a comment to special column names (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("x.x", "y.y") + .withColumn("z.z", struct("`x.x`", "`y.y`")) + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN `z.z`.`x.x` `x.x` integer COMMENT 'a comment'") + sql(s"ALTER TABLE $tableName CHANGE COLUMN `x.x` `x.x` integer COMMENT 'another comment'") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("x.x", "integer", true, "another comment") + .add("y.y", "string") + .add("z.z", new StructType() + .add("x.x", "integer", true, "a comment").add("y.y", "string"))) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, (Int, String))], + (1, "a", (1, "a")), (2, "b", (2, "b"))) + } + } + + ddlTest("CHANGE COLUMN - add a comment to a MapType (nested)") { + val table = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("a", array(struct(array(struct(map(struct("v1"), struct("v2"))))))) + withDeltaTable(table) { tableName => + sql( + s""" + |ALTER TABLE $tableName CHANGE COLUMN + |a.element.col1.element.col1 col1 MAP, + |STRUCT> COMMENT 'a comment' + """.stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("a", ArrayType(new StructType() + .add("col1", ArrayType(new StructType() + .add("col1", MapType( + new StructType() + .add("v1", "integer"), + new StructType() + .add("v2", "string")), nullable = true, "a comment")))))) + } + } + + ddlTest("CHANGE COLUMN - add a comment to an ArrayType (nested)") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("m", map(struct("v1"), struct(array(struct(struct("v1"))))))) { tableName => + + sql( + s""" + |ALTER TABLE $tableName CHANGE COLUMN + |m.value.col1.element.col1.v1 v1 integer COMMENT 'a comment' + """.stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("m", MapType( + new StructType() + .add("v1", "integer"), + new StructType() + .add("col1", ArrayType(new StructType() + .add("col1", new StructType() + .add("v1", "integer", nullable = true, "a comment"))))))) + } + } + + ddlTest("CHANGE COLUMN - add a comment to an ArrayType") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("a", array('v1))) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN a a ARRAY COMMENT 'a comment'") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("a", ArrayType(IntegerType), nullable = true, "a comment")) + } + } + + ddlTest("CHANGE COLUMN - add a comment to a MapType") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("a", map('v1, 'v2))) { tableName => + + sql( + s""" + |ALTER TABLE $tableName CHANGE COLUMN + |a a MAP COMMENT 'a comment' + """.stripMargin) + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("a", MapType(IntegerType, StringType), nullable = true, "a comment")) + } + } + + ddlTest("CHANGE COLUMN - change name") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + + assertNotSupported(s"ALTER TABLE $tableName CHANGE COLUMN v2 v3 string") + } + } + + ddlTest("CHANGE COLUMN - incompatible") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 long", + "'v1' with type 'IntegerType (nullable = true)'", + "'v1' with type 'LongType (nullable = true)'") + } + } + + ddlTest("CHANGE COLUMN - incompatible (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN struct.v1 v1 long", + "'struct.v1' with type 'IntegerType (nullable = true)'", + "'v1' with type 'LongType (nullable = true)'") + } + } + + test("CHANGE COLUMN - move to first") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v2 v2 string FIRST") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v2", "string").add("v1", "integer")) + + checkDatasetUnorderly( + spark.table(tableName).as[(String, Int)], + ("a", 1), ("b", 2)) + } + } + + test("CHANGE COLUMN - move to first (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN struct.v2 v2 string FIRST") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("struct", new StructType() + .add("v2", "string").add("v1", "integer"))) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, (String, Int))], + (1, "a", ("a", 1)), (2, "b", ("b", 2))) + + // Can't change the inner ordering + assertNotSupported(s"ALTER TABLE $tableName CHANGE COLUMN struct struct " + + "STRUCT FIRST") + + sql(s"ALTER TABLE $tableName CHANGE COLUMN struct struct " + + "STRUCT FIRST") + + assertEqual(deltaLog.update().schema, new StructType() + .add("struct", new StructType().add("v2", "string").add("v1", "integer")) + .add("v1", "integer").add("v2", "string")) + } + } + + test("CHANGE COLUMN - move a partitioned column to first") { + val df = Seq((1, "a", true), (2, "b", false)).toDF("v1", "v2", "v3") + withDeltaTable(df, Seq("v2", "v3")) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v3 v3 boolean FIRST") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v3", "boolean").add("v1", "integer").add("v2", "string")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Boolean, Int, String)], + (true, 1, "a"), (false, 2, "b")) + } + } + + test("CHANGE COLUMN - move to after some column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 integer AFTER v2") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v2", "string").add("v1", "integer")) + + checkDatasetUnorderly( + spark.table(tableName).as[(String, Int)], + ("a", 1), ("b", 2)) + } + } + + test("CHANGE COLUMN - move to after some column (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN struct.v1 v1 integer AFTER v2") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("struct", new StructType() + .add("v2", "string").add("v1", "integer"))) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, (String, Int))], + (1, "a", ("a", 1)), (2, "b", ("b", 2))) + + // cannot change ordering within the struct + assertNotSupported(s"ALTER TABLE $tableName CHANGE COLUMN struct struct " + + "STRUCT AFTER v1") + + // can move the struct itself however + sql(s"ALTER TABLE $tableName CHANGE COLUMN struct struct " + + "STRUCT AFTER v1") + + assertEqual(deltaLog.update().schema, new StructType() + .add("v1", "integer") + .add("struct", new StructType().add("v2", "string").add("v1", "integer")) + .add("v2", "string")) + } + } + + test("CHANGE COLUMN - move to after the same column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 integer AFTER v1") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String)], + (1, "a"), (2, "b")) + } + } + + test("CHANGE COLUMN - move to after the same column (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN struct.v1 v1 integer AFTER v1") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("struct", new StructType() + .add("v1", "integer").add("v2", "string"))) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, (Int, String))], + (1, "a", (1, "a")), (2, "b", (2, "b"))) + } + } + + test("CHANGE COLUMN - move a partitioned column to after some column") { + val df = Seq((1, "a", true), (2, "b", false)).toDF("v1", "v2", "v3") + withDeltaTable(df, Seq("v2", "v3")) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v3 v3 boolean AFTER v1") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v3", "boolean").add("v2", "string")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, Boolean, String)], + (1, true, "a"), (2, false, "b")) + } + } + + test("CHANGE COLUMN - move to after the last column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 integer AFTER v2") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v2", "string").add("v1", "integer")) + } + } + + test("CHANGE COLUMN - special column names with positions") { + val df = Seq((1, "a"), (2, "b")).toDF("x.x", "y.y") + withDeltaTable(df) { tableName => + sql(s"ALTER TABLE $tableName CHANGE COLUMN `x.x` `x.x` integer AFTER `y.y`") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("y.y", "string").add("x.x", "integer")) + + checkDatasetUnorderly( + spark.table(tableName).as[(String, Int)], + ("a", 1), ("b", 2)) + } + } + + test("CHANGE COLUMN - special column names (nested) with positions") { + val df = Seq((1, "a"), (2, "b")).toDF("x.x", "y.y") + .withColumn("z.z", struct("`x.x`", "`y.y`")) + withDeltaTable(df) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN `z.z`.`x.x` `x.x` integer AFTER `y.y`") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("x.x", "integer").add("y.y", "string") + .add("z.z", new StructType() + .add("y.y", "string").add("x.x", "integer"))) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, (String, Int))], + (1, "a", ("a", 1)), (2, "b", ("b", 2))) + } + } + + test("CHANGE COLUMN - move to after an unknown column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 integer AFTER unknown") + } + assert(ex.getMessage.contains("Missing field unknown") || + ex.getMessage.contains("Couldn't resolve positional argument AFTER unknown")) + } + } + + test("CHANGE COLUMN - move to after an unknown column (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName CHANGE COLUMN struct.v1 v1 integer AFTER unknown") + } + assert(ex.getMessage.contains("Missing field struct.unknown") || + ex.getMessage.contains("Couldn't resolve positional argument AFTER unknown")) + } + } + + test("CHANGE COLUMN - complex types nullability tests") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + // not supported to tighten nullabilities. + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN s s STRUCT") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN a a " + + "ARRAY>") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN m m " + + "MAP, STRUCT>") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN m m " + + "MAP, STRUCT>") + + // not supported to add not-null columns. + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN s s " + + "STRUCT") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN a a " + + "ARRAY>") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN m m " + + "MAP, " + + "STRUCT>") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN m m " + + "MAP, " + + "STRUCT>") + } + } + + ddlTest("CHANGE COLUMN - change name (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN struct.v2 v3 string") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN struct struct " + + "STRUCT") + } + } + + ddlTest("CHANGE COLUMN - add a comment (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + sql(s"ALTER TABLE $tableName CHANGE COLUMN struct.v1 v1 integer COMMENT 'a comment'") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("struct", new StructType() + .add("v1", "integer", true, "a comment").add("v2", "string"))) + + assertNotSupported(s"ALTER TABLE $tableName CHANGE COLUMN struct struct " + + "STRUCT") + } + } + + ddlTest("CHANGE COLUMN - complex types not supported because behavior is ambiguous") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + .withColumn("a", array("s")) + .withColumn("m", map(col("s"), col("s"))) + withDeltaTable(df) { tableName => + // not supported to add columns + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN s s STRUCT") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN a a ARRAY>") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN m m " + + "MAP, STRUCT>") + + // not supported to remove columns. + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN s s STRUCT") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN a a ARRAY>") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN m m " + + "MAP, STRUCT>") + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN m m " + + "MAP, STRUCT>") + } + } + + test("CHANGE COLUMN - move unknown column") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + withDeltaTable(df) { tableName => + + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName CHANGE COLUMN unknown unknown string FIRST") + } + assert(ex.getMessage.contains("Missing field unknown") || + ex.getMessage.contains("Cannot update missing field unknown")) + } + } + + test("CHANGE COLUMN - move unknown column (nested)") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + val ex = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName CHANGE COLUMN struct.unknown unknown string FIRST") + } + assert(ex.getMessage.contains("Missing field struct.unknown") || + ex.getMessage.contains("Cannot update missing field struct.unknown")) + } + } + + test("CHANGE COLUMN - case insensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + val (deltaLog, _) = getDeltaLogWithSnapshot(tableName) + + sql(s"ALTER TABLE $tableName CHANGE COLUMN V1 v1 integer") + + assertEqual(deltaLog.update().schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("s", new StructType().add("v1", "integer").add("v2", "string"))) + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 V1 integer") + + assertEqual(deltaLog.update().schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("s", new StructType().add("v1", "integer").add("v2", "string"))) + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 integer AFTER V2") + + assertEqual(deltaLog.update().schema, new StructType() + .add("v2", "string").add("v1", "integer") + .add("s", new StructType().add("v1", "integer").add("v2", "string"))) + + // Since the struct doesn't match the case this fails + assertNotSupported( + s"ALTER TABLE $tableName CHANGE COLUMN s s struct AFTER V2") + + sql( + s"ALTER TABLE $tableName CHANGE COLUMN s s struct AFTER V2") + + assertEqual(deltaLog.update().schema, new StructType() + .add("v2", "string") + .add("s", new StructType().add("v1", "integer").add("v2", "string")) + .add("v1", "integer")) + } + } + } + + test("CHANGE COLUMN - case sensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val df = Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("s", struct("v1", "v2")) + withDeltaTable(df) { tableName => + + val ex1 = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName CHANGE COLUMN V1 V1 integer") + } + assert(ex1.getMessage.contains("Missing field V1") || + ex1.getMessage.contains("Cannot update missing field V1")) + + val ex2 = intercept[ParseException] { + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 V1 integer") + } + assert(ex2.getMessage.contains("Renaming column is not supported")) + + val ex3 = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 integer AFTER V2") + } + assert(ex3.getMessage.contains("Missing field V2") || + ex3.getMessage.contains("Couldn't resolve positional argument AFTER V2")) + + val ex4 = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName CHANGE COLUMN s s struct AFTER v2") + } + assert(ex4.getMessage.contains("Cannot update")) + } + } + } + + test("CHANGE COLUMN: allow to change change column from char to string type") { + withTable("t") { + sql("CREATE TABLE t(i STRING, c CHAR(4)) USING delta") + sql("ALTER TABLE t CHANGE COLUMN c TYPE STRING") + assert(spark.table("t").schema(1).dataType === StringType) + } + } + + private def checkColType(f: StructField, dt: DataType): Unit = { + assert(f.dataType == CharVarcharUtils.replaceCharVarcharWithString(dt)) + assert(CharVarcharUtils.getRawType(f.metadata).contains(dt)) + } + + test("CHANGE COLUMN: allow to change column from char(x) to varchar(y) type x <= y") { + withTable("t") { + sql("CREATE TABLE t(i STRING, c CHAR(4)) USING delta") + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(4)") + checkColType(spark.table("t").schema(1), VarcharType(4)) + } + withTable("t") { + sql("CREATE TABLE t(i STRING, c CHAR(4)) USING delta") + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(5)") + checkColType(spark.table("t").schema(1), VarcharType(5)) + } + } + + test("CHANGE COLUMN: allow to change column from varchar(x) to varchar(y) type x <= y") { + withTable("t") { + sql("CREATE TABLE t(i STRING, c VARCHAR(4)) USING delta") + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(4)") + checkColType(spark.table("t").schema(1), VarcharType(4)) + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(5)") + checkColType(spark.table("t").schema(1), VarcharType(5)) + } + } +} + +trait DeltaAlterTableByNameTests extends DeltaAlterTableTests { + import testImplicits._ + + override protected def createTable(schema: String, tblProperties: Map[String, String]): String = { + val props = tblProperties.map { case (key, value) => + s"'$key' = '$value'" + }.mkString(", ") + val propsString = if (tblProperties.isEmpty) "" else s" TBLPROPERTIES ($props)" + sql(s"CREATE TABLE delta_test ($schema) USING delta$propsString") + "delta_test" + } + + override protected def createTable(df: DataFrame, partitionedBy: Seq[String]): String = { + df.write.partitionBy(partitionedBy: _*).format("delta").saveAsTable("delta_test") + "delta_test" + } + + override protected def dropTable(identifier: String): Unit = { + sql(s"DROP TABLE IF EXISTS $identifier") + } + + override protected def getDeltaLogWithSnapshot(identifier: String): (DeltaLog, Snapshot) = { + DeltaLog.forTableWithSnapshot(spark, TableIdentifier(identifier)) + } + + test("ADD COLUMNS - external table") { + withTempDir { dir => + withTable("delta_test") { + val path = dir.getCanonicalPath + Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .write + .format("delta") + .option("path", path) + .saveAsTable("delta_test") + + checkDatasetUnorderly( + spark.table("delta_test").as[(Int, String)], + (1, "a"), (2, "b")) + + sql("ALTER TABLE delta_test ADD COLUMNS (v3 long, v4 double)") + + val (deltaLog, snapshot) = DeltaLog.forTableWithSnapshot(spark, path) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("v3", "long").add("v4", "double")) + + checkDatasetUnorderly( + spark.table("delta_test").as[(Int, String, Option[Long], Option[Double])], + (1, "a", None, None), (2, "b", None, None)) + checkDatasetUnorderly( + spark.read.format("delta").load(path).as[(Int, String, Option[Long], Option[Double])], + (1, "a", None, None), (2, "b", None, None)) + } + } + } + + // LOCATION tests do not make sense for by path access + testQuietly("SET LOCATION") { + withTable("delta_table") { + spark.range(1).write.format("delta").saveAsTable("delta_table") + val catalog = spark.sessionState.catalog + val table = catalog.getTableMetadata(TableIdentifier(tableName = "delta_table")) + val oldLocation = table.location.toString + withTempDir { dir => + val path = dir.getCanonicalPath + spark.range(1, 2).write.format("delta").save(path) + checkAnswer(spark.table("delta_table"), Seq(Row(0))) + sql(s"alter table delta_table set location '$path'") + checkAnswer(spark.table("delta_table"), Seq(Row(1))) + } + Utils.deleteRecursively(new File(oldLocation.stripPrefix("file:"))) + } + } + + testQuietly("SET LOCATION: external delta table") { + withTable("delta_table") { + withTempDir { oldDir => + spark.range(1).write.format("delta").save(oldDir.getCanonicalPath) + sql(s"CREATE TABLE delta_table USING delta LOCATION '${oldDir.getCanonicalPath}'") + withTempDir { dir => + val path = dir.getCanonicalPath + spark.range(1, 2).write.format("delta").save(path) + checkAnswer(spark.table("delta_table"), Seq(Row(0))) + sql(s"alter table delta_table set location '$path'") + checkAnswer(spark.table("delta_table"), Seq(Row(1))) + } + } + } + } + + test( + "SET LOCATION - negative cases") { + withTable("delta_table") { + spark.range(1).write.format("delta").saveAsTable("delta_table") + withTempDir { dir => + val path = dir.getCanonicalPath + val catalog = spark.sessionState.catalog + val table = catalog.getTableMetadata(TableIdentifier(tableName = "delta_table")) + val oldLocation = table.location.toString + + // new location is not a delta table + var e = intercept[AnalysisException] { + sql(s"alter table delta_table set location '$path'") + } + assert(e.getMessage.contains("not a Delta table")) + + Seq("1").toDF("id").write.format("delta").save(path) + + // set location on specific partitions + e = intercept[AnalysisException] { + sql(s"alter table delta_table partition (id = 1) set location '$path'") + } + assert(Seq("partition", "not support").forall(e.getMessage.contains)) + + // schema mismatch + e = intercept[AnalysisException] { + sql(s"alter table delta_table set location '$path'") + } + assert(e.getMessage.contains("different than the current table schema")) + + withSQLConf(DeltaSQLConf.DELTA_ALTER_LOCATION_BYPASS_SCHEMA_CHECK.key -> "true") { + checkAnswer(spark.table("delta_table"), Seq(Row(0))) + // now we can bypass the schema mismatch check + sql(s"alter table delta_table set location '$path'") + checkAnswer(spark.table("delta_table"), Seq(Row("1"))) + } + Utils.deleteRecursively(new File(oldLocation.stripPrefix("file:"))) + } + } + } +} + +/** + * For ByPath tests, we select a test case per ALTER TABLE command to simply test identifier + * resolution. + */ +trait DeltaAlterTableByPathTests extends DeltaAlterTableTestBase { + override protected def createTable(schema: String, tblProperties: Map[String, String]): String = { + val tmpDir = Utils.createTempDir().getCanonicalPath + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tmpDir) + // This is a path-based table so we don't need to pass the catalogTable here + val txn = deltaLog.startTransaction(None, Some(snapshot)) + val metadata = Metadata( + schemaString = StructType.fromDDL(schema).json, + configuration = tblProperties) + txn.commit(metadata :: Nil, DeltaOperations.ManualUpdate) + s"delta.`$tmpDir`" + } + + override protected def createTable(df: DataFrame, partitionedBy: Seq[String]): String = { + val tmpDir = Utils.createTempDir().getCanonicalPath + df.write.format("delta").partitionBy(partitionedBy: _*).save(tmpDir) + s"delta.`$tmpDir`" + } + + override protected def dropTable(identifier: String): Unit = { + Utils.deleteRecursively(new File(identifier.stripPrefix("delta.`").stripSuffix("`"))) + } + + override protected def getDeltaLogWithSnapshot(identifier: String): (DeltaLog, Snapshot) = { + DeltaLog.forTableWithSnapshot(spark, identifier.stripPrefix("delta.`").stripSuffix("`")) + } + + override protected def ddlTest(testName: String)(f: => Unit): Unit = { + super.ddlTest(testName)(f) + + testQuietly(testName + " with delta database") { + withDatabase("delta") { + spark.sql("CREATE DATABASE delta") + f + } + } + } + + import testImplicits._ + + ddlTest("SET/UNSET TBLPROPERTIES - simple") { + withDeltaTable("v1 int, v2 string") { tableName => + + sql(s""" + |ALTER TABLE $tableName + |SET TBLPROPERTIES ( + | 'delta.logRetentionDuration' = '2 weeks', + | 'delta.checkpointInterval' = '20', + | 'key' = 'value' + |)""".stripMargin) + + val (deltaLog, snapshot1) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot1.metadata.configuration, Map( + "delta.logRetentionDuration" -> "2 weeks", + "delta.checkpointInterval" -> "20", + "key" -> "value")) + assert(deltaLog.deltaRetentionMillis(snapshot1.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot1.metadata) == 20) + + sql(s"ALTER TABLE $tableName UNSET TBLPROPERTIES ('delta.checkpointInterval', 'key')") + + val snapshot2 = deltaLog.update() + assertEqual(snapshot2.metadata.configuration, + Map("delta.logRetentionDuration" -> "2 weeks")) + assert(deltaLog.deltaRetentionMillis(snapshot2.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot2.metadata) == + CHECKPOINT_INTERVAL.fromString(CHECKPOINT_INTERVAL.defaultValue)) + } + } + + ddlTest("ADD COLUMNS - simple") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String)], + (1, "a"), (2, "b")) + + sql(s"ALTER TABLE $tableName ADD COLUMNS (v3 long, v4 double)") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer").add("v2", "string") + .add("v3", "long").add("v4", "double")) + + checkDatasetUnorderly( + spark.table(tableName).as[(Int, String, Option[Long], Option[Double])], + (1, "a", None, None), (2, "b", None, None)) + } + } + + ddlTest("CHANGE COLUMN - add a comment") { + withDeltaTable(Seq((1, "a"), (2, "b")).toDF("v1", "v2")) { tableName => + + sql(s"ALTER TABLE $tableName CHANGE COLUMN v1 v1 integer COMMENT 'a comment'") + + val (deltaLog, snapshot) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot.schema, new StructType() + .add("v1", "integer", true, "a comment").add("v2", "string")) + } + } + + test("SET LOCATION is not supported for path based tables") { + val df = spark.range(1).toDF() + withDeltaTable(df) { identifier => + withTempDir { dir => + val path = dir.getCanonicalPath + val e = intercept[DeltaAnalysisException] { + sql(s"alter table $identifier set location '$path'") + } + assert(e.getErrorClass == "DELTA_CANNOT_SET_LOCATION_ON_PATH_IDENTIFIER") + assert(e.getSqlState == "42613") + assert(e.getMessage == "[DELTA_CANNOT_SET_LOCATION_ON_PATH_IDENTIFIER] " + + "Cannot change the location of a path based table.") + } + } + } +} + +class DeltaAlterTableByNameSuite + extends DeltaAlterTableByNameTests + with DeltaSQLCommandTest { + + ddlTest("SET/UNSET TBLPROPERTIES - unset non-existent config value should still" + + "unset the config if key matches") { + val props = Map( + "delta.randomizeFilePrefixes" -> "true", + "delta.randomPrefixLength" -> "5", + "key" -> "value" + ) + withDeltaTable("v1 int, v2 string", props) { tableName => + sql(s"ALTER TABLE $tableName UNSET TBLPROPERTIES ('delta.randomizeFilePrefixes', 'kEy')") + + val (deltaLog, snapshot1) = getDeltaLogWithSnapshot(tableName) + assertEqual(snapshot1.metadata.configuration, Map( + "delta.randomPrefixLength" -> "5", + "key" -> "value")) + + sql(s"ALTER TABLE $tableName UNSET TBLPROPERTIES IF EXISTS " + + "('delta.randomizeFilePrefixes', 'kEy')") + + val snapshot2 = deltaLog.update() + assertEqual(snapshot2.metadata.configuration, + Map("delta.randomPrefixLength" -> "5", "key" -> "value")) + } + } + +} + +class DeltaAlterTableByPathSuite extends DeltaAlterTableByPathTests with DeltaSQLCommandTest + with DeltaAlterTableReplaceTests + + +trait DeltaAlterTableColumnMappingSelectedTests extends DeltaColumnMappingSelectedTestMixin { + override protected def runOnlyTests = Seq( + "ADD COLUMNS into complex types - Array", + "CHANGE COLUMN - move to first (nested)", + "CHANGE COLUMN - case insensitive") +} + +class DeltaAlterTableByNameIdColumnMappingSuite extends DeltaAlterTableByNameSuite + with DeltaColumnMappingEnableIdMode + with DeltaAlterTableColumnMappingSelectedTests + +class DeltaAlterTableByPathIdColumnMappingSuite extends DeltaAlterTableByPathSuite + with DeltaColumnMappingEnableIdMode + with DeltaAlterTableColumnMappingSelectedTests + +class DeltaAlterTableByNameNameColumnMappingSuite extends DeltaAlterTableByNameSuite + with DeltaColumnMappingEnableNameMode + with DeltaAlterTableColumnMappingSelectedTests + +class DeltaAlterTableByPathNameColumnMappingSuite extends DeltaAlterTableByPathSuite + with DeltaColumnMappingEnableNameMode + with DeltaAlterTableColumnMappingSelectedTests diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaArbitraryColumnNameSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaArbitraryColumnNameSuite.scala new file mode 100644 index 00000000000..a45dbeb288b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaArbitraryColumnNameSuite.scala @@ -0,0 +1,313 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.JavaConverters._ + +import org.scalatest.GivenWhenThen + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, StringType, StructType} + +trait DeltaArbitraryColumnNameSuiteBase extends DeltaColumnMappingSuiteUtils { + + protected val simpleNestedSchema = new StructType() + .add("a", StringType, true) + .add("b", + new StructType() + .add("c", StringType, true) + .add("d", IntegerType, true)) + .add("map", MapType(StringType, StringType), true) + .add("arr", ArrayType(IntegerType), true) + + protected val simpleNestedSchemaWithDuplicatedNestedColumnName = new StructType() + .add("a", + new StructType() + .add("c", StringType, true) + .add("d", IntegerType, true), true) + .add("b", + new StructType() + .add("c", StringType, true) + .add("d", IntegerType, true), true) + .add("map", MapType(StringType, StringType), true) + .add("arr", ArrayType(IntegerType), true) + + protected val nestedSchema = new StructType() + .add(colName("a"), StringType, true) + .add(colName("b"), + new StructType() + .add(colName("c"), StringType, true) + .add(colName("d"), IntegerType, true)) + .add(colName("map"), MapType(StringType, StringType), true) + .add(colName("arr"), ArrayType(IntegerType), true) + + protected def simpleNestedData = + spark.createDataFrame( + Seq( + Row("str1", Row("str1.1", 1), Map("k1" -> "v1"), Array(1, 11)), + Row("str2", Row("str1.2", 2), Map("k2" -> "v2"), Array(2, 22))).asJava, + simpleNestedSchema) + + protected def simpleNestedDataWithDuplicatedNestedColumnName = + spark.createDataFrame( + Seq( + Row(Row("str1", 1), Row("str1.1", 1), Map("k1" -> "v1"), Array(1, 11)), + Row(Row("str2", 2), Row("str1.2", 2), Map("k2" -> "v2"), Array(2, 22))).asJava, + simpleNestedSchemaWithDuplicatedNestedColumnName) + + protected def nestedData = + spark.createDataFrame( + Seq( + Row("str1", Row("str1.1", 1), Map("k1" -> "v1"), Array(1, 11)), + Row("str2", Row("str1.2", 2), Map("k2" -> "v2"), Array(2, 22))).asJava, + nestedSchema) + + // TODO: Refactor DeltaColumnMappingSuite and consolidate these table creation methods between + // the two suites. + protected def createTableWithSQLCreateOrReplaceAPI( + tableName: String, + data: DataFrame, + props: Map[String, String] = Map.empty, + partCols: Seq[String] = Nil): Unit = { + withTable("source") { + createTableWithDataFrameWriterV2API( + "source", + data, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode(props))) + + spark.sql( + s""" + |CREATE OR REPLACE TABLE $tableName + |USING DELTA + |${partitionStmt(partCols)} + |${propString(props)} + |AS SELECT * FROM source + |""".stripMargin) + } + } + + protected def createTableWithSQLAPI( + tableName: String, + data: DataFrame, + props: Map[String, String] = Map.empty, + partCols: Seq[String] = Nil): Unit = { + withTable("source") { + spark.sql( + s""" + |CREATE TABLE $tableName (${data.schema.toDDL}) + |USING DELTA + |${partitionStmt(partCols)} + |${propString(props)} + |""".stripMargin) + data.write.format("delta").mode("append").saveAsTable(tableName) + } + } + + protected def createTableWithCTAS( + tableName: String, + data: DataFrame, + props: Map[String, String] = Map.empty, + partCols: Seq[String] = Nil): Unit = { + withTable("source") { + createTableWithDataFrameWriterV2API( + "source", + data, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode(props))) + + spark.sql( + s""" + |CREATE TABLE $tableName + |USING DELTA + |${partitionStmt(partCols)} + |${propString(props)} + |AS SELECT * FROM source + |""".stripMargin) + } + } + + protected def createTableWithDataFrameAPI( + tableName: String, + data: DataFrame, + props: Map[String, String] = Map.empty, + partCols: Seq[String]): Unit = { + val sqlConfs = props.map { case (key, value) => + "spark.databricks.delta.properties.defaults." + key.stripPrefix("delta.") -> value + } + withSQLConf(sqlConfs.toList: _*) { + if (partCols.nonEmpty) { + data.write.format("delta") + .partitionBy(partCols.map(name => s"`$name`"): _*).saveAsTable(tableName) + } else { + data.write.format("delta").saveAsTable(tableName) + } + } + } + + protected def createTableWithDataFrameWriterV2API( + tableName: String, + data: DataFrame, + props: Map[String, String] = Map.empty, + partCols: Seq[String] = Seq.empty): Unit = { + + val writer = data.writeTo(tableName).using("delta") + props.foreach(prop => writer.tableProperty(prop._1, prop._2)) + val partColumns = partCols.map(name => expr(s"`$name`")) + if (partCols.nonEmpty) writer.partitionedBy(partColumns.head, partColumns.tail: _*) + writer.create() + } + + protected def assertException(message: String)(block: => Unit): Unit = { + val e = intercept[Exception](block) + + assert(e.getMessage.contains(message)) + } + + protected def assertExceptionOneOf(messages: Seq[String])(block: => Unit): Unit = { + val e = intercept[Exception](block) + assert(messages.exists(x => e.getMessage.contains(x))) + } +} + +class DeltaArbitraryColumnNameSuite extends QueryTest + with DeltaArbitraryColumnNameSuiteBase + with GivenWhenThen { + + private def testCreateTable(): Unit = { + val allProps = supportedModes + .map(mode => Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) ++ + // none mode + Seq(Map.empty[String, String]) + + def withProps(props: Map[String, String])(createFunc: => Unit) = { + withTable("t1") { + if (mode(props) != "none") { + createFunc + checkAnswer(spark.table("t1"), nestedData) + } else { + val e = intercept[AnalysisException] { + createFunc + } + assert(e.getMessage.contains("Found invalid character(s)")) + } + } + } + + allProps.foreach { props => + withProps(props) { + Given(s"with SQL CREATE TABLE API, mode ${mode(props)}") + createTableWithSQLAPI("t1", + nestedData, + props, + partCols = Seq(colName("a"))) + } + + withProps(props) { + Given(s"with SQL CTAS API, mode ${mode(props)}") + createTableWithCTAS("t1", + nestedData, + props, + partCols = Seq(colName("a")) + ) + } + + withProps(props) { + Given(s"with SQL CREATE OR REPLACE TABLE API, mode ${mode(props)}") + createTableWithSQLCreateOrReplaceAPI("t1", + nestedData, + props, + partCols = Seq(colName("a"))) + } + + withProps(props) { + Given(s"with DataFrame API, mode ${mode(props)}") + createTableWithDataFrameAPI("t1", + nestedData, + props, + partCols = Seq(colName("a"))) + } + + withProps(props) { + Given(s"with DataFrameWriterV2 API, mode ${mode(props)}") + createTableWithDataFrameWriterV2API("t1", + nestedData, + props, + // TODO: make DataFrameWriterV2 work with arbitrary partition column names + partCols = Seq.empty) + } + } + } + + test("create table") { + testCreateTable() + } + + testColumnMapping("schema evolution and simple query") { mode => + withTable("t1") { + createTableWithSQLAPI("t1", + nestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + partCols = Seq(colName("a")) + ) + val newNestedData = + spark.createDataFrame( + Seq(Row("str3", Row("str1.3", 3), Map("k3" -> "v3"), Array(3, 33), "new value")).asJava, + nestedSchema.add(colName("e"), StringType)) + newNestedData.write.format("delta") + .option("mergeSchema", "true") + .mode("append").saveAsTable("t1") + checkAnswer( + spark.table("t1"), + Seq( + Row("str1", Row("str1.1", 1), Map("k1" -> "v1"), Array(1, 11), null), + Row("str2", Row("str1.2", 2), Map("k2" -> "v2"), Array(2, 22), null), + Row("str3", Row("str1.3", 3), Map("k3" -> "v3"), Array(3, 33), "new value"))) + + val colA = colName("a") + val colB = colName("b") + val colC = colName("c") + val colD = colName("d") + checkAnswer( + spark.table("t1") + .where(s"`$colA` > 'str1'") + .where(s"`$colB`.`$colD` < 3") + .select(s"`$colB`.`$colC`"), + Row("str1.2")) + } + } + + testColumnMapping("alter table add and replace columns") { mode => + withTable("t1") { + createTableWithSQLAPI("t1", + nestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + partCols = Seq(colName("a")) + ) + spark.sql(s"alter table t1 add columns (`${colName("e")}` string)") + spark.sql("insert into t1 " + + "values ('str3', struct('str1.3', 3), map('k3', 'v3'), array(3, 33), 'new value')") + + checkAnswer( + spark.table("t1"), + Seq( + Row("str1", Row("str1.1", 1), Map("k1" -> "v1"), Array(1, 11), null), + Row("str2", Row("str1.2", 2), Map("k2" -> "v2"), Array(2, 22), null), + Row("str3", Row("str1.3", 3), Map("k3" -> "v3"), Array(3, 33), "new value"))) + + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCColumnMappingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCColumnMappingSuite.scala new file mode 100644 index 00000000000..4eb5a8cba80 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCColumnMappingSuite.scala @@ -0,0 +1,795 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils +import org.apache.spark.sql.delta.commands.cdc.CDCReader._ +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaColumnMappingSelectedTestMixin +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ + +trait DeltaCDCColumnMappingSuiteBase extends DeltaCDCSuiteBase + with DeltaColumnMappingTestUtils + with DeltaColumnMappingSelectedTestMixin { + + import testImplicits._ + + implicit class DataFrameDropCDCFields(df: DataFrame) { + def dropCDCFields: DataFrame = + df.drop(CDC_COMMIT_TIMESTAMP) + .drop(CDC_TYPE_COLUMN_NAME) + .drop(CDC_COMMIT_VERSION) + } + + test("upgrade to column mapping not blocked") { + withTempDir { dir => + setupInitialDeltaTable(dir, upgradeInNameMode = true) + implicit val deltaLog: DeltaLog = DeltaLog.forTable(spark, dir.getCanonicalPath) + val v1 = deltaLog.update().version + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v1.toString), + Some(BatchCDFSchemaEndVersion)).dropCDCFields, + (0 until 10).map(_.toString).map(i => Row(i, i)) + ) + } + } + + test("add column batch cdc read not blocked") { + withTempDir { dir => + // Set up an initial table with 10 records in schema + setupInitialDeltaTable(dir) + implicit val deltaLog: DeltaLog = DeltaLog.forTable(spark, dir.getCanonicalPath) + + // add column should not be blocked + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` ADD COLUMN (name string)") + + // write more data + writeDeltaData((10 until 15)) + + // None of the schema mode should block this use case + Seq(BatchCDFSchemaLegacy, BatchCDFSchemaLatest, BatchCDFSchemaEndVersion).foreach { mode => + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(deltaLog.update().version.toString), + Some(mode)).dropCDCFields, + (0 until 10).map(_.toString).toDF("id") + .withColumn("value", col("id")) + .withColumn("name", lit(null)) union + (10 until 15).map(_.toString).toDF("id") + .withColumn("value", col("id")) + .withColumn("name", col("id"))) + } + } + } + + test("data type and nullability change batch cdc read blocked") { + withTempDir { dir => + // Set up an initial table with 10 records in schema + setupInitialDeltaTable(dir) + implicit val deltaLog: DeltaLog = DeltaLog.forTable(spark, dir.getCanonicalPath) + val s1 = deltaLog.update() + val v1 = s1.version + + // Change the data type of column + deltaLog.withNewTransaction { txn => + // id was string + val updatedSchema = + SchemaMergingUtils.transformColumns( + StructType.fromDDL("id INT, value STRING")) { (_, field, _) => + val refField = s1.metadata.schema(field.name) + field.copy(metadata = refField.metadata) + } + txn.commit(s1.metadata.copy(schemaString = updatedSchema.json) :: Nil, ManualUpdate) + } + val v2 = deltaLog.update().version + + // write more data in updated schema + Seq((10, "10")).toDF("id", "value") + .write.format("delta").mode("append").save(dir.getCanonicalPath) + val v3 = deltaLog.update().version + + // query all changes using latest schema blocked + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v3, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v3.toString)).collect() + } + + // query using end version also blocked if cross schema change + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v3, + schemaMode = BatchCDFSchemaEndVersion, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v3.toString), + Some(BatchCDFSchemaEndVersion)).collect() + } + + // query using end version NOT blocked if NOT cross schema change + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v3.toString), + EndingVersion(v3.toString), + Some(BatchCDFSchemaEndVersion)).dropCDCFields, + Row(10, "10") :: Nil + ) + + val s2 = deltaLog.update() + + // Change nullability unsafely + deltaLog.withNewTransaction { txn => + // the schema was nullable, but we want to make it non-nullable + val updatedSchema = + SchemaMergingUtils.transformColumns( + StructType.fromDDL("id INT, value string").asNullable) { (_, field, _) => + val refField = s1.metadata.schema(field.name) + field.copy(metadata = refField.metadata, nullable = false) + } + txn.commit(s2.metadata.copy(schemaString = updatedSchema.json) :: Nil, ManualUpdate) + } + val v4 = deltaLog.update().version + + // write more data in updated schema + Seq((11, "11")).toDF("id", "value") + .write.format("delta").mode("append").save(dir.getCanonicalPath) + + val v5 = deltaLog.update().version + + // query changes using latest schema blocked + // Note this is not detected as an illegal schema change, but a data violation, because + // we attempt to read using latest schema @ v5 (nullable=false) to read some past data @ v3 + // (nullable=true), which is unsafe. + assertBlocked( + expectedIncompatSchemaVersion = v3, + expectedReadSchemaVersion = v5, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + // v3 is the first version post the data type schema change + StartingVersion(v3.toString), + EndingVersion(v5.toString)).collect() + } + + // query using end version also blocked if cross schema change + assertBlocked( + expectedIncompatSchemaVersion = v3, + expectedReadSchemaVersion = v5, + schemaMode = BatchCDFSchemaEndVersion, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v3.toString), + EndingVersion(v5.toString), + Some(BatchCDFSchemaEndVersion)).collect() + } + + // query using end version NOT blocked if NOT cross schema change + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v5.toString), + EndingVersion(v5.toString), + Some(BatchCDFSchemaEndVersion)).dropCDCFields, + Row(11, "11") :: Nil + ) + } + } + + test("overwrite table with invalid schema change in non-column mapping table is blocked") { + withTempDir { dir => + withColumnMappingConf("none") { + // Create table action sequence + Seq((1, "a")).toDF("id", "name").write.format("delta").save(dir.getCanonicalPath) + implicit val log: DeltaLog = DeltaLog.forTable(spark, dir) + val v1 = log.update().version + + // Overwrite with dropped column + Seq(2).toDF("id") + .write + .format("delta") + .mode("overwrite") + .option("overwriteSchema", "true") + .save(dir.getCanonicalPath) + val v2 = log.update().version + + assertBlocked( + expectedIncompatSchemaVersion = v1, + expectedReadSchemaVersion = v2, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v1.toString), + EndingVersion(v2.toString), + schemaMode = Some(BatchCDFSchemaEndVersion)).collect() + } + + // Overwrite with a renamed column + Seq(3).toDF("id2") + .write + .format("delta") + .mode("overwrite") + .option("overwriteSchema", "true") + .save(dir.getCanonicalPath) + val v3 = log.update().version + + assertBlocked( + expectedIncompatSchemaVersion = v2, + expectedReadSchemaVersion = v3, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v2.toString), + EndingVersion(v3.toString)).collect() + } + } + } + } + + test("drop column batch cdc read blocked") { + withTempDir { dir => + // Set up an initial table with 10 records in schema + setupInitialDeltaTable(dir) + implicit val deltaLog: DeltaLog = DeltaLog.forTable(spark, dir.getCanonicalPath) + val v1 = deltaLog.update().version + + // drop column would cause CDC read to be blocked + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` DROP COLUMN value") + val v2 = deltaLog.update().version + + // write more data + writeDeltaData(Seq(10)) + val v3 = deltaLog.update().version + + // query all changes using latest schema blocked + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v3, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v3.toString)).collect() + } + + // query just first two versions which have more columns than latest schema is also blocked + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v3, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion("1")).collect() + } + + // query unblocked if force enabled by user + withSQLConf( + DeltaSQLConf.DELTA_CDF_UNSAFE_BATCH_READ_ON_INCOMPATIBLE_SCHEMA_CHANGES.key -> "true") { + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v3.toString)).dropCDCFields, + // Note id is dropped because we are using latest schema + (0 until 11).map(i => Row(i.toString)) + ) + } + + // querying changes using endVersion schema blocked if crossing schema boundary + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v3, + schemaMode = BatchCDFSchemaEndVersion, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v3.toString), + Some(BatchCDFSchemaEndVersion)).collect() + } + + assertBlocked( + expectedIncompatSchemaVersion = v1, + expectedReadSchemaVersion = v3, + schemaMode = BatchCDFSchemaEndVersion, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v1.toString), + EndingVersion(v3.toString), + Some(BatchCDFSchemaEndVersion)).collect() + } + + // querying changes using endVersion schema NOT blocked if NOT crossing schema boundary + // with schema + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v1.toString), + Some(BatchCDFSchemaEndVersion)).dropCDCFields, + (0 until 10).map(_.toString).map(i => Row(i, i))) + + // with schema + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v3.toString), + EndingVersion(v3.toString), + Some(BatchCDFSchemaEndVersion)).dropCDCFields, + Row("10") :: Nil + ) + + // let's add the column back... + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` ADD COLUMN (value string)") + val v4 = deltaLog.update().version + + // write more data + writeDeltaData(Seq(11)) + val v5 = deltaLog.update().version + + // The read is still blocked, even schema @ 0 looks the "same" as the latest schema + // but the added column now maps to a different physical column. + // Note that this bypasses all the schema change actions in between because: + // 1. The schema after dropping @ v2 is a subset of the read schema -> this is fine + // 2. The schema after adding back @ v4 is the same as latest schema -> this is fine + // but our final check against the starting schema would catch it. + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v5, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v5.toString)).collect() + } + + // In this case, tho there aren't any read-incompat schema changes in the querying range, + // the latest schema is not read-compat with the data files @ v0, so we still block. + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v5, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion("1")).collect() + } + } + } + + test("rename column batch cdc read blocked") { + withTempDir { dir => + // Set up an initial table with 10 records in schema + setupInitialDeltaTable(dir) + implicit val deltaLog: DeltaLog = DeltaLog.forTable(spark, dir.getCanonicalPath) + val v1 = deltaLog.update().version + + // Rename column + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` RENAME COLUMN id TO id2") + val v2 = deltaLog.update().version + + // write more data + writeDeltaData(Seq(10)) + val v3 = deltaLog.update().version + + // query all versions using latest schema blocked + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v3, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v3.toString)).collect() + } + + // query unblocked if force enabled by user + withSQLConf( + DeltaSQLConf.DELTA_CDF_UNSAFE_BATCH_READ_ON_INCOMPATIBLE_SCHEMA_CHANGES.key -> "true") { + val df = cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v3.toString)).dropCDCFields + checkAnswer(df, (0 until 11).map(i => Row(i.toString, i.toString))) + // Note we serve the batch using the renamed column in the latest schema. + assert(df.schema.fieldNames.sameElements(Array("id2", "value"))) + } + + // query just the first few versions using latest schema also blocked + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v3, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion("1")).collect() + } + + // query using endVersion schema across schema boundary also blocked + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v2, + schemaMode = BatchCDFSchemaEndVersion, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v2.toString), + Some(BatchCDFSchemaEndVersion)).collect() + } + + // query using endVersion schema NOT blocked if NOT crossing schema boundary + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v1.toString), + Some(BatchCDFSchemaEndVersion)).dropCDCFields, + (0 until 10).map(_.toString).map(i => Row(i, i)) + ) + + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v2.toString), + EndingVersion(v3.toString), + Some(BatchCDFSchemaEndVersion)).dropCDCFields, + Row("10", "10") :: Nil + ) + + // Let's rename the column back + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` RENAME COLUMN id2 TO id") + val v4 = deltaLog.update().version + + // write more data + writeDeltaData(Seq(11)) + val v5 = deltaLog.update().version + + // query all changes using latest schema would still block because we crossed an + // intermediary action with a conflicting schema (the first rename). + assertBlocked(expectedIncompatSchemaVersion = v2, expectedReadSchemaVersion = v5) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v5.toString)).collect() + } + + // query all changes using LATEST schema would NOT block if we exclude the first + // rename back, because the data schemas before that are now consistent with the latest. + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v1.toString)).dropCDCFields, + (0 until 10).map(_.toString).map(i => Row(i, i))) + + // query using endVersion schema is blocked if we cross schema boundary + assertBlocked( + expectedIncompatSchemaVersion = v3, + expectedReadSchemaVersion = v5, + schemaMode = BatchCDFSchemaEndVersion, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + // v3 just pass the first schema change + StartingVersion(v3.toString), + EndingVersion(v5.toString), + Some(BatchCDFSchemaEndVersion)).collect() + } + + // Note how the conflictingVersion is v2 (the first rename), because v1 matches our end + // version schema due to renaming back. + assertBlocked( + expectedIncompatSchemaVersion = v2, + expectedReadSchemaVersion = v5, + schemaMode = BatchCDFSchemaEndVersion) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v1.toString), + EndingVersion(v5.toString), + Some(BatchCDFSchemaEndVersion)).collect() + } + } + } + + override def runOnlyTests: Seq[String] = Seq( + "changes from table by name", + "changes from table by path", + "batch write: append, dynamic partition overwrite + CDF", + // incompatible schema changes & schema mode tests + "add column batch cdc read not blocked", + "data type and nullability change batch cdc read blocked", + "drop column batch cdc read blocked", + "rename column batch cdc read blocked" + ) + + protected def assertBlocked( + expectedIncompatSchemaVersion: Long, + expectedReadSchemaVersion: Long, + schemaMode: DeltaBatchCDFSchemaMode = BatchCDFSchemaLegacy, + timeTravel: Boolean = false, + bySchemaChange: Boolean = true)(f: => Unit)(implicit log: DeltaLog): Unit = { + val e = intercept[DeltaUnsupportedOperationException] { + f + } + val (end, readSchemaJson) = if (bySchemaChange) { + assert(e.getErrorClass == "DELTA_CHANGE_DATA_FEED_INCOMPATIBLE_SCHEMA_CHANGE") + val Seq(_, end, readSchemaJson, readSchemaVersion, incompatibleVersion, _, _, _, _) = + e.getMessageParametersArray.toSeq + assert(incompatibleVersion.toLong == expectedIncompatSchemaVersion) + assert(readSchemaVersion.toLong == expectedReadSchemaVersion) + (end, readSchemaJson) + } else { + assert(e.getErrorClass == "DELTA_CHANGE_DATA_FEED_INCOMPATIBLE_DATA_SCHEMA") + val Seq(_, end, readSchemaJson, readSchemaVersion, incompatibleVersion, config) = + e.getMessageParametersArray.toSeq + assert(incompatibleVersion.toLong == expectedIncompatSchemaVersion) + assert(readSchemaVersion.toLong == expectedReadSchemaVersion) + assert(config == DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE.key) + (end, readSchemaJson) + } + + val latestSnapshot = log.update() + schemaMode match { + case BatchCDFSchemaLegacy if timeTravel => + // Read using time travelled schema, it can be arbitrary so nothing to check here + case BatchCDFSchemaEndVersion => + // Read using end version schema + assert(expectedReadSchemaVersion == end.toLong && + log.getSnapshotAt(expectedReadSchemaVersion).schema.json == readSchemaJson) + case _ => + // non time-travel legacy mode and latest mode should both read latest schema + assert(expectedReadSchemaVersion == latestSnapshot.version && + latestSnapshot.schema.json == readSchemaJson) + } + } + + /** + * Write test delta data to test blocking column mapping for CDC batch queries, it takes a + * sequence and write out as a row of strings, assuming the delta log's schema are all strings. + */ + protected def writeDeltaData( + data: Seq[Int], + userSpecifiedSchema: Option[StructType] = None)(implicit log: DeltaLog): Unit = { + val schema = userSpecifiedSchema.getOrElse(log.update().schema) + data.foreach { i => + val data = Seq(Row(schema.map(_ => i.toString): _*)) + spark.createDataFrame(data.asJava, schema) + .write.format("delta").mode("append").save(log.dataPath.toString) + } + } + + /** + * Set up initial table data, considering current column mapping mode + * + * The table contains 10 rows, with schema both are string + */ + protected def setupInitialDeltaTable(dir: File, upgradeInNameMode: Boolean = false): Unit = { + require(columnMappingModeString != NoMapping.name) + val tablePath = dir.getCanonicalPath + implicit val deltaLog: DeltaLog = DeltaLog.forTable(spark, tablePath) + + if (upgradeInNameMode && columnMappingModeString == NameMapping.name) { + // For name mode, we do an upgrade then write to test that behavior as well + // init table with 5 versions without column mapping + withColumnMappingConf("none") { + writeDeltaData((0 until 5), userSpecifiedSchema = Some( + new StructType().add("id", StringType, true).add("value", StringType, true) + )) + } + // upgrade to name mode + val protocol = deltaLog.snapshot.protocol + val (r, w) = if (protocol.supportsReaderFeatures || protocol.supportsWriterFeatures) { + (TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION, + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) + } else { + (ColumnMappingTableFeature.minReaderVersion, ColumnMappingTableFeature.minWriterVersion) + } + sql( + s""" + |ALTER TABLE delta.`${dir.getCanonicalPath}` + |SET TBLPROPERTIES ( + | ${DeltaConfigs.COLUMN_MAPPING_MODE.key} = "name", + | ${DeltaConfigs.MIN_READER_VERSION.key} = "$r", + | ${DeltaConfigs.MIN_WRITER_VERSION.key} = "$w")""".stripMargin) + // write more data + writeDeltaData((5 until 10)) + } else { + // For id mode and non-upgrade name mode, we could just create a table from scratch + withColumnMappingConf(columnMappingModeString) { + writeDeltaData((0 until 10), userSpecifiedSchema = Some( + new StructType().add("id", StringType, true).add("value", StringType, true) + )) + } + } + + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(deltaLog.update().version.toString)).dropCDCFields, + (0 until 10).map(_.toString).toDF("id").withColumn("value", col("id"))) + } +} + +trait DeltaCDCColumnMappingScalaSuiteBase extends DeltaCDCColumnMappingSuiteBase { + + import testImplicits._ + + test("time travel with batch cdf is disbaled by default") { + withTempDir { dir => + Seq(1).toDF("id").write.format("delta").save(dir.getCanonicalPath) + val e = intercept[DeltaAnalysisException] { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion("1"), + readerOptions = Map(DeltaOptions.VERSION_AS_OF -> "0")).collect() + } + assert(e.getErrorClass == "DELTA_UNSUPPORTED_TIME_TRAVEL_VIEWS") + } + } + + // NOTE: we do not support time travel option with SQL API, so we will just test Scala API suite + test("cannot specify both time travel options and schema mode") { + withSQLConf(DeltaSQLConf.DELTA_CDF_ALLOW_TIME_TRAVEL_OPTIONS.key -> "true") { + withTempDir { dir => + Seq(1).toDF("id").write.format("delta").save(dir.getCanonicalPath) + val e = intercept[DeltaIllegalArgumentException] { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion("1"), + Some(BatchCDFSchemaEndVersion), + readerOptions = Map(DeltaOptions.VERSION_AS_OF -> "0")).collect() + } + assert(e.getMessage.contains( + DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE.key)) + } + } + } + + test("time travel option is respected") { + withSQLConf(DeltaSQLConf.DELTA_CDF_ALLOW_TIME_TRAVEL_OPTIONS.key -> "true") { + withTempDir { dir => + // Set up an initial table with 10 records in schema + setupInitialDeltaTable(dir) + implicit val deltaLog: DeltaLog = DeltaLog.forTable(spark, dir.getCanonicalPath) + val v1 = deltaLog.update().version + + // Add a column + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` ADD COLUMN (prop string)") + val v2 = deltaLog.update().version + + // write more data + writeDeltaData(Seq(10)) + val v3 = deltaLog.update().version + + // Rename a column + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` RENAME COLUMN id TO id2") + val v4 = deltaLog.update().version + + // write more data + writeDeltaData(Seq(11)) + val v5 = deltaLog.update().version + + // query changes between version 0 - v1, not crossing schema boundary + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v1.toString), + readerOptions = Map(DeltaOptions.VERSION_AS_OF -> v1.toString)).dropCDCFields, + (0 until 10).map(_.toString).map(i => Row(i, i))) + + // query across add column, but not cross the rename, not blocked + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + EndingVersion(v3.toString), + // v2 is the add column schema change + readerOptions = Map(DeltaOptions.VERSION_AS_OF -> v2.toString)).dropCDCFields, + // Note how the first 10 records now misses a column, but it's fine + (0 until 10).map(_.toString).map(i => Row(i, i, null)) ++ + Seq(Row("10", "10", "10"))) + + // query across rename is blocked, if we are still specifying an old version + // note it failed at v4, because the initial schema does not conflict with schema @ v2 + assertBlocked( + expectedIncompatSchemaVersion = v4, + expectedReadSchemaVersion = v2, + timeTravel = true) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + // v5 cross the v4 rename column + EndingVersion(v5.toString), + // v2 is the add column schema change + readerOptions = Map(DeltaOptions.VERSION_AS_OF -> v2.toString)).collect() + } + + // Even the querying range has no schema change, the data files are still not + // compatible with the read schema due to arbitrary time travel. + assertBlocked( + expectedIncompatSchemaVersion = 0, + expectedReadSchemaVersion = v4, + timeTravel = true, + bySchemaChange = false) { + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion("0"), + // v1 still uses the schema prior to the rename + EndingVersion(v1.toString), + // v4 is the rename column change + readerOptions = Map(DeltaOptions.VERSION_AS_OF -> v4.toString)).collect() + } + + // But without crossing schema change boundary (v4 - v5) using v4's renamed schema, + // we can load the batch. + checkAnswer( + cdcRead( + new TablePath(dir.getCanonicalPath), + StartingVersion(v4.toString), + EndingVersion(v5.toString), + readerOptions = Map(DeltaOptions.VERSION_AS_OF -> v4.toString)).dropCDCFields, + Seq(Row("11", "11", "11"))) + } + } + } +} + +class DeltaCDCIdColumnMappingSuite extends DeltaCDCScalaSuite + with DeltaCDCColumnMappingScalaSuiteBase + with DeltaColumnMappingEnableIdMode + +class DeltaCDCNameColumnMappingSuite extends DeltaCDCScalaSuite + with DeltaCDCColumnMappingScalaSuiteBase + with DeltaColumnMappingEnableNameMode + +class DeltaCDCSQLIdColumnMappingSuite extends DeltaCDCSQLSuite + with DeltaCDCColumnMappingSuiteBase + with DeltaColumnMappingEnableIdMode + +class DeltaCDCSQLNameColumnMappingSuite extends DeltaCDCSQLSuite + with DeltaCDCColumnMappingSuiteBase + with DeltaColumnMappingEnableNameMode diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCSQLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCSQLSuite.scala new file mode 100644 index 00000000000..07b582c4f90 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCSQLSuite.scala @@ -0,0 +1,326 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.Date + +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.{AnalysisException, DataFrame} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.LongType + +class DeltaCDCSQLSuite extends DeltaCDCSuiteBase with DeltaColumnMappingTestUtils { + + /** Single method to do all kinds of CDC reads */ + def cdcRead( + tblId: TblId, + start: Boundary, + end: Boundary, + schemaMode: Option[DeltaBatchCDFSchemaMode] = Some(BatchCDFSchemaLegacy), + // SQL API does not support generic reader options, so it's a noop here + readerOptions: Map[String, String] = Map.empty): DataFrame = { + + // Set the batch CDF schema mode using SQL conf if we specified it + if (schemaMode.isDefined) { + var result: DataFrame = null + withSQLConf(DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE.key -> + schemaMode.get.name) { + result = cdcRead(tblId, start, end, None, readerOptions) + } + return result + } + + val startPrefix: String = start match { + case startingVersion: StartingVersion => + s"""${startingVersion.value}""" + + case startingTimestamp: StartingTimestamp => + s"""'${startingTimestamp.value}'""" + + case Unbounded => + "" + } + val endPrefix: String = end match { + case endingVersion: EndingVersion => + s"""${endingVersion.value}""" + + case endingTimestamp: EndingTimestamp => + s"""'${endingTimestamp.value}'""" + + case Unbounded => + "" + } + val fnName = tblId match { + case _: TablePath => + DeltaTableValueFunctions.CDC_PATH_BASED + case _: TableName => + DeltaTableValueFunctions.CDC_NAME_BASED + case _ => + throw new IllegalArgumentException("No table name or path provided") + } + + if (endPrefix === "") { + sql(s"SELECT * FROM $fnName('${tblId.id}', $startPrefix)") + } else { + sql(s"SELECT * FROM $fnName('${tblId.id}', $startPrefix, $endPrefix) ") + } + } + + override def ctas( + srcTbl: String, + dstTbl: String, + disableCDC: Boolean = false): Unit = { + + val prefix = s"CREATE TABLE ${dstTbl} USING DELTA" + val suffix = s" AS SELECT * FROM table_changes('${srcTbl}', 0, 1)" + + if (disableCDC) { + sql(prefix + s" TBLPROPERTIES (${DeltaConfigs.CHANGE_DATA_FEED.key} = false)" + suffix) + } else { + sql(prefix + suffix) + } + } + + test("select individual column should push down filters") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + val plans = DeltaTestUtils.withAllPlansCaptured(spark) { + val res = sql(s"SELECT id, _change_type FROM table_changes('$tblName', 0, 1)") + .where(col("id") < lit(5)) + + assert(res.columns === Seq("id", "_change_type")) + checkAnswer( + res, + spark.range(5) + .withColumn("_change_type", lit("insert"))) + } + assert(plans.map(_.executedPlan).toString + .contains("PushedFilters: [IsNotNull(id), LessThan(id,5)]")) + } + } + + test("use cdc query as a subquery") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + val res = sql(s""" + SELECT * FROM RANGE(30) WHERE id > ( + SELECT count(*) FROM table_changes('$tblName', 0, 1)) + """) + checkAnswer( + res, + spark.range(21, 30).toDF()) + } + } + + test("cdc table_changes is not case sensitive") { + val tblName = "tbl" + withTempDir { dir => + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + checkAnswer( + spark.sql(s"SELECT * FROM tabLe_chAnges('$tblName', 0, 1)"), + spark.sql(s"SELECT * FROM taBle_cHanges('$tblName', 0, 1)") + ) + } + } + } + + test("cdc table_changes_by_path are not case sensitive") { + withTempDir { dir => + createTblWithThreeVersions(path = Some(dir.getAbsolutePath)) + + checkAnswer( + spark.sql(s"SELECT * FROM tabLe_chaNges_By_pAth('${dir.getAbsolutePath}', 0, 1)"), + spark.sql(s"SELECT * FROM taBle_cHanges_bY_paTh('${dir.getAbsolutePath}', 0, 1)") + ) + } + } + + + test("parse multi part table name") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + checkAnswer( + spark.sql(s"SELECT * FROM table_changes('$tblName', 0, 1)"), + spark.sql(s"SELECT * FROM table_changes('default.`${tblName}`', 0, 1)") + ) + } + } + + test("negative case - invalid number of args") { + val tbl = "tbl" + withTable(tbl) { + spark.range(10).write.format("delta").saveAsTable(tbl) + + val invalidQueries = Seq( + s"SELECT * FROM table_changes()", + s"SELECT * FROM table_changes('tbl', 1, 2, 3)", + s"SELECT * FROM table_changes('tbl')", + s"SELECT * FROM table_changes_by_path()", + s"SELECT * FROM table_changes_by_path('tbl', 1, 2, 3)", + s"SELECT * FROM table_changes_by_path('tbl')" + ) + invalidQueries.foreach { q => + val e = intercept[AnalysisException] { + sql(q) + } + assert(e.getMessage.contains("requires at least 2 arguments and at most 3 arguments"), + s"failed query: $q ") + } + } + } + + test("negative case - invalid type of args") { + val tbl = "tbl" + withTable(tbl) { + spark.range(10).write.format("delta").saveAsTable(tbl) + + val invalidQueries = Seq( + s"SELECT * FROM table_changes(1, 1)", + s"SELECT * FROM table_changes('$tbl', 1.0)", + s"SELECT * FROM table_changes_by_path(1, 1)", + s"SELECT * FROM table_changes_by_path('$tbl', 1.0)" + ) + + invalidQueries.foreach { q => + val e = intercept[AnalysisException] { + sql(q) + } + assert(e.getMessage.contains("Unsupported expression type"), s"failed query: $q") + } + } + } + + test("resolve expression for timestamp function - now") { + val tbl = "tbl" + withTable(tbl) { + createTblWithThreeVersions(tblName = Some(tbl)) + + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tbl)) + + val currentTime = new Date().getTime + modifyDeltaTimestamp(deltaLog, 0, currentTime - 100000) + modifyDeltaTimestamp(deltaLog, 1, currentTime) + modifyDeltaTimestamp(deltaLog, 2, currentTime + 100000) + + val readDf = sql(s"SELECT * FROM table_changes('$tbl', 0, now())") + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier("tbl")), + readDf, + spark.range(20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType)) + ) + + // more complex expression + val readDf2 = sql(s"SELECT * FROM table_changes('$tbl', 0, now() + interval 5 seconds)") + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier("tbl")), + readDf2, + spark.range(20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType)) + ) + } + } + + test("resolve invalid table name should throw error") { + var e = intercept[AnalysisException] { + sql(s"SELECT * FROM table_changes(now(), 1, 1)") + } + assert(e.getMessage.contains("Unsupported expression type(TimestampType) for table name." + + " The supported types are [StringType literal]")) + + e = intercept[AnalysisException] { + sql(s"SELECT * FROM table_changes('invalidtable', 1, 1)") + } + assert(e.getErrorClass === "TABLE_OR_VIEW_NOT_FOUND") + + withTable ("tbl") { + spark.range(1).write.format("delta").saveAsTable("tbl") + val e = intercept[AnalysisException] { + sql(s"SELECT * FROM table_changes(concat('tb', 'l'), 1, 1)") + } + assert(e.getMessage.contains("Unsupported expression type(StringType) for table name." + + " The supported types are [StringType literal]")) + } + } + + test("resolution of complex expression should throw an error") { + val tbl = "tbl" + withTable(tbl) { + spark.range(10).write.format("delta").saveAsTable(tbl) + checkError( + exception = intercept[AnalysisException] { + sql(s"SELECT * FROM table_changes('$tbl', 0, id)") + }, + errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + parameters = Map("objectName" -> "`id`"), + queryContext = Array(ExpectedContext( + fragment = "id", + start = 38, + stop = 39))) + } + } + + test("protocol version") { + withTable("tbl") { + spark.range(10).write.format("delta").saveAsTable("tbl") + val log = DeltaLog.forTable(spark, TableIdentifier(tableName = "tbl")) + // We set CDC to be enabled by default, so this should automatically bump the writer protocol + // to the required version. + if (columnMappingEnabled) { + assert(log.snapshot.protocol == Protocol(2, 5)) + } else { + assert(log.snapshot.protocol == Protocol(1, 4)) + } + } + } + + test("table_changes and table_changes_by_path with a non-delta table") { + withTempDir { dir => + withTable("tbl") { + spark.range(10).write.format("parquet") + .option("path", dir.getAbsolutePath) + .saveAsTable("tbl") + + var e = intercept[AnalysisException] { + spark.sql(s"SELECT * FROM table_changes('tbl', 0, 1)") + } + assert(e.getErrorClass == "DELTA_TABLE_ONLY_OPERATION") + assert(e.getMessage.contains("table_changes")) + + e = intercept[AnalysisException] { + spark.sql(s"SELECT * FROM table_changes_by_path('${dir.getAbsolutePath}', 0, 1)") + } + assert(e.getErrorClass == "DELTA_MISSING_DELTA_TABLE") + assert(e.getMessage.contains("not a Delta table")) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCStreamSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCStreamSuite.scala new file mode 100644 index 00000000000..fa262cd22bf --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCStreamSuite.scala @@ -0,0 +1,1095 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.sql.Timestamp +import java.text.SimpleDateFormat +import java.util.Date + +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.actions.AddCDCFile +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.sources.{DeltaSourceOffset, DeltaSQLConf} +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import io.delta.tables._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.{SparkConf, SparkThrowable} +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryException, StreamTest, Trigger} +import org.apache.spark.sql.types.StructType + +trait DeltaCDCStreamSuiteBase extends StreamTest with DeltaSQLCommandTest + with DeltaSourceSuiteBase + with DeltaColumnMappingTestUtils { + + import testImplicits._ + import io.delta.implicits._ + + override protected def sparkConf: SparkConf = super.sparkConf + .set(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true") + + /** Modify timestamp for a delta commit, used to test timestamp querying */ + def modifyDeltaTimestamp(deltaLog: DeltaLog, version: Long, time: Long): Unit = { + val file = new File(FileNames.deltaFile(deltaLog.logPath, version).toUri) + file.setLastModified(time) + val crc = new File(FileNames.checksumFile(deltaLog.logPath, version).toUri) + if (crc.exists()) { + crc.setLastModified(time) + } + } + + /** + * Create two tests for maxFilesPerTrigger and maxBytesPerTrigger + */ + protected def testRateLimit( + name: String, + maxFilesPerTrigger: String, + maxBytesPerTrigger: String)(f: (String, String) => Unit): Unit = { + Seq(("maxFilesPerTrigger", maxFilesPerTrigger), ("maxBytesPerTrigger", maxBytesPerTrigger)) + .foreach { case (key: String, value: String) => + test(s"rateLimit - $key - $name") { + f(key, value) + } + } + } + + testQuietly("no startingVersion should result fetch the entire snapshot") { + withTempDir { inputDir => + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "false") { + // version 0 + Seq(1, 9).toDF("value").write.format("delta").save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + // version 1 + deltaTable.delete("value = 9") + + // version 2 + Seq(2).toDF("value").write.format("delta") + .mode("append") + .save(inputDir.getAbsolutePath) + } + // enable cdc - version 3 + sql(s"ALTER TABLE delta.`${inputDir.getAbsolutePath}` SET TBLPROPERTIES " + + s"(${DeltaConfigs.CHANGE_DATA_FEED.key}=true)") + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .format("delta") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + testStream(df) ( + ProcessAllAvailable(), + CheckAnswer((1, "insert", 3), (2, "insert", 3)), + Execute { _ => + deltaTable.delete("value = 1") // version 4 + }, + ProcessAllAvailable(), + CheckAnswer((1, "insert", 3), (2, "insert", 3), (1, "delete", 4)) + ) + } + } + + testQuietly("CDC initial snapshot should end at base index of next version") { + withTempDir { inputDir => + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true") { + // version 0 + Seq(5, 6).toDF("value").write.format("delta").save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .format("delta") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df)( + ProcessAllAvailable(), + CheckAnswer((5, "insert", 0), (6, "insert", 0)), + AssertOnQuery { q => + val offset = q.committedOffsets.iterator.next()._2.asInstanceOf[DeltaSourceOffset] + // The initial snapshot (version 0) was completely processed, so we should now be at + // the start of version 1. + assert(offset.reservoirVersion === 1) + assert(offset.index === DeltaSourceOffset.BASE_INDEX) + true + }, + StopStream + ) + } + } + } + + test("startingVersion = latest") { + withTempDir { inputDir => + Seq(1, 2).toDF("value").write.format("delta").save(inputDir.getAbsolutePath) + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "latest") + .format("delta") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df) ( + ProcessAllAvailable(), + CheckAnswer(), + AddToReservoir(inputDir, Seq(3).toDF("value")), + ProcessAllAvailable(), + CheckAnswer((3, "insert", 1)) + ) + } + } + + test("user provided startingVersion") { + withTempDir { inputDir => + // version 0 + Seq(1, 2, 3).toDF("id").write.delta(inputDir.toString) + + // version 1 + Seq(4, 5).toDF("id").write.mode("append").delta(inputDir.toString) + + // version 2 + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "1") + .format("delta") + .load(inputDir.toString) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df) ( + ProcessAllAvailable(), + CheckAnswer((4, "insert", 1), (5, "insert", 1)), + Execute { _ => + deltaTable.delete("id = 3") // version 2 + }, + ProcessAllAvailable(), + CheckAnswer((4, "insert", 1), (5, "insert", 1), (3, "delete", 2)) + ) + } + } + + test("user provided startingTimestamp") { + withTempDir { inputDir => + // version 0 + Seq(1, 2, 3).toDF("id").write.delta(inputDir.toString) + val deltaLog = DeltaLog.forTable(spark, inputDir.getAbsolutePath) + modifyDeltaTimestamp(deltaLog, 0, 1000) + + // version 1 + Seq(-1).toDF("id").write.mode("append").delta(inputDir.toString) + modifyDeltaTimestamp(deltaLog, 1, 2000) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + val startTs = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(2000)) + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingTimestamp", startTs) + .format("delta") + .load(inputDir.toString) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df) ( + ProcessAllAvailable(), + CheckAnswer((-1, "insert", 1)), + Execute { _ => + deltaTable.update(expr("id == -1"), Map("id" -> lit("4"))) + }, + ProcessAllAvailable(), + CheckAnswer((-1, "insert", 1), (-1, "update_preimage", 2), (4, "update_postimage", 2)) + ) + } + } + + testQuietly("starting[Version/Timestamp] > latest version") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + // version 0 + Seq(1, 2, 3, 4, 5, 6).toDF("id").write.delta(inputDir.toString) + val deltaLog = DeltaLog.forTable(spark, inputDir.getAbsolutePath) + modifyDeltaTimestamp(deltaLog, 0, 1000) + + val df1 = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 1) + .format("delta") + .load(inputDir.toString) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + val startTs = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(3000)) + val commitTs = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(1000)) + val df2 = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingTimestamp", startTs) + .format("delta") + .load(inputDir.toString) + + val e1 = VersionNotFoundException(1, 0, 0).getMessage + val e2 = DeltaErrors.timestampGreaterThanLatestCommit( + new Timestamp(3000), new Timestamp(1000), commitTs).getMessage + + Seq((df1, e1), (df2, e2)).foreach { pair => + val df = pair._1 + val stream = df.select("id").writeStream + .option("checkpointLocation", checkpointDir.toString) + .outputMode("append") + .format("delta") + .start(outputDir.getAbsolutePath) + val e = intercept[StreamingQueryException] { + stream.processAllAvailable() + } + stream.stop() + assert(e.cause.getMessage === pair._2) + } + } + } + + test("check starting[Version/Timestamp] > latest version without error") { + Seq("version", "timestamp").foreach { target => + withTempDir { inputDir => + withSQLConf(DeltaSQLConf.DELTA_CDF_ALLOW_OUT_OF_RANGE_TIMESTAMP.key -> "true") { + // version 0 + Seq(1, 2, 3).toDF("id").write.delta(inputDir.toString) + val inputPath = inputDir.getAbsolutePath + val deltaLog = DeltaLog.forTable(spark, inputPath) + modifyDeltaTimestamp(deltaLog, 0, 1000) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputPath) + + // Pick both the timestamp and version beyond latest commmit's version. + val df = if (target == "timestamp") { + // build dataframe with starting timestamp option. + val startTs = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(2000)) + spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingTimestamp", startTs) + .format("delta") + .load(inputDir.toString) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + } else { + assert(target == "version") + // build dataframe with starting version option. + spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 1) + .format("delta") + .load(inputDir.toString) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + } + + testStream(df)( + ProcessAllAvailable(), + // Expect empty update from the read stream. + CheckAnswer(), + // Verify new updates after the start timestamp/version can be read. + Execute { _ => + deltaTable.update(expr("id == 1"), Map("id" -> lit("4"))) + }, + ProcessAllAvailable(), + CheckAnswer((1, "update_preimage", 1), (4, "update_postimage", 1)) + ) + } + } + } + } + + testQuietly("startingVersion and startingTimestamp are both set") { + withTempDir { tableDir => + val tablePath = tableDir.getCanonicalPath + spark.range(10).write.format("delta").save(tableDir.getAbsolutePath) + val q = spark.readStream + .format("delta") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 0L) + .option("startingTimestamp", "2020-07-15") + .load(tablePath) + .writeStream + .format("console") + .start() + assert(intercept[StreamingQueryException] { + q.processAllAvailable() + }.getMessage.contains("Please either provide 'startingVersion' or 'startingTimestamp'")) + q.stop() + } + } + + test("cdc streams should respect checkpoint") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + // write 3 versions + Seq(1, 2, 3).toDF("id").write.format("delta").save(inputDir.getAbsolutePath) + Seq(4, 5, 6).toDF("id").write.format("delta") + .mode("append") + .save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + deltaTable.delete("id = 5") + + val checkpointDir1 = new Path(checkpointDir.getAbsolutePath, "ck1") + val checkpointDir2 = new Path(checkpointDir.getAbsolutePath, "ck2") + + def streamChanges( + startingVersion: Long, + checkpointLocation: String): Unit = { + val q = spark.readStream + .format("delta") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", startingVersion) + .load(inputDir.getCanonicalPath) + .select("id") + .writeStream + .format("delta") + .option("checkpointLocation", checkpointLocation) + .start(outputDir.getCanonicalPath) + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + streamChanges(1, checkpointDir1.toString) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq(4, 5, 5, 6).map(_.toLong).toDF("id")) + + // Second time streaming should not write the rows again + streamChanges(1, checkpointDir1.toString) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq(4, 5, 5, 6).map(_.toLong).toDF("id")) + + // new checkpoint location + streamChanges(1, checkpointDir2.toString) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Seq(4, 4, 5, 5, 5, 5, 6, 6).map(_.toLong).toDF("id")) + } + } + + test("cdc streams with noop merge") { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true" + ) { + withTempDirs { (srcDir, targetDir, checkpointDir) => + // write source table + Seq((1, "a"), (2, "b")) + .toDF("key1", "val1") + .write + .format("delta") + .save(srcDir.getCanonicalPath) + + // write target table + Seq((1, "t"), (2, "u")) + .toDF("key2", "val2") + .write + .format("delta") + .save(targetDir.getCanonicalPath) + + val srcDF = spark.read.format("delta").load(srcDir.getCanonicalPath) + val tgtTable = io.delta.tables.DeltaTable.forPath(targetDir.getCanonicalPath) + + // Perform the merge where all matching and non-matching conditions fail for + // target rows. + tgtTable + .merge(srcDF, + "key1 = key2") + .whenMatched("key1 = 10") + .updateExpr(Map("key2" -> "key1", "val2" -> "val1")) + .whenNotMatched("key1 = 11") + .insertExpr(Map("key2" -> "key1", "val2" -> "val1")) + .execute() + + // Read the target dir with cdc read option and ensure that + // data frame is empty. + val q = spark.readStream + .format("delta") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "1") + .load(targetDir.getCanonicalPath) + .writeStream + .format("memory") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .queryName("testQuery") + .start() + try { + q.processAllAvailable() + } finally { + q.stop() + } + + assert(spark.table("testQuery").isEmpty) + } + } + } + + Seq(true, false).foreach { readChangeFeed => + test(s"streams updating latest offset with readChangeFeed=$readChangeFeed") { + withTempDirs { (inputDir, checkpointDir, outputDir) => + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true") { + + sql(s"CREATE TABLE delta.`$inputDir` (id BIGINT, value STRING) USING DELTA") + // save some rows to input table. + spark.range(10).withColumn("value", lit("a")) + .write.format("delta").mode("overwrite") + .option("enableChangeDataFeed", "true").save(inputDir.getAbsolutePath) + + def runStreamingQuery(): StreamingQuery = { + // process the input table in a CDC manner + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, readChangeFeed) + .format("delta") + .load(inputDir.getAbsolutePath) + val query = df + .select("id") + .writeStream + .format("delta") + .outputMode("append") + .option("checkpointLocation", checkpointDir.toString) + .start(outputDir.getAbsolutePath) + + query.processAllAvailable() + query.stop() + query.awaitTermination() + query + } + + var query = runStreamingQuery() + + val deltaLog = DeltaLog.forTable(spark, inputDir.toString) + // Do three no-op updates to the table. These are tricky because the commits have no + // changes, but the stream should still pick up the new versions and progress past them. + for (i <- 0 to 2) { + deltaLog.startTransaction().commit(Seq(), DeltaOperations.ManualUpdate) + } + + // Read again from input table and no new data should be generated + query = runStreamingQuery() + + // check that the last batch was committed and that the + // reservoirVersion for the table was updated to latest + // in both cdf and non-cdf cases. + assert(query.lastProgress.batchId === 1) + val endOffset = + JsonUtils.fromJson[DeltaSourceOffset](query.lastProgress.sources.head.endOffset) + assert(endOffset.reservoirVersion === 5, + s"endOffset = $endOffset") + assert(endOffset.index === DeltaSourceOffset.BASE_INDEX, s"endOffset = $endOffset") + } + } + } + } + + test("cdc streams should be able to get offset when there only RemoveFiles") { + withTempDir { inputDir => + // version 0 + spark.range(2).withColumn("part", 'id % 2) + .write + .format("delta") + .partitionBy("part") + .save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 0) + .format("delta") + .load(inputDir.toString) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df) ( + ProcessAllAvailable(), + CheckAnswer((0, 0, "insert", 0), (1, 1, "insert", 0)), + Execute { _ => + deltaTable.delete("part = 0") // version 2 + }, + ProcessAllAvailable(), + CheckAnswer((0, 0, "insert", 0), (1, 1, "insert", 0), (0, 0, "delete", 1)) + ) + } + } + + test("cdc streams should work starting from RemoveFile") { + withTempDir { inputDir => + // version 0 + spark.range(2).withColumn("part", 'id % 2) + .write + .format("delta") + .partitionBy("part") + .save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + + deltaTable.delete("part = 0") + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 1) + .format("delta") + .load(inputDir.toString) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df) ( + ProcessAllAvailable(), + CheckAnswer((0, 0, "delete", 1)) + ) + } + } + + test("cdc streams should work starting from AddCDCFile") { + withTempDir { inputDir => + // version 0 + spark.range(2).withColumn("col2", 'id % 2) + .repartition(1) + .write + .format("delta") + .save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + + deltaTable.delete("col2 = 0") + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 1) + .format("delta") + .load(inputDir.toString) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df) ( + ProcessAllAvailable(), + CheckAnswer((0, 0, "delete", 1)), + AddToReservoir(inputDir, spark.range(2, 3).withColumn("col2", 'id % 2)), + ProcessAllAvailable(), + CheckAnswer((0, 0, "delete", 1), (2, 0, "insert", 2)) + ) + } + } + + testRateLimit(s"overall", "1", "1b") { (key, value) => + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + + // write - version 0 - 2 AddFiles - Adds 4 rows + spark.range(0, 4, 1, 1).toDF("id") + .withColumn("part", col("id") % 2) // 2 partitions + .write + .format("delta") + .partitionBy("part") + .save(inputDir.getAbsolutePath) + + assert(deltaLog.snapshot.version == 0) + assert(deltaLog.snapshot.numOfFiles == 2) + + // write - version 1 - 1 AddFile - Adds 1 row + Seq(4L).toDF("id").withColumn("part", lit(-1L)) + .write + .format("delta") + .mode("append") + .partitionBy("part") + .save(deltaLog.dataPath.toString) + assert(deltaLog.snapshot.version == 1) + assert(deltaLog.snapshot.numOfFiles == 3) + + // delete - version 2 - 1 RemoveFile - Removes 1 row + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + deltaTable.delete("part = -1") + assert(deltaLog.snapshot.version == 2) + assert(deltaLog.snapshot.numOfFiles == 2) + + // update the table - version 3 - 2 cdc files - Updates 2 rows + deltaTable.update(expr("id < 2"), Map("id" -> lit(0L))) + + // update the table - version 4 - 2 cdc files - Updates 2 rows + deltaTable.update(expr("id > 1"), Map("id" -> lit(0L))) + + val rowsPerBatch = Seq( + 2, // 2 rows from 1 AddFile + 2, // 2 rows from the 2nd AddFile + 1, // 1 row from the 3rd AddFile + 1, // 1 row from the RemoveFile + 4, // 4 rows(pre_image and post_image) from the 2 AddCDCFile + 4 // 4 rows(pre_image and post_image) from the 2 AddCDCFile + ) + val q = spark.readStream + .format("delta") + .option(key, value) + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "0") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(q) ( + ProcessAllAvailable(), + CheckProgress(rowsPerBatch), + CheckAnswer( + (0, 0, "insert", 0), + (1, 1, "insert", 0), + (2, 0, "insert", 0), + (3, 1, "insert", 0), + (4, -1, "insert", 1), + (4, -1, "delete", 2), + (0, 0, "update_preimage", 3), + (0, 0, "update_postimage", 3), + (1, 1, "update_preimage", 3), + (0, 1, "update_postimage", 3), + (2, 0, "update_preimage", 4), + (0, 0, "update_postimage", 4), + (3, 1, "update_preimage", 4), + (0, 1, "update_postimage", 4) + ) + ) + } + } + + testRateLimit(s"starting from initial snapshot", "1", "1b") { (key, value) => + withTempDir { inputDir => + // 3 commits - 3 AddFiles each + (0 until 3).foreach { i => + spark.range(i, i + 1, 1, 1) + .write + .mode("append") + .format("delta") + .save(inputDir.getAbsolutePath) + } + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + assert(deltaLog.snapshot.numOfFiles === 3) + + // 1 commit - 2 AddFiles + spark.range(3, 5, 1, 2) + .write + .mode("append") + .format("delta") + .save(inputDir.getAbsolutePath) + + assert(deltaLog.snapshot.numOfFiles === 5) + + val q = spark.readStream + .format("delta") + .option(key, value) + .option(DeltaOptions.CDC_READ_OPTION, "true") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + // 5 batches for the 5 commits split across commits and index number. + val rowsPerBatch = Seq(1, 1, 1, 1, 1) + + testStream(q)( + ProcessAllAvailable(), + CheckProgress(rowsPerBatch), + CheckAnswer( + (0, "insert", 3), + (1, "insert", 3), + (2, "insert", 3), + (3, "insert", 3), + (4, "insert", 3) + ) + ) + } + } + + testRateLimit(s"should not deadlock", "1", "1b") { (key, value) => + withTempDir { inputDir => + // version 0 - 2 AddFiles + spark.range(2) + .withColumn("part", 'id % 2) + .withColumn("col3", lit(0)) + .repartition(1) + .write + .format("delta") + .partitionBy("part") + .save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + // version 1 - 2 AddCDCFiles + deltaTable.update(expr("col3 < 2"), Map("col3" -> lit("0"))) + + // version 2 - 2 AddCDCFiles + deltaTable.update(expr("col3 < 2"), Map("col3" -> lit("1"))) + + val df = spark.readStream + .format("delta") + .option(key, value) + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "1") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df)( + ProcessAllAvailable(), + CheckProgress(Seq(4, 4)),// 4 rows(2 pre- and 2 post-images) for each version + CheckAnswer( + (0, 0, 0, "update_preimage", 1), + (0, 0, 0, "update_postimage", 1), + (0, 0, 0, "update_preimage", 2), + (0, 0, 1, "update_postimage", 2), + (1, 1, 0, "update_preimage", 1), + (1, 1, 0, "update_postimage", 1), + (1, 1, 0, "update_preimage", 2), + (1, 1, 1, "update_postimage", 2) + ) + ) + } + } + + test("maxFilesPerTrigger - 2 successive AddCDCFile commits") { + withTempDir { inputDir => + // version 0 - 2 AddFiles + spark.range(2) + .withColumn("part", 'id % 2) + .withColumn("col3", lit(0)) + .repartition(1) + .write + .format("delta") + .partitionBy("part") + .save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + // version 1 - 2 AddCDCFiles + deltaTable.update(expr("col3 < 2"), Map("col3" -> lit("0"))) + + // version 2 - 2 AddCDCFiles + deltaTable.update(expr("col3 < 2"), Map("col3" -> lit("1"))) + + val df = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "3") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "0") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + // test whether the AddCDCFile commits do not get split up. + val rowsPerBatch = Seq( + 2, // 2 rows from the 2 AddFile + 4, // 4 rows(pre and post image) from the 2 AddCDCFiles + 4 // 4 rows(pre and post image) from 2 AddCDCFiles + ) + + testStream(df)( + ProcessAllAvailable(), + CheckProgress(rowsPerBatch), + CheckAnswer( + (0, 0, 0, "insert", 0), + (1, 1, 0, "insert", 0), + (0, 0, 0, "update_preimage", 1), + (0, 0, 0, "update_postimage", 1), + (1, 1, 0, "update_preimage", 1), + (1, 1, 0, "update_postimage", 1), + (0, 0, 0, "update_preimage", 2), + (0, 0, 1, "update_postimage", 2), + (1, 1, 0, "update_preimage", 2), + (1, 1, 1, "update_postimage", 2) + ) + ) + } + } + + test("maxFilesPerTrigger with Trigger.AvailableNow respects read limits") { + withTempDir { inputDir => + // version 0 - 2 AddFiles + spark.range(2) + .withColumn("part", 'id % 2) + .withColumn("col3", lit(0)) + .repartition(1) + .write + .format("delta") + .partitionBy("part") + .save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + // version 1 - 2 AddCDCFiles + deltaTable.update(expr("col3 < 2"), Map("col3" -> lit("0"))) + + // version 2 - 2 AddCDCFiles + deltaTable.update(expr("col3 < 2"), Map("col3" -> lit("1"))) + + val df = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "3") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "0") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + // test whether the AddCDCFile commits do not get split up. + val rowsPerBatch = Seq( + 2, // 2 rows from the 2 AddFile + 4, // 4 rows(pre and post image) from the 2 AddCDCFiles + 4 // 4 rows(pre and post image) from 2 AddCDCFiles + ) + + testStream(df)( + StartStream(Trigger.AvailableNow), + Execute { query => + assert(query.awaitTermination(10000)) + }, + CheckProgress(rowsPerBatch), + CheckAnswer( + (0, 0, 0, "insert", 0), + (1, 1, 0, "insert", 0), + (0, 0, 0, "update_preimage", 1), + (0, 0, 0, "update_postimage", 1), + (1, 1, 0, "update_preimage", 1), + (1, 1, 0, "update_postimage", 1), + (0, 0, 0, "update_preimage", 2), + (0, 0, 1, "update_postimage", 2), + (1, 1, 0, "update_preimage", 2), + (1, 1, 1, "update_postimage", 2) + ) + ) + } + } + + test("excludeRegex works with cdc") { + withTempDir { inputDir => + spark.range(2) + .withColumn("part", 'id % 2) + .repartition(1) + .write + .format("delta") + .partitionBy("part") + .save(inputDir.getAbsolutePath) + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "0") + .option(DeltaOptions.EXCLUDE_REGEX_OPTION, "part=0") + .format("delta") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df)( + ProcessAllAvailable(), + CheckAnswer((1, 1, "insert", 0)) // first file should get excluded + ) + } + } + + test("excludeRegex on cdcPath should not return Add/RemoveFiles") { + withTempDir { inputDir => + // version 0 - 1 AddFile + Seq(0).toDF("id") + .withColumn("col2", lit("0")) + .repartition(1) + .write + .format("delta") + .save(inputDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(inputDir.getAbsolutePath) + // version 1 - 1 ChangeFile + deltaTable.update(expr("col2 < 2"), Map("col2" -> lit("1"))) + + val deltaLog = DeltaLog.forTable(spark, inputDir.getAbsolutePath) + val excludePath = deltaLog.getChanges(1).next()._2 + .filter(_.isInstanceOf[AddCDCFile]) + .head + .asInstanceOf[AddCDCFile] + .path + + val df = spark.readStream + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", "0") + .option(DeltaOptions.EXCLUDE_REGEX_OPTION, excludePath) + .format("delta") + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df)( + ProcessAllAvailable(), + CheckAnswer((0, "0", "insert", 0)) // first file should get excluded + ) + } + } + + test("schema check for cdc stream") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + Seq(i).toDF.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val df = spark.readStream + .format("delta") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 0) + .load(inputDir.getCanonicalPath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(df)( + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer( + (0, "insert", 0), + (1, "insert", 1), + (2, "insert", 2), + (3, "insert", 3), + (4, "insert", 4) + ), + // no schema changed exception should be thrown. + AssertOnQuery { _ => + withMetadata(deltaLog, StructType.fromDDL("value int")) + true + }, + AssertOnQuery { _ => + withMetadata(deltaLog, StructType.fromDDL("id int, value string")) + true + }, + ExpectFailure[DeltaIllegalStateException](t => + assert(t.getMessage.contains("Detected schema change"))) + ) + } + } + + test("should not attempt to read a non exist version") { + withTempDirs { (inputDir1, inputDir2, checkpointDir) => + spark.range(1, 2).write.format("delta").save(inputDir1.getCanonicalPath) + spark.range(1, 2).write.format("delta").save(inputDir2.getCanonicalPath) + + def startQuery(): StreamingQuery = { + val df1 = spark.readStream + .format("delta") + .option("readChangeFeed", "true") + .load(inputDir1.getCanonicalPath) + val df2 = spark.readStream + .format("delta") + .option("readChangeFeed", "true") + .load(inputDir2.getCanonicalPath) + df1.union(df2).writeStream + .format("noop") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start() + } + + var q = startQuery() + try { + q.processAllAvailable() + // current offsets: + // source1: DeltaSourceOffset(reservoirVersion=1,index=0,isInitialSnapshot=true) + // source2: DeltaSourceOffset(reservoirVersion=1,index=0,isInitialSnapshot=true) + + spark.range(1, 2).write.format("delta").mode("append").save(inputDir1.getCanonicalPath) + spark.range(1, 2).write.format("delta").mode("append").save(inputDir2.getCanonicalPath) + q.processAllAvailable() + // current offsets: + // source1: DeltaSourceOffset(reservoirVersion=2,index=-1,isInitialSnapshot=false) + // source2: DeltaSourceOffset(reservoirVersion=2,index=-1,isInitialSnapshot=false) + // Note: version 2 doesn't exist in source1 + + spark.range(1, 2).write.format("delta").mode("append").save(inputDir2.getCanonicalPath) + q.processAllAvailable() + // current offsets: + // source1: DeltaSourceOffset(reservoirVersion=2,index=-1,isInitialSnapshot=false) + // source2: DeltaSourceOffset(reservoirVersion=3,index=-1,isInitialSnapshot=false) + // Note: version 2 doesn't exist in source1 + + q.stop() + // Restart the query. It will call `getBatch` on the previous two offsets of `source1` which + // are both DeltaSourceOffset(reservoirVersion=2,index=-1,isInitialSnapshot=false) + // As version 2 doesn't exist, we should not try to load version 2 in this case. + q = startQuery() + q.processAllAvailable() + } finally { + q.stop() + } + } + } + + // LC-1281: Ensure that when we would split batches into one file at a time, we still produce + // correct CDF even in cases where the CDF may need to compare multiple file actions from the + // same commit to be correct, such as with persistent deletion vectors. + test("double delete-only on the same file") { + withTempDir { tableDir => + val tablePath = tableDir.toString + spark.range(start = 0L, end = 10L, step = 1L, numPartitions = 1).toDF("id") + .write.format("delta").save(tablePath) + + spark.sql(s"DELETE FROM delta.`$tablePath` WHERE id IN (1, 3, 6)") + spark.sql(s"DELETE FROM delta.`$tablePath` WHERE id IN (2, 4, 7)") + + val stream = spark.readStream + .format("delta") + .option(DeltaOptions.CDC_READ_OPTION, true) + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, 1) + .option(DeltaOptions.STARTING_VERSION_OPTION, 1) + .load(tablePath) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + testStream(stream)( + ProcessAllAvailable(), + CheckAnswer( + (1L, "delete", 1L), + (3L, "delete", 1L), + (6L, "delete", 1L), + (2L, "delete", 2L), + (4L, "delete", 2L), + (7L, "delete", 2L) + ) + ) + } + } +} + +class DeltaCDCStreamDeletionVectorSuite extends DeltaCDCStreamSuite + with DeletionVectorsTestUtils { + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectorsForAllSupportedOperations(spark) + } +} + +class DeltaCDCStreamSuite extends DeltaCDCStreamSuiteBase +abstract class DeltaCDCStreamColumnMappingSuiteBase extends DeltaCDCStreamSuite + with ColumnMappingStreamingBlockedWorkflowSuiteBase with DeltaColumnMappingSelectedTestMixin { + + override protected def isCdcTest: Boolean = true + + + override def runOnlyTests: Seq[String] = Seq( + "no startingVersion should result fetch the entire snapshot", + "user provided startingVersion", + "maxFilesPerTrigger - 2 successive AddCDCFile commits", + + // streaming blocking semantics test + "deltaLog snapshot should not be updated outside of the stream", + "column mapping + streaming - allowed workflows - column addition", + "column mapping + streaming - allowed workflows - upgrade to name mode", + "column mapping + streaming: blocking workflow - drop column", + "column mapping + streaming: blocking workflow - rename column" + ) + +} + +class DeltaCDCStreamIdColumnMappingSuite extends DeltaCDCStreamColumnMappingSuiteBase + with DeltaColumnMappingEnableIdMode { +} + +class DeltaCDCStreamNameColumnMappingSuite extends DeltaCDCStreamColumnMappingSuiteBase + with DeltaColumnMappingEnableNameMode { +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCSuite.scala new file mode 100644 index 00000000000..758fa73ab7d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCDCSuite.scala @@ -0,0 +1,1001 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.text.SimpleDateFormat +import java.util.Date + +import scala.collection.JavaConverters._ + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.commands.cdc.CDCReader._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaColumnMappingSelectedTestMixin +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.{col, current_timestamp, floor, lit} +import org.apache.spark.sql.streaming.StreamingQueryException +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{LongType, StringType, StructType} + +abstract class DeltaCDCSuiteBase + extends QueryTest + with SharedSparkSession + with CheckCDCAnswer + with DeltaSQLCommandTest { + + import testImplicits._ + + override protected def sparkConf: SparkConf = super.sparkConf + .set(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true") + + /** Represents path or metastore table name */ + abstract case class TblId(id: String) + class TablePath(path: String) extends TblId(path) + class TableName(name: String) extends TblId(name) + + /** Indicates either the starting or ending version/timestamp */ + trait Boundary + case class StartingVersion(value: String) extends Boundary + case class StartingTimestamp(value: String) extends Boundary + case class EndingVersion(value: String) extends Boundary + case class EndingTimestamp(value: String) extends Boundary + case object Unbounded extends Boundary // used to model situation when a boundary isn't provided + val dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + + def createTblWithThreeVersions( + tblName: Option[String] = None, + path: Option[String] = None): Unit = { + // version 0 + if (tblName.isDefined && path.isDefined) { + spark.range(10).write.format("delta") + .option("path", path.get) + .saveAsTable(tblName.get) + } else if (tblName.isDefined) { + spark.range(10).write.format("delta") + .saveAsTable(tblName.get) + } else if (path.isDefined) { + spark.range(10).write.format("delta") + .save(path.get) + } + + if (tblName.isDefined) { + // version 1 + spark.range(10, 20).write.format("delta").mode("append").saveAsTable(tblName.get) + + // version 2 + spark.range(20, 30).write.format("delta").mode("append").saveAsTable(tblName.get) + } else if (path.isDefined) { + // version 1 + spark.range(10, 20).write.format("delta").mode("append").save(path.get) + + // version 2 + spark.range(20, 30).write.format("delta").mode("append").save(path.get) + } + } + + /** Single method to do all kinds of CDC reads */ + // By default, we use the `legacy` batch CDF schema mode, in which either latest schema is used + // or the time-travelled schema is used. + def cdcRead( + tblId: TblId, + start: Boundary, + end: Boundary, + schemaMode: Option[DeltaBatchCDFSchemaMode] = Some(BatchCDFSchemaLegacy), + readerOptions: Map[String, String] = Map.empty): DataFrame + + /** Modify timestamp for a delta commit, used to test timestamp querying */ + def modifyDeltaTimestamp(deltaLog: DeltaLog, version: Long, time: Long): Unit = { + val file = new File(FileNames.deltaFile(deltaLog.logPath, version).toUri) + file.setLastModified(time) + val crc = new File(FileNames.checksumFile(deltaLog.logPath, version).toUri) + if (crc.exists()) { + crc.setLastModified(time) + } + } + + /** Create table utility method */ + def ctas(srcTbl: String, dstTbl: String, disableCDC: Boolean = false): Unit = { + val readDf = cdcRead(new TableName(srcTbl), StartingVersion("0"), EndingVersion("1")) + if (disableCDC) { + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "false") { + readDf.write.format("delta") + .saveAsTable(dstTbl) + } + } else { + readDf.write.format("delta") + .saveAsTable(dstTbl) + } + } + + private val validTimestampFormats = + Seq("yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm:ss.SSS", "yyyy-MM-dd") + private val invalidTimestampFormats = + Seq("yyyyMMddHHmmssSSS") + + (validTimestampFormats ++ invalidTimestampFormats).foreach { formatStr => + val isValid = validTimestampFormats.contains(formatStr) + val isValidStr = if (isValid) "valid" else "invalid" + + test(s"CDF timestamp format - $formatStr is $isValidStr") { + withTable("src") { + createTblWithThreeVersions(tblName = Some("src")) + + val timestamp = new SimpleDateFormat(formatStr).format(new Date(1)) + + def doRead(): Unit = { + cdcRead(new TableName("src"), StartingTimestamp(timestamp), EndingVersion("1")) + } + + if (isValid) { + doRead() + } else { + val e = intercept[AnalysisException] { + doRead() + }.getMessage() + assert(e.contains("The provided timestamp")) + assert(e.contains("cannot be converted to a valid timestamp")) + } + } + } + } + + testQuietly("writes with metadata columns") { + withTable("src", "dst") { + + // populate src table with CDC data + createTblWithThreeVersions(tblName = Some("src")) + + // writing cdc data to a new table with cdc enabled should fail. the source table has columns + // that are reserved for CDC only, and shouldn't be allowed into the target table. + val e = intercept[IllegalStateException] { + ctas("src", "dst") + } + val writeContainsCDCColumnsError = DeltaErrors.cdcColumnsInData( + cdcReadSchema(new StructType()).fieldNames).getMessage + val enablingCDCOnTableWithCDCColumns = DeltaErrors.tableAlreadyContainsCDCColumns( + cdcReadSchema(new StructType()).fieldNames).getMessage + + assert(e.getMessage.contains(writeContainsCDCColumnsError)) + + // when cdc is disabled writes should work + ctas("src", "dst", disableCDC = true) + + // write some more data + withTable("more_data") { + spark.range(20, 30) + .withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + .withColumn("_commit_version", lit(2L)) + .withColumn("_commit_timestamp", current_timestamp) + .write.saveAsTable("more_data") + + spark.table("more_data").write.format("delta") + .mode("append") + .saveAsTable("dst") + + checkAnswer( + spark.read.format("delta").table("dst"), + cdcRead(new TableName("src"), StartingVersion("0"), EndingVersion("1")) + .union(spark.table("more_data")) + ) + } + + // re-enabling cdc should be disallowed, since the dst table already contains column that are + // reserved for CDC only. + val e2 = intercept[IllegalStateException] { + sql(s"ALTER TABLE dst SET TBLPROPERTIES " + + s"(${DeltaConfigs.CHANGE_DATA_FEED.key}=true)") + } + assert(e2.getMessage.contains(enablingCDCOnTableWithCDCColumns)) + } + } + + test("changes from table by name") { + withTable("tbl") { + createTblWithThreeVersions(tblName = Some("tbl")) + + val readDf = cdcRead(new TableName("tbl"), StartingVersion("0"), EndingVersion("1")) + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier("tbl")), + readDf, + spark.range(20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType)) + ) + } + } + + test("changes from table by path") { + withTempDir { dir => + createTblWithThreeVersions(path = Some(dir.getAbsolutePath)) + + val readDf = cdcRead( + new TablePath(dir.getAbsolutePath), StartingVersion("0"), EndingVersion("1")) + checkCDCAnswer( + DeltaLog.forTable(spark, dir.getAbsolutePath), + readDf, + spark.range(20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType)) + ) + } + } + + test("changes - start and end are timestamps") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + // modify timestamps + // version 0 + modifyDeltaTimestamp(deltaLog, 0, 0) + val tsAfterV0 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(1)) + + // version 1 + modifyDeltaTimestamp(deltaLog, 1, 1000) + val tsAfterV1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(1001)) + + modifyDeltaTimestamp(deltaLog, 2, 2000) + + val readDf = cdcRead( + new TablePath(tempDir.getAbsolutePath), + StartingTimestamp(tsAfterV0), EndingTimestamp(tsAfterV1)) + checkCDCAnswer( + DeltaLog.forTable(spark, tempDir), + readDf, + spark.range(20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + + test("changes - only start is a timestamp") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + modifyDeltaTimestamp(deltaLog, 0, 0) + modifyDeltaTimestamp(deltaLog, 1, 10000) + modifyDeltaTimestamp(deltaLog, 2, 20000) + + val ts0 = dateFormat.format(new Date(2000)) + val readDf = cdcRead( + new TablePath(tempDir.getAbsolutePath), StartingTimestamp(ts0), EndingVersion("1")) + checkCDCAnswer( + DeltaLog.forTable(spark, tempDir), + readDf, + spark.range(10, 20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + + test("changes - only start is a timestamp - inclusive behavior") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + modifyDeltaTimestamp(deltaLog, 0, 0) + modifyDeltaTimestamp(deltaLog, 1, 1000) + modifyDeltaTimestamp(deltaLog, 2, 2000) + + val ts0 = dateFormat.format(new Date(0)) + val readDf = cdcRead( + new TablePath(tempDir.getAbsolutePath), StartingTimestamp(ts0), EndingVersion("1")) + checkCDCAnswer( + DeltaLog.forTable(spark, tempDir), + readDf, + spark.range(20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + + test("version from timestamp - before the first version") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + modifyDeltaTimestamp(deltaLog, 0, 4000) + modifyDeltaTimestamp(deltaLog, 1, 8000) + modifyDeltaTimestamp(deltaLog, 2, 12000) + + val ts0 = dateFormat.format(new Date(1000)) + val ts1 = dateFormat.format(new Date(3000)) + intercept[AnalysisException] { + cdcRead( + new TablePath(tempDir.getAbsolutePath), + StartingTimestamp(ts0), + EndingTimestamp(ts1)) + .collect() + }.getMessage.contains("before the earliest version") + } + } + + test("version from timestamp - between two valid versions") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + modifyDeltaTimestamp(deltaLog, 0, 0) + modifyDeltaTimestamp(deltaLog, 1, 4000) + modifyDeltaTimestamp(deltaLog, 2, 8000) + + val ts0 = dateFormat.format(new Date(1000)) + val ts1 = dateFormat.format(new Date(3000)) + val readDf = cdcRead( + new TablePath(tempDir.getAbsolutePath), StartingTimestamp(ts0), EndingTimestamp(ts1)) + checkCDCAnswer( + DeltaLog.forTable(spark, tempDir), + readDf, + spark.range(0) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + + test("version from timestamp - one version in between") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + modifyDeltaTimestamp(deltaLog, 0, 0) + modifyDeltaTimestamp(deltaLog, 1, 4000) + modifyDeltaTimestamp(deltaLog, 2, 8000) + + val ts0 = dateFormat.format(new Date(3000)) + val ts1 = dateFormat.format(new Date(5000)) + val readDf = cdcRead( + new TablePath(tempDir.getAbsolutePath), StartingTimestamp(ts0), EndingTimestamp(ts1)) + checkCDCAnswer( + DeltaLog.forTable(spark, tempDir), + readDf, + spark.range(10, 20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + + test("version from timestamp - end before start") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + modifyDeltaTimestamp(deltaLog, 0, 0) + modifyDeltaTimestamp(deltaLog, 1, 4000) + modifyDeltaTimestamp(deltaLog, 2, 8000) + + val ts0 = dateFormat.format(new Date(3000)) + val ts1 = dateFormat.format(new Date(1000)) + intercept[DeltaIllegalArgumentException] { + cdcRead( + new TablePath(tempDir.getAbsolutePath), + StartingTimestamp(ts0), + EndingTimestamp(ts1)) + .collect() + }.getErrorClass === "DELTA_INVALID_CDC_RANGE" + } + } + + test("version from timestamp - end before start with one version in between") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + modifyDeltaTimestamp(deltaLog, 0, 0) + modifyDeltaTimestamp(deltaLog, 1, 4000) + modifyDeltaTimestamp(deltaLog, 2, 8000) + + val ts0 = dateFormat.format(new Date(5000)) + val ts1 = dateFormat.format(new Date(3000)) + intercept[DeltaIllegalArgumentException] { + cdcRead( + new TablePath(tempDir.getAbsolutePath), + StartingTimestamp(ts0), + EndingTimestamp(ts1)) + .collect() + }.getErrorClass === "DELTA_INVALID_CDC_RANGE" + } + } + + test("start version and end version are the same") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + val readDf = cdcRead( + new TableName(tblName), StartingVersion("0"), EndingVersion("0")) + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier("tbl")), + readDf, + spark.range(10) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + + for (readWithVersionNumber <- BOOLEAN_DOMAIN) + test(s"CDC read respects timezone and DST - readWithVersionNumber=$readWithVersionNumber") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tblName)) + + // Set commit time during Daylight savings time change. + val restoreDate = "2022-11-06 01:42:44" + val format = new java.text.SimpleDateFormat("yyyy-MM-dd hh:mm:ss Z") + val timestamp = format.parse(s"$restoreDate -0800").getTime + modifyDeltaTimestamp(deltaLog, 0, timestamp) + + // Verify DST is respected. + val e = intercept[Exception] { + cdcRead(new TableName(tblName), + StartingTimestamp(s"$restoreDate -0700"), + EndingTimestamp(s"$restoreDate -0700")) + } + assert(e.getMessage.contains("is before the earliest version available")) + + val readDf = if (readWithVersionNumber) { + cdcRead(new TableName(tblName), StartingVersion("0"), EndingVersion("0")) + } else { + cdcRead( + new TableName(tblName), + StartingTimestamp(s"$restoreDate -0800"), + EndingTimestamp(s"$restoreDate -0800")) + } + + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier(tblName)), + readDf, + spark.range(10) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + + test("start version is provided and no end version") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + val readDf = cdcRead( + new TableName(tblName), StartingVersion("0"), Unbounded) + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier("tbl")), + readDf, + spark.range(30) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + + test("end timestamp < start timestamp") { + withTempDir { tempDir => + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + modifyDeltaTimestamp(deltaLog, 0, 0) + modifyDeltaTimestamp(deltaLog, 1, 1000) + modifyDeltaTimestamp(deltaLog, 2, 2000) + + val ts0 = dateFormat.format(new Date(2000)) + val ts1 = dateFormat.format(new Date(1)) + val e = intercept[IllegalArgumentException] { + cdcRead( + new TablePath(tempDir.getAbsolutePath), StartingTimestamp(ts0), EndingTimestamp(ts1)) + } + assert(e.getMessage.contains("End cannot be before start")) + } + } + + test("end version < start version") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + val e = intercept[IllegalArgumentException] { + cdcRead(new TableName(tblName), StartingVersion("1"), EndingVersion("0")) + } + assert(e.getMessage.contains("End cannot be before start")) + } + } + + test("cdc result dataframe can be transformed further") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + val cdcResult = cdcRead(new TableName(tblName), StartingVersion("0"), EndingVersion("1")) + val transformedDf = cdcResult + .drop(CDC_COMMIT_TIMESTAMP) + .withColumn("col3", lit(0)) + .withColumn("still_there", col("_change_type")) + + checkAnswer( + transformedDf, + spark.range(20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType)) + .withColumn("col3", lit(0)) + .withColumn("still_there", col("_change_type")) + ) + } + } + + test("multiple references on same table") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + val cdcResult0_1 = cdcRead(new TableName(tblName), StartingVersion("0"), EndingVersion("1")) + val cdcResult0_2 = cdcRead(new TableName(tblName), StartingVersion("0"), EndingVersion("2")) + + val diff = cdcResult0_2.except(cdcResult0_1) + + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier("tbl")), + diff, + spark.range(20, 30) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType)) + ) + } + } + + test("filtering cdc metadata columns") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + val deltaTable = io.delta.tables.DeltaTable.forName("tbl") + deltaTable.delete("id > 20") + + val cdcResult = cdcRead(new TableName(tblName), StartingVersion("0"), EndingVersion("3")) + + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier("tbl")), + cdcResult.filter("_change_type != 'insert'"), + spark.range(21, 30) + .withColumn("_change_type", lit("delete")) + .withColumn("_commit_version", lit(3)) + ) + + checkCDCAnswer( + DeltaLog.forTable(spark, TableIdentifier("tbl")), + cdcResult.filter("_commit_version = 1"), + spark.range(10, 20) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", lit(1)) + ) + } + } + + test("aggregating non-numeric cdc data columns") { + withTempDir { dir => + val path = dir.getAbsolutePath + spark.range(10).selectExpr("id", "'text' as text") + .write.format("delta").save(path) + val deltaTable = io.delta.tables.DeltaTable.forPath(path) + deltaTable.delete("id > 5") + + val cdcResult = cdcRead(new TablePath(path), StartingVersion("0"), EndingVersion("3")) + + checkAnswer( + cdcResult.selectExpr("count(distinct text)"), + Row(1) + ) + + checkAnswer( + cdcResult.selectExpr("first(text)"), + Row("text") + ) + } + } + + test("ending version not specified resolves to latest at execution time") { + withTempDir { dir => + val path = dir.getAbsolutePath + spark.range(5).selectExpr("id", "'text' as text") + .write.format("delta").save(path) + val cdcResult = cdcRead(new TablePath(path), StartingVersion("0"), Unbounded) + + checkAnswer( + cdcResult.selectExpr("id", "_change_type", "_commit_version"), + Row(0, "insert", 0) :: Row(1, "insert", 0) :: Row(2, "insert", 0) :: + Row(3, "insert", 0):: Row(4, "insert", 0) :: Nil + ) + + // The next scan of `cdcResult` should include this delete even though the DF was defined + // before it. + val deltaTable = io.delta.tables.DeltaTable.forPath(path) + deltaTable.delete("id > 2") + + checkAnswer( + cdcResult.selectExpr("id", "_change_type", "_commit_version"), + Row(0, "insert", 0) :: Row(1, "insert", 0) :: Row(2, "insert", 0) :: + Row(3, "insert", 0):: Row(4, "insert", 0) :: + Row(3, "delete", 1):: Row(4, "delete", 1) :: Nil + ) + } + } + + test("table schema changed after dataframe with ending specified") { + withTempDir { dir => + val path = dir.getAbsolutePath + spark.range(5).selectExpr("id", "'text' as text") + .write.format("delta").save(path) + val cdcResult = cdcRead(new TablePath(path), StartingVersion("0"), EndingVersion("1")) + sql(s"ALTER TABLE delta.`$path` ADD COLUMN (newCol INT)") + + checkAnswer( + cdcResult.selectExpr("id", "_change_type", "_commit_version"), + Row(0, "insert", 0) :: Row(1, "insert", 0) :: Row(2, "insert", 0) :: + Row(3, "insert", 0) :: Row(4, "insert", 0) :: Nil + ) + } + } + + test("table schema changed after dataframe with ending not specified") { + withTempDir { dir => + val path = dir.getAbsolutePath + spark.range(5).selectExpr("id", "'text' as text") + .write.format("delta").save(path) + val cdcResult = cdcRead(new TablePath(path), StartingVersion("0"), Unbounded) + sql(s"ALTER TABLE delta.`$path` ADD COLUMN (newCol STRING)") + sql(s"INSERT INTO delta.`$path` VALUES (5, 'text', 'newColVal')") + + // Just ignoring the new column is pretty weird, but it's what we do for non-CDC dataframes, + // so we preserve the behavior rather than adding a special case. + checkAnswer( + cdcResult.selectExpr("id", "_change_type", "_commit_version"), + Row(0, "insert", 0) :: Row(1, "insert", 0) :: Row(2, "insert", 0) :: + Row(3, "insert", 0) :: Row(4, "insert", 0) :: Row(5, "insert", 2) :: Nil + ) + } + } + + test("An error should be thrown when CDC is not enabled") { + val tblName = "tbl" + withTable(tblName) { + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "false") { + // create version with cdc disabled - v0 + spark.range(10).write.format("delta").saveAsTable(tblName) + } + val deltaTable = io.delta.tables.DeltaTable.forName(tblName) + // v1 + deltaTable.delete("id > 8") + + // v2 + sql(s"ALTER TABLE ${tblName} SET TBLPROPERTIES " + + s"(${DeltaConfigs.CHANGE_DATA_FEED.key}=true)") + + // v3 + spark.range(10, 20).write.format("delta").mode("append").saveAsTable(tblName) + + // v4 + deltaTable.delete("id > 18") + + // v5 + sql(s"ALTER TABLE ${tblName} SET TBLPROPERTIES " + + s"(${DeltaConfigs.CHANGE_DATA_FEED.key}=false)") + + var e = intercept[AnalysisException] { + cdcRead(new TableName(tblName), StartingVersion("0"), EndingVersion("4")).collect() + } + assert(e.getMessage === DeltaErrors.changeDataNotRecordedException(0, 0, 4).getMessage) + + val cdcDf = cdcRead(new TableName(tblName), StartingVersion("2"), EndingVersion("4")) + assert(cdcDf.count() == 11) // 10 rows inserted, 1 row deleted + + // Check that we correctly detect CDC is disabled and fail the query for multiple types of + // ranges: + // * disabled at the end but not start - (2, 5) + // * disabled at the start but not end - (1, 4) + // * disabled at both start and end (even though enabled in the middle) - (1, 5) + for ((start, end, firstDisabledVersion) <- Seq((2, 5, 5), (1, 4, 1), (1, 5, 1))) { + e = intercept[AnalysisException] { + cdcRead( + new TableName(tblName), + StartingVersion(start.toString), EndingVersion(end.toString)).collect() + } + assert(e.getMessage === DeltaErrors.changeDataNotRecordedException( + firstDisabledVersion, start, end).getMessage) + } + } + } + + test("changes - start timestamp exceeding latest commit timestamp") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DELTA_CDF_ALLOW_OUT_OF_RANGE_TIMESTAMP.key -> "true") { + val path = tempDir.getAbsolutePath + createTblWithThreeVersions(path = Some(path)) + val deltaLog = DeltaLog.forTable(spark, path) + + // modify timestamps + // version 0 + modifyDeltaTimestamp(deltaLog, 0, 0) + + // version 1 + modifyDeltaTimestamp(deltaLog, 1, 1000) + + // version 2 + modifyDeltaTimestamp(deltaLog, 2, 2000) + + val tsStart = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(3000)) + val tsEnd = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(4000)) + + val readDf = cdcRead( + new TablePath(path), + StartingTimestamp(tsStart), + EndingTimestamp(tsEnd)) + checkCDCAnswer( + DeltaLog.forTable(spark, tempDir), + readDf, + sqlContext.emptyDataFrame) + } + } + } + + test("changes - end timestamp exceeding latest commit timestamp") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DELTA_CDF_ALLOW_OUT_OF_RANGE_TIMESTAMP.key -> "true") { + createTblWithThreeVersions(path = Some(tempDir.getAbsolutePath)) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + // modify timestamps + // version 0 + modifyDeltaTimestamp(deltaLog, 0, 0) + + // version 1 + modifyDeltaTimestamp(deltaLog, 1, 1000) + + // version 2 + modifyDeltaTimestamp(deltaLog, 2, 2000) + + val tsStart = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(0)) + val tsEnd = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .format(new Date(4000)) + + val readDf = cdcRead( + new TablePath(tempDir.getAbsolutePath), + StartingTimestamp(tsStart), EndingTimestamp(tsEnd)) + checkCDCAnswer( + DeltaLog.forTable(spark, tempDir), + readDf, + spark.range(30) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", (col("id") / 10).cast(LongType))) + } + } + } + + test("batch write: append, dynamic partition overwrite + CDF") { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true", + DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(("a", "x"), ("b", "y"), ("c", "x")).toDF("value", "part") + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(tempDir.getCanonicalPath) + checkAnswer( + cdcRead(new TablePath(tempDir.getCanonicalPath), StartingVersion("0"), EndingVersion("0")) + .drop(CDC_COMMIT_TIMESTAMP), + Row("a", "x", "insert", 0) :: Row("b", "y", "insert", 0) :: + Row("c", "x", "insert", 0) :: Nil + ) + + // ovewrite nothing + Seq(("d", "z")).toDF("value", "part") + .write + .format("delta") + .partitionBy("part") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value", "part").as[(String, String)], + ("a", "x"), ("b", "y"), ("c", "x"), ("d", "z")) + checkAnswer( + cdcRead(new TablePath(tempDir.getCanonicalPath), StartingVersion("1"), EndingVersion("1")) + .drop(CDC_COMMIT_TIMESTAMP), + Row("d", "z", "insert", 1) :: Nil + ) + + // overwrite partition `part`="x" + Seq(("a", "x"), ("e", "x")).toDF("value", "part") + .write + .format("delta") + .partitionBy("part") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value", "part").as[(String, String)], + ("a", "x"), ("b", "y"), ("d", "z"), ("e", "x")) + checkAnswer( + cdcRead(new TablePath(tempDir.getCanonicalPath), StartingVersion("2"), EndingVersion("2")) + .drop(CDC_COMMIT_TIMESTAMP), + Row("a", "x", "delete", 2) :: Row("c", "x", "delete", 2) :: + Row("a", "x", "insert", 2) :: Row("e", "x", "insert", 2) :: Nil + ) + } + } + } +} + +class DeltaCDCScalaSuite extends DeltaCDCSuiteBase { + + /** Single method to do all kinds of CDC reads */ + def cdcRead( + tblId: TblId, + start: Boundary, + end: Boundary, + schemaMode: Option[DeltaBatchCDFSchemaMode] = Some(BatchCDFSchemaLegacy), + readerOptions: Map[String, String] = Map.empty): DataFrame = { + + // Set the batch CDF schema mode using SQL conf if we specified it + if (schemaMode.isDefined) { + var result: DataFrame = null + withSQLConf(DeltaSQLConf.DELTA_CDF_DEFAULT_SCHEMA_MODE_FOR_COLUMN_MAPPING_TABLE.key -> + schemaMode.get.name) { + result = cdcRead(tblId, start, end, None, readerOptions) + } + return result + } + + val startPrefix: (String, String) = start match { + case startingVersion: StartingVersion => + ("startingVersion", startingVersion.value) + + case startingTimestamp: StartingTimestamp => + ("startingTimestamp", startingTimestamp.value) + + case Unbounded => + ("", "") + } + val endPrefix: (String, String) = end match { + case endingVersion: EndingVersion => + ("endingVersion", endingVersion.value) + + case endingTimestamp: EndingTimestamp => + ("endingTimestamp", endingTimestamp.value) + + case Unbounded => + ("", "") + } + + var dfr = spark.read.format("delta") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option(startPrefix._1, startPrefix._2) + .option(endPrefix._1, endPrefix._2) + + readerOptions.foreach { case (k, v) => + dfr = dfr.option(k, v) + } + + tblId match { + case path: TablePath => + dfr.load(path.id) + + case tblName: TableName => + dfr.table(tblName.id) + + case _ => + throw new IllegalArgumentException("No table name or path provided") + } + } + + + test("start version or timestamp is not provided") { + val tblName = "tbl" + withTable(tblName) { + createTblWithThreeVersions(tblName = Some(tblName)) + + val e = intercept[AnalysisException] { + spark.read.format("delta") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("endingVersion", 1) + .table(tblName) + .show() + } + assert(e.getMessage.contains(DeltaErrors.noStartVersionForCDC().getMessage)) + } + } + + test("Not having readChangeFeed will not output cdc columns") { + val tblName = "tbl2" + withTable(tblName) { + spark.range(0, 10).write.format("delta").saveAsTable(tblName) + checkAnswer(spark.read.format("delta").table(tblName), spark.range(0, 10).toDF("id")) + + checkAnswer( + spark.read.format("delta") + .option("startingVersion", "0") + .option("endingVersion", "0") + .table(tblName), + spark.range(0, 10).toDF("id")) + } + } + + test("non-monotonic timestamps") { + withTempDir { dir => + val path = dir.getAbsolutePath + val deltaLog = DeltaLog.forTable(spark, path) + (0 to 3).foreach { i => + spark.range(i * 10, (i + 1) * 10).write.format("delta").mode("append").save(path) + val file = new File(FileNames.deltaFile(deltaLog.logPath, i).toUri) + file.setLastModified(300 - i) + } + + checkCDCAnswer( + deltaLog, + cdcRead(new TablePath(path), StartingVersion("0"), EndingVersion("3")), + spark.range(0, 40) + .withColumn("_change_type", lit("insert")) + .withColumn("_commit_version", floor(col("id") / 10))) + } + } + + test("Repeated delete") { + withTempDir { dir => + val path = dir.getAbsolutePath + val deltaLog = DeltaLog.forTable(spark, path) + spark.range(0, 5, 1, numPartitions = 1).write.format("delta").save(path) + sql(s"DELETE FROM delta.`$path` WHERE id = 3") // Version 1 + sql(s"DELETE FROM delta.`$path` WHERE id = 4") // Version 2 + sql(s"DELETE FROM delta.`$path` WHERE id IN (0, 1, 2)") // Version 3, remove the whole file + + val allChanges: Map[Int, Seq[Row]] = Map( + 1 -> (Row(3, "delete", 1) :: Nil), + 2 -> (Row(4, "delete", 2) :: Nil), + 3 -> (Row(0, "delete", 3) :: Row(1, "delete", 3) :: Row(2, "delete", 3) :: Nil) + ) + + for(start <- 1 to 3; end <- start to 3) { + checkCDCAnswer( + deltaLog, + cdcRead( + new TablePath(path), + StartingVersion(start.toString), + EndingVersion(end.toString)), + (start to end).flatMap(v => allChanges(v))) + } + } + } +} + +class DeltaCDCScalaWithDeletionVectorsSuite extends DeltaCDCScalaSuite + with DeletionVectorsTestUtils { + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectorsForAllSupportedOperations(spark) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCheckpointWithStructColsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCheckpointWithStructColsSuite.scala new file mode 100644 index 00000000000..b506ba36d31 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCheckpointWithStructColsSuite.scala @@ -0,0 +1,328 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.functions.{col, struct} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ + +class DeltaCheckpointWithStructColsSuite + extends QueryTest + with SharedSparkSession + with DeltaColumnMappingTestUtils + with DeltaSQLCommandTest { + + import testImplicits._ + + protected val checkpointFnsWithStructAndJsonStats: Seq[DeltaLog => Long] = Seq( + checkpointWithProperty(writeStatsAsJson = Some(true)), + checkpointWithProperty(writeStatsAsJson = None)) + + protected val checkpointFnsWithStructWithoutJsonStats: Seq[DeltaLog => Long] = Seq( + checkpointWithProperty(writeStatsAsJson = Some(false))) + + protected val checkpointFnsWithoutStructWithJsonStats: Seq[DeltaLog => Long] = Seq( + checkpointWithProperty(writeStatsAsJson = Some(true), writeStatsAsStruct = false), + checkpointWithProperty(writeStatsAsJson = None, writeStatsAsStruct = false)) + + /** + * Creates a table from the given DataFrame and partitioning. Then for each checkpointing + * function, it runs the given validation function. + */ + protected def checkpointSchemaForTable(df: DataFrame, partitionBy: String*)( + checkpointingFns: Seq[DeltaLog => Long], + expectedCols: Seq[(String, DataType)], + additionalValidationFn: Set[String] => Unit = _ => ()): Unit = { + checkpointingFns.foreach { checkpointingFn => + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + df.write.format("delta").partitionBy(partitionBy: _*).save(dir.getCanonicalPath) + val version = checkpointingFn(deltaLog) + + val f = spark.read.parquet( + FileNames.checkpointFileSingular(deltaLog.logPath, version).toString) + assert(f.schema.getFieldIndex("commitInfo").isEmpty, + "commitInfo should not be written to the checkpoint") + val baseCols = Set("add", "metaData", "protocol", "remove", "txn") + baseCols.foreach { name => + assert(f.schema.getFieldIndex(name).nonEmpty, s"Couldn't find required field $name " + + s"among: ${f.schema.fieldNames.mkString("[", ", ", " ]")}") + } + + val addSchema = f.schema("add").dataType.asInstanceOf[StructType] + val addColumns = SchemaMergingUtils.explodeNestedFieldNames(addSchema).toSet + + val requiredCols = Seq( + "path" -> StringType, + "partitionValues" -> MapType(StringType, StringType), + "size" -> LongType, + "modificationTime" -> LongType, + "dataChange" -> BooleanType, + "tags" -> MapType(StringType, StringType) + ) + + val schema = deltaLog.update().schema + (requiredCols ++ expectedCols).foreach { case (expectedField, dataType) => + // use physical name if possible + val expectedPhysicalField = + convertColumnNameToAttributeWithPhysicalName(expectedField, schema).name + assert(addColumns.contains(expectedPhysicalField)) + // Check data type + assert(f.select(col(s"add.$expectedPhysicalField")).schema.head.dataType === dataType) + } + + additionalValidationFn(addColumns) + + DeltaLog.clearCache() + checkAnswer( + spark.read.format("delta").load(dir.getCanonicalPath), + df + ) + } + } + } + + test("unpartitioned table") { + val df = spark.range(10).withColumn("part", ('id / 2).cast("int")) + checkpointSchemaForTable(df)( + checkpointingFns = checkpointFnsWithStructAndJsonStats, + expectedCols = Nil, + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = true, + unexpected = Seq(Checkpoints.STRUCT_PARTITIONS_COL_NAME)) + } + ) + + checkpointSchemaForTable(df)( + checkpointingFns = checkpointFnsWithStructWithoutJsonStats, + expectedCols = Nil, + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = false, + unexpected = Seq(Checkpoints.STRUCT_PARTITIONS_COL_NAME)) + } + ) + + checkpointSchemaForTable(df)( + checkpointingFns = checkpointFnsWithoutStructWithJsonStats, + expectedCols = Nil, + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = true, + unexpected = Seq(Checkpoints.STRUCT_PARTITIONS_COL_NAME)) + } + ) + + checkpointSchemaForTable(df)( + checkpointingFns = Seq(checkpointWithoutStats), + expectedCols = Nil, + additionalValidationFn = addColumns => { + checkFields( + addColumns, statsAsJsonExists = false, Seq(Checkpoints.STRUCT_PARTITIONS_COL_NAME)) + } + ) + } + + test("partitioned table") { + val df = spark.range(10).withColumn("part", ('id / 2).cast("int")) + // partitioned by "part" + checkpointSchemaForTable(df, "part")( + checkpointingFns = checkpointFnsWithStructAndJsonStats, + expectedCols = Seq("partitionValues_parsed.part" -> IntegerType), + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = true, + Nil) + } + ) + + checkpointSchemaForTable(df, "part")( + checkpointingFns = checkpointFnsWithStructWithoutJsonStats, + expectedCols = Seq("partitionValues_parsed.part" -> IntegerType), + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = false, + Nil) + } + ) + + checkpointSchemaForTable(df, "part")( + checkpointingFns = checkpointFnsWithoutStructWithJsonStats, + expectedCols = Nil, + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = true, + Seq(Checkpoints.STRUCT_PARTITIONS_COL_NAME)) + } + ) + + checkpointSchemaForTable(df, "part")( + checkpointingFns = Seq(checkpointWithoutStats), + expectedCols = Nil, + additionalValidationFn = addColumns => { + checkFields( + addColumns, statsAsJsonExists = false, Seq(Checkpoints.STRUCT_PARTITIONS_COL_NAME)) + } + ) + } + + test("special characters") { + val weirdName1 = "part%!@#_$%^&*-" + val weirdName2 = "part?_.+<>|/" + val df = spark.range(10) + .withColumn(weirdName1, ('id / 2).cast("int")) + .withColumn(weirdName2, ('id / 3).cast("int")) + .withColumn("struct", struct($"id", col(weirdName1), $"id".as(weirdName2))) + + val structColumns = Seq( + s"partitionValues_parsed.$weirdName1" -> IntegerType, + s"partitionValues_parsed.`$weirdName2`" -> IntegerType) + + // partitioned by weirdName1, weirdName2 + checkpointSchemaForTable(df, weirdName1, weirdName2)( + checkpointingFns = checkpointFnsWithStructAndJsonStats, + expectedCols = structColumns, + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = true, + Nil) + } + ) + + checkpointSchemaForTable(df, weirdName1, weirdName2)( + checkpointingFns = checkpointFnsWithStructWithoutJsonStats, + expectedCols = Nil, + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = false, + Nil) + } + ) + + checkpointSchemaForTable(df, weirdName1, weirdName2)( + checkpointingFns = checkpointFnsWithoutStructWithJsonStats, + expectedCols = Nil, + additionalValidationFn = addColumns => { + checkFields( + addColumns, + statsAsJsonExists = true, + structColumns.map(_._1)) + } + ) + } + + test("timestamps as partition values") { + withTempDir { dir => + val df = Seq( + (java.sql.Timestamp.valueOf("2012-12-31 16:00:10.011"), 2), + (java.sql.Timestamp.valueOf("2099-12-31 16:00:10.011"), 4)).toDF("key", "value") + + df.write.format("delta").partitionBy("key").save(dir.getCanonicalPath) + val deltaLog = DeltaLog.forTable(spark, dir) + val version = checkpointWithProperty( + writeStatsAsJson = Some(true), writeStatsAsStruct = true)(deltaLog) + val f = spark.read.parquet( + FileNames.checkpointFileSingular(deltaLog.logPath, version).toString) + + // use physical name + val key = getPhysicalName("key", deltaLog.snapshot.schema) + checkAnswer( + f.select(s"add.partitionValues_parsed.`$key`"), + Seq(Row(null), Row(null)) ++ df.select("key").collect() + ) + + sql(s"DELETE FROM delta.`${dir.getCanonicalPath}` WHERE CURRENT_TIMESTAMP > key") + checkAnswer( + spark.read.format("delta").load(dir.getCanonicalPath), + Row(java.sql.Timestamp.valueOf("2099-12-31 16:00:10.011"), 4) + ) + + sql(s"DELETE FROM delta.`${dir.getCanonicalPath}` WHERE CURRENT_TIMESTAMP < key") + } + } + + + /** + * Creates a checkpoint by based on `writeStatsAsJson`/`writeStatsAsStruct` properties. + */ + protected def checkpointWithProperty( + writeStatsAsJson: Option[Boolean], + writeStatsAsStruct: Boolean = true)(deltaLog: DeltaLog): Long = { + val asJson = writeStatsAsJson.map { v => + s", delta.checkpoint.writeStatsAsJson = $v" + }.getOrElse("") + sql(s"ALTER TABLE delta.`${deltaLog.dataPath}` " + + s"SET TBLPROPERTIES (delta.checkpoint.writeStatsAsStruct = ${writeStatsAsStruct}${asJson})") + deltaLog.checkpoint() + deltaLog.readLastCheckpointFile().get.version + } + + /** A checkpoint that doesn't have any stats columns, i.e. `stats` and `stats_parsed`. */ + protected def checkpointWithoutStats(deltaLog: DeltaLog): Long = { + sql(s"ALTER TABLE delta.`${deltaLog.dataPath}` " + + s"SET TBLPROPERTIES (delta.checkpoint.writeStatsAsStruct = false, " + + "delta.checkpoint.writeStatsAsJson = false)") + deltaLog.checkpoint() + deltaLog.readLastCheckpointFile().get.version + } + + /** + * Check the existence of the stats field and also not existence of the `unexpected` fields. The + * `addColumns` is a Set of column names that contain the entire tree of columns in the `add` + * field of the schema. + */ + protected def checkFields( + addColumns: Set[String], + statsAsJsonExists: Boolean, + unexpected: Seq[String]): Unit = { + if (statsAsJsonExists) { + assert(addColumns.contains("stats")) + } else { + assert(!addColumns.contains("stats")) + } + unexpected.foreach { colName => + assert(!addColumns.contains(colName), s"$colName shouldn't be part of the " + + "schema because it is of null type.") + } + } +} + + +class DeltaCheckpointWithStructColsNameColumnMappingSuite extends DeltaCheckpointWithStructColsSuite + with DeltaColumnMappingEnableNameMode { + + override protected def runOnlyTests = Seq( + "unpartitioned table", + "partitioned table" + ) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnMappingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnMappingSuite.scala new file mode 100644 index 00000000000..c44a88aaa52 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnMappingSuite.scala @@ -0,0 +1,1918 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File +import java.nio.file.Files + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile, Metadata => MetadataAction, Protocol, SetTransaction} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.schema.SchemaMergingUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.hadoop.fs.Path +import org.apache.parquet.format.converter.ParquetMetadataConverter +import org.apache.parquet.hadoop.ParquetFileReader +import org.scalatest.GivenWhenThen + +import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ +// scalastyle:on import.ordering.noEmptyLine + +trait DeltaColumnMappingSuiteUtils extends SharedSparkSession with DeltaSQLCommandTest { + + + protected def supportedModes: Seq[String] = Seq("id", "name") + + protected def colName(name: String) = s"$name with special chars ,;{}()\n\t=" + + protected def partitionStmt(partCols: Seq[String]): String = { + if (partCols.nonEmpty) s"PARTITIONED BY (${partCols.map(name => s"`$name`").mkString(",")})" + else "" + } + + protected def propString(props: Map[String, String]) = if (props.isEmpty) "" + else { + props + .map { case (key, value) => s"'$key' = '$value'" } + .mkString("TBLPROPERTIES (", ",", ")") + } + + protected def alterTableWithProps( + tableName: String, + props: Map[String, String]): Unit = + spark.sql( + s""" + | ALTER TABLE $tableName SET ${propString(props)} + |""".stripMargin) + + protected def mode(props: Map[String, String]): String = + props.get(DeltaConfigs.COLUMN_MAPPING_MODE.key).getOrElse("none") + + protected def testColumnMapping( + testName: String, + enableSQLConf: Boolean = false, + modes: Option[Seq[String]] = None)(testCode: String => Unit): Unit = { + test(testName) { + modes.getOrElse(supportedModes).foreach { mode => { + withClue(s"Testing under mode: $mode") { + if (enableSQLConf) { + withSQLConf(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> mode) { + testCode(mode) + } + } else { + testCode(mode) + } + } + }} + } + } + + +} + +class DeltaColumnMappingSuite extends QueryTest + with GivenWhenThen + with DeltaColumnMappingSuiteUtils { + + import testImplicits._ + + protected def withId(id: Long): Metadata = + new MetadataBuilder() + .putLong(DeltaColumnMapping.COLUMN_MAPPING_METADATA_ID_KEY, id) + .build() + + protected def withPhysicalName(pname: String) = + new MetadataBuilder() + .putString(DeltaColumnMapping.COLUMN_MAPPING_PHYSICAL_NAME_KEY, pname) + .build() + + protected def withIdAndPhysicalName(id: Long, pname: String): Metadata = + new MetadataBuilder() + .putLong(DeltaColumnMapping.COLUMN_MAPPING_METADATA_ID_KEY, id) + .putString(DeltaColumnMapping.COLUMN_MAPPING_PHYSICAL_NAME_KEY, pname) + .build() + + protected def assertEqual( + actual: StructType, + expected: StructType, + ignorePhysicalName: Boolean = true): Unit = { + + var actualSchema = actual + var expectedSchema = expected + + val fieldsToRemove = mutable.Set[String]() + if (ignorePhysicalName) { + fieldsToRemove.add(DeltaColumnMapping.COLUMN_MAPPING_PHYSICAL_NAME_KEY) + } + + def removeFields(metadata: Metadata): Metadata = { + val metadataBuilder = new MetadataBuilder().withMetadata(metadata) + fieldsToRemove.foreach { field => { + if (metadata.contains(field)) { + metadataBuilder.remove(field) + } + } + } + metadataBuilder.build() + } + + // drop fields if needed + actualSchema = SchemaMergingUtils.transformColumns(actual) { (_, field, _) => + field.copy(metadata = removeFields(field.metadata)) + } + expectedSchema = SchemaMergingUtils.transformColumns(expected) { (_, field, _) => + field.copy(metadata = removeFields(field.metadata)) + } + + assert(expectedSchema === actualSchema, + s""" + |Schema mismatch: + | + |expected: + |${expectedSchema.prettyJson} + | + |actual: + |${actualSchema.prettyJson} + |""".stripMargin) + + } + + protected def checkSchema( + tableName: String, + expectedSchema: StructType, + ignorePhysicalName: Boolean = true): Unit = { + + // snapshot schema should have all the expected metadata + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + assertEqual(deltaLog.update().schema, expectedSchema, ignorePhysicalName) + + // table schema should not have any metadata + assert(spark.table(tableName).schema === + DeltaColumnMapping.dropColumnMappingMetadata(expectedSchema)) + } + + // NOTE: + // All attached metadata to the following sample inputs, if used in source dataframe, + // will be CLEARED out after metadata is imported into the target table + // See ImplicitMetadataOperation.updateMetadata() for how the old metadata is cleared + protected val schema = new StructType() + .add("a", StringType, true) + .add("b", IntegerType, true) + + protected val schemaNested = new StructType() + .add("a", StringType, true) + .add("b", + new StructType() + .add("c", StringType, true) + .add("d", IntegerType, true), + true + ) + + protected val schemaWithId = new StructType() + .add("a", StringType, true, withId(1)) + .add("b", IntegerType, true, withId(2)) + + protected val schemaWithIdRandom = new StructType() + .add("a", StringType, true, withId(111)) + .add("b", IntegerType, true, withId(222)) + + protected val schemaWithIdAndPhysicalNameRandom = new StructType() + .add("a", StringType, true, withIdAndPhysicalName(111, "asjdklsajdkl")) + .add("b", IntegerType, true, withIdAndPhysicalName(222, "iotiyoiopio")) + + protected val schemaWithDuplicatingIds = new StructType() + .add("a", StringType, true, withId(1)) + .add("b", IntegerType, true, withId(2)) + .add("c", IntegerType, true, withId(2)) + + protected val schemaWithIdAndDuplicatingPhysicalNames = new StructType() + .add("a", StringType, true, withIdAndPhysicalName(1, "aaa")) + .add("b", IntegerType, true, withIdAndPhysicalName(2, "bbb")) + .add("c", IntegerType, true, withIdAndPhysicalName(3, "bbb")) + + protected val schemaWithDuplicatingPhysicalNames = new StructType() + .add("a", StringType, true, withPhysicalName("aaa")) + .add("b", IntegerType, true, withPhysicalName("bbb")) + .add("c", IntegerType, true, withPhysicalName("bbb")) + + protected val schemaWithDuplicatingPhysicalNamesNested = new StructType() + .add("b", + new StructType() + .add("c", StringType, true, withPhysicalName("dupName")) + .add("d", IntegerType, true, withPhysicalName("dupName")), + true, + withPhysicalName("b") + ) + + protected val schemaWithIdNested = new StructType() + .add("a", StringType, true, withId(1)) + .add("b", + new StructType() + .add("c", StringType, true, withId(3)) + .add("d", IntegerType, true, withId(4)), + true, + withId(2) + ) + + protected val schemaWithPhysicalNamesNested = new StructType() + .add("a", StringType, true, withIdAndPhysicalName(1, "aaa")) + .add("b", + // let's call this nested struct 'X'. + new StructType() + .add("c", StringType, true, withIdAndPhysicalName(2, "ccc")) + .add("d", IntegerType, true, withIdAndPhysicalName(3, "ddd")) + .add("foo.bar", + new StructType().add("f", LongType, true, withIdAndPhysicalName(4, "fff")), + true, + withIdAndPhysicalName(5, "foo.foo.foo.bar.bar.bar")), + true, + withIdAndPhysicalName(6, "bbb") + ) + .add("g", + // nested struct 'X' (see above) is repeated here. + new StructType() + .add("c", StringType, true, withIdAndPhysicalName(7, "ccc")) + .add("d", IntegerType, true, withIdAndPhysicalName(8, "ddd")) + .add("foo.bar", + new StructType().add("f", LongType, true, withIdAndPhysicalName(9, "fff")), + true, + withIdAndPhysicalName(10, "foo.foo.foo.bar.bar.bar")), + true, + withIdAndPhysicalName(11, "ggg") + ) + .add("h", IntegerType, true, withIdAndPhysicalName(12, "hhh")) + + protected val schemaWithIdNestedRandom = new StructType() + .add("a", StringType, true, withId(111)) + .add("b", + new StructType() + .add("c", StringType, true, withId(333)) + .add("d", IntegerType, true, withId(444)), + true, + withId(222) + ) + + // This schema has both a.b and a . b as physical path for its columns, we would like to make sure + // it shouldn't trigger the duplicated physical name check + protected val schemaWithDottedColumnNames = new StructType() + .add("a.b", StringType, true, withIdAndPhysicalName(1, "a.b")) + .add("a", new StructType() + .add("b", StringType, true, withIdAndPhysicalName(3, "b")), + true, withIdAndPhysicalName(2, "a")) + + protected def dfWithoutIds(spark: SparkSession) = + spark.createDataFrame(Seq(Row("str1", 1), Row("str2", 2)).asJava, schema) + + protected def dfWithoutIdsNested(spark: SparkSession) = + spark.createDataFrame( + Seq(Row("str1", Row("str1.1", 1)), Row("str2", Row("str1.2", 2))).asJava, schemaNested) + + protected def dfWithIds(spark: SparkSession, randomIds: Boolean = false) = + spark.createDataFrame(Seq(Row("str1", 1), Row("str2", 2)).asJava, + if (randomIds) schemaWithIdRandom else schemaWithId) + + protected def dfWithIdsNested(spark: SparkSession, randomIds: Boolean = false) = + spark.createDataFrame( + Seq(Row("str1", Row("str1.1", 1)), Row("str2", Row("str1.2", 2))).asJava, + if (randomIds) schemaWithIdNestedRandom else schemaWithIdNested) + + protected def checkProperties( + tableName: String, + mode: Option[String] = None, + readerVersion: Int = 1, + writerVersion: Int = 2, + curMaxId: Long = 0): Unit = { + val props = + spark.sql(s"SHOW TBLPROPERTIES $tableName").as[(String, String)].collect().toMap + assert(props.get("delta.minReaderVersion").map(_.toInt) == Some(readerVersion)) + assert(props.get("delta.minWriterVersion").map(_.toInt) == Some(writerVersion)) + + assert(props.get(DeltaConfigs.COLUMN_MAPPING_MODE.key) == mode) + assert(props.get(DeltaConfigs.COLUMN_MAPPING_MAX_ID.key).map(_.toLong).getOrElse(0) == curMaxId) + } + + protected def createTableWithDeltaTableAPI( + tableName: String, + props: Map[String, String] = Map.empty, + withColumnIds: Boolean = false, + isPartitioned: Boolean = false): Unit = { + val schemaToUse = if (withColumnIds) schemaWithId else schema + val builder = io.delta.tables.DeltaTable.createOrReplace(spark) + .tableName(tableName) + .addColumn(schemaToUse.fields(0)) + .addColumn(schemaToUse.fields(1)) + props.foreach { case (key, value) => + builder.property(key, value) + } + if (isPartitioned) { + builder.partitionedBy("a") + } + builder.execute() + } + + protected def createTableWithSQLCreateOrReplaceAPI( + tableName: String, + props: Map[String, String] = Map.empty, + withColumnIds: Boolean = false, + isPartitioned: Boolean = false, + nested: Boolean = false, + randomIds: Boolean = false): Unit = { + withTable("source") { + val dfToWrite = if (withColumnIds) { + if (nested) { + dfWithIdsNested(spark, randomIds) + } else { + dfWithIds(spark, randomIds) + } + } else { + if (nested) { + dfWithoutIdsNested(spark) + } else { + dfWithoutIds(spark) + } + } + dfToWrite.write.saveAsTable("source") + val partitionStmt = if (isPartitioned) "PARTITIONED BY (a)" else "" + spark.sql( + s""" + |CREATE OR REPLACE TABLE $tableName + |USING DELTA + |$partitionStmt + |${propString(props)} + |AS SELECT * FROM source + |""".stripMargin) + } + } + + protected def createTableWithSQLAPI( + tableName: String, + props: Map[String, String] = Map.empty, + withColumnIds: Boolean = false, + isPartitioned: Boolean = false, + nested: Boolean = false, + randomIds: Boolean = false): Unit = { + withTable("source") { + val dfToWrite = if (withColumnIds) { + if (nested) { + dfWithIdsNested(spark, randomIds) + } else { + dfWithIds(spark, randomIds) + } + } else { + if (nested) { + dfWithoutIdsNested(spark) + } else { + dfWithoutIds(spark) + } + } + dfToWrite.write.saveAsTable("source") + val partitionStmt = if (isPartitioned) "PARTITIONED BY (a)" else "" + spark.sql( + s""" + |CREATE TABLE $tableName + |USING DELTA + |$partitionStmt + |${propString(props)} + |AS SELECT * FROM source + |""".stripMargin) + } + } + + protected def createTableWithDataFrameAPI( + tableName: String, + props: Map[String, String] = Map.empty, + withColumnIds: Boolean = false, + isPartitioned: Boolean = false, + nested: Boolean = false, + randomIds: Boolean = false): Unit = { + val sqlConfs = props.map { case (key, value) => + "spark.databricks.delta.properties.defaults." + key.stripPrefix("delta.") -> value + } + withSQLConf(sqlConfs.toList: _*) { + val dfToWrite = if (withColumnIds) { + if (nested) { + dfWithIdsNested(spark, randomIds) + } else { + dfWithIds(spark, randomIds) + } + } else { + if (nested) { + dfWithoutIdsNested(spark) + } else { + dfWithoutIds(spark) + } + } + if (isPartitioned) { + dfToWrite.write.format("delta").partitionBy("a").saveAsTable(tableName) + } else { + dfToWrite.write.format("delta").saveAsTable(tableName) + } + } + } + + protected def createTableWithDataFrameWriterV2API( + tableName: String, + props: Map[String, String] = Map.empty, + withColumnIds: Boolean = false, + isPartitioned: Boolean = false, + nested: Boolean = false, + randomIds: Boolean = false): Unit = { + val dfToWrite = if (withColumnIds) { + if (nested) { + dfWithIdsNested(spark, randomIds) + } else { + dfWithIds(spark, randomIds) + } + } else { + if (nested) { + dfWithoutIdsNested(spark) + } else { + dfWithoutIds(spark) + } + } + val writer = dfToWrite.writeTo(tableName).using("delta") + props.foreach(prop => writer.tableProperty(prop._1, prop._2)) + if (isPartitioned) writer.partitionedBy('a) + writer.create() + } + + protected def createStrictSchemaTableWithDeltaTableApi( + tableName: String, + schema: StructType, + props: Map[String, String] = Map.empty, + isPartitioned: Boolean = false): Unit = { + val builder = io.delta.tables.DeltaTable.createOrReplace(spark) + .tableName(tableName) + builder.addColumns(schema) + props.foreach(prop => builder.property(prop._1, prop._2)) + if (isPartitioned) builder.partitionedBy("a") + builder.execute() + } + + protected def testCreateTableColumnMappingMode( + tableName: String, + expectedSchema: StructType, + ignorePhysicalName: Boolean, + mode: String, + createNewTable: Boolean = true)(fn: => Unit): Unit = { + withTable(tableName) { + fn + checkProperties(tableName, + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = DeltaColumnMapping.findMaxColumnId(expectedSchema) + ) + checkSchema(tableName, expectedSchema, ignorePhysicalName) + } + } + + test("find max column id in existing columns") { + assert(DeltaColumnMapping.findMaxColumnId(schemaWithId) == 2) + assert(DeltaColumnMapping.findMaxColumnId(schemaWithIdNested) == 4) + assert(DeltaColumnMapping.findMaxColumnId(schemaWithIdRandom) == 222) + assert(DeltaColumnMapping.findMaxColumnId(schemaWithIdNestedRandom) == 444) + assert(DeltaColumnMapping.findMaxColumnId(schema) == 0) + assert(DeltaColumnMapping.findMaxColumnId(new StructType()) == 0) + } + + test("Enable column mapping with schema change on table with no schema") { + withTempDir { dir => + val tablePath = dir.getCanonicalPath + Seq((1, "a"), (2, "b")).toDF("id", "name") + .write.mode("append").format("delta").save(tablePath) + val deltaLog = DeltaLog.forTable(spark, tablePath) + val txn = deltaLog.startTransaction() + txn.commitManually(actions.Metadata()) // Whip the schema out + val txn2 = deltaLog.startTransaction() + txn2.commitManually(Protocol(2, 5)) + txn2.updateMetadata(actions.Metadata( + configuration = Map("delta.columnMapping.mode" -> "name"), + schemaString = new StructType().add("a", StringType).json)) + + // Now ensure that it is not allowed to enable column mapping with schema change + // on a table with a schema + Seq((1, "a"), (2, "b")).toDF("id", "name") + .write.mode("overwrite").format("delta") + .option("overwriteSchema", "true") + .save(tablePath) + val txn3 = deltaLog.startTransaction() + txn3.commitManually(Protocol(2, 5)) + val e = intercept[DeltaColumnMappingUnsupportedException] { + txn3.updateMetadata( + actions.Metadata( + configuration = Map("delta.columnMapping.mode" -> "name"), + schemaString = new StructType().add("a", StringType).json)) + } + val msg = "Schema changes are not allowed during the change of column mapping mode." + assert(e.getMessage.contains(msg)) + } + } + + // TODO: repurpose this once we roll out the proper semantics for CM + streaming + testColumnMapping("isColumnMappingReadCompatible") { mode => + // Set up table based on mode and return the initial metadata actions for comparison + def setupInitialTable(deltaLog: DeltaLog): (MetadataAction, MetadataAction) = { + val tablePath = deltaLog.dataPath.toString + if (mode == NameMapping.name) { + Seq((1, "a"), (2, "b")).toDF("id", "name") + .write.mode("append").format("delta").save(tablePath) + // schema: + val m0 = deltaLog.update().metadata + + // add a column + sql(s"ALTER TABLE delta.`$tablePath` ADD COLUMN (score long)") + // schema: + val m1 = deltaLog.update().metadata + + // column mapping not enabled -> not blocked at all + assert(DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m1, m0)) + + // upgrade to name mode + alterTableWithProps(s"delta.`$tablePath`", Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name", + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + + (m0, m1) + } else { + // for id mode, just create the table + withSQLConf(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> "id") { + Seq((1, "a"), (2, "b")).toDF("id", "name") + .write.mode("append").format("delta").save(tablePath) + } + // schema: + val m0 = deltaLog.update().metadata + + // add a column + sql(s"ALTER TABLE delta.`$tablePath` ADD COLUMN (score long)") + // schema: + val m1 = deltaLog.update().metadata + + // add column shouldn't block + assert(DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m1, m0)) + + (m0, m1) + } + } + + withTempDir { dir => + val tablePath = dir.getCanonicalPath + val deltaLog = DeltaLog.forTable(spark, tablePath) + + val (m0, m1) = setupInitialTable(deltaLog) + + // schema: + val m2 = deltaLog.update().metadata + + assert(DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m2, m1)) + assert(DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m2, m0)) + + // rename column + sql(s"ALTER TABLE delta.`$tablePath` RENAME COLUMN score TO age") + // schema: + val m3 = deltaLog.update().metadata + + assert(!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m3, m2)) + assert(!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m3, m1)) + // But IS read compatible with the initial schema, because the added column should not + // be blocked by this column mapping check. + assert(DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m3, m0)) + + // drop a column + sql(s"ALTER TABLE delta.`$tablePath` DROP COLUMN age") + // schema: + val m4 = deltaLog.update().metadata + + assert(!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m4, m3)) + assert(!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m4, m2)) + assert(!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m4, m1)) + // but IS read compatible with the initial schema, because the added column is dropped + assert(DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m4, m0)) + + // add back the same column + sql(s"ALTER TABLE delta.`$tablePath` ADD COLUMN (score long)") + // schema: + val m5 = deltaLog.update().metadata + + // It IS read compatible with the previous schema, because the added column should not + // blocked by this column mapping check. + assert(DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m5, m4)) + assert(!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m5, m3)) + assert(!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m5, m2)) + // But Since the new added column has a different physical name as all previous columns, + // even it has the same logical name as say, m1.schema, we will still block + assert(!DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m5, m1)) + // But it IS read compatible with the initial schema, because the added column should not + // be blocked by this column mapping check. + assert(DeltaColumnMapping.hasNoColumnMappingSchemaChanges(m5, m0)) + } + } + + testColumnMapping("create table through raw schema API should " + + "auto bump the version and retain input metadata") { mode => + + // provides id only (let Delta generate physical name for me) + testCreateTableColumnMappingMode( + "t1", schemaWithIdRandom, ignorePhysicalName = true, mode = mode) { + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithIdRandom, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + } + + // provides id and physical name (Delta shouldn't rebuild/override) + // we use random ids as input, which shouldn't be changed too + testCreateTableColumnMappingMode( + "t1", schemaWithIdAndPhysicalNameRandom, ignorePhysicalName = false, mode = mode) { + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithIdAndPhysicalNameRandom, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + } + + } + + testColumnMapping("create table through dataframe should " + + "auto bumps the version and rebuild schema metadata/drop dataframe metadata") { mode => + // existing ids should be dropped/ignored and ids should be regenerated + // so for tests below even if we are ingesting dfs with random ids + // we should still expect schema with normal sequential ids + val expectedSchema = schemaWithId + + testCreateTableColumnMappingMode( + "t1", expectedSchema, ignorePhysicalName = true, mode = mode) { + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + randomIds = true) + } + + testCreateTableColumnMappingMode( + "t1", expectedSchema, ignorePhysicalName = true, mode = mode) { + createTableWithDataFrameAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + randomIds = true) + } + + testCreateTableColumnMappingMode( + "t1", expectedSchema, ignorePhysicalName = true, mode = mode) { + createTableWithSQLCreateOrReplaceAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + randomIds = true) + } + + testCreateTableColumnMappingMode( + "t1", expectedSchema, ignorePhysicalName = true, mode = mode) { + createTableWithDataFrameWriterV2API( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + randomIds = true) + } + } + + test("create table with none mode") { + withTable("t1") { + // column ids will be dropped, having the options here to make sure such happens + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "none"), + withColumnIds = true, + randomIds = true) + + // Should be still on old protocol, the schema shouldn't have any metadata + checkProperties( + "t1", + mode = Some("none")) + + checkSchema("t1", schema, ignorePhysicalName = false) + } + } + + testColumnMapping("update column mapped table invalid max id property is blocked") { mode => + withTable("t1") { + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true + ) + + val log = DeltaLog.forTable(spark, TableIdentifier("t1")) + // Get rid of max column id prop + assert { + intercept[DeltaAnalysisException] { + log.withNewTransaction { txn => + val existingMetadata = log.update().metadata + txn.commit(existingMetadata.copy(configuration = + existingMetadata.configuration - DeltaConfigs.COLUMN_MAPPING_MAX_ID.key) :: Nil, + DeltaOperations.ManualUpdate) + } + }.getErrorClass == "DELTA_COLUMN_MAPPING_MAX_COLUMN_ID_NOT_SET" + } + // Use an invalid max column id prop + assert { + intercept[DeltaAnalysisException] { + log.withNewTransaction { txn => + val existingMetadata = log.update().metadata + txn.commit(existingMetadata.copy(configuration = + existingMetadata.configuration ++ Map( + // '1' is less than the current max + DeltaConfigs.COLUMN_MAPPING_MAX_ID.key -> "1" + )) :: Nil, + DeltaOperations.ManualUpdate) + } + }.getErrorClass == "DELTA_COLUMN_MAPPING_MAX_COLUMN_ID_NOT_SET_CORRECTLY" + } + } + } + + testColumnMapping( + "create column mapped table with duplicated id/physical name should error" + ) { mode => + withTable("t1") { + val e = intercept[ColumnMappingException] { + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithDuplicatingIds, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + } + assert( + e.getMessage.contains( + s"Found duplicated column id `2` in column mapping mode `$mode`")) + assert(e.getMessage.contains(DeltaColumnMapping.COLUMN_MAPPING_METADATA_ID_KEY)) + + val e2 = intercept[ColumnMappingException] { + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithIdAndDuplicatingPhysicalNames, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + } + assert( + e2.getMessage.contains( + s"Found duplicated physical name `bbb` in column mapping mode `$mode`")) + assert(e2.getMessage.contains(DeltaColumnMapping.COLUMN_MAPPING_PHYSICAL_NAME_KEY)) + } + + // for name mode specific, we would also like to check for name duplication + if (mode == "name") { + val e = intercept[ColumnMappingException] { + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithDuplicatingPhysicalNames, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + } + assert( + e.getMessage.contains( + s"Found duplicated physical name `bbb` in column mapping mode `$mode`") + ) + + val e2 = intercept[ColumnMappingException] { + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithDuplicatingPhysicalNamesNested, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + } + assert( + e2.getMessage.contains( + s"Found duplicated physical name `b.dupName` in column mapping mode `$mode`") + ) + } + } + + testColumnMapping( + "create table in column mapping mode without defining ids explicitly" + ) { mode => + withTable("t1") { + // column ids will be dropped, having the options here to make sure such happens + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + randomIds = true) + checkSchema("t1", schemaWithId) + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = DeltaColumnMapping.findMaxColumnId(schemaWithId) + ) + } + } + + testColumnMapping("alter column order in schema on new protocol") { mode => + withTable("t1") { + // column ids will be dropped, having the options here to make sure such happens + createTableWithSQLAPI("t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + nested = true, + randomIds = true) + spark.sql( + """ + |ALTER TABLE t1 ALTER COLUMN a AFTER b + |""".stripMargin + ) + + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = DeltaColumnMapping.findMaxColumnId(schemaWithIdNested)) + checkSchema( + "t1", + schemaWithIdNested.copy(fields = schemaWithIdNested.fields.reverse)) + } + } + + testColumnMapping("add column in schema on new protocol") { mode => + + def check(expectedSchema: StructType): Unit = { + val curMaxId = DeltaColumnMapping.findMaxColumnId(expectedSchema) + 1 + checkSchema("t1", expectedSchema) + spark.sql( + """ + |ALTER TABLE t1 ADD COLUMNS (c STRING AFTER b) + |""".stripMargin + ) + + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = curMaxId) + + checkSchema("t1", expectedSchema.add("c", StringType, true, withId(curMaxId))) + + val curMaxId2 = DeltaColumnMapping.findMaxColumnId(expectedSchema) + 2 + + spark.sql( + """ + |ALTER TABLE t1 ADD COLUMNS (d STRING AFTER c) + |""".stripMargin + ) + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = curMaxId2) + checkSchema("t1", + expectedSchema + .add("c", StringType, true, withId(curMaxId)) + .add("d", StringType, true, withId(curMaxId2))) + } + + withTable("t1") { + // column ids will be dropped, having the options here to make sure such happens + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), withColumnIds = true, randomIds = true) + + check(schemaWithId) + } + + withTable("t1") { + // column ids will NOT be dropped, so future ids should update based on the current max + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithIdRandom, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode) + ) + + check(schemaWithIdRandom) + } + } + + testColumnMapping("add nested column in schema on new protocol") { mode => + withTable("t1") { + // column ids will be dropped, having the options here to make sure such happens + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + nested = true, + randomIds = true) + + checkSchema("t1", schemaWithIdNested) + + val curMaxId = DeltaColumnMapping.findMaxColumnId(schemaWithIdNested) + 1 + + spark.sql( + """ + |ALTER TABLE t1 ADD COLUMNS (b.e STRING AFTER d) + |""".stripMargin + ) + + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = curMaxId) + checkSchema("t1", + schemaWithIdNested.merge( + new StructType().add( + "b", + new StructType().add( + "e", StringType, true, withId(5)), + true, + withId(2) + )) + ) + + val curMaxId2 = DeltaColumnMapping.findMaxColumnId(schemaWithIdNested) + 2 + spark.sql( + """ + |ALTER TABLE t1 ADD COLUMNS (b.f STRING AFTER e) + |""".stripMargin + ) + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = curMaxId2) + checkSchema("t1", + schemaWithIdNested.merge( + new StructType().add( + "b", + new StructType().add( + "e", StringType, true, withId(5)), + true, + withId(2) + )).merge( + new StructType().add( + "b", + new StructType() + .add("f", StringType, true, withId(6)), + true, + withId(2)) + )) + + } + } + + testColumnMapping("write/merge df to table") { mode => + withTable("t1") { + // column ids will be dropped, having the options here to make sure such happens + createTableWithDataFrameAPI("t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), withColumnIds = true, randomIds = true) + val curMaxId = DeltaColumnMapping.findMaxColumnId(schemaWithId) + + val df1 = dfWithIds(spark) + df1.write + .format("delta") + .mode("append") + .saveAsTable("t1") + + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = curMaxId) + checkSchema("t1", schemaWithId) + + val previousSchema = spark.table("t1").schema + // ingest df with random id should not cause existing schema col id to change + val df2 = dfWithIds(spark, randomIds = true) + df2.write + .format("delta") + .mode("append") + .saveAsTable("t1") + + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = curMaxId) + + // with checkPhysicalSchema check + checkSchema("t1", schemaWithId) + + // compare with before + assertEqual(spark.table("t1").schema, + previousSchema, ignorePhysicalName = false) + + val df3 = spark.createDataFrame( + Seq(Row("str3", 3, "str3.1"), Row("str4", 4, "str4.1")).asJava, + schemaWithId.add("c", StringType, true, withId(3)) + ) + df3.write + .option("mergeSchema", "true") + .format("delta") + .mode("append") + .saveAsTable("t1") + + val curMaxId2 = DeltaColumnMapping.findMaxColumnId(schemaWithId) + 1 + checkProperties("t1", + readerVersion = 2, + writerVersion = 5, + mode = Some(mode), + curMaxId = curMaxId2) + checkSchema("t1", schemaWithId.add("c", StringType, true, withId(3))) + } + } + + testColumnMapping(s"try modifying restricted max id property should fail") { mode => + withTable("t1") { + val e = intercept[UnsupportedOperationException] { + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode, + DeltaConfigs.COLUMN_MAPPING_MAX_ID.key -> "100"), + withColumnIds = true, + nested = true) + } + assert(e.getMessage.contains(s"The Delta table configuration " + + s"${DeltaConfigs.COLUMN_MAPPING_MAX_ID.key} cannot be specified by the user")) + } + + withTable("t1") { + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + nested = true) + + val e2 = intercept[UnsupportedOperationException] { + alterTableWithProps("t1", Map(DeltaConfigs.COLUMN_MAPPING_MAX_ID.key -> "100")) + } + + assert(e2.getMessage.contains(s"The Delta table configuration " + + s"${DeltaConfigs.COLUMN_MAPPING_MAX_ID.key} cannot be specified by the user")) + } + + withTable("t1") { + val e = intercept[UnsupportedOperationException] { + createTableWithDataFrameAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode, + DeltaConfigs.COLUMN_MAPPING_MAX_ID.key -> "100"), + withColumnIds = true, + nested = true) + } + assert(e.getMessage.contains(s"The Delta table configuration " + + s"${DeltaConfigs.COLUMN_MAPPING_MAX_ID.key} cannot be specified by the user")) + } + } + + testColumnMapping("physical data and partition schema") { mode => + withTable("t1") { + // column ids will be dropped, having the options here to make sure such happens + createTableWithSQLAPI("t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + withColumnIds = true, + randomIds = true) + + val metadata = DeltaLog.forTableWithSnapshot(spark, TableIdentifier("t1"))._2.metadata + + assertEqual(metadata.schema, schemaWithId) + assertEqual(metadata.schema, StructType(metadata.partitionSchema ++ metadata.dataSchema)) + } + } + + testColumnMapping("block CONVERT TO DELTA") { mode => + withSQLConf(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> mode) { + withTempDir { tablePath => + val tempDir = tablePath.getCanonicalPath + val df1 = Seq(0).toDF("id") + .withColumn("key1", lit("A1")) + .withColumn("key2", lit("A2")) + + df1.write + .partitionBy(Seq("key1"): _*) + .format("parquet") + .mode("overwrite") + .save(tempDir) + + val e = intercept[UnsupportedOperationException] { + sql(s"convert to delta parquet.`$tempDir` partitioned by (key1 String)") + } + assert(e.getMessage.contains(s"cannot be set to `$mode` when using CONVERT TO DELTA")) + } + } + } + + testColumnMapping( + "column mapping batch scan should detect physical name changes", + enableSQLConf = true + ) { _ => + withTempDir { dir => + spark.range(10).toDF("id") + .write.format("delta").save(dir.getCanonicalPath) + // Analysis phase + val df = spark.read.format("delta").load(dir.getCanonicalPath) + // Overwrite schema but with same logical schema + withSQLConf(DeltaSQLConf.REUSE_COLUMN_MAPPING_METADATA_DURING_OVERWRITE.key -> "false") { + spark.range(10).toDF("id") + .write.format("delta").option("overwriteSchema", "true").mode("overwrite") + .save(dir.getCanonicalPath) + } + // The previous analyzed DF no longer is able to read the data any more because it generates + // new physical name for the underlying columns, so we should fail. + assert { + intercept[DeltaAnalysisException] { + df.collect() + }.getErrorClass == "DELTA_SCHEMA_CHANGE_SINCE_ANALYSIS" + } + // See we can't read back the same data any more + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_ON_READ_CHECK_ENABLED.key -> "false") { + checkAnswer( + df, + (0 until 10).map(_ => Row(null)) + ) + } + } + } + + protected def testPartitionPath(tableName: String)(createFunc: Boolean => Unit): Unit = { + withTable(tableName) { + Seq(true, false).foreach { isPartitioned => + spark.sql(s"drop table if exists $tableName") + createFunc(isPartitioned) + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(tableName)) + val prefixLen = DeltaConfigs.RANDOM_PREFIX_LENGTH.fromMetaData(snapshot.metadata) + Seq(("str3", 3), ("str4", 4)).toDF(schema.fieldNames: _*) + .write.format("delta").mode("append").saveAsTable(tableName) + checkAnswer(spark.table(tableName), + Row("str1", 1) :: Row("str2", 2) :: Row("str3", 3) :: Row("str4", 4) :: Nil) + // both new table writes and appends should use prefix + val pattern = s"[A-Za-z0-9]{$prefixLen}/part-.*parquet" + assert(snapshot.allFiles.collect().map(_.path).forall(_.matches(pattern))) + } + } + } + + // Copied verbatim from the "valid replaceWhere" test in DeltaSuite + protected def testReplaceWhere(): Unit = + Seq(true, false).foreach { enabled => + withSQLConf(DeltaSQLConf.REPLACEWHERE_DATACOLUMNS_ENABLED.key -> enabled.toString) { + Seq(true, false).foreach { partitioned => + // Skip when it's not enabled and not partitioned. + if (enabled || partitioned) { + withTempDir { dir => + val writer = Seq(1, 2, 3, 4).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + .write + .format("delta") + + if (partitioned) { + writer.partitionBy("is_odd").save(dir.toString) + } else { + writer.save(dir.toString) + } + + def data: DataFrame = spark.read.format("delta").load(dir.toString) + + Seq(5, 7).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_odd = true") + .save(dir.toString) + checkAnswer( + data, + Seq(2, 4, 5, 7).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0)) + + // replaceWhere on non-partitioning columns if enabled. + if (enabled) { + Seq(6, 8).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_even = true") + .save(dir.toString) + checkAnswer( + data, + Seq(5, 6, 7, 8).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0)) + } + } + } + } + } + } + + testColumnMapping("valid replaceWhere", enableSQLConf = true) { _ => + testReplaceWhere() + } + + protected def verifyUpgradeAndTestSchemaEvolution(tableName: String): Unit = { + checkProperties(tableName, + readerVersion = 2, + writerVersion = 5, + mode = Some("name"), + curMaxId = 4) + checkSchema(tableName, schemaWithIdNested) + val expectedSchema = new StructType() + .add("a", StringType, true, withIdAndPhysicalName(1, "a")) + .add("b", + new StructType() + .add("c", StringType, true, withIdAndPhysicalName(3, "c")) + .add("d", IntegerType, true, withIdAndPhysicalName(4, "d")), + true, + withIdAndPhysicalName(2, "b")) + + assertEqual( + DeltaLog.forTableWithSnapshot(spark, TableIdentifier(tableName))._2.schema, + expectedSchema, + ignorePhysicalName = false) + + checkAnswer(spark.table(tableName), dfWithoutIdsNested(spark)) + + // test schema evolution + val newNestedData = + spark.createDataFrame( + Seq(Row("str3", Row("str1.3", 3), "new value")).asJava, + schemaNested.add("e", StringType)) + newNestedData.write.format("delta") + .option("mergeSchema", "true") + .mode("append").saveAsTable(tableName) + checkAnswer( + spark.table(tableName), + dfWithoutIdsNested(spark).withColumn("e", lit(null)).union(newNestedData)) + + val newTableSchema = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(tableName))._2.schema + val newPhysicalName = DeltaColumnMapping.getPhysicalName(newTableSchema("e")) + + // physical name of new column should be GUID, not display name + assert(newPhysicalName.startsWith("col-")) + assertEqual( + newTableSchema, + expectedSchema.add("e", StringType, true, withIdAndPhysicalName(5, newPhysicalName)), + ignorePhysicalName = false) + } + + test("change mode on new protocol table") { + withTable("t1") { + createTableWithSQLAPI( + "t1", + isPartitioned = true, + nested = true, + props = Map( + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + + alterTableWithProps("t1", Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name")) + verifyUpgradeAndTestSchemaEvolution("t1") + } + } + + test("upgrade first and then change mode") { + withTable("t1") { + createTableWithSQLAPI("t1", isPartitioned = true, nested = true) + alterTableWithProps("t1", Map( + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + + alterTableWithProps("t1", Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name")) + verifyUpgradeAndTestSchemaEvolution("t1") + } + } + + test("upgrade and change mode in one ALTER TABLE cmd") { + withTable("t1") { + createTableWithSQLAPI("t1", isPartitioned = true, nested = true) + + alterTableWithProps("t1", Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name", + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + verifyUpgradeAndTestSchemaEvolution("t1") + } + } + + test("illegal mode changes") { + val oldModes = Seq("none") ++ supportedModes + val newModes = Seq("none") ++ supportedModes + val upgrade = Seq(true, false) + oldModes.foreach { oldMode => + newModes.foreach { newMode => + upgrade.foreach { ug => + val oldProps = Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> oldMode) + val newProps = Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> newMode) ++ + (if (!ug) Map.empty else Map( + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + + if (oldMode != newMode && !(oldMode == "none" && newMode == "name")) { + Given(s"old mode: $oldMode, new mode: $newMode, upgrade: $ug") + val e = intercept[UnsupportedOperationException] { + withTable("t1") { + createTableWithSQLAPI("t1", props = oldProps) + alterTableWithProps("t1", props = newProps) + } + } + assert(e.getMessage.contains("Changing column mapping mode from")) + } + } + } + } + } + + test("legal mode change without explicit upgrade") { + val e = intercept[UnsupportedOperationException] { + withTable("t1") { + createTableWithSQLAPI("t1") + alterTableWithProps("t1", props = Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name")) + } + } + assert(e.getMessage.contains("Your current table protocol version does not" + + " support changing column mapping modes")) + } + + test("getPhysicalNameFieldMap") { + // To keep things simple, we use schema `schemaWithPhysicalNamesNested` such that the + // physical name is just the logical name repeated three times. + + val actual = DeltaColumnMapping + .getPhysicalNameFieldMap(schemaWithPhysicalNamesNested) + .map { case (physicalPath, field) => (physicalPath, field.name) } + + val expected = Map[Seq[String], String]( + Seq("aaa") -> "a", + Seq("bbb") -> "b", + Seq("bbb", "ccc") -> "c", + Seq("bbb", "ddd") -> "d", + Seq("bbb", "foo.foo.foo.bar.bar.bar") -> "foo.bar", + Seq("bbb", "foo.foo.foo.bar.bar.bar", "fff") -> "f", + Seq("ggg") -> "g", + Seq("ggg", "ccc") -> "c", + Seq("ggg", "ddd") -> "d", + Seq("ggg", "foo.foo.foo.bar.bar.bar") -> "foo.bar", + Seq("ggg", "foo.foo.foo.bar.bar.bar", "fff") -> "f", + Seq("hhh") -> "h" + ) + + assert(expected === actual, + s""" + |The actual physicalName -> logicalName map + |${actual.mkString("\n")} + |did not equal the expected map + |${expected.mkString("\n")} + |""".stripMargin) + } + + testColumnMapping("is drop/rename column operation") { mode => + import DeltaColumnMapping.{isDropColumnOperation, isRenameColumnOperation} + + withTable("t1") { + def getMetadata(): MetadataAction = { + DeltaLog.forTableWithSnapshot(spark, TableIdentifier("t1"))._2.metadata + } + + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithPhysicalNamesNested, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode) + ) + + // case 1: currentSchema compared with itself + var currentMetadata = getMetadata() + var newMetadata = getMetadata() + assert( + !isDropColumnOperation(newMetadata, currentMetadata) && + !isRenameColumnOperation(newMetadata, currentMetadata) + ) + + // case 2: add a top-level column + sql("ALTER TABLE t1 ADD COLUMNS (ping INT)") + currentMetadata = newMetadata + newMetadata = getMetadata() + assert( + !isDropColumnOperation(newMetadata, currentMetadata) && + !isRenameColumnOperation(newMetadata, currentMetadata) + ) + + // case 3: add a nested column + sql("ALTER TABLE t1 ADD COLUMNS (b.`foo.bar`.`my.new;col()` LONG)") + currentMetadata = newMetadata + newMetadata = getMetadata() + assert( + !isDropColumnOperation(newMetadata, currentMetadata) && + !isRenameColumnOperation(newMetadata, currentMetadata) + ) + + // case 4: drop a top-level column + sql("ALTER TABLE t1 DROP COLUMN (ping)") + currentMetadata = newMetadata + newMetadata = getMetadata() + assert( + isDropColumnOperation(newMetadata, currentMetadata) && + !isRenameColumnOperation(newMetadata, currentMetadata) + ) + + // case 5: drop a nested column + sql("ALTER TABLE t1 DROP COLUMN (g.`foo.bar`)") + currentMetadata = newMetadata + newMetadata = getMetadata() + assert( + isDropColumnOperation(newMetadata, currentMetadata) && + !isRenameColumnOperation(newMetadata, currentMetadata) + ) + + // case 6: rename a top-level column + sql("ALTER TABLE t1 RENAME COLUMN a TO pong") + currentMetadata = newMetadata + newMetadata = getMetadata() + assert( + !isDropColumnOperation(newMetadata, currentMetadata) && + isRenameColumnOperation(newMetadata, currentMetadata) + ) + + // case 7: rename a nested column + sql("ALTER TABLE t1 RENAME COLUMN b.c TO c2") + currentMetadata = newMetadata + newMetadata = getMetadata() + assert( + !isDropColumnOperation(newMetadata, currentMetadata) && + isRenameColumnOperation(newMetadata, currentMetadata) + ) + } + } + + Seq(true, false).foreach { cdfEnabled => + var shouldBlock = cdfEnabled + + val shouldBlockStr = if (shouldBlock) "should block" else "should not block" + + def checkHelper( + log: DeltaLog, + newSchema: StructType, + action: Action, + shouldFail: Boolean = shouldBlock): Unit = { + val txn = log.startTransaction() + txn.updateMetadata(txn.metadata.copy(schemaString = newSchema.json)) + + if (shouldFail) { + val e = intercept[DeltaUnsupportedOperationException] { + txn.commit(Seq(action), DeltaOperations.ManualUpdate) + }.getMessage + assert(e == "[DELTA_BLOCK_COLUMN_MAPPING_AND_CDC_OPERATION] " + + "Operation \"Manual Update\" is not allowed when the table has enabled " + + "change data feed (CDF) and has undergone schema changes using DROP COLUMN or RENAME " + + "COLUMN.") + } else { + txn.commit(Seq(action), DeltaOperations.ManualUpdate) + } + } + + val fileActions = Seq( + AddFile("foo", Map.empty, 1L, 1L, dataChange = true), + AddFile("foo", Map.empty, 1L, 1L, dataChange = true).remove) ++ + (if (cdfEnabled) AddCDCFile("foo", Map.empty, 1L) :: Nil else Nil) + + testColumnMapping( + s"CDF and Column Mapping: $shouldBlockStr when CDF=$cdfEnabled", + enableSQLConf = true) { mode => + + def createTable(): Unit = { + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithPhysicalNamesNested, + Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode, + DeltaConfigs.CHANGE_DATA_FEED.key -> cdfEnabled.toString + ) + ) + } + + Seq("h", "b.`foo.bar`.f").foreach { colName => + + // case 1: drop column with non-FileAction action should always pass + withTable("t1") { + createTable() + val log = DeltaLog.forTable(spark, TableIdentifier("t1")) + val droppedColumnSchema = sql("SELECT * FROM t1").drop(colName).schema + checkHelper(log, droppedColumnSchema, SetTransaction("id", 1, None), shouldFail = false) + } + + // case 2: rename column with FileAction should fail if $shouldBlock == true + fileActions.foreach { fileAction => + withTable("t1") { + createTable() + val log = DeltaLog.forTable(spark, TableIdentifier("t1")) + withSQLConf( + DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> mode) { + withTable("t2") { + sql("DROP TABLE IF EXISTS t2") + sql("CREATE TABLE t2 USING DELTA AS SELECT * FROM t1") + sql(s"ALTER TABLE t2 RENAME COLUMN $colName TO ii") + val renamedColumnSchema = sql("SELECT * FROM t2").schema + checkHelper(log, renamedColumnSchema, fileAction) + } + } + } + } + + // case 3: drop column with FileAction should fail if $shouldBlock == true + fileActions.foreach { fileAction => + { + withTable("t1") { + createTable() + val log = DeltaLog.forTable(spark, TableIdentifier("t1")) + val droppedColumnSchema = sql("SELECT * FROM t1").drop(colName).schema + checkHelper(log, droppedColumnSchema, fileAction) + } + } + } + } + } + } + + testColumnMapping("id and name mode should write field_id in parquet schema", + modes = Some(Seq("name", "id"))) { mode => + withTable("t1") { + createTableWithSQLAPI( + "t1", + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + val (log, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier("t1")) + val files = snapshot.allFiles.collect() + files.foreach { f => + val footer = ParquetFileReader.readFooter( + log.newDeltaHadoopConf(), + new Path(log.dataPath, f.path), + ParquetMetadataConverter.NO_FILTER) + footer.getFileMetaData.getSchema.getFields.asScala.foreach(f => + // getId.intValue will throw NPE if field id does not exist + assert(f.getId.intValue >= 0) + ) + } + } + } + + test("should block CM upgrade when commit has FileActions and CDF enabled") { + Seq(true, false).foreach { cdfEnabled => + var shouldBlock = cdfEnabled + + withTable("t1") { + createTableWithSQLAPI( + "t1", + props = Map(DeltaConfigs.CHANGE_DATA_FEED.key -> cdfEnabled.toString)) + + val table = DeltaTableV2(spark, TableIdentifier("t1")) + val currMetadata = table.snapshot.metadata + val upgradeMetadata = currMetadata.copy( + configuration = currMetadata.configuration ++ Map( + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5", + DeltaConfigs.COLUMN_MAPPING_MODE.key -> NameMapping.name + ) + ) + + val txn = table.startTransactionWithInitialSnapshot() + txn.updateMetadata(upgradeMetadata) + + if (shouldBlock) { + val e = intercept[DeltaUnsupportedOperationException] { + txn.commit( + AddFile("foo", Map.empty, 1L, 1L, dataChange = true) :: Nil, + DeltaOperations.ManualUpdate) + }.getMessage + assert(e == "[DELTA_BLOCK_COLUMN_MAPPING_AND_CDC_OPERATION] " + + "Operation \"Manual Update\" is not allowed when the table has enabled " + + "change data feed (CDF) and has undergone schema changes using DROP COLUMN or RENAME " + + "COLUMN.") + } else { + txn.commit( + AddFile("foo", Map.empty, 1L, 1L, dataChange = true) :: Nil, + DeltaOperations.ManualUpdate) + } + } + } + } + + test("upgrade with dot column name should not be blocked") { + testCreateTableColumnMappingMode( + "t1", + schemaWithDottedColumnNames, + false, + "name", + createNewTable = false + ) { + sql(s"CREATE TABLE t1 (${schemaWithDottedColumnNames.toDDL}) USING DELTA") + alterTableWithProps("t1", props = Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name", + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + } + } + + test("explicit id matching") { + // Explicitly disable field id reading to test id mode reinitialization + val requiredConfs = Seq( + SQLConf.PARQUET_FIELD_ID_READ_ENABLED, + SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED) + + requiredConfs.foreach { conf => + withSQLConf(conf.key -> "false") { + val e = intercept[IllegalArgumentException] { + withTable("t1") { + createStrictSchemaTableWithDeltaTableApi( + "t1", + schemaWithIdNested, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "id") + ) + val testData = spark.createDataFrame( + Seq(Row("str3", Row("str1.3", 3))).asJava, schemaWithIdNested) + testData.write.format("delta").mode("append").saveAsTable("t1") + } + } + assert(e.getMessage.contains(conf.key)) + } + } + + // The above configs are enabled by default, so no need to explicitly enable. + withTable("t1") { + val testSchema = schemaWithIdNested.add("e", StringType, true, withId(5)) + val testData = spark.createDataFrame( + Seq(Row("str3", Row("str1.3", 3), "str4")).asJava, testSchema) + + createStrictSchemaTableWithDeltaTableApi( + "t1", + testSchema, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "id") + ) + + testData.write.format("delta").mode("append").saveAsTable("t1") + + def read: DataFrame = spark.read.format("delta").table("t1") + val deltaLog = DeltaLog.forTable(spark, TableIdentifier("t1")) + + def updateFieldIdFor(fieldName: String, newId: Int): Unit = { + val currentMetadata = deltaLog.update().metadata + val currentSchema = currentMetadata.schema + val field = currentSchema(fieldName) + deltaLog.withNewTransaction { txn => + val updated = field.copy(metadata = + new MetadataBuilder().withMetadata(field.metadata) + .putLong(DeltaColumnMapping.PARQUET_FIELD_ID_METADATA_KEY, newId) + .putLong(DeltaColumnMapping.COLUMN_MAPPING_METADATA_ID_KEY, newId) + .build()) + val newSchema = StructType(Seq(updated) ++ currentSchema.filter(_.name != field.name)) + txn.commit(currentMetadata.copy( + schemaString = newSchema.json, + configuration = currentMetadata.configuration ++ + // Just a big id to bypass the check + Map(DeltaConfigs.COLUMN_MAPPING_MAX_ID.key -> "10000")) :: Nil, ManualUpdate) + } + } + + // Case 1: manually modify the schema to read a non-existing id + updateFieldIdFor("a", 100) + // Reading non-existing id should return null + checkAnswer(read.select("a"), Row(null) :: Nil) + + // Case 2: manually modify the schema to read another field's id + // First let's drop e, because Delta detects duplicated field + sql(s"ALTER TABLE t1 DROP COLUMN e") + // point to the dropped field 's data + updateFieldIdFor("a", 5) + checkAnswer(read.select("a"), Row("str4")) + } + } + + test("drop and recreate external Delta table with name column mapping enabled") { + withTempDir { dir => + withTable("t1") { + val createExternalTblCmd: String = + s""" + |CREATE EXTERNAL TABLE t1 (a long) + |USING DELTA + |LOCATION '${dir.getCanonicalPath}' + |TBLPROPERTIES('delta.columnMapping.mode'='name')""".stripMargin + sql(createExternalTblCmd) + // Add column and drop the old one to increment max column ID + sql(s"ALTER TABLE t1 ADD COLUMN (b long)") + sql(s"ALTER TABLE t1 DROP COLUMN a") + sql(s"ALTER TABLE t1 RENAME COLUMN b to a") + val log = DeltaLog.forTable(spark, dir.getCanonicalPath) + val configBeforeDrop = log.update().metadata.configuration + assert(configBeforeDrop("delta.columnMapping.maxColumnId") == "2") + sql(s"DROP TABLE t1") + sql(createExternalTblCmd) + // Configuration after recreating the external table should match the config right + // before initially dropping it. + assert(log.update().metadata.configuration == configBeforeDrop) + // Adding another column picks up from the last maxColumnId and increments it + sql(s"ALTER TABLE t1 ADD COLUMN (c string)") + assert(log.update().metadata.configuration("delta.columnMapping.maxColumnId") == "3") + } + } + } + + test("replace external Delta table with name column mapping enabled") { + withTempDir { dir => + withTable("t1") { + val replaceExternalTblCmd: String = + s""" + |CREATE OR REPLACE TABLE t1 (a long) + |USING DELTA + |LOCATION '${dir.getCanonicalPath}' + |TBLPROPERTIES('delta.columnMapping.mode'='name')""".stripMargin + sql(replaceExternalTblCmd) + // Add column and drop the old one to increment max column ID + sql(s"ALTER TABLE t1 ADD COLUMN (b long)") + sql(s"ALTER TABLE t1 DROP COLUMN a") + sql(s"ALTER TABLE t1 RENAME COLUMN b to a") + val log = DeltaLog.forTable(spark, dir.getCanonicalPath) + assert(log.update().metadata.configuration("delta.columnMapping.maxColumnId") == "2") + sql(replaceExternalTblCmd) + // Configuration after replacing existing table should be like the table has started new. + assert(log.update().metadata.configuration("delta.columnMapping.maxColumnId") == "1") + } + } + } + + test("verify internal table properties only if property exists in spec and existing metadata") { + val withoutMaxColumnId = Map[String, String]("delta.columnMapping.mode" -> "name") + val maxColumnIdOne = Map[String, String]( + "delta.columnMapping.mode" -> "name", + "delta.columnMapping.maxColumnId" -> "1" + ) + val maxColumnIdOneWithOthers = Map[String, String]( + "delta.columnMapping.mode" -> "name", + "delta.columnMapping.maxColumnId" -> "1", + "dummy.property" -> "dummy" + ) + val maxColumnIdTwo = Map[String, String]( + "delta.columnMapping.mode" -> "name", + "delta.columnMapping.maxColumnId" -> "2" + ) + // Max column ID is missing in first set of configs. So don't block on verification. + assert(DeltaColumnMapping.verifyInternalProperties(withoutMaxColumnId, maxColumnIdOne)) + // Max column ID matches. + assert(DeltaColumnMapping.verifyInternalProperties(maxColumnIdOne, maxColumnIdOneWithOthers)) + // Max column IDs don't match + assert(!DeltaColumnMapping.verifyInternalProperties(maxColumnIdOne, maxColumnIdTwo)) + } + + testColumnMapping( + "overwrite a column mapping table should preserve column mapping metadata", + enableSQLConf = true) { _ => + val data = spark.range(10).toDF("id").withColumn("value", lit(1)) + + def checkReadability( + oldDf: DataFrame, + expected: DataFrame, + overwrite: () => Unit, + // Whether the new data files are readable after applying the fix. + readableWithFix: Boolean = true, + // Whether the method can read the new data files out of box, regardless of the fix. + readableOutOfBox: Boolean = false): Unit = { + // Overwrite + overwrite() + if (readableWithFix) { + // Previous analyzed DF is still readable + // Apply a .select so the plan cache won't kick in. + checkAnswer(oldDf.select("id"), expected.select("id").collect()) + withSQLConf(DeltaSQLConf.REUSE_COLUMN_MAPPING_METADATA_DURING_OVERWRITE.key -> "false") { + // Overwrite again + overwrite() + if (readableOutOfBox) { + checkAnswer(oldDf.select("value"), expected.select("value").collect()) + } else { + // Without the fix, will fail + assert { + intercept[DeltaAnalysisException] { + oldDf.select("value").collect() + }.getErrorClass == "DELTA_SCHEMA_CHANGE_SINCE_ANALYSIS" + } + } + } + } else { + // Not readable, just fail + assert { + intercept[DeltaAnalysisException] { + oldDf.select("value").collect() + }.getErrorClass == "DELTA_SCHEMA_CHANGE_SINCE_ANALYSIS" + } + } + } + + // Readable - overwrite using DF + val overwriteData1 = spark.range(10, 20).toDF("id").withColumn("value", lit(2)) + withTempDir { dir => + data.write.format("delta").save(dir.getCanonicalPath) + val df = spark.read.format("delta").load(dir.getCanonicalPath) + checkAnswer(df, data.collect()) + checkReadability(df, overwriteData1, () => { + overwriteData1.write.mode("overwrite") + .option("overwriteSchema", "true") + .format("delta") + .save(dir.getCanonicalPath) + }) + } + + // Unreadable - data type changes + val overwriteIncompatibleDatatType = + spark.range(10, 20).toDF("id").withColumn("value", lit("name")) + withTempDir { dir => + data.write.format("delta").save(dir.getCanonicalPath) + val df = spark.read.format("delta").load(dir.getCanonicalPath) + checkAnswer(df, data.collect()) + checkReadability(df, overwriteIncompatibleDatatType, () => { + overwriteIncompatibleDatatType.write.mode("overwrite") + .option("overwriteSchema", "true") + .format("delta") + .save(dir.getCanonicalPath) + }, readableWithFix = false) + } + + def withTestTable(f: (String, DataFrame) => Unit): Unit = { + val tableName = s"cm_table" + withTable(tableName) { + data.createOrReplaceTempView("src_data") + spark.sql(s"CREATE TABLE $tableName USING DELTA AS SELECT * FROM src_data") + val df = spark.read.table(tableName) + checkAnswer(df, data.collect()) + + f(tableName, df) + } + } + + withTestTable { (tableName, df) => + // "overwrite" using REPLACE won't be covered by this fix because this is logically equivalent + // to DROP and RECREATE a new table. Therefore this optimization won't kick in. + overwriteData1.createOrReplaceTempView("overwrite_data") + checkReadability(df, overwriteData1, () => { + spark.sql(s"REPLACE TABLE $tableName USING DELTA AS SELECT * FROM overwrite_data") + }, readableWithFix = false) + } + + withTestTable { (tableName, df) => + // "overwrite" using INSERT OVERWRITE actually works without this fix because it will NOT + // trigger the overwriteSchema code path. In this case, the pre and post schema are exactly + // the same, so in fact no schema updates would occur. + val overwriteData2 = spark.range(20, 30).toDF("id").withColumn("value", lit(2)) + overwriteData2.createOrReplaceTempView("overwrite_data2") + checkReadability(df, overwriteData2, () => { + spark.sql(s"INSERT OVERWRITE $tableName SELECT * FROM overwrite_data2") + }, readableOutOfBox = true) + } + } + + test("column mapping upgrade with table features") { + val testTableName = "columnMappingTestTable" + withTable(testTableName) { + val minReaderKey = DeltaConfigs.MIN_READER_VERSION.key + val minWriterKey = DeltaConfigs.MIN_WRITER_VERSION.key + sql( + s"""CREATE TABLE $testTableName + |USING DELTA + |TBLPROPERTIES( + |'$minReaderKey' = '2', + |'$minWriterKey' = '7' + |) + |AS SELECT * FROM RANGE(1) + |""".stripMargin) + + // [[DeltaColumnMapping.verifyAndUpdateMetadataChange]] should not throw an error. The table + // does not need to support read table features too. + val columnMappingMode = DeltaConfigs.COLUMN_MAPPING_MODE.key + sql( + s"""ALTER TABLE $testTableName SET TBLPROPERTIES( + |'$columnMappingMode'='name' + |)""".stripMargin) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnMappingTestUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnMappingTestUtils.scala new file mode 100644 index 00000000000..11d024e9ec4 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnMappingTestUtils.scala @@ -0,0 +1,481 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import scala.collection.mutable + +import org.apache.spark.sql.delta.actions.{Metadata, Protocol, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaColumnMappingSelectedTestMixin +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import io.delta.tables.{DeltaTable => OSSDeltaTable} +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Column, DataFrame, DataFrameWriter, Dataset, QueryTest, Row, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{AtomicType, StructField, StructType} + +trait DeltaColumnMappingTestUtilsBase extends SharedSparkSession { + + import testImplicits._ + + protected def columnMappingMode: String = NoMapping.name + + private val PHYSICAL_NAME_REGEX = + "col-[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}".r + + implicit class PhysicalNameString(s: String) { + def phy(deltaLog: DeltaLog): String = { + PHYSICAL_NAME_REGEX + .findFirstIn(s) + .getOrElse(getPhysicalName(s, deltaLog)) + } + } + + protected def columnMappingEnabled: Boolean = { + columnMappingModeString != "none" + } + + protected def columnMappingModeString: String = { + spark.conf.getOption(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey) + .getOrElse("none") + } + + /** + * Check if two schemas are equal ignoring column mapping metadata + * @param schema1 Schema + * @param schema2 Schema + */ + protected def assertEqual(schema1: StructType, schema2: StructType): Unit = { + if (columnMappingEnabled) { + assert( + DeltaColumnMapping.dropColumnMappingMetadata(schema1) == + DeltaColumnMapping.dropColumnMappingMetadata(schema2) + ) + } else { + assert(schema1 == schema2) + } + } + + /** + * Check if two table configurations are equal ignoring column mapping metadata + * @param config1 Table config + * @param config2 Table config + */ + protected def assertEqual( + config1: Map[String, String], + config2: Map[String, String]): Unit = { + if (columnMappingEnabled) { + assert(dropColumnMappingConfigurations(config1) == dropColumnMappingConfigurations(config2)) + } else { + assert(config1 == config2) + } + } + + /** + * Check if a partition with specific values exists. + * Handles both column mapped and non-mapped cases + * @param partCol Partition column name + * @param partValue Partition value + * @param deltaLog DeltaLog + */ + protected def assertPartitionWithValueExists( + partCol: String, + partValue: String, + deltaLog: DeltaLog): Unit = { + assert(getPartitionFilePathsWithValue(partCol, partValue, deltaLog).nonEmpty) + } + + /** + * Assert partition exists in an array of set of partition names/paths + * @param partCol Partition column name + * @param deltaLog Delta log + * @param inputFiles Input files to scan for DF + */ + protected def assertPartitionExists( + partCol: String, + deltaLog: DeltaLog, + inputFiles: Array[String]): Unit = { + val physicalName = partCol.phy(deltaLog) + val allFiles = deltaLog.snapshot.allFiles.collect() + val filesWithPartitions = inputFiles.map { f => + allFiles.filter(af => f.contains(af.path)).flatMap(_.partitionValues.keys).toSet + } + assert(filesWithPartitions.forall(p => p.count(_ == physicalName) > 0)) + // for non-column mapped mode, we can check the file paths as well + if (!columnMappingEnabled) { + assert(inputFiles.forall(path => path.contains(s"$physicalName=")), + s"${inputFiles.toSeq.mkString("\n")}\ndidn't contain partition columns $physicalName") + } + } + + /** + * Load Deltalog from path + * @param pathOrIdentifier Location + * @param isIdentifier Whether the previous argument is a metastore identifier + * @return + */ + protected def loadDeltaLog(pathOrIdentifier: String, isIdentifier: Boolean = false): DeltaLog = { + if (isIdentifier) { + DeltaLog.forTable(spark, TableIdentifier(pathOrIdentifier)) + } else { + DeltaLog.forTable(spark, pathOrIdentifier) + } + } + + /** + * Convert a (nested) column string to sequence of name parts + * @param col Column string + * @return Sequence of parts + */ + protected def columnNameToParts(col: String): Seq[String] = { + UnresolvedAttribute.parseAttributeName(col) + } + + /** + * Get partition file paths for a specific partition value + * @param partCol Logical or physical partition name + * @param partValue Partition value + * @param deltaLog DeltaLog + * @return List of paths + */ + protected def getPartitionFilePathsWithValue( + partCol: String, + partValue: String, + deltaLog: DeltaLog): Array[String] = { + getPartitionFilePaths(partCol, deltaLog).getOrElse(partValue, Array.empty) + } + + /** + * Get the partition value for null + */ + protected def nullPartitionValue: String = { + if (columnMappingEnabled) { + null + } else { + ExternalCatalogUtils.DEFAULT_PARTITION_NAME + } + } + + /** + * Get partition file paths grouped by partition value + * @param partCol Logical or physical partition name + * @param deltaLog DeltaLog + * @return Partition value to paths + */ + protected def getPartitionFilePaths( + partCol: String, + deltaLog: DeltaLog): Map[String, Array[String]] = { + if (columnMappingEnabled) { + val colName = partCol.phy(deltaLog) + deltaLog.update().allFiles.collect() + .groupBy(_.partitionValues(colName)) + .mapValues(_.map(deltaLog.dataPath.toUri.getPath + "/" + _.path)).toMap + } else { + val partColEscaped = s"${ExternalCatalogUtils.escapePathName(partCol)}" + val dataPath = new File(deltaLog.dataPath.toUri.getPath) + dataPath.listFiles().filter(_.getName.startsWith(s"$partColEscaped=")) + .groupBy(_.getName.split("=").last).mapValues(_.map(_.getPath)).toMap + } + } + + /** + * Group a list of input file paths by partition key-value pair w.r.t. delta log + * @param inputFiles Input file paths + * @param deltaLog Delta log + * @return A mapped array each with the corresponding partition keys + */ + protected def groupInputFilesByPartition( + inputFiles: Array[String], + deltaLog: DeltaLog): Map[(String, String), Array[String]] = { + if (columnMappingEnabled) { + val allFiles = deltaLog.update().allFiles.collect() + val grouped = inputFiles.flatMap { f => + allFiles.find(af => f.contains(af.path)).head.partitionValues.map(entry => (f, entry)) + }.groupBy(_._2) + grouped.mapValues(_.map(_._1)).toMap + } else { + inputFiles.groupBy(p => { + val nameParts = new Path(p).getParent.getName.split("=") + (nameParts(0), nameParts(1)) + }) + } + } + + /** + * Drop column mapping configurations from Map + * @param configuration Table configuration + * @return Configuration + */ + protected def dropColumnMappingConfigurations( + configuration: Map[String, String]): Map[String, String] = { + configuration - DeltaConfigs.COLUMN_MAPPING_MODE.key - DeltaConfigs.COLUMN_MAPPING_MAX_ID.key + } + + /** + * Drop column mapping configurations from Dataset (e.g. sql("SHOW TBLPROPERTIES t1") + * @param configs Table configuration + * @return Configuration Dataset + */ + protected def dropColumnMappingConfigurations( + configs: Dataset[(String, String)]): Dataset[(String, String)] = { + spark.createDataset(configs.collect().filter(p => + !Seq( + DeltaConfigs.COLUMN_MAPPING_MAX_ID.key, + DeltaConfigs.COLUMN_MAPPING_MODE.key + ).contains(p._1) + )) + } + + /** Return KV pairs of Protocol-related stuff for checking the result of DESCRIBE TABLE. */ + protected def buildProtocolProps(snapshot: Snapshot): Seq[(String, String)] = { + val mergedConf = + DeltaConfigs.mergeGlobalConfigs(spark.sessionState.conf, snapshot.metadata.configuration) + val metadata = snapshot.metadata.copy(configuration = mergedConf) + var props = Seq( + (Protocol.MIN_READER_VERSION_PROP, + Protocol.forNewTable(spark, Some(metadata)).minReaderVersion.toString), + (Protocol.MIN_WRITER_VERSION_PROP, + Protocol.forNewTable(spark, Some(metadata)).minWriterVersion.toString)) + if (snapshot.protocol.supportsReaderFeatures || snapshot.protocol.supportsWriterFeatures) { + props ++= + Protocol.minProtocolComponentsFromAutomaticallyEnabledFeatures(spark, metadata)._3 + .map(f => ( + s"${TableFeatureProtocolUtils.FEATURE_PROP_PREFIX}${f.name}", + TableFeatureProtocolUtils.FEATURE_PROP_SUPPORTED)) + } + props + } + + /** + * Convert (nested) column name string into physical name with reference from DeltaLog + * If target field does not have physical name, display name is returned + * @param col Logical column name + * @param deltaLog Reference DeltaLog + * @return Physical column name + */ + protected def getPhysicalName(col: String, deltaLog: DeltaLog): String = { + val nameParts = UnresolvedAttribute.parseAttributeName(col) + val realSchema = deltaLog.update().schema + getPhysicalName(nameParts, realSchema) + } + + protected def getPhysicalName(col: String, schema: StructType): String = { + val nameParts = UnresolvedAttribute.parseAttributeName(col) + getPhysicalName(nameParts, schema) + } + + protected def getPhysicalName(nameParts: Seq[String], schema: StructType): String = { + SchemaUtils.findNestedFieldIgnoreCase(schema, nameParts, includeCollections = true) + .map(DeltaColumnMapping.getPhysicalName) + .get + } + + protected def withColumnMappingConf(mode: String)(f: => Any): Any = { + withSQLConf(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> mode) { + f + } + } + + protected def withMaxColumnIdConf(maxId: String)(f: => Any): Any = { + withSQLConf(DeltaConfigs.COLUMN_MAPPING_MAX_ID.defaultTablePropertyKey -> maxId) { + f + } + } + + /** + * Gets the physical names of a path. This is used for converting column paths in stats schema, + * so it's ok to not support MapType and ArrayType. + */ + def getPhysicalPathForStats(path: Seq[String], schema: StructType): Option[Seq[String]] = { + if (path.isEmpty) return Some(Seq.empty) + val field = schema.fields.find(_.name.equalsIgnoreCase(path.head)) + field match { + case Some(f @ StructField(_, _: AtomicType, _, _ )) => + if (path.size == 1) Some(Seq(DeltaColumnMapping.getPhysicalName(f))) else None + case Some(f @ StructField(_, st: StructType, _, _)) => + val tail = getPhysicalPathForStats(path.tail, st) + tail.map(DeltaColumnMapping.getPhysicalName(f) +: _) + case _ => + None + } + } + + /** + * Convert (nested) column name string into physical name. + * Ignore parts of special paths starting with: + * 1. stats columns: minValues, maxValues, numRecords + * 2. stats df: stats_parsed + * 3. partition values: partitionValues_parsed, partitionValues + * @param col Logical column name (e.g. a.b.c) + * @param schema Reference schema with metadata + * @return Unresolved attribute with physical name paths + */ + protected def convertColumnNameToAttributeWithPhysicalName( + col: String, + schema: StructType): UnresolvedAttribute = { + val parts = UnresolvedAttribute.parseAttributeName(col) + val shouldIgnoreFirstPart = Set( + "minValues", + "maxValues", + "numRecords", + Checkpoints.STRUCT_PARTITIONS_COL_NAME, + "partitionValues") + val shouldIgnoreSecondPart = Set(Checkpoints.STRUCT_STATS_COL_NAME, "stats") + val physical = if (shouldIgnoreFirstPart.contains(parts.head)) { + parts.head +: getPhysicalPathForStats(parts.tail, schema).getOrElse(parts.tail) + } else if (shouldIgnoreSecondPart.contains(parts.head)) { + parts.take(2) ++ getPhysicalPathForStats(parts.slice(2, parts.length), schema) + .getOrElse(parts.slice(2, parts.length)) + } else { + getPhysicalPathForStats(parts, schema).getOrElse(parts) + } + UnresolvedAttribute(physical) + } + + /** + * Convert a list of (nested) stats columns into physical name with reference from DeltaLog + * @param columns Logical columns + * @param deltaLog Reference DeltaLog + * @return Physical columns + */ + protected def convertToPhysicalColumns( + columns: Seq[Column], + deltaLog: DeltaLog): Seq[Column] = { + val schema = deltaLog.update().schema + columns.map { col => + val newExpr = col.expr.transform { + case a: Attribute => + convertColumnNameToAttributeWithPhysicalName(a.name, schema) + } + new Column(newExpr) + } + } + + /** + * Standard CONVERT TO DELTA + * @param tableOrPath String + */ + protected def convertToDelta(tableOrPath: String): Unit = { + sql(s"CONVERT TO DELTA $tableOrPath") + } + + /** + * Force enable streaming read (with possible data loss) on column mapping enabled table with + * drop / rename schema changes. + */ + protected def withStreamingReadOnColumnMappingTableEnabled(f: => Unit): Unit = { + if (columnMappingEnabled) { + withSQLConf(DeltaSQLConf + .DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_COLUMN_MAPPING_SCHEMA_CHANGES.key -> "true") { + f + } + } else { + f + } + } + +} + +trait DeltaColumnMappingTestUtils extends DeltaColumnMappingTestUtilsBase + +/** + * Include this trait to enable Id column mapping mode for a suite + */ +trait DeltaColumnMappingEnableIdMode extends SharedSparkSession + with DeltaColumnMappingTestUtils + with DeltaColumnMappingSelectedTestMixin { + + protected override def columnMappingMode: String = IdMapping.name + + protected override def sparkConf: SparkConf = + super.sparkConf.set(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey, "id") + + /** + * CONVERT TO DELTA blocked in id mode + */ + protected override def convertToDelta(tableOrPath: String): Unit = + throw DeltaErrors.convertToDeltaWithColumnMappingNotSupported( + DeltaColumnMappingMode(columnMappingModeString) + ) +} + +/** + * Include this trait to enable Name column mapping mode for a suite + */ +trait DeltaColumnMappingEnableNameMode extends SharedSparkSession + with DeltaColumnMappingTestUtils + with DeltaColumnMappingSelectedTestMixin { + + protected override def columnMappingMode: String = NameMapping.name + + protected override def sparkConf: SparkConf = + super.sparkConf.set(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey, columnMappingMode) + + /** + * CONVERT TO DELTA can be possible under name mode in tests + */ + protected override def convertToDelta(tableOrPath: String): Unit = { + withColumnMappingConf("none") { + super.convertToDelta(tableOrPath) + } + + val (deltaPath, deltaLog) = + if (tableOrPath.contains("parquet") && tableOrPath.contains("`")) { + // parquet.`PATH` + val plainPath = tableOrPath.split('.').last.drop(1).dropRight(1) + (s"delta.`$plainPath`", DeltaLog.forTable(spark, plainPath)) + } else { + (tableOrPath, DeltaLog.forTable(spark, TableIdentifier(tableOrPath))) + } + + val tableReaderVersion = deltaLog.unsafeVolatileSnapshot.protocol.minReaderVersion + val tableWriterVersion = deltaLog.unsafeVolatileSnapshot.protocol.minWriterVersion + val requiredReaderVersion = if (tableWriterVersion >= + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) { + // If the writer version of the table supports table features, we need to + // bump the reader version to table features to enable column mapping. + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION + } else { + ColumnMappingTableFeature.minReaderVersion + } + val readerVersion = spark.conf.get(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION).max( + requiredReaderVersion) + val writerVersion = spark.conf.get(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION).max( + ColumnMappingTableFeature.minWriterVersion) + + val properties = mutable.ListBuffer(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name") + if (tableReaderVersion < readerVersion) { + properties += DeltaConfigs.MIN_READER_VERSION.key -> readerVersion.toString + } + if (tableWriterVersion < writerVersion) { + properties += DeltaConfigs.MIN_WRITER_VERSION.key -> writerVersion.toString + } + val propertiesStr = properties.map(kv => s"'${kv._1}' = '${kv._2}'").mkString(", ") + sql(s"ALTER TABLE $deltaPath SET TBLPROPERTIES ($propertiesStr)") + } + +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnRenameSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnRenameSuite.scala new file mode 100644 index 00000000000..f7550de4d24 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaColumnRenameSuite.scala @@ -0,0 +1,570 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.scalatest.GivenWhenThen + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.types._ + +class DeltaColumnRenameSuite extends QueryTest + with DeltaArbitraryColumnNameSuiteBase + with GivenWhenThen { + + testColumnMapping("rename in column mapping mode") { mode => + withTable("t1") { + createTableWithSQLAPI("t1", + simpleNestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode), + partCols = Seq("a")) + + spark.sql(s"Alter table t1 RENAME COLUMN b to b1") + + // insert data after rename + spark.sql("insert into t1 " + + "values ('str3', struct('str1.3', 3), map('k3', 'v3'), array(3, 33))") + + // some queries + checkAnswer( + spark.table("t1"), + Seq( + Row("str1", Row("str1.1", 1), Map("k1" -> "v1"), Array(1, 11)), + Row("str2", Row("str1.2", 2), Map("k2" -> "v2"), Array(2, 22)), + Row("str3", Row("str1.3", 3), Map("k3" -> "v3"), Array(3, 33)))) + + checkAnswer( + spark.table("t1").select("b1"), + Seq(Row(Row("str1.1", 1)), Row(Row("str1.2", 2)), Row(Row("str1.3", 3)))) + + checkAnswer( + spark.table("t1").select("a", "b1.c").where("b1.c = 'str1.2'"), + Seq(Row("str2", "str1.2"))) + + // b is no longer visible + val e = intercept[AnalysisException] { + spark.table("t1").select("b").collect() + } + // The error class is renamed in Spark 3.4 + assert(e.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION" + || e.getErrorClass == "MISSING_COLUMN" ) + + // rename partition column + spark.sql(s"Alter table t1 RENAME COLUMN a to a1") + // rename nested column + spark.sql(s"Alter table t1 RENAME COLUMN b1.c to c1") + + // rename and verify rename history + val renameHistoryDf = sql("DESCRIBE HISTORY t1") + .where("operation = 'RENAME COLUMN'") + .select("version", "operationParameters") + + checkAnswer(renameHistoryDf, + Row(2, Map("oldColumnPath" -> "b", "newColumnPath" -> "b1")) :: + Row(4, Map("oldColumnPath" -> "a", "newColumnPath" -> "a1")) :: + Row(5, Map("oldColumnPath" -> "b1.c", "newColumnPath" -> "b1.c1")) :: Nil) + + // cannot rename column to the same name + assert( + intercept[AnalysisException] { + spark.sql(s"Alter table t1 RENAME COLUMN map to map") + }.getMessage.contains("already exists")) + + // cannot rename to a different casing + assert( + intercept[AnalysisException] { + spark.sql("Alter table t1 RENAME COLUMN arr to Arr") + }.getMessage.contains("already exists")) + + // a is no longer visible + val e2 = intercept[AnalysisException] { + spark.table("t1").select("a").collect() + } + // The error class is renamed in Spark 3.4 + assert(e2.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION" + || e2.getErrorClass == "MISSING_COLUMN" ) + + // b1.c is no longer visible + val e3 = intercept[AnalysisException] { + spark.table("t1").select("b1.c").collect() + } + assert(e3.getMessage.contains("No such struct field")) + + // insert data after rename + spark.sql("insert into t1 " + + "values ('str4', struct('str1.4', 4), map('k4', 'v4'), array(4, 44))") + + checkAnswer( + spark.table("t1").select("a1", "b1.c1", "map") + .where("b1.c1 = 'str1.4'"), + Seq(Row("str4", "str1.4", Map("k4" -> "v4")))) + } + } + + test("rename workflow: error, upgrade to name mode and then rename") { + // error when not in the correct protocol and mode + withTable("t1") { + createTableWithSQLAPI("t1", + simpleNestedData, + partCols = Seq("a")) + val e = intercept[AnalysisException] { + spark.sql(s"Alter table t1 RENAME COLUMN map to map1") + } + assert(e.getMessage.contains("enable Column Mapping") && + e.getMessage.contains("mapping mode 'name'")) + + alterTableWithProps("t1", Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name", + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + + // rename a column to have arbitrary chars + spark.sql(s"Alter table t1 RENAME COLUMN a to `${colName("a")}`") + + // rename a column that already has arbitrary chars + spark.sql(s"Alter table t1" + + s" RENAME COLUMN `${colName("a")}` to `${colName("a1")}`") + + // rename partition column + spark.sql(s"Alter table t1 RENAME COLUMN map to `${colName("map")}`") + + // insert data after rename + spark.sql("insert into t1 " + + "values ('str3', struct('str1.3', 3), map('k3', 'v3'), array(3, 33))") + + checkAnswer( + spark.table("t1").select(colName("a1"), "b.d", colName("map")) + .where("b.c >= 'str1.2'"), + Seq(Row("str2", 2, Map("k2" -> "v2")), + Row("str3", 3, Map("k3" -> "v3")))) + + // add old column back? + spark.sql(s"alter table t1 add columns (a string, map map)") + + // insert data after rename + spark.sql("insert into t1 " + + "values ('str4', struct('str1.4', 4), map('k4', 'v4'), array(4, 44)," + + " 'new_str4', map('new_k4', 'new_v4'))") + + checkAnswer( + spark.table("t1").select(colName("a1"), "a", colName("map"), "map") + .where("b.c >= 'str1.2'"), + Seq( + Row("str2", null, Map("k2" -> "v2"), null), + Row("str3", null, Map("k3" -> "v3"), null), + Row("str4", "new_str4", Map("k4" -> "v4"), Map("new_k4" -> "new_v4")))) + } + } + + test("rename workflow: error, upgrade to name mode and then rename - " + + "nested data with duplicated column name") { + withTable("t1") { + createTableWithSQLAPI("t1", simpleNestedDataWithDuplicatedNestedColumnName) + val e = intercept[AnalysisException] { + spark.sql(s"Alter table t1 RENAME COLUMN map to map1") + } + assert(e.getMessage.contains("enable Column Mapping") && + e.getMessage.contains("mapping mode 'name'")) + + // Upgrading this schema shouldn't cause any errors even if there are leaf column name + // duplications such as a.c, b.c. + alterTableWithProps("t1", Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name", + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + + // rename shouldn't cause duplicates in column names + Seq(("a", "b"), ("arr", "map")).foreach { case (from, to) => + val e = intercept[AnalysisException] { + spark.sql(s"Alter table t1 RENAME COLUMN $from to $to") + } + assert(e.getMessage.contains("Cannot rename column")) + } + + // spice things up by changing name to arbitrary chars + spark.sql(s"Alter table t1 RENAME COLUMN a to `${colName("a")}`") + // rename partition column + spark.sql(s"Alter table t1 RENAME COLUMN map to `${colName("map")}`") + + // insert data after rename + spark.sql("insert into t1 " + + "values (struct('str3', 3), struct('str1.3', 3), map('k3', 'v3'), array(3, 33))") + + checkAnswer( + spark.table("t1").select(colName("a"), "b.d", colName("map")) + .where("b.c >= 'str1.2'"), + Seq(Row(Row("str2", 2), 2, Map("k2" -> "v2")), + Row(Row("str3", 3), 3, Map("k3" -> "v3")))) + + // add old column back? + spark.sql(s"alter table t1 add columns (a string, map map)") + + // insert data after rename + spark.sql("insert into t1 " + + "values (struct('str4', 4), struct('str1.4', 4), map('k4', 'v4'), array(4, 44)," + + " 'new_str4', map('new_k4', 'new_v4'))") + + checkAnswer( + spark.table("t1").select(colName("a"), "a", colName("map"), "map") + .where("b.c >= 'str1.2'"), + Seq( + Row(Row("str2", 2), null, Map("k2" -> "v2"), null), + Row(Row("str3", 3), null, Map("k3" -> "v3"), null), + Row(Row("str4", 4), "new_str4", Map("k4" -> "v4"), Map("new_k4" -> "new_v4")))) + } + } + + test("rename with constraints") { + withTable("t1") { + val schemaWithNotNull = + simpleNestedData.schema.toDDL.replace("c: STRING", "c: STRING NOT NULL") + .replace("`c`: STRING", "`c`: STRING NOT NULL") + + withTable("source") { + spark.sql( + s""" + |CREATE TABLE t1 ($schemaWithNotNull) + |USING DELTA + |${partitionStmt(Seq("a"))} + |${propString(Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name"))} + |""".stripMargin) + simpleNestedData.write.format("delta").mode("append").saveAsTable("t1") + } + + spark.sql("alter table t1 add constraint rangeABC check (concat(a, a) > 'str')") + spark.sql("alter table t1 add constraint rangeBD check (`b`.`d` > 0)") + + spark.sql("alter table t1 add constraint arrValue check (arr[0] > 0)") + + assertException("Cannot rename column a") { + spark.sql("alter table t1 rename column a to a1") + } + + assertException("Cannot rename column arr") { + spark.sql("alter table t1 rename column arr to arr1") + } + + + // cannot rename b because its child is referenced + assertException("Cannot rename column b") { + spark.sql("alter table t1 rename column b to b1") + } + + // can still rename b.c because it's referenced by a null constraint + spark.sql("alter table t1 rename column b.c to c1") + + spark.sql("insert into t1 " + + "values ('str3', struct('str1.3', 3), map('k3', 'v3'), array(3, 33))") + + assertException("CHECK constraint rangeabc (concat(a, a) > 'str')") { + spark.sql("insert into t1 " + + "values ('fail constraint', struct('str1.3', 3), map('k3', 'v3'), array(3, 33))") + } + + assertException("CHECK constraint rangebd (b.d > 0)") { + spark.sql("insert into t1 " + + "values ('str3', struct('str1.3', -1), map('k3', 'v3'), array(3, 33))") + } + + assertException("NOT NULL constraint violated for column: b.c1") { + spark.sql("insert into t1 " + + "values ('str3', struct(null, 3), map('k3', 'v3'), array(3, 33))") + } + + // this is a safety flag - it won't error when you turn it off + withSQLConf(DeltaSQLConf.DELTA_ALTER_TABLE_CHANGE_COLUMN_CHECK_EXPRESSIONS.key -> "false") { + spark.sql("alter table t1 rename column a to a1") + spark.sql("alter table t1 rename column arr to arr1") + spark.sql("alter table t1 rename column b to b1") + } + } + } + + test("rename with constraints - map element") { + withTable("t1") { + val schemaWithNotNull = + simpleNestedData.schema.toDDL.replace("c: STRING", "c: STRING NOT NULL") + .replace("`c`: STRING", "`c`: STRING NOT NULL") + + withTable("source") { + spark.sql( + s""" + |CREATE TABLE t1 ($schemaWithNotNull) + |USING DELTA + |${partitionStmt(Seq("a"))} + |${propString(Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name"))} + |""".stripMargin) + simpleNestedData.write.format("delta").mode("append").saveAsTable("t1") + } + + spark.sql("alter table t1 add constraint" + + " mapValue check (not array_contains(map_keys(map), 'k1') or map['k1'] = 'v1')") + + assertException("Cannot rename column map") { + spark.sql("alter table t1 rename column map to map1") + } + + spark.sql("insert into t1 " + + "values ('str3', struct('str1.3', 3), map('k3', 'v3'), array(3, 33))") + } + } + + test("rename with generated column") { + withTable("t1") { + val tableBuilder = io.delta.tables.DeltaTable.create(spark).tableName("t1") + tableBuilder.property("delta.columnMapping.mode", "name") + + // add existing columns + simpleNestedSchema.map(field => (field.name, field.dataType)).foreach(col => { + val (colName, dataType) = col + val columnBuilder = io.delta.tables.DeltaTable.columnBuilder(spark, colName) + columnBuilder.dataType(dataType.sql) + tableBuilder.addColumn(columnBuilder.build()) + }) + + // add generated columns + val genCol1 = io.delta.tables.DeltaTable.columnBuilder(spark, "genCol1") + .dataType("int") + .generatedAlwaysAs("length(a)") + .build() + + val genCol2 = io.delta.tables.DeltaTable.columnBuilder(spark, "genCol2") + .dataType("int") + .generatedAlwaysAs("b.d * 100 + arr[0]") + .build() + + val genCol3 = io.delta.tables.DeltaTable.columnBuilder(spark, "genCol3") + .dataType("string") + .generatedAlwaysAs("concat(a, a)") + .build() + + tableBuilder + .addColumn(genCol1) + .addColumn(genCol2) + .addColumn(genCol3) + .partitionedBy("genCol2") + .execute() + + simpleNestedData.write.format("delta").mode("append").saveAsTable("t1") + + assertException("Cannot rename column a") { + spark.sql("alter table t1 rename column a to a1") + } + + assertException("Cannot rename column b") { + spark.sql("alter table t1 rename column b to b1") + } + + assertException("Cannot rename column b.d") { + spark.sql("alter table t1 rename column b.d to d1") + } + + assertException("Cannot rename column arr") { + spark.sql("alter table t1 rename column arr to arr1") + } + + // you can still rename b.c + spark.sql("alter table t1 rename column b.c to c1") + + // The following is just to show generated columns are actually there + + // add new data (without data for generated columns so that they are auto populated) + spark.createDataFrame( + Seq(Row("str3", Row("str1.3", 3), Map("k3" -> "v3"), Array(3, 33))).asJava, + new StructType() + .add("a", StringType, true) + .add("b", + new StructType() + .add("c1", StringType, true) + .add("d", IntegerType, true)) + .add("map", MapType(StringType, StringType), true) + .add("arr", ArrayType(IntegerType), true)) + .write.format("delta").mode("append").saveAsTable("t1") + + checkAnswer(spark.table("t1"), + Seq( + Row("str1", Row("str1.1", 1), Map("k1" -> "v1"), Array(1, 11), 4, 101, "str1str1"), + Row("str2", Row("str1.2", 2), Map("k2" -> "v2"), Array(2, 22), 4, 202, "str2str2"), + Row("str3", Row("str1.3", 3), Map("k3" -> "v3"), Array(3, 33), 4, 303, "str3str3"))) + + // this is a safety flag - if you turn it off, it will still error but msg is not as helpful + withSQLConf(DeltaSQLConf.DELTA_ALTER_TABLE_CHANGE_COLUMN_CHECK_EXPRESSIONS.key -> "false") { + assertException("A generated column cannot use a non-existent column") { + spark.sql("alter table t1 rename column arr to arr1") + } + assertExceptionOneOf(Seq("No such struct field d in c1, d1", + "No such struct field `d` in `c1`, `d1`")) { + spark.sql("alter table t1 rename column b.d to d1") + } + } + } + } + + /** + * Covers renaming a nested field using the ALTER TABLE command. + * @param initialColumnType Type of the single column used to create the initial test table. + * @param fieldToRename Old and new name of the field to rename. + * @param updatedColumnType Expected type of the single column after renaming the nested field. + */ + def testRenameNestedField(testName: String)( + initialColumnType: String, + fieldToRename: (String, String), + updatedColumnType: String): Unit = + testColumnMapping(s"ALTER TABLE RENAME COLUMN - nested $testName") { mode => + withTempDir { dir => + withTable("delta_test") { + sql( + s""" + |CREATE TABLE delta_test (data $initialColumnType) + |USING delta + |TBLPROPERTIES (${DeltaConfigs.COLUMN_MAPPING_MODE.key} = '${mode}') + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + + val expectedInitialType = initialColumnType.filterNot(_.isWhitespace) + val expectedUpdatedType = updatedColumnType.filterNot(_.isWhitespace) + val fieldName = s"data.${fieldToRename._1}" + + def columnType: DataFrame = + sql("DESCRIBE TABLE delta_test") + .filter("col_name = 'data'") + .select("data_type") + checkAnswer(columnType, Row(expectedInitialType)) + + sql(s"ALTER TABLE delta_test RENAME COLUMN $fieldName TO ${fieldToRename._2}") + checkAnswer(columnType, Row(expectedUpdatedType)) + } + } + } + + testRenameNestedField("struct in map key")( + initialColumnType = "map, int>", + fieldToRename = "key.b" -> "c", + updatedColumnType = "map, int>") + + testRenameNestedField("struct in map value")( + initialColumnType = "map>", + fieldToRename = "value.b" -> "c", + updatedColumnType = "map>") + + testRenameNestedField("struct in array")( + initialColumnType = "array>", + fieldToRename = "element.b" -> "c", + updatedColumnType = "array>") + + testRenameNestedField("struct in nested map keys")( + initialColumnType = "map, int>, int>", + fieldToRename = "key.key.b" -> "c", + updatedColumnType = "map, int>, int>") + + testRenameNestedField("struct in nested map values")( + initialColumnType = "map>>", + fieldToRename = "value.value.b" -> "c", + updatedColumnType = "map>>") + + testRenameNestedField("struct in nested arrays")( + initialColumnType = "array>>", + fieldToRename = "element.element.b" -> "c", + updatedColumnType = "array>>") + + testRenameNestedField("struct in nested array and map")( + initialColumnType = "array>>", + fieldToRename = "element.value.b" -> "c", + updatedColumnType = "array>>") + + testRenameNestedField("struct in nested map key and array")( + initialColumnType = "map>, int>", + fieldToRename = "key.element.b" -> "c", + updatedColumnType = "map>, int>") + + testRenameNestedField("struct in nested map value and array")( + initialColumnType = "map>>", + fieldToRename = "value.element.b" -> "c", + updatedColumnType = "map>>") + + testColumnMapping("ALTER TABLE RENAME COLUMN - rename fields nested in maps") { mode => + withTable("t1") { + val rows = Seq( + Row(Map(Row(1) -> Map(Row(10) -> Row(11)))), + Row(Map(Row(2) -> Map(Row(20) -> Row(21))))) + + val df = spark.createDataFrame( + rows = rows.asJava, + schema = new StructType() + .add("a", MapType( + new StructType().add("x", IntegerType), + MapType( + new StructType().add("y", IntegerType), + new StructType().add("z", IntegerType))))) + + createTableWithSQLAPI("t1", df, Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + + spark.sql(s"ALTER TABLE t1 RENAME COLUMN a.key.x to x1") + checkAnswer(spark.table("t1"), rows) + + spark.sql(s"ALTER TABLE t1 RENAME COLUMN a.value.key.y to y1") + checkAnswer(spark.table("t1"), rows) + + spark.sql(s"ALTER TABLE t1 RENAME COLUMN a.value.value.z to z1") + checkAnswer(spark.table("t1"), rows) + + // Insert data after rename. + spark.sql("INSERT INTO t1 " + + "VALUES (map(named_struct('x', 3), map(named_struct('y', 30), named_struct('z', 31))))") + checkAnswer(spark.table("t1"), rows :+ Row(Map(Row(3) -> Map(Row(30) -> Row(31))))) + } + } + + testColumnMapping("ALTER TABLE RENAME COLUMN - rename fields nested in arrays") { mode => + withTable("t1") { + val rows = Seq( + Row(Array(Array(Row(10, 11), Row(12, 13)), Array(Row(14, 15), Row(16, 17)))), + Row(Array(Array(Row(20, 21), Row(22, 23)), Array(Row(24, 25), Row(26, 27))))) + + val schema = new StructType() + .add("a", ArrayType(ArrayType( + new StructType() + .add("x", IntegerType) + .add("y", IntegerType)))) + val df = spark.createDataFrame(rows.asJava, schema) + + createTableWithSQLAPI("t1", df, Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> mode)) + + spark.sql(s"ALTER TABLE t1 RENAME COLUMN a.element.element.x to x1") + checkAnswer(spark.table("t1"), df) + + spark.sql(s"ALTER TABLE t1 RENAME COLUMN a.element.element.y to y1") + checkAnswer(spark.table("t1"), df) + + // Insert data after rename. + spark.sql( + """ + |INSERT INTO t1 VALUES ( + |array( + | array(named_struct('x', 30, 'y', 31), named_struct('x', 32, 'y', 33)), + | array(named_struct('x', 34, 'y', 35), named_struct('x', 36, 'y', 37)))) + """.stripMargin) + + val expDf3 = spark.createDataFrame( + (rows :+ Row(Array(Array(Row(30, 31), Row(32, 33)), Array(Row(34, 35), Row(36, 37))))) + .asJava, + schema) + checkAnswer(spark.table("t1"), expDf3) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCommitLockSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCommitLockSuite.scala new file mode 100644 index 00000000000..1dd946bf7c1 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCommitLockSuite.scala @@ -0,0 +1,81 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage.{AzureLogStore, S3SingleDriverLogStore} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.{LocalSparkSession, SparkSession} +import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.Utils + +class DeltaCommitLockSuite extends SparkFunSuite with LocalSparkSession with SQLHelper { + + private def verifyIsCommitLockEnabled(path: File, expected: Boolean): Unit = { + val deltaLog = DeltaLog.forTable(spark, path) + val txn = deltaLog.startTransaction() + assert(txn.isCommitLockEnabled == expected) + } + + test("commit lock flag on Azure") { + spark = SparkSession.builder() + .config("spark.delta.logStore.class", classOf[AzureLogStore].getName) + .master("local[2]") + .config(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[DeltaCatalog].getName) + .getOrCreate() + val path = Utils.createTempDir() + try { + // Should lock by default on Azure + verifyIsCommitLockEnabled(path, expected = true) + // Should respect user config + for (enabled <- true :: false :: Nil) { + withSQLConf(DeltaSQLConf.DELTA_COMMIT_LOCK_ENABLED.key -> enabled.toString) { + verifyIsCommitLockEnabled(path, expected = enabled) + } + } + } finally { + Utils.deleteRecursively(path) + } + } + + test("commit lock flag on S3") { + spark = SparkSession.builder() + .config("spark.delta.logStore.class", classOf[S3SingleDriverLogStore].getName) + .master("local[2]") + .config(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[DeltaCatalog].getName) + .getOrCreate() + val path = Utils.createTempDir() + try { + // Should not lock by default on S3 + verifyIsCommitLockEnabled(path, expected = false) + // Should respect user config + for (enabled <- true :: false :: Nil) { + withSQLConf(DeltaSQLConf.DELTA_COMMIT_LOCK_ENABLED.key -> enabled.toString) { + verifyIsCommitLockEnabled(path, expected = enabled) + } + } + } finally { + Utils.deleteRecursively(path) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaConfigSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaConfigSuite.scala new file mode 100644 index 00000000000..672b732f641 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaConfigSuite.scala @@ -0,0 +1,206 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.concurrent.TimeUnit + +import org.apache.spark.sql.delta.DeltaConfigs.{getMilliSeconds, isValidIntervalConfigValue, parseCalendarInterval} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.unsafe.types.CalendarInterval +import org.apache.spark.util.ManualClock + +class DeltaConfigSuite extends SparkFunSuite + with SharedSparkSession + with DeltaSQLCommandTest { + + test("parseCalendarInterval") { + for (input <- Seq("5 MINUTES", "5 minutes", "5 Minutes", "inTERval 5 minutes")) { + assert(parseCalendarInterval(input) === + new CalendarInterval(0, 0, TimeUnit.MINUTES.toMicros(5))) + } + + for (input <- Seq(null, "", " ")) { + val e = intercept[IllegalArgumentException] { + parseCalendarInterval(input) + } + assert(e.getMessage.contains("cannot be null or blank")) + } + + for (input <- Seq("interval", "interval1 day", "foo", "foo 1 day")) { + val e = intercept[IllegalArgumentException] { + parseCalendarInterval(input) + } + assert(e.getMessage.contains("not a valid INTERVAL")) + } + } + + test("isValidIntervalConfigValue") { + for (input <- Seq( + // Allow 0 microsecond because we always convert microseconds to milliseconds so 0 + // microsecond is the same as 100 microseconds. + "0 microsecond", + "1 microsecond", + "1 millisecond", + "1 day", + "-1 day 86400001 milliseconds", // This is 1 millisecond + "1 day -1 microseconds")) { + assert(isValidIntervalConfigValue(parseCalendarInterval(input))) + } + for (input <- Seq( + "-1 microseconds", + "-1 millisecond", + "-1 day", + "1 day -86400001 milliseconds", // This is -1 millisecond + "1 month", + "1 year")) { + assert(!isValidIntervalConfigValue(parseCalendarInterval(input)), s"$input") + } + } + + test("Optional Calendar Interval config") { + val clock = new ManualClock(System.currentTimeMillis()) + + // case 1: duration not specified + withTempDir { dir => + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta") + + val retentionTimestampOpt = DeltaLog.forTable(spark, dir, clock) + .snapshot.minSetTransactionRetentionTimestamp + + assert(retentionTimestampOpt.isEmpty) + } + + // case 2: valid duration specified + withTempDir { dir => + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.setTransactionRetentionDuration' = 'interval 1 days') + |""".stripMargin) + + DeltaLog.clearCache() // we want to ensure we can use the ManualClock we pass in + + val log = DeltaLog.forTable(spark, dir, clock) + val retentionTimestampOpt = log.snapshot.minSetTransactionRetentionTimestamp + assert(log.clock.getTimeMillis() == clock.getTimeMillis()) + val expectedRetentionTimestamp = + clock.getTimeMillis() - getMilliSeconds(parseCalendarInterval("interval 1 days")) + + assert(retentionTimestampOpt.contains(expectedRetentionTimestamp)) + } + + // case 3: invalid duration specified + withTempDir { dir => + val e = intercept[IllegalArgumentException] { + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.setTransactionRetentionDuration' = 'interval 1 foo') + |""".stripMargin) + } + assert(e.getMessage.contains("not a valid INTERVAL")) + } + } + + test("DeltaSQLConf.ALLOW_ARBITRARY_TABLE_PROPERTIES = true") { + withSQLConf(DeltaSQLConf.ALLOW_ARBITRARY_TABLE_PROPERTIES.key -> "true") { + // (1) we can set arbitrary table properties + withTempDir { tempDir => + sql( + s"""CREATE TABLE delta.`${tempDir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.autoOptimize.autoCompact' = true) + |""".stripMargin) + } + + // (2) we still validate matching properties + withTempDir { tempDir => + val e = intercept[IllegalArgumentException] { + sql( + s"""CREATE TABLE delta.`${tempDir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.setTransactionRetentionDuration' = 'interval 1 foo') + |""".stripMargin) + } + assert(e.getMessage.contains("not a valid INTERVAL")) + } + } + } + + test("we don't allow arbitrary delta-prefixed table properties") { + + // standard behavior + withSQLConf(DeltaSQLConf.ALLOW_ARBITRARY_TABLE_PROPERTIES.key -> "false") { + val e = intercept[AnalysisException] { + withTempDir { tempDir => + sql( + s"""CREATE TABLE delta.`${tempDir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.foo' = true) + |""".stripMargin) + } + } + var msg = "[DELTA_UNKNOWN_CONFIGURATION] " + + "Unknown configuration was specified: delta.foo\nTo disable this check, set " + + "spark.databricks.delta.allowArbitraryProperties.enabled=true in the Spark session " + + "configuration." + assert(e.getMessage == msg) + } + } + + test("allow setting valid and supported isolation level") { + // currently only Serializable isolation level is supported + withTempDir { dir => + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.isolationLevel' = 'Serializable') + |""".stripMargin) + + val isolationLevel = + DeltaLog.forTable(spark, dir.getCanonicalPath).startTransaction().getDefaultIsolationLevel() + + assert(isolationLevel == Serializable) + } + } + + test("do not allow setting valid but unsupported isolation level") { + withTempDir { dir => + val e = intercept[IllegalArgumentException] { + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.isolationLevel' = 'WriteSerializable') + |""".stripMargin) + } + val msg = "requirement failed: delta.isolationLevel must be Serializable" + assert(e.getMessage == msg) + } + } + + test("do not allow setting invalid isolation level") { + withTempDir { dir => + val e = intercept[IllegalArgumentException] { + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.isolationLevel' = 'InvalidSerializable') + |""".stripMargin) + } + val msg = "[DELTA_INVALID_ISOLATION_LEVEL] invalid isolation level 'InvalidSerializable'" + assert(e.getMessage == msg) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCreateTableLikeSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCreateTableLikeSuite.scala new file mode 100644 index 00000000000..d5c9ac65342 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCreateTableLikeSuite.scala @@ -0,0 +1,400 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.test.SharedSparkSession + +class DeltaCreateTableLikeSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + def checkTableEmpty(tblName: String): Boolean = { + val numRows = spark.sql(s"SELECT * FROM $tblName") + numRows.count() == 0 + } + + /** + * This method checks if certain properties and fields of delta tables are the + * same between the two delta tables. Boolean values can be passed in to check + * or not to check (assert) the specific property. Note that for checkLocation + * a boolean value is not passed in. If checkLocation argument is None, location + * of target table will not be checked. + * + * @param checkTargetTableByPath when true, targetTbl must be a path not table name + * @param checkSourceTableByPath when true, srcTbl must be a path not table name + */ + def checkTableCopyDelta( + srcTbl: String, + targetTbl: String, + checkDesc: Boolean = true, + checkSchemaString: Boolean = true, + checkPartitionColumns: Boolean = true, + checkConfiguration: Boolean = true, + checkTargetTableByPath: Boolean = false, + checkSourceTableByPath: Boolean = false, + checkLocation: Option[String] = None): Unit = { + val src = + if (checkSourceTableByPath) { + DeltaLog.forTable(spark, srcTbl) + } else { + DeltaLog.forTable(spark, TableIdentifier(srcTbl)) + } + + val target = + if (checkTargetTableByPath) { + DeltaLog.forTable(spark, targetTbl) + } else { + DeltaLog.forTable(spark, TableIdentifier(targetTbl)) + } + assert(src.unsafeVolatileSnapshot.protocol == + target.unsafeVolatileSnapshot.protocol, + "protocol does not match") + if (checkDesc) { + assert(src.unsafeVolatileSnapshot.metadata.description == + target.unsafeVolatileSnapshot.metadata.description, + "description/comment does not match") + } + if (checkSchemaString) { + assert(src.unsafeVolatileSnapshot.metadata.schemaString == + target.unsafeVolatileSnapshot.metadata.schemaString, + "schema does not match") + } + if (checkPartitionColumns) { + assert(src.unsafeVolatileSnapshot.metadata.partitionColumns == + target.unsafeVolatileSnapshot.metadata.partitionColumns, + "partition columns do not match") + } + if (checkConfiguration) { + // Checks Table properties and table constraints + assert(src.unsafeVolatileSnapshot.metadata.configuration == + target.unsafeVolatileSnapshot.metadata.configuration, + "configuration does not match") + } + + val catalog = spark.sessionState.catalog + if(checkLocation.isDefined) { + assert( + catalog.getTableMetadata(TableIdentifier(targetTbl)).location.toString + "/" + == checkLocation.get || + catalog.getTableMetadata(TableIdentifier(targetTbl)).location.toString == + checkLocation.get, "location does not match") + } + + } + + /** + * This method checks if certain properties and fields of a table are the + * same between two tables. Boolean values can be passed in to check + * or not to check (assert) the specific property. Note that for checkLocation + * a boolean value is not passed in. If checkLocation argument is None, location + * of target table will not be checked. + */ + def checkTableCopy( + srcTbl: String, targetTbl: String, + checkDesc: Boolean = true, + checkSchemaString: Boolean = true, + checkPartitionColumns: Boolean = true, + checkConfiguration: Boolean = true, + checkProvider: Boolean = true, + checkLocation: Option[String] = None): Unit = { + val srcTblDesc = spark.sessionState.catalog. + getTempViewOrPermanentTableMetadata(TableIdentifier(srcTbl)) + val targetTblDesc = DeltaLog.forTable(spark, TableIdentifier(targetTbl)) + val targetTblMetadata = targetTblDesc.unsafeVolatileSnapshot.metadata + if (checkDesc) { + assert(srcTblDesc.comment == Some(targetTblMetadata.description), + "description/comment does not match") + } + if (checkSchemaString) { + assert(srcTblDesc.schema == + targetTblDesc.unsafeVolatileSnapshot.metadata.schema, + "schema does not match") + } + if (checkPartitionColumns) { + assert(srcTblDesc.partitionColumnNames == + targetTblMetadata.partitionColumns, + "partition columns do not match") + } + if (checkConfiguration) { + // Checks Table properties + assert(srcTblDesc.properties == targetTblMetadata.configuration, + "configuration does not match") + } + if (checkProvider) { + val targetTblProvider = spark.sessionState.catalog. + getTempViewOrPermanentTableMetadata(TableIdentifier(targetTbl)).provider + assert(srcTblDesc.provider == targetTblProvider, + "provider does not match") + } + val catalog = spark.sessionState.catalog + if(checkLocation.isDefined) { + assert( + catalog.getTableMetadata(TableIdentifier(targetTbl)).location.toString + "/" + == checkLocation.get || + catalog.getTableMetadata(TableIdentifier(targetTbl)).location.toString == + checkLocation.get) + } + } + + def createTable( + srcTbl: String, format: String = "delta", + addTblProperties: Boolean = true, + addComment: Boolean = true): Unit = { + spark.range(100) + .withColumnRenamed("id", "key") + .withColumn("newCol", lit(1)) + .write + .format(format) + .partitionBy("key") + .saveAsTable(srcTbl) + + if (addTblProperties) { + spark.sql(s"ALTER TABLE $srcTbl" + + " SET TBLPROPERTIES(this.is.my.key = 14, 'this.is.my.key2' = false)") + } + if (format == "delta") { + spark.sql(s"ALTER TABLE $srcTbl SET TBLPROPERTIES('delta.minReaderVersion' = '2'," + + " 'delta.minWriterVersion' = '5')") + } + if (addComment) { + spark.sql(s"COMMENT ON TABLE $srcTbl IS 'srcTbl'") + } + } + + test("CREATE TABLE LIKE basic test") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl) + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl") + checkTableCopyDelta(srcTbl, targetTbl) + } + } + + test("CREATE TABLE LIKE with no comment") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl, addComment = false) + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl") + checkTableCopyDelta(srcTbl, targetTbl) + } + } + + test("CREATE TABLE LIKE with no added table properties") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl, addTblProperties = false) + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl") + checkTableCopyDelta(srcTbl, targetTbl) + } + } + + test("CREATE TABLE LIKE where table has no schema") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + spark.sql(s"CREATE TABLE $srcTbl USING DELTA") + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl") + checkTableCopyDelta(srcTbl, targetTbl) + } + } + + test("CREATE TABLE LIKE with no added constraints") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl + ) + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl") + checkTableCopyDelta(srcTbl, targetTbl) + } + } + + test("CREATE TABLE LIKE with IF NOT EXISTS, given that targetTable does not exist") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl) + spark.sql(s"CREATE TABLE IF NOT EXISTS $targetTbl LIKE $srcTbl USING DELTA") + checkTableCopyDelta(srcTbl, targetTbl) + } + } + + test("CREATE TABLE LIKE with IF NOT EXISTS, given that targetTable does exist") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl) + spark.sql(s"CREATE TABLE $targetTbl(key4 INT) USING DELTA") + spark.sql(s"CREATE TABLE IF NOT EXISTS $targetTbl LIKE $srcTbl") + + val msg = intercept[TestFailedException] { + checkTableCopyDelta(srcTbl, targetTbl) + }.getMessage + assert(msg.contains("protocol does not match")) + } + } + + test("CREATE TABLE LIKE without IF NOT EXISTS, given that targetTable does exist") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl) + spark.range(100).repartition(3) + .withColumnRenamed("id4", "key4") + .write + .format("delta") + .saveAsTable(targetTbl) + + val msg = intercept[DeltaAnalysisException] { + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl") + }.getMessage + msg.contains("Table `default`.`targetTbl` already exists.") + } + } + + test("CREATE TABLE LIKE where sourceTable is a json table") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl, format = "json") + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl USING DELTA") + // Provider should be different, expected exception to be thrown + val msg = intercept[TestFailedException] { + checkTableCopy(srcTbl, targetTbl, checkDesc = false) + }.getMessage + assert(msg.contains("provider does not match")) + } + } + + test("CREATE TABLE LIKE where sourceTable is a parquet table") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl, format = "parquet") + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl USING DELTA") + // Provider should be different, expected exception to be thrown + val msg = intercept[TestFailedException] { + checkTableCopy(srcTbl, targetTbl, checkDesc = false) + }.getMessage + assert(msg.contains("provider does not match")) + } + } + + test("CREATE TABLE LIKE test where source table is an external table") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTempDir { dir => + val path = dir.toURI.toString + + new File(dir.getAbsolutePath, srcTbl).mkdir() + withTable(srcTbl, targetTbl) { + spark.sql(s"CREATE TABLE $srcTbl (key STRING) USING DELTA LOCATION '$path/$srcTbl'") + spark.sql(s"ALTER TABLE $srcTbl" + + s" SET TBLPROPERTIES(this.is.my.key = 14, 'this.is.my.key2' = false)") + spark.sql(s"COMMENT ON TABLE $srcTbl IS 'srcTbl'") + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl") + + checkTableCopyDelta(srcTbl, targetTbl) + } + } + } + + test("CREATE TABLE LIKE where target table is a named external table") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTempDir { dir => + withTable(srcTbl) { + createTable(srcTbl) + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl LOCATION '${dir.toURI.toString}'") + checkTableCopyDelta(srcTbl, targetTbl, checkLocation = Some(dir.toURI.toString)) + } + } + } + + test("CREATE TABLE LIKE where target table is a nameless table") { + val srcTbl = "srcTbl" + withTempDir { dir => + withTable(srcTbl) { + createTable(srcTbl) + spark.sql(s"CREATE TABLE delta.`${dir.toURI.toString}` LIKE $srcTbl") + checkTableCopyDelta(srcTbl, dir.toString, checkTargetTableByPath = true + ) + } + } + } + + test("CREATE TABLE LIKE where source is a view") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + val srcView = "srcView" + withTable(srcTbl, targetTbl) { + withView(srcView) { + createTable(srcTbl) + spark.sql(s"DROP TABLE IF EXISTS $targetTbl") + spark.sql(s"CREATE VIEW srcView AS SELECT * FROM $srcTbl") + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcView USING DELTA") + val targetTableDesc = DeltaLog.forTable(spark, TableIdentifier(targetTbl)) + val srcViewDesc = spark.sessionState.catalog. + getTempViewOrPermanentTableMetadata(TableIdentifier(srcView)) + assert(targetTableDesc.unsafeVolatileSnapshot.metadata.schema == srcViewDesc.schema) + } + } + } + + test("CREATE TABLE LIKE where source is a temporary view") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + val srcView = "srcView" + withTable(srcTbl, targetTbl) { + createTable(srcTbl) + spark.sql(s"CREATE TEMPORARY VIEW srcView AS SELECT * FROM $srcTbl") + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcView USING DELTA") + val targetTableDesc = DeltaLog.forTable(spark, TableIdentifier(targetTbl)) + val srcViewDesc = spark.sessionState.catalog. + getTempViewOrPermanentTableMetadata(TableIdentifier(srcView)) + assert(targetTableDesc.unsafeVolatileSnapshot.metadata.schema == srcViewDesc.schema) + } + } + + test("CREATE TABLE LIKE where source table has a column mapping") { + val srcTbl = "srcTbl" + val targetTbl = "targetTbl" + withTable(srcTbl, targetTbl) { + createTable(srcTbl + ) + // Need to set minWriterVersion to 5 for column mappings to work + spark.sql(s"ALTER TABLE $srcTbl SET TBLPROPERTIES('delta.minReaderVersion' = '2'," + + " 'delta.minWriterVersion' = '5')") + // Need to set delta.columnMapping.mode to 'name' for column mappings to work + spark.sql(s"ALTER TABLE $srcTbl SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name')") + spark.sql(s"ALTER TABLE $srcTbl RENAME COLUMN key TO key2") + spark.sql(s"CREATE TABLE $targetTbl LIKE $srcTbl USING DELTA") + checkTableCopyDelta(srcTbl, targetTbl) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDDLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDDLSuite.scala new file mode 100644 index 00000000000..e2ba9cc5e62 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDDLSuite.scala @@ -0,0 +1,628 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.schema.InvariantViolationException +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkEnv +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException +import org.apache.spark.sql.catalyst.catalog.CatalogUtils +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType} + +class DeltaDDLSuite extends DeltaDDLTestBase with SharedSparkSession + with DeltaSQLCommandTest { + + override protected def verifyNullabilityFailure(exception: AnalysisException): Unit = { + exception.getMessage.contains("Cannot change nullable column to non-nullable") + } + + test("table creation with ambiguous paths only allowed with legacy flag") { + // ambiguous paths not allowed + withTempDir { foo => + withTempDir { bar => + val fooPath = foo.getCanonicalPath() + val barPath = bar.getCanonicalPath() + val e = intercept[AnalysisException] { + sql(s"CREATE TABLE delta.`$fooPath`(id LONG) USING delta LOCATION '$barPath'") + } + assert(e.message.contains("legacy.allowAmbiguousPathsInCreateTable")) + } + } + + // allowed with legacy flag + withTempDir { foo => + withTempDir { bar => + val fooPath = foo.getCanonicalPath() + val barPath = bar.getCanonicalPath() + withSQLConf(DeltaSQLConf.DELTA_LEGACY_ALLOW_AMBIGUOUS_PATHS.key -> "true") { + sql(s"CREATE TABLE delta.`$fooPath`(id LONG) USING delta LOCATION '$barPath'") + assert(io.delta.tables.DeltaTable.isDeltaTable(fooPath)) + assert(!io.delta.tables.DeltaTable.isDeltaTable(barPath)) + } + } + } + + // allowed if paths are the same + withTempDir { foo => + val fooPath = foo.getCanonicalPath() + sql(s"CREATE TABLE delta.`$fooPath`(id LONG) USING delta LOCATION '$fooPath'") + assert(io.delta.tables.DeltaTable.isDeltaTable(fooPath)) + } + } + + test("append table when column name with special chars") { + withTable("t") { + val schema = new StructType().add("a`b", "int") + val df = spark.createDataFrame(sparkContext.emptyRDD[Row], schema) + df.write.format("delta").saveAsTable("t") + df.write.format("delta").mode("append").saveAsTable("t") + assert(spark.table("t").collect().isEmpty) + } + } +} + + +class DeltaDDLNameColumnMappingSuite extends DeltaDDLSuite + with DeltaColumnMappingEnableNameMode { + + override protected def runOnlyTests = Seq( + "create table with NOT NULL - check violation through file writing", + "ALTER TABLE CHANGE COLUMN with nullability change in struct type - relaxed" + ) +} + + +abstract class DeltaDDLTestBase extends QueryTest with SQLTestUtils { + import testImplicits._ + + protected def verifyDescribeTable(tblName: String): Unit = { + val res = sql(s"DESCRIBE TABLE $tblName").collect() + assert(res.takeRight(2).map(_.getString(0)) === Seq("name", "dept")) + } + + protected def verifyNullabilityFailure(exception: AnalysisException): Unit + + protected def getDeltaLog(tableLocation: String): DeltaLog = { + DeltaLog.forTable(spark, tableLocation) + } + + + testQuietly("create table with NOT NULL - check violation through file writing") { + withTempDir { dir => + withTable("delta_test") { + sql(s""" + |CREATE TABLE delta_test(a LONG, b String NOT NULL) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + val expectedSchema = new StructType() + .add("a", LongType, nullable = true) + .add("b", StringType, nullable = false) + assert(spark.table("delta_test").schema === expectedSchema) + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("delta_test")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + Seq((1L, "a")).toDF("a", "b") + .write.format("delta").mode("append").save(table.location.toString) + val read = spark.read.format("delta").load(table.location.toString) + checkAnswer(read, Seq(Row(1L, "a"))) + + intercept[InvariantViolationException] { + Seq((2L, null)).toDF("a", "b") + .write.format("delta").mode("append").save(table.location.toString) + } + } + } + } + + test("ALTER TABLE ADD COLUMNS with NOT NULL - not supported") { + withTempDir { dir => + val tableName = "delta_test_add_not_null" + withTable(tableName) { + sql(s""" + |CREATE TABLE $tableName(a LONG) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + + val expectedSchema = new StructType().add("a", LongType, nullable = true) + assert(spark.table(tableName).schema === expectedSchema) + + val e = intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName + |ADD COLUMNS (b String NOT NULL, c Int)""".stripMargin) + } + val msg = "`NOT NULL in ALTER TABLE ADD COLUMNS` is not supported for Delta tables" + assert(e.getMessage.contains(msg)) + } + } + } + + test("ALTER TABLE CHANGE COLUMN from nullable to NOT NULL - not supported") { + withTempDir { dir => + val tableName = "delta_test_from_nullable_to_not_null" + withTable(tableName) { + sql(s""" + |CREATE TABLE $tableName(a LONG, b String) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + + val expectedSchema = new StructType() + .add("a", LongType, nullable = true) + .add("b", StringType, nullable = true) + assert(spark.table(tableName).schema === expectedSchema) + + val e = intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName + |CHANGE COLUMN b b String NOT NULL""".stripMargin) + } + verifyNullabilityFailure(e) + } + } + } + + test("ALTER TABLE CHANGE COLUMN from NOT NULL to nullable") { + withTempDir { dir => + val tableName = "delta_test_not_null_to_nullable" + withTable(tableName) { + sql( + s""" + |CREATE TABLE $tableName(a LONG NOT NULL, b String) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + + val expectedSchema = new StructType() + .add("a", LongType, nullable = false) + .add("b", StringType, nullable = true) + assert(spark.table(tableName).schema === expectedSchema) + + sql(s"INSERT INTO $tableName SELECT 1, 'a'") + checkAnswer( + sql(s"SELECT * FROM $tableName"), + Seq(Row(1L, "a"))) + + sql( + s""" + |ALTER TABLE $tableName + |ALTER COLUMN a DROP NOT NULL""".stripMargin) + val expectedSchema2 = new StructType() + .add("a", LongType, nullable = true) + .add("b", StringType, nullable = true) + assert(spark.table(tableName).schema === expectedSchema2) + + sql(s"INSERT INTO $tableName SELECT NULL, 'b'") + checkAnswer( + sql(s"SELECT * FROM $tableName"), + Seq(Row(1L, "a"), Row(null, "b"))) + } + } + } + + testQuietly("create table with NOT NULL - check violation through SQL") { + withTempDir { dir => + withTable("delta_test") { + sql(s""" + |CREATE TABLE delta_test(a LONG, b String NOT NULL) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + val expectedSchema = new StructType() + .add("a", LongType, nullable = true) + .add("b", StringType, nullable = false) + assert(spark.table("delta_test").schema === expectedSchema) + + sql("INSERT INTO delta_test SELECT 1, 'a'") + checkAnswer( + sql("SELECT * FROM delta_test"), + Seq(Row(1L, "a"))) + + val e = intercept[InvariantViolationException] { + sql("INSERT INTO delta_test VALUES (2, null)") + } + if (!e.getMessage.contains("nullable values to non-null column")) { + verifyInvariantViolationException(e) + } + } + } + } + + testQuietly("create table with NOT NULL in struct type - check violation") { + withTempDir { dir => + withTable("delta_test") { + sql(s""" + |CREATE TABLE delta_test + |(x struct, y LONG) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + val expectedSchema = new StructType() + .add("x", new StructType(). + add("a", LongType, nullable = true) + .add("b", StringType, nullable = false)) + .add("y", LongType, nullable = true) + assert(spark.table("delta_test").schema === expectedSchema) + + sql("INSERT INTO delta_test SELECT (1, 'a'), 1") + checkAnswer( + sql("SELECT * FROM delta_test"), + Seq(Row(Row(1L, "a"), 1))) + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("delta_test")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + val schema = new StructType() + .add("x", + new StructType() + .add("a", "bigint") + .add("b", "string")) + .add("y", "bigint") + val e = intercept[InvariantViolationException] { + spark.createDataFrame( + Seq(Row(Row(2L, null), 2L)).asJava, + schema + ).write.format("delta").mode("append").save(table.location.toString) + } + verifyInvariantViolationException(e) + } + } + } + + test("ALTER TABLE ADD COLUMNS with NOT NULL in struct type - not supported") { + withTempDir { dir => + val tableName = "delta_test_not_null_struct" + withTable(tableName) { + sql(s""" + |CREATE TABLE $tableName + |(y LONG) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + val expectedSchema = new StructType() + .add("y", LongType, nullable = true) + assert(spark.table(tableName).schema === expectedSchema) + + val e = intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName + |ADD COLUMNS (x struct, z INT)""".stripMargin) + } + val msg = "Operation not allowed: " + + "`NOT NULL in ALTER TABLE ADD COLUMNS` is not supported for Delta tables" + assert(e.getMessage.contains(msg)) + } + } + } + + test("ALTER TABLE ADD COLUMNS to table with existing NOT NULL fields") { + withTempDir { dir => + val tableName = "delta_test_existing_not_null" + withTable(tableName) { + sql( + s""" + |CREATE TABLE $tableName + |(y LONG NOT NULL) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + val expectedSchema = new StructType() + .add("y", LongType, nullable = false) + assert(spark.table(tableName).schema === expectedSchema) + + sql( + s""" + |ALTER TABLE $tableName + |ADD COLUMNS (x struct, z INT)""".stripMargin) + val expectedSchema2 = new StructType() + .add("y", LongType, nullable = false) + .add("x", new StructType() + .add("a", LongType) + .add("b", StringType)) + .add("z", IntegerType) + assert(spark.table(tableName).schema === expectedSchema2) + } + } + } + + /** + * Covers adding and changing a nested field using the ALTER TABLE command. + * @param initialColumnType Type of the single column used to create the initial test table. + * @param fieldToAdd Tuple (name, type) of the nested field to add and change. + * @param updatedColumnType Expected type of the single column after adding the nested field. + */ + def testAlterTableNestedFields(testName: String)( + initialColumnType: String, + fieldToAdd: (String, String), + updatedColumnType: String): Unit = { + // Remove spaces in test name so we can re-use it as a unique table name. + val tableName = testName.replaceAll(" ", "") + test(s"ALTER TABLE ADD/CHANGE COLUMNS - nested $testName") { + withTempDir { dir => + withTable(tableName) { + sql( + s""" + |CREATE TABLE $tableName (data $initialColumnType) + |USING delta + |TBLPROPERTIES (${DeltaConfigs.COLUMN_MAPPING_MODE.key} = 'name') + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + + val expectedInitialType = initialColumnType.filterNot(_.isWhitespace) + val expectedUpdatedType = updatedColumnType.filterNot(_.isWhitespace) + val fieldName = s"data.${fieldToAdd._1}" + val fieldType = fieldToAdd._2 + + def columnType: DataFrame = + sql(s"DESCRIBE TABLE $tableName") + .where("col_name = 'data'") + .select("data_type") + checkAnswer(columnType, Row(expectedInitialType)) + + sql(s"ALTER TABLE $tableName ADD COLUMNS ($fieldName $fieldType)") + checkAnswer(columnType, Row(expectedUpdatedType)) + + sql(s"ALTER TABLE $tableName CHANGE COLUMN $fieldName TYPE $fieldType") + checkAnswer(columnType, Row(expectedUpdatedType)) + } + } + } + } + + testAlterTableNestedFields("struct in map key")( + initialColumnType = "map, int>", + fieldToAdd = "key.b" -> "string", + updatedColumnType = "map, int>") + + testAlterTableNestedFields("struct in map value")( + initialColumnType = "map>", + fieldToAdd = "value.b" -> "string", + updatedColumnType = "map>") + + testAlterTableNestedFields("struct in array")( + initialColumnType = "array>", + fieldToAdd = "element.b" -> "string", + updatedColumnType = "array>") + + testAlterTableNestedFields("struct in nested map keys")( + initialColumnType = "map, int>, int>", + fieldToAdd = "key.key.b" -> "string", + updatedColumnType = "map, int>, int>") + + testAlterTableNestedFields("struct in nested map values")( + initialColumnType = "map>>", + fieldToAdd = "value.value.b" -> "string", + updatedColumnType = "map>>") + + testAlterTableNestedFields("struct in nested arrays")( + initialColumnType = "array>>", + fieldToAdd = "element.element.b" -> "string", + updatedColumnType = "array>>") + + testAlterTableNestedFields("struct in nested array and map")( + initialColumnType = "array>>", + fieldToAdd = "element.value.b" -> "string", + updatedColumnType = "array>>") + + testAlterTableNestedFields("struct in nested map key and array")( + initialColumnType = "map>, int>", + fieldToAdd = "key.element.b" -> "string", + updatedColumnType = "map>, int>") + + testAlterTableNestedFields("struct in nested map value and array")( + initialColumnType = "map>>", + fieldToAdd = "value.element.b" -> "string", + updatedColumnType = "map>>") + + test("ALTER TABLE CHANGE COLUMN with nullability change in struct type - not supported") { + withTempDir { dir => + val tableName = "not_supported_delta_test" + withTable(tableName) { + sql(s""" + |CREATE TABLE $tableName + |(x struct, y LONG) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + val expectedSchema = new StructType() + .add("x", new StructType() + .add("a", LongType) + .add("b", StringType)) + .add("y", LongType, nullable = true) + assert(spark.table(tableName).schema === expectedSchema) + + val e1 = intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName + |CHANGE COLUMN x x struct""".stripMargin) + } + assert(e1.getMessage.contains("Cannot update")) + val e2 = intercept[AnalysisException] { + sql( + s""" + |ALTER TABLE $tableName + |CHANGE COLUMN x.b b String NOT NULL""".stripMargin) // this syntax may change + } + verifyNullabilityFailure(e2) + } + } + } + + test("ALTER TABLE CHANGE COLUMN with nullability change in struct type - relaxed") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + withTempDir { dir => + val tblName = "delta_test2" + withTable(tblName) { + sql( + s""" + |CREATE TABLE $tblName + |(x struct NOT NULL, y LONG) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + val expectedSchema = new StructType() + .add("x", new StructType() + .add("a", LongType) + .add("b", StringType, nullable = false), nullable = false) + .add("y", LongType) + assert(spark.table(tblName).schema === expectedSchema) + sql(s"INSERT INTO $tblName SELECT (1, 'a'), 1") + checkAnswer( + sql(s"SELECT * FROM $tblName"), + Seq(Row(Row(1L, "a"), 1))) + + sql( + s""" + |ALTER TABLE $tblName + |ALTER COLUMN x.b DROP NOT NULL""".stripMargin) // relax nullability + sql(s"INSERT INTO $tblName SELECT (2, null), null") + checkAnswer( + sql(s"SELECT * FROM $tblName"), + Seq( + Row(Row(1L, "a"), 1), + Row(Row(2L, null), null))) + + sql( + s""" + |ALTER TABLE $tblName + |ALTER COLUMN x DROP NOT NULL""".stripMargin) + sql(s"INSERT INTO $tblName SELECT null, 3") + checkAnswer( + sql(s"SELECT * FROM $tblName"), + Seq( + Row(Row(1L, "a"), 1), + Row(Row(2L, null), null), + Row(null, 3))) + } + } + } + } + + private def verifyInvariantViolationException(e: InvariantViolationException): Unit = { + if (e == null) { + fail("Didn't receive a InvariantViolationException.") + } + assert(e.getMessage.contains("NOT NULL constraint violated for column")) + } + + test("ALTER TABLE RENAME TO") { + withTable("tbl", "newTbl") { + sql(s""" + |CREATE TABLE tbl + |USING delta + |AS SELECT 1 as a, 'a' as b + """.stripMargin) + sql(s"ALTER TABLE tbl RENAME TO newTbl") + checkDatasetUnorderly(sql("SELECT * FROM newTbl").as[(Long, String)], 1L -> "a") + } + } + + + /** + * Although Spark 3.2 adds the support for SHOW CREATE TABLE for v2 tables, it doesn't work + * properly for Delta. For example, table properties, constraints and generated columns are not + * showed properly. + * + * TODO Implement Delta's own ShowCreateTableCommand to show the Delta table definition correctly + */ + test("SHOW CREATE TABLE is not supported") { + withTable("delta_test") { + sql( + s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta + """.stripMargin) + + val e = intercept[AnalysisException] { + sql("SHOW CREATE TABLE delta_test").collect()(0).getString(0) + } + assert(e.message.contains("`SHOW CREATE TABLE` is not supported for Delta table")) + } + + withTempDir { dir => + withTable("delta_test") { + val path = dir.getCanonicalPath() + sql( + s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta + |LOCATION '$path' + """.stripMargin) + + val e = intercept[AnalysisException] { + sql("SHOW CREATE TABLE delta_test").collect()(0).getString(0) + } + assert(e.message.contains("`SHOW CREATE TABLE` is not supported for Delta table")) + } + } + } + + + test("DESCRIBE TABLE for partitioned table") { + withTempDir { dir => + withTable("delta_test") { + val path = dir.getCanonicalPath() + + val df = Seq( + (1, "IT", "Alice"), + (2, "CS", "Bob"), + (3, "IT", "Carol")).toDF("id", "dept", "name") + df.write.format("delta").partitionBy("name", "dept").save(path) + + sql(s"CREATE TABLE delta_test USING delta LOCATION '$path'") + + verifyDescribeTable("delta_test") + verifyDescribeTable(s"delta.`$path`") + + assert(sql("DESCRIBE EXTENDED delta_test").collect().length > 0) + } + } + } + + test("snapshot returned after a dropped managed table should be empty") { + withTable("delta_test") { + sql("CREATE TABLE delta_test USING delta AS SELECT 'foo' as a") + val tableLocation = sql("DESC DETAIL delta_test").select("location").as[String].head() + val snapshotBefore = getDeltaLog(tableLocation).update() + sql("DROP TABLE delta_test") + val snapshotAfter = getDeltaLog(tableLocation).update() + assert(snapshotBefore ne snapshotAfter) + assert(snapshotAfter.version === -1) + } + } + + test("snapshot returned after renaming a managed table should be empty") { + val oldTableName = "oldTableName" + val newTableName = "newTableName" + withTable(oldTableName, newTableName) { + sql(s"CREATE TABLE $oldTableName USING delta AS SELECT 'foo' as a") + val tableLocation = sql(s"DESC DETAIL $oldTableName").select("location").as[String].head() + val snapshotBefore = getDeltaLog(tableLocation).update() + sql(s"ALTER TABLE $oldTableName RENAME TO $newTableName") + val snapshotAfter = getDeltaLog(tableLocation).update() + assert(snapshotBefore ne snapshotAfter) + assert(snapshotAfter.version === -1) + } + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDDLUsingPathSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDDLUsingPathSuite.scala new file mode 100644 index 00000000000..6b28baaf392 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDDLUsingPathSuite.scala @@ -0,0 +1,300 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.hadoop.fs.Path +import org.scalatest.Tag + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, TableCatalog} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Utils + +trait DeltaDDLUsingPathTests extends QueryTest + with SharedSparkSession with DeltaColumnMappingTestUtils { + + import testImplicits._ + + protected def catalogName: String = { + CatalogManager.SESSION_CATALOG_NAME + } + + protected def testUsingPath(command: String, tags: Tag*)(f: (String, String) => Unit): Unit = { + test(s"$command - using path", tags: _*) { + withTempDir { tempDir => + withTable("delta_test") { + val path = tempDir.getCanonicalPath + Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", + struct((col("v1") * 10).as("x"), concat(col("v2"), col("v2")).as("y"))) + .write + .format("delta") + .partitionBy("v1") + .option("path", path) + .saveAsTable("delta_test") + f("`delta_test`", path) + } + } + } + test(s"$command - using path in 'delta' database", tags: _*) { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + + withDatabase("delta") { + sql("CREATE DATABASE delta") + + withTable("delta.delta_test") { + Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .withColumn("struct", + struct((col("v1") * 10).as("x"), concat(col("v2"), col("v2")).as("y"))) + .write + .format("delta") + .partitionBy("v1") + .option("path", path) + .saveAsTable("delta.delta_test") + f("`delta`.`delta_test`", path) + } + } + } + } + } + + protected def toQualifiedPath(path: String): String = { + val hadoopPath = new Path(path) + // scalastyle:off deltahadoopconfiguration + val fs = hadoopPath.getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + fs.makeQualified(hadoopPath).toString + } + + protected def checkDescribe(describe: String, keyvalues: (String, String)*): Unit = { + val result = sql(describe).collect() + keyvalues.foreach { case (key, value) => + val row = result.find(_.getString(0) == key) + assert(row.isDefined) + if (key == "Location") { + assert(toQualifiedPath(row.get.getString(1)) === toQualifiedPath(value)) + } else { + assert(row.get.getString(1) === value) + } + } + } + + private def errorContains(errMsg: String, str: String): Unit = { + assert(errMsg.contains(str)) + } + + testUsingPath("SELECT") { (table, path) => + Seq(table, s"delta.`$path`").foreach { tableOrPath => + checkDatasetUnorderly( + sql(s"SELECT * FROM $tableOrPath").as[(Int, String, (Int, String))], + (1, "a", (10, "aa")), (2, "b", (20, "bb"))) + checkDatasetUnorderly( + spark.table(tableOrPath).as[(Int, String, (Int, String))], + (1, "a", (10, "aa")), (2, "b", (20, "bb"))) + } + + val ex = intercept[AnalysisException] { + spark.table(s"delta.`/path/to/delta`") + } + assert(ex.getMessage.matches( + ".*Path does not exist: (file:)?/path/to/delta.?.*"), + "Found: " + ex.getMessage) + + withSQLConf(SQLConf.RUN_SQL_ON_FILES.key -> "false") { + val ex = intercept[AnalysisException] { + spark.table(s"delta.`/path/to/delta`") + } + assert(ex.getMessage.contains("Table or view not found: delta.`/path/to/delta`") || + ex.getMessage.contains("table or view `delta`.`/path/to/delta` cannot be found")) + } + } + + testUsingPath("DESCRIBE TABLE") { (table, path) => + val qualifiedPath = toQualifiedPath(path) + + Seq(table, s"delta.`$path`").foreach { tableOrPath => + checkDescribe(s"DESCRIBE $tableOrPath", + "v1" -> "int", + "v2" -> "string", + "struct" -> "struct") + + checkDescribe(s"DESCRIBE EXTENDED $tableOrPath", + "v1" -> "int", + "v2" -> "string", + "struct" -> "struct", + "Provider" -> "delta", + "Location" -> qualifiedPath) + } + } + + testUsingPath("SHOW TBLPROPERTIES") { (table, path) => + sql(s"ALTER TABLE $table SET TBLPROPERTIES " + + "('delta.logRetentionDuration' = '2 weeks', 'key' = 'value')") + + val metadata = loadDeltaLog(path).snapshot.metadata + + Seq(table, s"delta.`$path`").foreach { tableOrPath => + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES $tableOrPath('delta.logRetentionDuration')") + .as[(String, String)]), + "delta.logRetentionDuration" -> "2 weeks") + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES $tableOrPath('key')").as[(String, String)]), + "key" -> "value") + } + + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES $table").as[(String, String)]), + "delta.logRetentionDuration" -> "2 weeks", + "delta.minReaderVersion" -> + Protocol.forNewTable(spark, Some(metadata)).minReaderVersion.toString, + "delta.minWriterVersion" -> + Protocol.forNewTable(spark, Some(metadata)).minWriterVersion.toString, + "key" -> "value") + + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES delta.`$path`").as[(String, String)]), + "delta.logRetentionDuration" -> "2 weeks", + "delta.minReaderVersion" -> + Protocol.forNewTable(spark, Some(metadata)).minReaderVersion.toString, + "delta.minWriterVersion" -> + Protocol.forNewTable(spark, Some(metadata)).minWriterVersion.toString, + "key" -> "value") + + if (table == "`delta_test`") { + val tableName = s"$catalogName.default.delta_test" + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES $table('dEltA.lOgrEteNtiOndURaTion')").as[(String, String)]), + "dEltA.lOgrEteNtiOndURaTion" -> + s"Table $tableName does not have property: dEltA.lOgrEteNtiOndURaTion") + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES $table('kEy')").as[(String, String)]), + "kEy" -> s"Table $tableName does not have property: kEy") + } else { + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES $table('kEy')").as[(String, String)]), + "kEy" -> s"Table $catalogName.delta.delta_test does not have property: kEy") + } + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES delta.`$path`('dEltA.lOgrEteNtiOndURaTion')") + .as[(String, String)]), + "dEltA.lOgrEteNtiOndURaTion" -> + s"Table $catalogName.delta.`$path` does not have property: dEltA.lOgrEteNtiOndURaTion") + checkDatasetUnorderly( + dropColumnMappingConfigurations( + sql(s"SHOW TBLPROPERTIES delta.`$path`('kEy')").as[(String, String)]), + "kEy" -> + s"Table $catalogName.delta.`$path` does not have property: kEy") + + val e = intercept[AnalysisException] { + sql(s"SHOW TBLPROPERTIES delta.`/path/to/delta`").as[(String, String)] + } + assert(e.getMessage.contains(s"not a Delta table")) + } + + testUsingPath("SHOW COLUMNS") { (table, path) => + Seq(table, s"delta.`$path`").foreach { tableOrPath => + checkDatasetUnorderly( + sql(s"SHOW COLUMNS IN $tableOrPath").as[String], + "v1", "v2", "struct") + } + if (table == "`delta_test`") { + checkDatasetUnorderly( + sql(s"SHOW COLUMNS IN $table").as[String], + "v1", "v2", "struct") + } else { + checkDatasetUnorderly( + sql(s"SHOW COLUMNS IN $table IN delta").as[String], + "v1", "v2", "struct") + } + checkDatasetUnorderly( + sql(s"SHOW COLUMNS IN `$path` IN delta").as[String], + "v1", "v2", "struct") + checkDatasetUnorderly( + sql(s"SHOW COLUMNS IN delta.`$path` IN delta").as[String], + "v1", "v2", "struct") + val e = intercept[AnalysisException] { + sql("SHOW COLUMNS IN delta.`/path/to/delta`") + } + assert(e.getMessage.contains(s"not a Delta table")) + } + + testUsingPath("DESCRIBE COLUMN") { (table, path) => + Seq(table, s"delta.`$path`").foreach { tableOrPath => + checkDatasetUnorderly( + sql(s"DESCRIBE $tableOrPath v1").as[(String, String)], + "col_name" -> "v1", + "data_type" -> "int", + "comment" -> "NULL") + checkDatasetUnorderly( + sql(s"DESCRIBE $tableOrPath struct").as[(String, String)], + "col_name" -> "struct", + "data_type" -> "struct", + "comment" -> "NULL") + checkDatasetUnorderly( + sql(s"DESCRIBE EXTENDED $tableOrPath v1").as[(String, String)], + "col_name" -> "v1", + "data_type" -> "int", + "comment" -> "NULL" + ) + val ex1 = intercept[AnalysisException] { + sql(s"DESCRIBE $tableOrPath unknown") + } + assert(ex1.getErrorClass() === "UNRESOLVED_COLUMN.WITH_SUGGESTION") + val ex2 = intercept[AnalysisException] { + sql(s"DESCRIBE $tableOrPath struct.x") + } + assert(ex2.getMessage.contains("DESC TABLE COLUMN does not support nested column: struct.x")) + } + val ex = intercept[AnalysisException] { + sql("DESCRIBE delta.`/path/to/delta` v1") + } + assert(ex.getMessage.contains("not a Delta table"), s"Original message: ${ex.getMessage()}") + } +} + +class DeltaDDLUsingPathSuite extends DeltaDDLUsingPathTests with DeltaSQLCommandTest { +} + + +class DeltaDDLUsingPathNameColumnMappingSuite extends DeltaDDLUsingPathSuite + with DeltaColumnMappingEnableNameMode { + + override protected def runOnlyTests = Seq( + "create table with NOT NULL - check violation through file writing", + "ALTER TABLE CHANGE COLUMN with nullability change in struct type - relaxed" + ) +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDataFrameHadoopOptionsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDataFrameHadoopOptionsSuite.scala new file mode 100644 index 00000000000..b199eb9b354 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDataFrameHadoopOptionsSuite.scala @@ -0,0 +1,169 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage.LocalLogStore +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + +class DeltaDataFrameHadoopOptionsSuite extends QueryTest with SQLTestUtils with SharedSparkSession + with DeltaSQLCommandTest { + + protected override def sparkConf = + super.sparkConf.set("spark.delta.logStore.fake.impl", classOf[LocalLogStore].getName) + + /** + * Create Hadoop file system options for `FakeFileSystem`. If Delta doesn't pick up them, + * it won't be able to read/write any files using `fake://`. + */ + private def fakeFileSystemOptions: Map[String, String] = { + Map( + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true" + ) + } + + /** Create a fake file system path to test from the dir path. */ + private def fakeFileSystemPath(dir: File): String = s"fake://${dir.getCanonicalPath}" + + /** Clear cache to make sure we don't reuse the cached snapshot */ + private def clearCachedDeltaLogToForceReload(): Unit = { + DeltaLog.clearCache() + } + + // read/write parquet format check cache + test("SC-86916: " + + "read/write Delta paths using DataFrame should pick up Hadoop file system options") { + withTempPath { dir => + val path = fakeFileSystemPath(dir) + spark.range(1, 10) + .write + .format("delta") + .options(fakeFileSystemOptions) + .save(path) + clearCachedDeltaLogToForceReload() + spark.read.format("delta").options(fakeFileSystemOptions).load(path).foreach(_ => {}) + // Test time travel + clearCachedDeltaLogToForceReload() + spark.read.format("delta").options(fakeFileSystemOptions).load(path + "@v0").foreach(_ => {}) + clearCachedDeltaLogToForceReload() + spark.read.format("delta").options(fakeFileSystemOptions).option("versionAsOf", 0) + .load(path).foreach(_ => {}) + + } + } + + testQuietly("SC-86916: disabling the conf should not pick up Hadoop file system options") { + withSQLConf(DeltaSQLConf.LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS.key -> "false") { + withTempPath { dir => + val path = fakeFileSystemPath(dir) + intercept[Exception] { + spark.read.format("delta").options(fakeFileSystemOptions).load(path) + } + } + } + } + + test("SC-86916: checkpoint should pick up Hadoop file system options") { + withSQLConf(DeltaConfigs.CHECKPOINT_INTERVAL.defaultTablePropertyKey -> "1") { + withTempPath { dir => + val path = fakeFileSystemPath(dir) + spark.range(1, 10).write.format("delta") + .options(fakeFileSystemOptions) + .mode("append") + .save(path) + spark.range(1, 10).write.format("delta") + .options(fakeFileSystemOptions) + .mode("append") + .save(path) + // Ensure we did write the checkpoint and read it back + val deltaLog = DeltaLog.forTable(spark, new Path(path), fakeFileSystemOptions) + assert(deltaLog.readLastCheckpointFile().get.version == 1) + } + } + } + + test("SC-86916: invalidateCache should invalidate all DeltaLogs of the given path") { + withTempPath { dir => + val pathStr = fakeFileSystemPath(dir) + val path = new Path(pathStr) + spark.range(1, 10).write.format("delta") + .options(fakeFileSystemOptions) + .mode("append") + .save(pathStr) + val deltaLog = DeltaLog.forTable(spark, path, fakeFileSystemOptions) + spark.range(1, 10).write.format("delta") + .options(fakeFileSystemOptions) + .mode("append") + .save(pathStr) + val cachedDeltaLog = DeltaLog.forTable(spark, path, fakeFileSystemOptions) + assert(deltaLog eq cachedDeltaLog) + withSQLConf(fakeFileSystemOptions.toSeq: _*) { + DeltaLog.invalidateCache(spark, path) + } + spark.range(1, 10).write.format("delta") + .options(fakeFileSystemOptions) + .mode("append") + .save(pathStr) + val newDeltaLog = DeltaLog.forTable(spark, path, fakeFileSystemOptions) + assert(deltaLog ne newDeltaLog) + } + } + + test("SC-86916: Delta log cache should respect options") { + withTempPath { dir => + val path = fakeFileSystemPath(dir) + DeltaLog.clearCache() + spark.range(1, 10).write.format("delta") + .options(fakeFileSystemOptions) + .mode("append") + .save(path) + assert(DeltaLog.cacheSize == 1) + + // Accessing the same table should not create a new entry in the cache + spark.read.format("delta").options(fakeFileSystemOptions).load(path).foreach(_ => {}) + assert(DeltaLog.cacheSize == 1) + + // Accessing the table with different options should create a new entry + spark.read.format("delta") + .options(fakeFileSystemOptions ++ Map("fs.foo" -> "foo")).load(path).foreach(_ => {}) + assert(DeltaLog.cacheSize == 2) + + // Accessing the table without options should create a new entry + withSQLConf(fakeFileSystemOptions.toSeq: _*) { + spark.read.format("delta").load(path).foreach(_ => {}) + } + assert(DeltaLog.cacheSize == 3) + + // Make sure we don't break existing cache logic + DeltaLog.clearCache() + withSQLConf(fakeFileSystemOptions.toSeq: _*) { + spark.read.format("delta").load(path).foreach(_ => {}) + spark.read.format("delta").load(path).foreach(_ => {}) + } + assert(DeltaLog.cacheSize == 1) + } + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDataFrameWriterV2Suite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDataFrameWriterV2Suite.scala new file mode 100644 index 00000000000..1147e8f2510 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDataFrameWriterV2Suite.scala @@ -0,0 +1,759 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.actions.{Protocol, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.catalog.{DeltaCatalog, DeltaTableV2} +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.scalatest.BeforeAndAfter + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, CreateTableWriter, Dataset, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, TableAlreadyExistsException} +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, Identifier, Table, TableCatalog} +import org.apache.spark.sql.connector.expressions._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{LongType, StringType, StructType} +import org.apache.spark.util.Utils + +// These tests are copied from Apache Spark (minus partition by expressions) and should work exactly +// the same with Delta minus some writer options +trait OpenSourceDataFrameWriterV2Tests + extends QueryTest + with SharedSparkSession + with BeforeAndAfter { + + import testImplicits._ + + before { + val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") + df.createOrReplaceTempView("source") + val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data") + df2.createOrReplaceTempView("source2") + } + + after { + spark.sessionState.catalog.listTables("default").foreach { ti => + spark.sessionState.catalog.dropTable(ti, ignoreIfNotExists = false, purge = false) + } + } + + def catalog: TableCatalog = { + spark.sessionState.catalogManager.currentCatalog.asInstanceOf[TableCatalog] + } + + protected def catalogPrefix: String = { + s"${CatalogManager.SESSION_CATALOG_NAME}." + } + + protected def getProperties(table: Table): Map[String, String] = { + table.properties().asScala.toMap + .filterKeys(!CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(_)) + .filterKeys(!TableFeatureProtocolUtils.isTableProtocolProperty(_)) + .toMap + } + + test("Append: basic append") { + spark.sql("CREATE TABLE table_name (id bigint, data string) USING delta") + + checkAnswer(spark.table("table_name"), Seq.empty) + + spark.table("source").writeTo("table_name").append() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + spark.table("source2").writeTo("table_name").append() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) + } + + test("Append: by name not position") { + spark.sql("CREATE TABLE table_name (id bigint, data string) USING delta") + + checkAnswer(spark.table("table_name"), Seq.empty) + + val exc = intercept[AnalysisException] { + spark.table("source").withColumnRenamed("data", "d").writeTo("table_name").append() + } + + assert(exc.getMessage.contains("schema mismatch")) + + checkAnswer( + spark.table("table_name"), + Seq()) + } + + test("Append: fail if table does not exist") { + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("table_name").append() + } + + assert(exc.getMessage.contains("table_name")) + } + + test("Overwrite: overwrite by expression: true") { + spark.sql( + "CREATE TABLE table_name (id bigint, data string) USING delta PARTITIONED BY (id)") + + checkAnswer(spark.table("table_name"), Seq.empty) + + spark.table("source").writeTo("table_name").append() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + spark.table("source2").writeTo("table_name").overwrite(lit(true)) + + checkAnswer( + spark.table("table_name"), + Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) + } + + test("Overwrite: overwrite by expression: id = 3") { + spark.sql( + "CREATE TABLE table_name (id bigint, data string) USING delta PARTITIONED BY (id)") + + checkAnswer(spark.table("table_name"), Seq.empty) + + spark.table("source").writeTo("table_name").append() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val e = intercept[AnalysisException] { + spark.table("source2").writeTo("table_name").overwrite($"id" === 3) + } + assert(e.getErrorClass == "DELTA_REPLACE_WHERE_MISMATCH") + assert(e.getMessage.startsWith( + "[DELTA_REPLACE_WHERE_MISMATCH] Written data does not conform to partial table overwrite " + + "condition or constraint")) + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + } + + test("Overwrite: by name not position") { + spark.sql("CREATE TABLE table_name (id bigint, data string) USING delta") + + checkAnswer(spark.table("table_name"), Seq.empty) + + val exc = intercept[AnalysisException] { + spark.table("source").withColumnRenamed("data", "d") + .writeTo("table_name").overwrite(lit(true)) + } + + assert(exc.getMessage.contains("schema mismatch")) + + checkAnswer( + spark.table("table_name"), + Seq()) + } + + test("Overwrite: fail if table does not exist") { + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("table_name").overwrite(lit(true)) + } + + assert(exc.getMessage.contains("table_name")) + } + + test("OverwritePartitions: overwrite conflicting partitions") { + spark.sql( + "CREATE TABLE table_name (id bigint, data string) USING delta PARTITIONED BY (id)") + + checkAnswer(spark.table("table_name"), Seq.empty) + + spark.table("source").writeTo("table_name").append() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + spark.table("source2").withColumn("id", $"id" - 2) + .writeTo("table_name").overwritePartitions() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "d"), Row(3L, "e"), Row(4L, "f"))) + } + + test("OverwritePartitions: overwrite all rows if not partitioned") { + spark.sql("CREATE TABLE table_name (id bigint, data string) USING delta") + + checkAnswer(spark.table("table_name"), Seq.empty) + + spark.table("source").writeTo("table_name").append() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + spark.table("source2").writeTo("table_name").overwritePartitions() + + checkAnswer( + spark.table("table_name"), + Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) + } + + test("OverwritePartitions: by name not position") { + spark.sql("CREATE TABLE table_name (id bigint, data string) USING delta") + + checkAnswer(spark.table("table_name"), Seq.empty) + + val e = intercept[AnalysisException] { + spark.table("source").withColumnRenamed("data", "d") + .writeTo("table_name").overwritePartitions() + } + + assert(e.getMessage.contains("schema mismatch")) + + checkAnswer( + spark.table("table_name"), + Seq()) + } + + test("OverwritePartitions: fail if table does not exist") { + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("table_name").overwritePartitions() + } + + assert(exc.getMessage.contains("table_name")) + } + + test("Create: basic behavior") { + spark.table("source").writeTo("table_name").using("delta").create() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning.isEmpty) + assert(getProperties(table).isEmpty) + } + + test("Create: with using") { + spark.table("source").writeTo("table_name").using("delta").create() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning.isEmpty) + assert(getProperties(table).isEmpty) + } + + test("Create: with property") { + spark.table("source").writeTo("table_name") + .tableProperty("prop", "value").using("delta").create() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning.isEmpty) + assert(getProperties(table) === Map("prop" -> "value")) + } + + test("Create: identity partitioned table") { + spark.table("source").writeTo("table_name").using("delta").partitionedBy($"id").create() + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning === Seq(IdentityTransform(FieldReference("id")))) + assert(getProperties(table).isEmpty) + } + + test("Create: fail if table already exists") { + spark.sql( + "CREATE TABLE table_name (id bigint, data string) USING delta PARTITIONED BY (id)") + + val exc = intercept[TableAlreadyExistsException] { + spark.table("source").writeTo("table_name").using("delta").create() + } + + assert(exc.getMessage.contains("table_name")) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // table should not have been changed + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning === Seq(IdentityTransform(FieldReference("id")))) + assert(getProperties(table).isEmpty) + } + + test("Replace: basic behavior") { + spark.sql( + "CREATE TABLE table_name (id bigint, data string) USING delta PARTITIONED BY (id)") + spark.sql("INSERT INTO TABLE table_name SELECT * FROM source") + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the initial table + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning === Seq(IdentityTransform(FieldReference("id")))) + assert(getProperties(table).isEmpty) + + spark.table("source2") + .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd")) + .writeTo("table_name").using("delta") + .tableProperty("deLta.aPpeNdonly", "true").replace() + + checkAnswer( + spark.table("table_name"), + Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even"))) + + val replaced = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the replacement table + assert(replaced.name === s"${catalogPrefix}default.table_name") + assert(replaced.schema === new StructType() + .add("id", LongType) + .add("data", StringType) + .add("even_or_odd", StringType)) + assert(replaced.partitioning.isEmpty) + assert(getProperties(replaced) === Map("delta.appendOnly" -> "true")) + } + + test("Replace: partitioned table") { + spark.sql("CREATE TABLE table_name (id bigint, data string) USING delta") + spark.sql("INSERT INTO TABLE table_name SELECT * FROM source") + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the initial table + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning.isEmpty) + assert(getProperties(table).isEmpty) + + spark.table("source2") + .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd")) + .writeTo("table_name").using("delta") + .partitionedBy($"id") + .replace() + + checkAnswer( + spark.table("table_name"), + Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even"))) + + val replaced = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the replacement table + assert(replaced.name === s"${catalogPrefix}default.table_name") + assert(replaced.schema === new StructType() + .add("id", LongType) + .add("data", StringType) + .add("even_or_odd", StringType)) + assert(replaced.partitioning === Seq(IdentityTransform(FieldReference("id")))) + assert(getProperties(replaced).isEmpty) + } + + test("Replace: fail if table does not exist") { + val exc = intercept[CannotReplaceMissingTableException] { + spark.table("source").writeTo("table_name").using("delta").replace() + } + + assert(exc.getMessage.contains("table_name")) + } + + test("CreateOrReplace: table does not exist") { + spark.table("source2").writeTo("table_name").using("delta").createOrReplace() + + checkAnswer( + spark.table("table_name"), + Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) + + val replaced = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the replacement table + assert(replaced.name === s"${catalogPrefix}default.table_name") + assert(replaced.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(replaced.partitioning.isEmpty) + assert(getProperties(replaced).isEmpty) + } + + test("CreateOrReplace: table exists") { + spark.sql( + "CREATE TABLE table_name (id bigint, data string) USING delta PARTITIONED BY (id)") + spark.sql("INSERT INTO TABLE table_name SELECT * FROM source") + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the initial table + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning === Seq(IdentityTransform(FieldReference("id")))) + assert(getProperties(table).isEmpty) + + spark.table("source2") + .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd")) + .writeTo("table_name").using("delta").createOrReplace() + + checkAnswer( + spark.table("table_name"), + Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even"))) + + val replaced = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the replacement table + assert(replaced.name === s"${catalogPrefix}default.table_name") + assert(replaced.schema === new StructType() + .add("id", LongType) + .add("data", StringType) + .add("even_or_odd", StringType)) + assert(replaced.partitioning.isEmpty) + assert(getProperties(replaced).isEmpty) + } + + test("Create: partitioned by years(ts) - not supported") { + val e = intercept[AnalysisException] { + spark.table("source") + .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp")) + .writeTo("table_name") + .partitionedBy(years($"ts")) + .using("delta") + .create() + } + assert(e.getMessage.contains("Partitioning by expressions")) + } + + test("Create: partitioned by months(ts) - not supported") { + val e = intercept[AnalysisException] { + spark.table("source") + .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp")) + .writeTo("table_name") + .partitionedBy(months($"ts")) + .using("delta") + .create() + } + assert(e.getMessage.contains("Partitioning by expressions")) + } + + test("Create: partitioned by days(ts) - not supported") { + val e = intercept[AnalysisException] { + spark.table("source") + .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp")) + .writeTo("table_name") + .partitionedBy(days($"ts")) + .using("delta") + .create() + } + assert(e.getMessage.contains("Partitioning by expressions")) + } + + test("Create: partitioned by hours(ts) - not supported") { + val e = intercept[AnalysisException] { + spark.table("source") + .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp")) + .writeTo("table_name") + .partitionedBy(hours($"ts")) + .using("delta") + .create() + } + assert(e.getMessage.contains("Partitioning by expressions")) + } + + test("Create: partitioned by bucket(4, id) - not supported") { + val e = intercept[AnalysisException] { + spark.table("source") + .writeTo("table_name") + .partitionedBy(bucket(4, $"id")) + .using("delta") + .create() + } + assert(e.getMessage.contains("is not supported for Delta tables")) + } +} + +class DeltaDataFrameWriterV2Suite + extends OpenSourceDataFrameWriterV2Tests + with DeltaSQLCommandTest { + + import testImplicits._ + + test("Append: basic append by path") { + spark.sql("CREATE TABLE table_name (id bigint, data string) USING delta") + + checkAnswer(spark.table("table_name"), Seq.empty) + val location = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + .asInstanceOf[DeltaTableV2].path + + spark.table("source").writeTo(s"delta.`$location`").append() + + checkAnswer( + spark.table(s"delta.`$location`"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + // allows missing columns + Seq(4L).toDF("id").writeTo(s"delta.`$location`").append() + checkAnswer( + spark.table(s"delta.`$location`"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"), Row(4L, null))) + } + + test("Create: basic behavior by path") { + withTempDir { tempDir => + val dir = tempDir.getCanonicalPath + spark.table("source").writeTo(s"delta.`$dir`").using("delta").create() + + checkAnswer( + spark.read.format("delta").load(dir), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("delta"), dir)) + + assert(table.name === s"delta.`file:$dir`") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning.isEmpty) + assert(getProperties(table).isEmpty) + } + } + + test("Create: using empty dataframe") { + spark.table("source").where("false") + .writeTo("table_name").using("delta") + .tableProperty("delta.appendOnly", "true") + .partitionedBy($"id").create() + + checkAnswer(spark.table("table_name"), Seq.empty[Row]) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning === Seq(IdentityTransform(FieldReference("id")))) + assert(getProperties(table) === Map("delta.appendOnly" -> "true")) + } + + test("Replace: basic behavior using empty df") { + spark.sql( + "CREATE TABLE table_name (id bigint, data string) USING delta PARTITIONED BY (id)") + spark.sql("INSERT INTO TABLE table_name SELECT * FROM source") + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + val table = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the initial table + assert(table.name === s"${catalogPrefix}default.table_name") + assert(table.schema === new StructType().add("id", LongType).add("data", StringType)) + assert(table.partitioning === Seq(IdentityTransform(FieldReference("id")))) + assert(getProperties(table).isEmpty) + + spark.table("source2").where("false") + .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd")) + .writeTo("table_name").using("delta") + .tableProperty("deLta.aPpeNdonly", "true").replace() + + checkAnswer( + spark.table("table_name"), + Seq.empty[Row]) + + val replaced = catalog.loadTable(Identifier.of(Array("default"), "table_name")) + + // validate the replacement table + assert(replaced.name === s"${catalogPrefix}default.table_name") + assert(replaced.schema === new StructType() + .add("id", LongType) + .add("data", StringType) + .add("even_or_odd", StringType)) + assert(replaced.partitioning.isEmpty) + assert(getProperties(replaced) === Map("delta.appendOnly" -> "true")) + } + + test("throw error with createOrReplace and Replace if overwriteSchema=false") { + spark.sql( + "CREATE TABLE table_name (id bigint, data string) USING delta PARTITIONED BY (id)") + spark.sql("INSERT INTO TABLE table_name SELECT * FROM source") + + checkAnswer( + spark.table("table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + + def checkFailure( + df: Dataset[_], + errorMsg: String)( + f: CreateTableWriter[_] => CreateTableWriter[_]): Unit = { + val e = intercept[IllegalArgumentException] { + val dfwV2 = df.writeTo("table_name") + .using("delta") + .option("overwriteSchema", "false") + f(dfwV2).replace() + } + assert(e.getMessage.contains(errorMsg)) + + val e2 = intercept[IllegalArgumentException] { + val dfwV2 = df.writeTo("table_name") + .using("delta") + .option("overwriteSchema", "false") + f(dfwV2).createOrReplace() + } + assert(e2.getMessage.contains(errorMsg)) + } + + // schema changes + checkFailure( + spark.table("table_name").withColumn("id2", 'id + 1), + "overwriteSchema is not allowed when replacing")(a => a.partitionedBy($"id")) + + // partitioning changes + // did not specify partitioning + checkFailure(spark.table("table_name"), + "overwriteSchema is not allowed when replacing")(a => a) + + // different partitioning column + checkFailure(spark.table("table_name"), + "overwriteSchema is not allowed when replacing")(a => a.partitionedBy($"data")) + + // different table Properties + checkFailure(spark.table("table_name"), "overwriteSchema is not allowed when replacing")(a => + a.partitionedBy($"id").tableProperty("delta.appendOnly", "true")) + } + + test("append or overwrite mode should not do implicit casting") { + val table = "not_implicit_casting" + withTable(table) { + spark.sql(s"CREATE TABLE $table(id bigint, p int) USING delta PARTITIONED BY (p)") + def verifyNotImplicitCasting(f: => Unit): Unit = { + val e = intercept[AnalysisException](f).getMessage + assert(e.contains("Failed to merge incompatible data types LongType and IntegerType")) + } + verifyNotImplicitCasting { + Seq(1 -> 1).toDF("id", "p").write.mode("append").format("delta").saveAsTable(table) + } + verifyNotImplicitCasting { + Seq(1 -> 1).toDF("id", "p").write.mode("overwrite").format("delta").saveAsTable(table) + } + verifyNotImplicitCasting { + Seq(1 -> 1).toDF("id", "p").writeTo(table).append() + } + verifyNotImplicitCasting { + Seq(1 -> 1).toDF("id", "p").writeTo(table).overwrite($"p" === 1) + } + verifyNotImplicitCasting { + Seq(1 -> 1).toDF("id", "p").writeTo(table).overwritePartitions() + } + } + } + + test("append or overwrite mode allows missing columns") { + val table = "allow_missing_columns" + withTable(table) { + spark.sql( + s"CREATE TABLE $table(col1 int, col2 int, col3 int) USING delta PARTITIONED BY (col3)") + + // append + Seq((0, 10)).toDF("col1", "col3").writeTo(table).append() + checkAnswer( + spark.table(table), + Seq(Row(0, null, 10)) + ) + + // overwrite by expression + Seq((1, 11)).toDF("col1", "col3").writeTo(table).overwrite($"col3" === 11) + checkAnswer( + spark.table(table), + Seq(Row(0, null, 10), Row(1, null, 11)) + ) + + // dynamic partition overwrite + Seq((2, 10)).toDF("col1", "col3").writeTo(table).overwritePartitions() + checkAnswer( + spark.table(table), + Seq(Row(2, null, 10), Row(1, null, 11)) + ) + } + + } +} + +trait DeltaDataFrameWriterV2ColumnMappingSuiteBase extends DeltaColumnMappingSelectedTestMixin { + override protected def runOnlyTests = Seq( + "Append: basic append", + "Create: with using", + "Overwrite: overwrite by expression: true", + "Replace: partitioned table" + ) +} + +class DeltaDataFrameWriterV2IdColumnMappingSuite extends DeltaDataFrameWriterV2Suite + with DeltaColumnMappingEnableIdMode + with DeltaDataFrameWriterV2ColumnMappingSuiteBase { + + override protected def getProperties(table: Table): Map[String, String] = { + // ignore column mapping configurations + dropColumnMappingConfigurations(super.getProperties(table)) + } + +} + +class DeltaDataFrameWriterV2NameColumnMappingSuite extends DeltaDataFrameWriterV2Suite + with DeltaColumnMappingEnableNameMode + with DeltaDataFrameWriterV2ColumnMappingSuiteBase { + + override protected def getProperties(table: Table): Map[String, String] = { + // ignore column mapping configurations + dropColumnMappingConfigurations(super.getProperties(table)) + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDropColumnSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDropColumnSuite.scala new file mode 100644 index 00000000000..fb0d07b692a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaDropColumnSuite.scala @@ -0,0 +1,440 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, StringType, StructType} + +class DeltaDropColumnSuite extends QueryTest + with DeltaArbitraryColumnNameSuiteBase { + + override protected val sparkConf: SparkConf = + super.sparkConf.set(DeltaSQLConf.DELTA_ALTER_TABLE_DROP_COLUMN_ENABLED.key, "true") + + protected def dropTest( + testName: String, + testTags: org.scalatest.Tag*)( + f: ((String, Seq[String]) => Unit) => Unit): Unit = { + test(testName, testTags: _*) { + def drop(table: String, columns: Seq[String]): Unit = + sql(s"alter table $table drop column (${columns.mkString(",")})") + f(drop) + + } + } + + dropTest("drop column disallowed with sql flag off") { drop => + withSQLConf(DeltaSQLConf.DELTA_ALTER_TABLE_DROP_COLUMN_ENABLED.key -> "false") { + withTable("t1") { + createTableWithSQLAPI("t1", + simpleNestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name")) + + assertException("DROP COLUMN is not supported for your Delta table") { + drop("t1", "arr" :: Nil) + } + } + } + } + + dropTest("drop column disallowed with no mapping mode") { drop => + withTable("t1") { + createTableWithSQLAPI("t1", simpleNestedData) + + assertException("DROP COLUMN is not supported for your Delta table") { + drop("t1", "arr" :: Nil) + } + } + } + + dropTest("drop column - basic") { drop => + withTable("t1") { + createTableWithSQLAPI("t1", + simpleNestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name")) + + // drop single column + drop("t1", "arr" :: Nil) + checkAnswer(spark.table("t1"), simpleNestedData.drop("arr")) + + // drop multiple columns + drop("t1", "a" :: "b.c" :: Nil) + checkAnswer(spark.table("t1"), + Seq( + Row(Row(1), Map("k1" -> "v1")), + Row(Row(2), Map("k2" -> "v2")))) + + // check delta history + checkAnswer( + spark.sql("describe history t1") + .select("operation", "operationParameters") + .where("version = 3"), + Seq(Row("DROP COLUMNS", Map("columns" -> """["a","b.c"]""")))) + } + } + + dropTest("drop column - basic - path based table") { drop => + withTempDir { dir => + simpleNestedData.write.mode("overwrite").format("delta").save(dir.getCanonicalPath) + alterTableWithProps(s"delta.`${dir.getCanonicalPath}`", Map( + DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name", + DeltaConfigs.MIN_READER_VERSION.key -> "2", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5")) + + // drop single column + drop(s"delta.`${dir.getCanonicalPath}`", "arr" :: Nil) + checkAnswer(spark.read.format("delta").load(dir.getCanonicalPath), + simpleNestedData.drop("arr")) + } + } + + dropTest("dropped columns can no longer be queried") { drop => + withTable("t1") { + createTableWithSQLAPI("t1", + simpleNestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name")) + + drop("t1", "a" :: "b.c" :: "arr" :: Nil) + + // dropped column cannot be queried anymore + val err1 = intercept[AnalysisException] { + spark.table("t1").where("a = 'str1'").collect() + }.getMessage + assert( + err1.contains("cannot be resolved") || + err1.contains("Column 'a' does not exist") || + err1.contains("cannot resolve")) + + val err2 = intercept[AnalysisException] { + spark.table("t1").select("min(a)").collect() + }.getMessage + assert( + err2.contains("cannot be resolved") || + err2.contains("Column '`min(a)`' does not exist") || + err2.contains("cannot resolve")) + } + } + + dropTest("drop column - corner cases") { drop => + withTable("t1") { + createTableWithSQLAPI("t1", + simpleNestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name")) + + drop("t1", "a" :: "b.c" :: "arr" :: Nil) + + // cannot drop the last nested field + val e = intercept[AnalysisException] { + drop("t1", "b.d" :: Nil) + } + assert(e.getMessage.contains("Cannot drop column from a struct type with a single field")) + + // can drop the parent column + drop("t1", "b" :: Nil) + + // cannot drop the last top-level field + val e2 = intercept[AnalysisException] { + drop("t1", "map" :: Nil) + } + assert(e2.getMessage.contains("Cannot drop column from a struct type with a single field")) + + spark.sql("alter table t1 add column (e struct)") + + // can drop a column with arbitrary chars + spark.sql(s"alter table t1 rename column map to `${colName("map")}`") + drop("t1", s"`${colName("map")}`" :: Nil) + + // only column e is left now + assert(spark.table("t1").schema.map(_.name) == Seq("e")) + + // can drop a nested column when the top-level column is the only column + drop("t1", "e.e1" :: Nil) + val resultSchema = spark.table("t1").schema + assert(resultSchema.findNestedField("e" :: "e2" :: Nil).isDefined) + assert(resultSchema.findNestedField("e" :: "e1" :: Nil).isEmpty) + } + } + + dropTest("drop column with constraints") { drop => + withTable("t1") { + val schemaWithNotNull = + simpleNestedData.schema.toDDL.replace("c: STRING", "c: STRING NOT NULL") + + withTable("source") { + spark.sql( + s""" + |CREATE TABLE t1 ($schemaWithNotNull) + |USING DELTA + |${propString(Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name"))} + |""".stripMargin) + simpleNestedData.write.format("delta").mode("append").saveAsTable("t1") + } + + spark.sql("alter table t1 add constraint rangeABC check (concat(a, a) > 'str')") + spark.sql("alter table t1 add constraint rangeBD check (`b`.`d` > 0)") + + spark.sql("alter table t1 add constraint arrValue check (arr[0] > 0)") + + assertException("Cannot drop column a because this column is referenced by") { + drop("t1", "a" :: Nil) + } + + assertException("Cannot drop column arr because this column is referenced by") { + drop("t1", "arr" :: Nil) + } + + + // cannot drop b because its child is referenced + assertException("Cannot drop column b because this column is referenced by") { + drop("t1", "b" :: Nil) + } + + // can still drop b.c because it's referenced by a null constraint + drop("t1", "b.c" :: Nil) + + // this is a safety flag - it won't error when you turn it off + withSQLConf(DeltaSQLConf.DELTA_ALTER_TABLE_CHANGE_COLUMN_CHECK_EXPRESSIONS.key -> "false") { + drop("t1", "b" :: "arr" :: Nil) + } + } + } + + test("drop column with constraints - map element") { + def drop(table: String, columns: Seq[String]): Unit = + sql(s"alter table $table drop column (${columns.mkString(",")})") + + withTable("t1") { + val schemaWithNotNull = + simpleNestedData.schema.toDDL.replace("c: STRING", "c: STRING NOT NULL") + + withTable("source") { + spark.sql( + s""" + |CREATE TABLE t1 ($schemaWithNotNull) + |USING DELTA + |${propString(Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name"))} + |""".stripMargin) + simpleNestedData.write.format("delta").mode("append").saveAsTable("t1") + } + + spark.sql("alter table t1 add constraint" + + " mapValue check (not array_contains(map_keys(map), 'k1') or map['k1'] = 'v1')") + + assertException("Cannot drop column map because this column is referenced by") { + drop("t1", "map" :: Nil) + } + } + } + + dropTest("drop with generated column") { drop => + withTable("t1") { + withSQLConf(DeltaSQLConf.DELTA_ALTER_TABLE_DROP_COLUMN_ENABLED.key -> "true") { + val tableBuilder = io.delta.tables.DeltaTable.create(spark).tableName("t1") + tableBuilder.property("delta.columnMapping.mode", "name") + + // add existing columns + simpleNestedSchema.map(field => (field.name, field.dataType)).foreach(col => { + val (colName, dataType) = col + val columnBuilder = io.delta.tables.DeltaTable.columnBuilder(spark, colName) + columnBuilder.dataType(dataType.sql) + tableBuilder.addColumn(columnBuilder.build()) + }) + + // add generated columns + val genCol1 = io.delta.tables.DeltaTable.columnBuilder(spark, "genCol1") + .dataType("int") + .generatedAlwaysAs("length(a)") + .build() + + val genCol2 = io.delta.tables.DeltaTable.columnBuilder(spark, "genCol2") + .dataType("int") + .generatedAlwaysAs("b.d * 100 + arr[0]") + .build() + + tableBuilder + .addColumn(genCol1) + .addColumn(genCol2) + .execute() + + simpleNestedData.write.format("delta").mode("append").saveAsTable("t1") + + assertException("Cannot drop column a because this column is referenced by") { + drop("t1", "a" :: Nil) + } + + assertException("Cannot drop column b because this column is referenced by") { + drop("t1", "b" :: Nil) + } + + assertException("Cannot drop column b.d because this column is referenced by") { + drop("t1", "b.d" :: Nil) + } + + assertException("Cannot drop column arr because this column is referenced by") { + drop("t1", "arr" :: Nil) + } + + // you can still drop b.c as it has no dependent gen col + drop("t1", "b.c" :: Nil) + + // you can also drop a generated column itself + drop("t1", "genCol1" :: Nil) + + // add new data after dropping + spark.createDataFrame( + Seq(Row("str3", Row(3), Map("k3" -> "v3"), Array(3, 33))).asJava, + new StructType() + .add("a", StringType, true) + .add("b", + new StructType() + .add("d", IntegerType, true)) + .add("map", MapType(StringType, StringType), true) + .add("arr", ArrayType(IntegerType), true)) + .write.format("delta").mode("append").saveAsTable("t1") + + checkAnswer(spark.table("t1"), + Seq( + Row("str1", Row(1), Map("k1" -> "v1"), Array(1, 11), 101), + Row("str2", Row(2), Map("k2" -> "v2"), Array(2, 22), 202), + Row("str3", Row(3), Map("k3" -> "v3"), Array(3, 33), 303))) + + // this is a safety flag - if you turn it off, it will still error but msg is not as helpful + withSQLConf(DeltaSQLConf.DELTA_ALTER_TABLE_CHANGE_COLUMN_CHECK_EXPRESSIONS.key -> "false") { + assertException("A generated column cannot use a non-existent column") { + drop("t1", "arr" :: Nil) + } + } + } + } + } + + dropTest("dropping all columns is not allowed") { drop => + withTable("t1") { + createTableWithSQLAPI("t1", + simpleNestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name") + ) + val e = intercept[AnalysisException] { + drop("t1", "a" :: "b" :: "map" :: "arr" :: Nil) + } + assert(e.getMessage.contains("Cannot drop column")) + } + } + + dropTest("dropping partition columns is not allowed") { drop => + withTable("t1") { + createTableWithSQLAPI("t1", + simpleNestedData, + Map(DeltaConfigs.COLUMN_MAPPING_MODE.key -> "name"), + partCols = Seq("a") + ) + val e = intercept[AnalysisException] { + drop("t1", "a" :: Nil) + } + assert(e.getMessage.contains("Dropping partition columns (a) is not allowed")) + } + } + + + /** + * Covers dropping a nested field using the ALTER TABLE command. + * @param initialColumnType Type of the single column used to create the initial test table. + * @param fieldToDrop Name of the field to drop from the initial column type. + * @param updatedColumnType Expected type of the single column after dropping the nested field. + */ + def testDropNestedField(testName: String)( + initialColumnType: String, + fieldToDrop: String, + updatedColumnType: String): Unit = + testColumnMapping(s"ALTER TABLE DROP COLUMNS - nested $testName") { mode => + withTempDir { dir => + withTable("delta_test") { + sql( + s""" + |CREATE TABLE delta_test (data $initialColumnType) + |USING delta + |TBLPROPERTIES (${DeltaConfigs.COLUMN_MAPPING_MODE.key} = '$mode') + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + + val expectedInitialType = initialColumnType.filterNot(_.isWhitespace) + val expectedUpdatedType = updatedColumnType.filterNot(_.isWhitespace) + val fieldName = s"data.${fieldToDrop}" + + def columnType: DataFrame = + sql("DESCRIBE TABLE delta_test") + .filter("col_name = 'data'") + .select("data_type") + checkAnswer(columnType, Row(expectedInitialType)) + + sql(s"ALTER TABLE delta_test DROP COLUMNS ($fieldName)") + checkAnswer(columnType, Row(expectedUpdatedType)) + } + } + } + + testDropNestedField("struct in map key")( + initialColumnType = "map, int>", + fieldToDrop = "key.b", + updatedColumnType = "map, int>") + + testDropNestedField("struct in map value")( + initialColumnType = "map>", + fieldToDrop = "value.b", + updatedColumnType = "map>") + + testDropNestedField("struct in array")( + initialColumnType = "array>", + fieldToDrop = "element.b", + updatedColumnType = "array>") + + testDropNestedField("struct in nested map keys")( + initialColumnType = "map, int>, int>", + fieldToDrop = "key.key.b", + updatedColumnType = "map, int>, int>") + + testDropNestedField("struct in nested map values")( + initialColumnType = "map>>", + fieldToDrop = "value.value.b", + updatedColumnType = "map>>") + + testDropNestedField("struct in nested arrays")( + initialColumnType = "array>>", + fieldToDrop = "element.element.b", + updatedColumnType = "array>>") + + testDropNestedField("struct in nested array and map")( + initialColumnType = "array>>", + fieldToDrop = "element.value.b", + updatedColumnType = "array>>") + + testDropNestedField("struct in nested map key and array")( + initialColumnType = "map>, int>", + fieldToDrop = "key.element.b", + updatedColumnType = "map>, int>") + + testDropNestedField("struct in nested map value and array")( + initialColumnType = "map>>", + fieldToDrop = "value.element.b", + updatedColumnType = "map>>") +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaErrorsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaErrorsSuite.scala new file mode 100644 index 00000000000..1c8c791224c --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaErrorsSuite.scala @@ -0,0 +1,2788 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{FileNotFoundException, PrintWriter, StringWriter} +import java.net.URI +import java.sql.Timestamp +import java.text.SimpleDateFormat +import java.util.Locale + +import scala.sys.process.Process + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaErrors.generateDocsLink +import org.apache.spark.sql.delta.actions.{Action, Metadata, Protocol} +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.{TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION} +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import org.apache.spark.sql.delta.constraints.CharVarcharConstraint +import org.apache.spark.sql.delta.constraints.Constraints +import org.apache.spark.sql.delta.constraints.Constraints.NotNull +import org.apache.spark.sql.delta.hooks.AutoCompactType +import org.apache.spark.sql.delta.hooks.PostCommitHook +import org.apache.spark.sql.delta.schema.{DeltaInvariantViolationException, InvariantViolationException, SchemaMergingUtils, SchemaUtils, UnsupportedDataTypeInfo} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import io.delta.sql.DeltaSparkSessionExtension +import org.apache.hadoop.fs.Path +import org.json4s.JString +import org.scalatest.GivenWhenThen + +import org.apache.spark.SparkThrowable +import org.apache.spark.sql.{AnalysisException, QueryTest, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, ExprId, Length, LessThanOrEqual, Literal, SparkVersion} +import org.apache.spark.sql.catalyst.expressions.Uuid +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ +import org.apache.spark.sql.connector.catalog.Identifier +import org.apache.spark.sql.errors.QueryErrorsBase +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.{CalendarIntervalType, DataTypes, DateType, IntegerType, StringType, StructField, StructType, TimestampNTZType} + +trait DeltaErrorsSuiteBase + extends QueryTest + with SharedSparkSession + with GivenWhenThen + with DeltaSQLCommandTest + with SQLTestUtils + with QueryErrorsBase { + + val MAX_URL_ACCESS_RETRIES = 3 + val path = "/sample/path" + + // Map of error function to the error + // When adding a function... + // (a) if the function is just a message: add the name of the message/function as the key, and an + // error that uses that message as the value + // (b) if the function is an error function: add the name of the function as the key, and the + // value as the error being thrown + def errorsToTest: Map[String, Throwable] = Map( + "createExternalTableWithoutLogException" -> + DeltaErrors.createExternalTableWithoutLogException(new Path(path), "tableName", spark), + "createExternalTableWithoutSchemaException" -> + DeltaErrors.createExternalTableWithoutSchemaException(new Path(path), "tableName", spark), + "createManagedTableWithoutSchemaException" -> + DeltaErrors.createManagedTableWithoutSchemaException("tableName", spark), + "multipleSourceRowMatchingTargetRowInMergeException" -> + DeltaErrors.multipleSourceRowMatchingTargetRowInMergeException(spark), + "concurrentModificationExceptionMsg" -> new ConcurrentWriteException(None), + "incorrectLogStoreImplementationException" -> + DeltaErrors.incorrectLogStoreImplementationException(sparkConf, new Throwable()), + "sourceNotDeterministicInMergeException" -> + DeltaErrors.sourceNotDeterministicInMergeException(spark), + "columnMappingAdviceMessage" -> + DeltaErrors.columnRenameNotSupported, + "icebergClassMissing" -> DeltaErrors.icebergClassMissing(sparkConf, new Throwable()), + "tableFeatureReadRequiresWriteException" -> + DeltaErrors.tableFeatureReadRequiresWriteException(requiredWriterVersion = 7), + "tableFeatureRequiresHigherReaderProtocolVersion" -> + DeltaErrors.tableFeatureRequiresHigherReaderProtocolVersion( + feature = "feature", + currentVersion = 1, + requiredVersion = 7), + "tableFeatureRequiresHigherWriterProtocolVersion" -> + DeltaErrors.tableFeatureRequiresHigherReaderProtocolVersion( + feature = "feature", + currentVersion = 1, + requiredVersion = 7), + "blockStreamingReadsWithIncompatibleColumnMappingSchemaChanges" -> + DeltaErrors.blockStreamingReadsWithIncompatibleColumnMappingSchemaChanges( + spark, + StructType.fromDDL("id int"), + StructType.fromDDL("id2 int"), + detectedDuringStreaming = true) + ) + + def otherMessagesToTest: Map[String, String] = Map( + "ignoreStreamingUpdatesAndDeletesWarning" -> + DeltaErrors.ignoreStreamingUpdatesAndDeletesWarning(spark) + ) + + def errorMessagesToTest: Map[String, String] = + errorsToTest.mapValues(_.getMessage).toMap ++ otherMessagesToTest + + def checkIfValidResponse(url: String, response: String): Boolean = { + response.contains("HTTP/1.1 200 OK") || response.contains("HTTP/2 200") + } + + def getUrlsFromMessage(message: String): List[String] = { + val regexToFindUrl = "https://[^\\s]+".r + regexToFindUrl.findAllIn(message).toList + } + + def testUrls(): Unit = { + errorMessagesToTest.foreach { case (errName, message) => + getUrlsFromMessage(message).foreach { url => + Given(s"*** Checking response for url: $url") + var response = "" + (1 to MAX_URL_ACCESS_RETRIES).foreach { attempt => + if (attempt > 1) Thread.sleep(1000) + response = try { + Process("curl -I " + url).!! + } catch { + case e: RuntimeException => + val sw = new StringWriter + e.printStackTrace(new PrintWriter(sw)) + sw.toString + } + if (!checkIfValidResponse(url, response)) { + fail( + s""" + |A link to the URL: '$url' is broken in the error: $errName, accessing this URL + |does not result in a valid response, received the following response: $response + """.stripMargin) + } + } + } + } + } + + /** + * New testcases should always use this method. + */ + def checkErrorMessage( + e: Exception with DeltaThrowable, + errClassOpt: Option[String] = None, + sqlStateOpt: Option[String] = None, + errMsgOpt: Option[String] = None, + startWith: Boolean = false): Unit = { + val prefix = errClassOpt match { + case Some(exist) => + assert(e.getErrorClass == exist) + exist + case _ => e.getErrorClass + } + sqlStateOpt match { + case Some(sqlState) => assert(e.getSqlState == sqlState) + case _ => + } + (errMsgOpt, startWith) match { + case (Some(errMsg), true) => + assert(e.getMessage.startsWith(s"[${prefix}] ${errMsg}")) + case (Some(errMsg), false) => + assert(e.getMessage == s"[${prefix}] ${errMsg}") + case _ => + } + } + + test("Validate that links to docs in DeltaErrors are correct") { + // verify DeltaErrors.errorsWithDocsLinks is consistent with DeltaErrorsSuite + assert(errorsToTest.keySet ++ otherMessagesToTest.keySet == + DeltaErrors.errorsWithDocsLinks.toSet + ) + testUrls() + } + + protected def multipleSourceRowMatchingTargetRowInMergeUrl: String = + "/delta-update.html#upsert-into-a-table-using-merge" + + test("test DeltaErrors methods -- part 1") { + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.tableAlreadyContainsCDCColumns(Seq("col1", "col2")) + } + checkErrorMessage(e, Some("DELTA_TABLE_ALREADY_CONTAINS_CDC_COLUMNS"), Some("42711"), + Some(s"""Unable to enable Change Data Capture on the table. The table already contains + |reserved columns [col1,col2] that will + |be used internally as metadata for the table's Change Data Feed. To enable + |Change Data Feed on the table rename/drop these columns. + |""".stripMargin)) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.cdcColumnsInData(Seq("col1", "col2")) + } + checkErrorMessage(e, Some("RESERVED_CDC_COLUMNS_ON_WRITE"), Some("42939"), + Some(s""" + |The write contains reserved columns [col1,col2] that are used + |internally as metadata for Change Data Feed. To write to the table either rename/drop + |these columns or disable Change Data Feed on the table by setting + |delta.enableChangeDataFeed to false.""".stripMargin)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.multipleCDCBoundaryException("sample") + } + checkErrorMessage(e, Some("DELTA_MULTIPLE_CDC_BOUNDARY"), Some("42614"), + Some("Multiple sample arguments provided for CDC read. Please provide " + + "one of either sampleTimestamp or sampleVersion.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.failOnCheckpointRename(new Path("path-1"), new Path("path-2")) + } + checkErrorMessage(e, None, None, + Some("Cannot rename path-1 to path-2")) + } + { + val e = intercept[DeltaInvariantViolationException] { + throw DeltaErrors.notNullColumnMissingException(NotNull(Seq("c0", "c1"))) + } + checkErrorMessage(e, Some("DELTA_MISSING_NOT_NULL_COLUMN_VALUE"), Some("23502"), + Some("Column c0.c1, which has a NOT NULL constraint, is missing " + + "from the data being written into the table.")) + } + { + val parent = "parent" + val nested = IntegerType + val nestType = "nestType" + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nestedNotNullConstraint(parent, nested, nestType) + } + checkErrorMessage(e, Some("DELTA_NESTED_NOT_NULL_CONSTRAINT"), Some("0AKDC"), + Some(s"The $nestType type of the field $parent contains a NOT NULL " + + s"constraint. Delta does not support NOT NULL constraints nested within arrays or maps. " + + s"To suppress this error and silently ignore the specified constraints, set " + + s"${DeltaSQLConf.ALLOW_UNENFORCED_NOT_NULL_CONSTRAINTS.key} = true.\n" + + s"Parsed $nestType type:\n${nested.prettyJson}")) + } + { + val e = intercept[DeltaInvariantViolationException] { + throw DeltaInvariantViolationException(Constraints.NotNull(Seq("col1"))) + } + checkErrorMessage(e, Some("DELTA_NOT_NULL_CONSTRAINT_VIOLATED"), Some("23502"), + Some("NOT NULL constraint violated for column: col1.\n")) + } + { + val expr = CatalystSqlParser.parseExpression("concat(\"hello \", \"world\")") + val e = intercept[DeltaInvariantViolationException] { + throw DeltaInvariantViolationException( + Constraints.Check(CharVarcharConstraint.INVARIANT_NAME, + LessThanOrEqual(Length(expr), Literal(5))), + Map.empty[String, Any]) + } + checkErrorMessage(e, Some("DELTA_EXCEED_CHAR_VARCHAR_LIMIT"), Some("22001"), + Some("Exceeds char/varchar type length limitation. " + + "Failed check: (length('concat(hello , world)) <= 5).")) + } + { + val e = intercept[DeltaInvariantViolationException] { + throw DeltaInvariantViolationException( + Constraints.Check("__dummy__", + CatalystSqlParser.parseExpression("id < 0")), + Map("a" -> "b")) + } + checkErrorMessage(e, Some("DELTA_VIOLATE_CONSTRAINT_WITH_VALUES"), Some("23001"), + Some("CHECK constraint __dummy__ (id < 0) violated " + + "by row with values:\n - a : b")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.notADeltaTableException(DeltaTableIdentifier(Some("path"))) + } + checkErrorMessage(e, None, None, + Some("`path` is not a Delta table.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.notADeltaTableException( + operation = "delete", + DeltaTableIdentifier(Some("path"))) + } + checkErrorMessage(e, None, None, + Some("`path` is not a Delta table. delete is only supported for Delta tables.")) + } + { + val table = TableIdentifier("table") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotWriteIntoView(table) + } + checkErrorMessage(e, Some("DELTA_CANNOT_WRITE_INTO_VIEW"), Some("0A000"), + Some(s"$table is a view. Writes to a view are not supported.")) + } + { + val sourceType = IntegerType + val targetType = DateType + val columnName = "column_name" + val e = intercept[DeltaArithmeticException] { + throw DeltaErrors.castingCauseOverflowErrorInTableWrite(sourceType, targetType, columnName) + } + checkErrorMessage(e, Some("DELTA_CAST_OVERFLOW_IN_TABLE_WRITE"), Some("22003"), None) + assert(e.getMessageParameters.get("sourceType") == toSQLType(sourceType)) + assert(e.getMessageParameters.get("targetType") == toSQLType(targetType)) + assert(e.getMessageParameters.get("columnName") == toSQLId(columnName)) + assert(e.getMessageParameters.get("storeAssignmentPolicyFlag") + == SQLConf.STORE_ASSIGNMENT_POLICY.key) + assert(e.getMessageParameters.get("updateAndMergeCastingFollowsAnsiEnabledFlag") + == DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key) + assert(e.getMessageParameters.get("ansiEnabledFlag") == SQLConf.ANSI_ENABLED.key) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.invalidColumnName(name = "col-1") + } + checkErrorMessage(e, None, None, + Some("Attribute name \"col-1\" contains invalid character(s) " + + "among \" ,;{}()\\\\n\\\\t=\". Please use alias to rename it.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.updateSetColumnNotFoundException(col = "c0", colList = Seq("c1", "c2")) + } + checkErrorMessage(e, None, None, + Some("SET column `c0` not found given columns: [`c1`, `c2`].")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.updateSetConflictException(cols = Seq("c1", "c2")) + } + checkErrorMessage(e, None, None, + Some("There is a conflict from these SET columns: [`c1`, `c2`].")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.bloomFilterOnNestedColumnNotSupportedException("c0") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_NESTED_COLUMN_IN_BLOOM_FILTER"), Some("0AKDC"), + Some("Creating a bloom filer index on a nested " + + "column is currently unsupported: c0")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.bloomFilterOnPartitionColumnNotSupportedException("c0") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_PARTITION_COLUMN_IN_BLOOM_FILTER"), + Some("0AKDC"), + Some("Creating a bloom filter index on a partitioning column " + + "is unsupported: c0")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.bloomFilterDropOnNonIndexedColumnException("c0") + } + checkErrorMessage(e, None, None, + Some("Cannot drop bloom filter index on a non indexed column: c0")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.cannotRenamePath("a", "b") + } + checkErrorMessage(e, None, None, + Some("Cannot rename a to b")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.cannotSpecifyBothFileListAndPatternString() + } + checkErrorMessage(e, None, None, + Some("Cannot specify both file list and pattern string.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotUpdateArrayField("t", "f") + } + checkErrorMessage(e, None, None, + Some("Cannot update t field f type: update the element by updating f.element")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotUpdateMapField("t", "f") + } + checkErrorMessage(e, None, None, + Some("Cannot update t field f type: update a map by updating f.key or f.value")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotUpdateStructField("t", "f") + } + checkErrorMessage(e, None, None, + Some("Cannot update t field f type: update struct by adding, deleting, " + + "or updating its fields")) + } + { + val tableName = "table" + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotUpdateOtherField(tableName, IntegerType) + } + checkErrorMessage(e, Some("DELTA_CANNOT_UPDATE_OTHER_FIELD"), Some("429BQ"), + Some(s"Cannot update $tableName field of type ${IntegerType}")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.duplicateColumnsOnUpdateTable(originalException = new Exception("123")) + } + checkErrorMessage(e, None, None, + Some("123\nPlease remove duplicate columns before you update your table.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.maxCommitRetriesExceededException(0, 1, 2, 3, 4) + } + checkErrorMessage(e, None, None, + Some(s"""This commit has failed as it has been tried 0 times but did not succeed. + |This can be caused by the Delta table being committed continuously by many concurrent + |commits. + | + |Commit started at version: 2 + |Commit failed at version: 1 + |Number of actions attempted to commit: 3 + |Total time spent attempting this commit: 4 ms""".stripMargin)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.missingColumnsInInsertInto("c") + } + checkErrorMessage(e, None, None, + Some("Column c is not specified in INSERT")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.invalidAutoCompactType("invalid") + } + val allowed = AutoCompactType.ALLOWED_VALUES.mkString("(", ",", ")") + checkErrorMessage(e, None, None, + Some(s"Invalid auto-compact type: invalid. Allowed values are: $allowed.")) + } + { + val table = DeltaTableIdentifier(Some("path")) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nonExistentDeltaTable(table) + } + checkErrorMessage(e, None, None, + Some(s"Delta table $table doesn't exist.")) + } + checkError( + exception = intercept[DeltaIllegalStateException] { + throw DeltaErrors.differentDeltaTableReadByStreamingSource( + newTableId = "027fb01c-94aa-4cab-87cb-5aab6aec6d17", + oldTableId = "2edf2c02-bb63-44e9-a84c-517fad0db296") + }, + errorClass = "DIFFERENT_DELTA_TABLE_READ_BY_STREAMING_SOURCE", + parameters = Map( + "oldTableId" -> "2edf2c02-bb63-44e9-a84c-517fad0db296", + "newTableId" -> "027fb01c-94aa-4cab-87cb-5aab6aec6d17") + ) + + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nonExistentColumnInSchema("c", "s") + } + checkErrorMessage(e, None, None, + Some("Couldn't find column c in:\ns")) + } + { + val ident = Identifier.of(Array("namespace"), "name") + val e = intercept[DeltaNoSuchTableException] { + throw DeltaErrors.noRelationTable(ident) + } + checkErrorMessage(e, Some("DELTA_NO_RELATION_TABLE"), Some("42P01"), + Some(s"Table ${ident.quoted} not found")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.notADeltaTable("t") + } + checkErrorMessage(e, None, None, + Some("t is not a Delta table. Please drop this table first if you would " + + "like to recreate it with Delta Lake.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.notFoundFileToBeRewritten("f", Seq("a", "b")) + } + checkErrorMessage(e, None, None, + Some("File (f) to be rewritten not found among candidate files:\na\nb")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unsetNonExistentProperty("k", "t") + } + checkErrorMessage(e, None, None, + Some("Attempted to unset non-existent property 'k' in table t")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.generatedColumnsReferToWrongColumns( + new AnalysisException("analysis exception")) + } + checkErrorMessage(e, None, None, + Some("A generated column cannot use a non-existent column or " + + "another generated column")) + } + { + val current = StructField("c0", IntegerType) + val update = StructField("c0", StringType) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.generatedColumnsUpdateColumnType(current, update) + } + checkErrorMessage(e, Some("DELTA_GENERATED_COLUMN_UPDATE_TYPE_MISMATCH"), Some("42K09"), + Some( + s"Column ${current.name} is a generated column or a column used by a generated column. " + + s"The data type is ${current.dataType.sql} and cannot be converted to data type " + + s"${update.dataType.sql}")) + } + { + val e = intercept[DeltaColumnMappingUnsupportedException] { + throw DeltaErrors.changeColumnMappingModeNotSupported(oldMode = "old", newMode = "new") + } + checkErrorMessage(e, None, None, + Some("Changing column mapping mode from 'old' to 'new' is not supported.")) + } + { + val e = intercept[DeltaColumnMappingUnsupportedException] { + throw DeltaErrors.generateManifestWithColumnMappingNotSupported + } + checkErrorMessage(e, None, None, + Some("Manifest generation is not supported for tables that leverage " + + "column mapping, as external readers cannot read these Delta tables. See Delta " + + "documentation for more details.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.convertToDeltaNoPartitionFound("testTable") + } + checkErrorMessage(e, Some("DELTA_CONVERSION_NO_PARTITION_FOUND"), Some("42KD6"), + Some("Found no partition information in the catalog for table testTable." + + " Have you run \"MSCK REPAIR TABLE\" on your table to discover partitions?")) + } + { + val e = intercept[DeltaColumnMappingUnsupportedException] { + throw DeltaErrors.convertToDeltaWithColumnMappingNotSupported(IdMapping) + } + checkErrorMessage(e, None, None, + Some("The configuration " + + "'spark.databricks.delta.properties.defaults.columnMapping.mode' cannot be set to `id` " + + "when using CONVERT TO DELTA.")) + } + { + val oldAndNew = Seq( + (Protocol(2, 4), ColumnMappingTableFeature.minProtocolVersion), + ( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION), + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(ColumnMappingTableFeature))) + for ((oldProtocol, newProtocol) <- oldAndNew) { + val e = intercept[DeltaColumnMappingUnsupportedException] { + throw DeltaErrors.changeColumnMappingModeOnOldProtocol(oldProtocol) + } + // scalastyle:off line.size.limit + checkErrorMessage(e, None, None, + Some( + s""" + |Your current table protocol version does not support changing column mapping modes + |using delta.columnMapping.mode. + | + |Required Delta protocol version for column mapping: + |${newProtocol.toString} + |Your table's current Delta protocol version: + |${oldProtocol.toString} + | + |Please enable Column Mapping on your Delta table with mapping mode 'name'. + |You can use one of the following commands. + | + |If your table is already on the required protocol version: + |ALTER TABLE table_name SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name') + | + |If your table is not on the required protocol version and requires a protocol upgrade: + |ALTER TABLE table_name SET TBLPROPERTIES ( + | 'delta.columnMapping.mode' = 'name', + | 'delta.minReaderVersion' = '${newProtocol.minReaderVersion}', + | 'delta.minWriterVersion' = '${newProtocol.minWriterVersion}') + |""".stripMargin) + ) + // scalastyle:off line.size.limit + } + } + { + val e = intercept[DeltaColumnMappingUnsupportedException] { + throw DeltaErrors.schemaChangeDuringMappingModeChangeNotSupported( + StructType(Seq(StructField("c0", IntegerType))), + StructType(Seq(StructField("c1", IntegerType)))) + } + checkErrorMessage(e, None, None, + Some(""" + |Schema change is detected: + | + |old schema: + |root + | |-- c0: integer (nullable = true) + | + | + |new schema: + |root + | |-- c1: integer (nullable = true) + | + | + |Schema changes are not allowed during the change of column mapping mode. + | + |""".stripMargin)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.notEnoughColumnsInInsert( + "table", 1, 2, Some("nestedField")) + } + checkErrorMessage(e, None, None, + Some("Cannot write to 'table', not enough nested fields in nestedField; " + + s"target table has 2 column(s) but the inserted data has " + + s"1 column(s)")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotInsertIntoColumn( + "tableName", "source", "target", "targetType") + } + checkErrorMessage(e, None, None, + Some("Struct column source cannot be inserted into a " + + "targetType field target in tableName.")) + } + { + val colName = "col1" + val schema = Seq(UnresolvedAttribute("col2")) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.partitionColumnNotFoundException(colName, schema) + } + checkErrorMessage(e, Some("DELTA_PARTITION_COLUMN_NOT_FOUND"), Some("42703"), + Some(s"Partition column ${DeltaErrors.formatColumn(colName)} not found in schema " + + s"[${schema.map(_.name).mkString(", ")}]")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.partitionPathParseException("fragment") + } + checkErrorMessage(e, None, None, + Some("A partition path fragment should be the form like " + + "`part1=foo/part2=bar`. The partition path: fragment")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.replaceWhereMismatchException("replaceWhere", + new InvariantViolationException("Invariant violated.")) + } + checkErrorMessage(e, Some("DELTA_REPLACE_WHERE_MISMATCH"), Some("44000"), + Some("""Written data does not conform to partial table overwrite condition or constraint 'replaceWhere'. + |Invariant violated.""".stripMargin)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.replaceWhereMismatchException("replaceWhere", "badPartitions") + } + checkErrorMessage(e, Some("DELTA_REPLACE_WHERE_MISMATCH"), Some("44000"), + Some("""Written data does not conform to partial table overwrite condition or constraint 'replaceWhere'. + |Invalid data would be written to partitions badPartitions.""".stripMargin)) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.actionNotFoundException("action", 0) + } + val msg = s"""The action of your Delta table could not be recovered while Reconstructing + |version: 0. Did you manually delete files in the _delta_log directory?""".stripMargin + checkErrorMessage(e, None, None, + Some(msg)) + } + { + val oldSchema = StructType(Seq(StructField("c0", IntegerType))) + val newSchema = StructType(Seq(StructField("c0", StringType))) + for (retryable <- DeltaTestUtils.BOOLEAN_DOMAIN) { + val expectedClass: Class[_] = classOf[DeltaIllegalStateException] + + var e = intercept[Exception with SparkThrowable] { + throw DeltaErrors.schemaChangedException(oldSchema, newSchema, retryable, None, false) + } + assert(expectedClass.isAssignableFrom(e.getClass)) + assert(e.getErrorClass == "DELTA_SCHEMA_CHANGED") + assert(e.getSqlState == "KD007") + // Use '#' as stripMargin interpolator to get around formatSchema having '|' in it + var msg = + s"""Detected schema change: + #streaming source schema: ${DeltaErrors.formatSchema(oldSchema)} + # + #data file schema: ${DeltaErrors.formatSchema(newSchema)} + # + #Please try restarting the query. If this issue repeats across query restarts without + #making progress, you have made an incompatible schema change and need to start your + #query from scratch using a new checkpoint directory. + #""".stripMargin('#') + // [StreamingRetryableException] is a SparkThrowable + // but uses DeltaThrowableHelper to format its message. + // It does not contain a parameter map, so we cannot use [checkError] + // It is not a DeltaThrowable so we cannot use [checkErrorMessage] + // Directly compare the error message here. + assert(e.getMessage == s"[DELTA_SCHEMA_CHANGED] ${msg}") + + // Check the error message with version information + e = intercept[Exception with SparkThrowable] { + throw DeltaErrors.schemaChangedException(oldSchema, newSchema, retryable, Some(10), false) + } + assert(expectedClass.isAssignableFrom(e.getClass)) + assert(e.getErrorClass == "DELTA_SCHEMA_CHANGED_WITH_VERSION") + assert(e.getSqlState == "KD007") + // Use '#' as stripMargin interpolator to get around formatSchema having '|' in it + msg = + s"""Detected schema change in version 10: + #streaming source schema: ${DeltaErrors.formatSchema(oldSchema)} + # + #data file schema: ${DeltaErrors.formatSchema(newSchema)} + # + #Please try restarting the query. If this issue repeats across query restarts without + #making progress, you have made an incompatible schema change and need to start your + #query from scratch using a new checkpoint directory. + #""".stripMargin('#') + assert(e.getMessage == s"[DELTA_SCHEMA_CHANGED_WITH_VERSION] $msg") + + // Check the error message with startingVersion/Timestamp error message + e = intercept[Exception with SparkThrowable] { + throw DeltaErrors.schemaChangedException(oldSchema, newSchema, retryable, Some(10), true) + } + assert(expectedClass.isAssignableFrom(e.getClass)) + assert(e.getErrorClass == "DELTA_SCHEMA_CHANGED_WITH_STARTING_OPTIONS") + assert(e.getSqlState == "KD007") + // Use '#' as stripMargin interpolator to get around formatSchema having '|' in it + msg = + s"""Detected schema change in version 10: + #streaming source schema: ${DeltaErrors.formatSchema(oldSchema)} + # + #data file schema: ${DeltaErrors.formatSchema(newSchema)} + # + #Please try restarting the query. If this issue repeats across query restarts without + #making progress, you have made an incompatible schema change and need to start your + #query from scratch using a new checkpoint directory. If the issue persists after + #changing to a new checkpoint directory, you may need to change the existing + #'startingVersion' or 'startingTimestamp' option to start from a version newer than + #10 with a new checkpoint directory. + #""".stripMargin('#') + assert(e.getMessage == s"[DELTA_SCHEMA_CHANGED_WITH_STARTING_OPTIONS] $msg") + } + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.restoreVersionNotExistException(0, 0, 0) + } + checkErrorMessage(e, None, None, + Some("Cannot restore table to version 0. " + + "Available versions: [0, 0].")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.unsupportedGenerateModeException("modeName") + } + import org.apache.spark.sql.delta.commands.DeltaGenerateCommand + val supportedModes = DeltaGenerateCommand.modeNameToGenerationFunc.keys.toSeq.mkString(", ") + checkErrorMessage(e, None, None, + Some(s"Specified mode 'modeName' is not supported. " + + s"Supported modes are: $supportedModes")) + } + { + import org.apache.spark.sql.delta.DeltaOptions.EXCLUDE_REGEX_OPTION + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.excludeRegexOptionException(EXCLUDE_REGEX_OPTION) + } + checkErrorMessage(e, None, None, + Some(s"Please recheck your syntax for '$EXCLUDE_REGEX_OPTION'")) + } + { + val e = intercept[DeltaFileNotFoundException] { + throw DeltaErrors.fileNotFoundException("path") + } + checkErrorMessage(e, None, None, + Some(s"File path path")) + } + { + val ex = new FileNotFoundException("reason") + val e = intercept[DeltaFileNotFoundException] { + throw DeltaErrors.logFileNotFoundExceptionForStreamingSource(ex) + } + checkErrorMessage(e, Some("DELTA_LOG_FILE_NOT_FOUND_FOR_STREAMING_SOURCE"), Some("42K03"), None) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.invalidIsolationLevelException("level") + } + checkErrorMessage(e, None, None, + Some("invalid isolation level 'level'")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.columnNameNotFoundException("a", "b") + } + checkErrorMessage(e, None, None, + Some("Unable to find the column `a` given [b]")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.addColumnAtIndexLessThanZeroException("1", "a") + } + checkErrorMessage(e, None, None, + Some("Index 1 to add column a is lower than 0")) + } + { + val pos = -1 + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.dropColumnAtIndexLessThanZeroException(-1) + } + checkErrorMessage(e, Some("DELTA_DROP_COLUMN_AT_INDEX_LESS_THAN_ZERO"), Some("42KD8"), + Some(s"Index $pos to drop column is lower than 0")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.incorrectArrayAccess() + } + checkErrorMessage(e, None, None, + Some(s"""Incorrectly accessing an ArrayType. Use arrayname.element.elementname position to + |add to an array.""".stripMargin)) + } + { + val e = intercept[DeltaRuntimeException] { + throw DeltaErrors.partitionColumnCastFailed("Value", "Type", "Name") + } + checkErrorMessage(e, None, None, + Some("Failed to cast value `Value` to `Type` for partition column `Name`")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.invalidTimestampFormat("ts", "format") + } + checkErrorMessage(e, None, None, + Some("The provided timestamp ts does not match the expected syntax format.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotChangeDataType("example message") + } + checkErrorMessage(e, Some("DELTA_CANNOT_CHANGE_DATA_TYPE"), Some("429BQ"), + Some("Cannot change data type: example message")) + } + { + val table = CatalogTable(TableIdentifier("my table"), null, null, null) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.tableAlreadyExists(table) + } + checkErrorMessage(e, Some("DELTA_TABLE_ALREADY_EXISTS"), Some("42P07"), + Some("Table `my table` already exists.")) + } + { + val storage1 = + CatalogStorageFormat(Option(new URI("loc1")), null, null, null, false, Map.empty) + val storage2 = + CatalogStorageFormat(Option(new URI("loc2")), null, null, null, false, Map.empty) + val table = CatalogTable(TableIdentifier("table"), null, storage1, null) + val existingTable = CatalogTable(TableIdentifier("existing table"), null, storage2, null) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.tableLocationMismatch(table, existingTable) + } + checkErrorMessage(e, Some("DELTA_TABLE_LOCATION_MISMATCH"), Some("42613"), + Some(s"The location of the existing table ${table.identifier.quotedString} is " + + s"`${existingTable.location}`. It doesn't match the specified location " + + s"`${table.location}`.")) + } + { + val ident = "ident" + val e = intercept[DeltaNoSuchTableException] { + throw DeltaErrors.nonSinglePartNamespaceForCatalog(ident) + } + checkErrorMessage(e, Some("DELTA_NON_SINGLE_PART_NAMESPACE_FOR_CATALOG"), Some("42K05"), + Some(s"Delta catalog requires a single-part namespace, but $ident is multi-part.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.targetTableFinalSchemaEmptyException() + } + checkErrorMessage(e, Some("DELTA_TARGET_TABLE_FINAL_SCHEMA_EMPTY"), Some("428GU"), + Some("Target table final schema is empty.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nonDeterministicNotSupportedException("op", Uuid()) + } + checkErrorMessage(e, Some("DELTA_NON_DETERMINISTIC_FUNCTION_NOT_SUPPORTED"), Some("0AKDC"), + Some("Non-deterministic functions " + + "are not supported in the op (condition = uuid()).")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.tableNotSupportedException("someOp") + } + checkErrorMessage(e, Some("DELTA_TABLE_NOT_SUPPORTED_IN_OP"), Some("42809"), + Some("Table is not supported in someOp. Please use a path instead.")) + } + { + val e = intercept[DeltaRuntimeException] { + throw DeltaErrors.postCommitHookFailedException(new PostCommitHook() { + override val name: String = "DummyPostCommitHook" + override def run( + spark: SparkSession, txn: OptimisticTransactionImpl, committedVersion: Long, + postCommitSnapshot: Snapshot, committedActions: Seq[Action]): Unit = {} + }, 0, "msg", null) + } + checkErrorMessage(e, Some("DELTA_POST_COMMIT_HOOK_FAILED"), Some("2DKD0"), + Some("Committing to the Delta table version 0 " + + "succeeded but error while executing post-commit hook DummyPostCommitHook: msg")) + } + { + val e = intercept[DeltaRuntimeException] { + throw DeltaErrors.postCommitHookFailedException(new PostCommitHook() { + override val name: String = "DummyPostCommitHook" + override def run( + spark: SparkSession, txn: OptimisticTransactionImpl, committedVersion: Long, + postCommitSnapshot: Snapshot, committedActions: Seq[Action]): Unit = {} + }, 0, null, null) + } + checkErrorMessage(e, Some("DELTA_POST_COMMIT_HOOK_FAILED"), Some("2DKD0"), + Some("Committing to the Delta table version 0 " + + "succeeded but error while executing post-commit hook DummyPostCommitHook")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.indexLargerThanStruct(1, StructField("col1", IntegerType), 1) + } + checkErrorMessage(e, Some("DELTA_INDEX_LARGER_THAN_STRUCT"), Some("42KD8"), + Some("Index 1 to add column StructField(col1,IntegerType,true) is larger " + + "than struct length: 1")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.indexLargerOrEqualThanStruct(1, 1) + } + checkErrorMessage(e, Some("DELTA_INDEX_LARGER_OR_EQUAL_THAN_STRUCT"), Some("42KD8"), + Some("Index 1 to drop column equals to or is larger " + + "than struct length: 1")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.invalidV1TableCall("v1Table", "DeltaTableV2") + } + checkErrorMessage(e, Some("DELTA_INVALID_V1_TABLE_CALL"), Some("XXKDS"), + Some("v1Table call is not expected with path based DeltaTableV2")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.cannotGenerateUpdateExpressions() + } + checkErrorMessage(e, Some("DELTA_CANNOT_GENERATE_UPDATE_EXPRESSIONS"), Some("XXKDS"), + Some("Calling without generated columns should always return a update " + + "expression for each column")) + } + { + val e = intercept[AnalysisException] { + val s1 = StructType(Seq(StructField("c0", IntegerType))) + val s2 = StructType(Seq(StructField("c0", StringType))) + SchemaMergingUtils.mergeSchemas(s1, s2) + } + assert(e.getMessage == "Failed to merge fields 'c0' and 'c0'. Failed to merge " + + "incompatible data types IntegerType and StringType") + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.describeViewHistory + } + checkErrorMessage(e, Some("DELTA_CANNOT_DESCRIBE_VIEW_HISTORY"), Some("42809"), + Some("Cannot describe the history of a view.")) + } + { + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.unrecognizedInvariant() + } + checkErrorMessage(e, Some("DELTA_UNRECOGNIZED_INVARIANT"), Some("56038"), + Some("Unrecognized invariant. Please upgrade your Spark version.")) + } + { + val baseSchema = StructType(Seq(StructField("c0", StringType))) + val field = StructField("id", IntegerType) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotResolveColumn(field.name, baseSchema) + } + checkErrorMessage(e, Some("DELTA_CANNOT_RESOLVE_COLUMN"), Some("42703"), + Some("""Can't resolve column id in root + | |-- c0: string (nullable = true) + |""".stripMargin + )) + } + { + val s1 = StructType(Seq(StructField("c0", IntegerType))) + val s2 = StructType(Seq(StructField("c0", StringType))) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.alterTableReplaceColumnsException(s1, s2, "incompatible") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_ALTER_TABLE_REPLACE_COL_OP"), Some("0AKDC"), + Some("""Unsupported ALTER TABLE REPLACE COLUMNS operation. Reason: incompatible + | + |Failed to change schema from: + |root + | |-- c0: integer (nullable = true) + | + |to: + |root + | |-- c0: string (nullable = true) + |""".stripMargin + )) + } + { + val e = intercept[DeltaAnalysisException] { + val classConf = Seq(("classKey", "classVal")) + val schemeConf = Seq(("schemeKey", "schemeVal")) + throw DeltaErrors.logStoreConfConflicts(classConf, schemeConf) + } + checkErrorMessage(e, Some("DELTA_INVALID_LOGSTORE_CONF"), Some("F0000"), + Some("(`classKey`) and (`schemeKey`) cannot " + + "be set at the same time. Please set only one group of them.")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + val schemeConf = Seq(("key", "val")) + throw DeltaErrors.inconsistentLogStoreConfs( + Seq(("delta.key", "value1"), ("spark.delta.key", "value2"))) + } + checkErrorMessage(e, Some("DELTA_INCONSISTENT_LOGSTORE_CONFS"), Some("F0000"), + Some("(delta.key = value1, spark.delta.key = value2) cannot be set to " + + "different values. Please only set one of them, or set them to the same value.")) + } + { + val e = intercept[DeltaSparkException] { + throw DeltaErrors.failedMergeSchemaFile("file", "schema", null) + } + checkErrorMessage(e, Some("DELTA_FAILED_MERGE_SCHEMA_FILE"), None, + Some("Failed to merge schema of file file:\nschema")) + } + { + val id = TableIdentifier("id") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.operationNotSupportedException("op", id) + } + checkErrorMessage(e, Some("DELTA_OPERATION_NOT_ALLOWED_DETAIL"), None, + Some(s"Operation not allowed: `op` is not supported " + + s"for Delta tables: $id")) + } + { + val e = intercept[DeltaFileNotFoundException] { + throw DeltaErrors.fileOrDirectoryNotFoundException("path") + } + checkErrorMessage(e, Some("DELTA_FILE_OR_DIR_NOT_FOUND"), None, + Some("No such file or directory: path")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.invalidPartitionColumn("col", "tbl") + } + checkErrorMessage(e, Some("DELTA_INVALID_PARTITION_COLUMN"), None, + Some("col is not a valid partition column in table tbl.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.cannotFindSourceVersionException("json") + } + checkErrorMessage(e, Some("DELTA_CANNOT_FIND_VERSION"), None, + Some("Cannot find 'sourceVersion' in json")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unknownConfigurationKeyException("confKey") + } + var msg = "Unknown configuration was specified: confKey\nTo disable this check, set " + + "spark.databricks.delta.allowArbitraryProperties.enabled=true in the Spark session " + + "configuration." + checkErrorMessage(e, Some("DELTA_UNKNOWN_CONFIGURATION"), None, + Some(msg)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.pathNotExistsException("path") + } + checkErrorMessage(e, Some("DELTA_PATH_DOES_NOT_EXIST"), None, + Some("path doesn't exist"), startWith = true) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.failRelativizePath("path") + } + var msg = + """Failed to relativize the path (path). This can happen when absolute paths make + |it into the transaction log, which start with the scheme + |s3://, wasbs:// or adls://. + | + |If this table is NOT USED IN PRODUCTION, you can set the SQL configuration + |spark.databricks.delta.vacuum.relativize.ignoreError to true. + |Using this SQL configuration could lead to accidental data loss, therefore we do + |not recommend the use of this flag unless this is for testing purposes.""".stripMargin + checkErrorMessage(e, Some("DELTA_FAIL_RELATIVIZE_PATH"), Some("XXKDS"), + Some(msg)) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.illegalFilesFound("file") + } + checkErrorMessage(e, Some("DELTA_ILLEGAL_FILE_FOUND"), None, + Some("Illegal files found in a dataChange = false transaction. Files: file")) + } + { + val name = "name" + val input = "input" + val explain = "explain" + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.illegalDeltaOptionException(name, input, explain) + } + checkErrorMessage(e, Some("DELTA_ILLEGAL_OPTION"), Some("42616"), + Some(s"Invalid value '$input' for option '$name', $explain")) + } + { + val version = "version" + val timestamp = "timestamp" + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.startingVersionAndTimestampBothSetException(version, timestamp) + } + checkErrorMessage(e, Some("DELTA_STARTING_VERSION_AND_TIMESTAMP_BOTH_SET"), Some("42613"), + Some(s"Please either provide '$version' or '$timestamp'")) + } + { + val path = new Path("parent", "child") + val specifiedSchema = StructType(Seq(StructField("a", IntegerType))) + val existingSchema = StructType(Seq(StructField("b", StringType))) + val diffs = Seq("a", "b") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.createTableWithDifferentSchemaException( + path, specifiedSchema, existingSchema, diffs) + } + checkErrorMessage(e, Some("DELTA_CREATE_TABLE_SCHEME_MISMATCH"), None, None) + } + { + val path = new Path("parent", "child") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.noHistoryFound(path) + } + checkErrorMessage(e, Some("DELTA_NO_COMMITS_FOUND"), Some("KD006"), + Some(s"No commits found at $path")) + } + { + val path = new Path("parent", "child") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.noRecreatableHistoryFound(path) + } + checkErrorMessage(e, Some("DELTA_NO_RECREATABLE_HISTORY_FOUND"), Some("KD006"), + Some(s"No recreatable commits found at $path")) + } + { + val e = intercept[DeltaRuntimeException] { + throw DeltaErrors.castPartitionValueException("partitionValue", StringType) + } + checkErrorMessage(e, Some("DELTA_FAILED_CAST_PARTITION_VALUE"), None, + Some(s"Failed to cast partition value `partitionValue` to $StringType")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.sparkSessionNotSetException() + } + checkErrorMessage(e, Some("DELTA_SPARK_SESSION_NOT_SET"), None, + Some("Active SparkSession not set.")) + } + { + val id = Identifier.of(Array("namespace"), "name") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotReplaceMissingTableException(id) + } + checkErrorMessage(e, Some("DELTA_CANNOT_REPLACE_MISSING_TABLE"), None, + Some(s"Table $id cannot be replaced as it does not exist. " + + s"Use CREATE OR REPLACE TABLE to create the table.")) + } + { + val e = intercept[DeltaIOException] { + throw DeltaErrors.cannotCreateLogPathException("logPath") + } + checkErrorMessage(e, Some("DELTA_CANNOT_CREATE_LOG_PATH"), None, + Some("Cannot create logPath")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.protocolPropNotIntException("key", "value") + } + checkErrorMessage(e, Some("DELTA_PROTOCOL_PROPERTY_NOT_INT"), None, + Some("Protocol property key needs to be an integer. Found value")) + } + { + val path = new Path("parent", "child") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.createExternalTableWithoutLogException(path, "tableName", spark) + } + val msg = s""" + |You are trying to create an external table tableName + |from `$path` using Delta, but there is no transaction log present at + |`$path/_delta_log`. Check the upstream job to make sure that it is writing using + |format("delta") and that the path is the root of the table.""".stripMargin + checkErrorMessage(e, Some("DELTA_CREATE_EXTERNAL_TABLE_WITHOUT_TXN_LOG"), None, Some(msg), true) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.ambiguousPathsInCreateTableException("loc1", "loc2") + } + checkErrorMessage(e, Some("DELTA_AMBIGUOUS_PATHS_IN_CREATE_TABLE"), Some("42613"), + Some(s"""CREATE TABLE contains two different locations: loc1 and loc2. + |You can remove the LOCATION clause from the CREATE TABLE statement, or set + |${DeltaSQLConf.DELTA_LEGACY_ALLOW_AMBIGUOUS_PATHS.key} to true to skip this check. + |""".stripMargin)) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.illegalUsageException("overwriteSchema", "replacing") + } + checkErrorMessage(e, Some("DELTA_ILLEGAL_USAGE"), Some("42601"), + Some("The usage of overwriteSchema is not allowed when replacing a Delta table.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.expressionsNotFoundInGeneratedColumn("col1") + } + checkErrorMessage(e, Some("DELTA_EXPRESSIONS_NOT_FOUND_IN_GENERATED_COLUMN"), Some("XXKDS"), + Some("Cannot find the expressions in the generated column col1")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.activeSparkSessionNotFound() + } + checkErrorMessage(e, Some("DELTA_ACTIVE_SPARK_SESSION_NOT_FOUND"), Some("08003"), + Some("Could not find active SparkSession")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.operationOnTempViewWithGenerateColsNotSupported("UPDATE") + } + checkErrorMessage(e, Some("DELTA_OPERATION_ON_TEMP_VIEW_WITH_GENERATED_COLS_NOT_SUPPORTED"), Some("0A000"), + Some("UPDATE command on a temp view referring to a Delta table that " + + "contains generated columns is not supported. Please run the UPDATE command on the Delta " + + "table directly")) + } + { + val property = "prop" + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.cannotModifyTableProperty(property) + } + checkErrorMessage(e, Some("DELTA_CANNOT_MODIFY_TABLE_PROPERTY"), Some("42939"), + Some(s"The Delta table configuration $property cannot be specified by the user")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.missingProviderForConvertException("parquet_path") + } + checkErrorMessage(e, Some("DELTA_MISSING_PROVIDER_FOR_CONVERT"), Some("0AKDC"), + Some("CONVERT TO DELTA only supports parquet tables. Please rewrite your " + + "target as parquet.`parquet_path` if it's a parquet directory.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.iteratorAlreadyClosed() + } + checkErrorMessage(e, Some("DELTA_ITERATOR_ALREADY_CLOSED"), Some("XXKDS"), + Some("Iterator is closed")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.activeTransactionAlreadySet() + } + checkErrorMessage(e, Some("DELTA_ACTIVE_TRANSACTION_ALREADY_SET"), Some("0B000"), + Some("Cannot set a new txn as active when one is already active")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.bloomFilterMultipleConfForSingleColumnException("col1") + } + checkErrorMessage(e, Some("DELTA_MULTIPLE_CONF_FOR_SINGLE_COLUMN_IN_BLOOM_FILTER"), Some("42614"), + Some("Multiple bloom filter index configurations passed to " + + "command for column: col1")) + } + { + val e = intercept[DeltaIOException] { + throw DeltaErrors.incorrectLogStoreImplementationException(sparkConf, null) + } + val docsLink = DeltaErrors.generateDocsLink( + sparkConf, "/delta-storage.html", skipValidation = true) + checkErrorMessage(e, Some("DELTA_INCORRECT_LOG_STORE_IMPLEMENTATION"), Some("0AKDC"), + Some(s"""The error typically occurs when the default LogStore implementation, that + |is, HDFSLogStore, is used to write into a Delta table on a non-HDFS storage system. + |In order to get the transactional ACID guarantees on table updates, you have to use the + |correct implementation of LogStore that is appropriate for your storage system. + |See $docsLink for details. + |""".stripMargin)) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.invalidSourceVersion("xyz") + } + checkErrorMessage(e, Some("DELTA_INVALID_SOURCE_VERSION"), Some("XXKDS"), + Some("sourceVersion(xyz) is invalid")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.invalidSourceOffsetFormat() + } + checkErrorMessage(e, Some("DELTA_INVALID_SOURCE_OFFSET_FORMAT"), Some("XXKDS"), + Some("The stored source offset format is invalid")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.invalidCommittedVersion(1L, 2L) + } + checkErrorMessage(e, Some("DELTA_INVALID_COMMITTED_VERSION"), Some("XXKDS"), + Some("The committed version is 1 but the current version is 2." + )) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nonPartitionColumnReference("col1", Seq("col2", "col3")) + } + checkErrorMessage(e, Some("DELTA_NON_PARTITION_COLUMN_REFERENCE"), Some("42P10"), + Some("Predicate references non-partition column 'col1'. Only the " + + "partition columns may be referenced: [col2, col3]")) + } + { + val e = intercept[DeltaAnalysisException] { + val attr = UnresolvedAttribute("col1") + val attrs = Seq(UnresolvedAttribute("col2"), UnresolvedAttribute("col3")) + throw DeltaErrors.missingColumn(attr, attrs) + } + checkErrorMessage(e, Some("DELTA_MISSING_COLUMN"), Some("42703"), + Some("Cannot find col1 in table columns: col2, col3")) + } + { + val e = intercept[DeltaAnalysisException] { + val schema = StructType(Seq(StructField("c0", IntegerType))) + throw DeltaErrors.missingPartitionColumn("c1", schema.catalogString) + } + checkErrorMessage(e, Some("DELTA_MISSING_PARTITION_COLUMN"), Some("42KD6"), + Some("Partition column `c1` not found in schema struct" + )) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.aggsNotSupportedException("op", SparkVersion()) + } + checkErrorMessage(e, Some("DELTA_AGGREGATION_NOT_SUPPORTED"), Some("42903"), + Some("Aggregate functions are not supported in the op " + + "(condition = version())..")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotChangeProvider() + } + checkErrorMessage(e, Some("DELTA_CANNOT_CHANGE_PROVIDER"), Some("42939"), + Some("'provider' is a reserved table property, and cannot be altered.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.noNewAttributeId(AttributeReference("attr1", IntegerType)()) + } + checkErrorMessage(e, Some("DELTA_NO_NEW_ATTRIBUTE_ID"), Some("XXKDS"), + Some("Could not find a new attribute ID for column attr1. This " + + "should have been checked earlier.")) + } + { + val e = intercept[ProtocolDowngradeException] { + val p1 = Protocol(1, 1) + val p2 = Protocol(2, 2) + throw new ProtocolDowngradeException(p1, p2) + } + checkErrorMessage(e, Some("DELTA_INVALID_PROTOCOL_DOWNGRADE"), Some("KD004"), + Some("Protocol version cannot be downgraded from (1,1) to (2,2)")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.generatedColumnsTypeMismatch("col1", IntegerType, StringType) + } + checkErrorMessage(e, Some("DELTA_GENERATED_COLUMNS_EXPR_TYPE_MISMATCH"), Some("42K09"), + Some("The expression type of the generated column col1 is STRING, " + + "but the column type is INT")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.nonGeneratedColumnMissingUpdateExpression( + AttributeReference("attr1", IntegerType)(ExprId(1234567L))) + } + val msg = "attr1#1234567 is not a generated column but is missing " + + "its update expression" + checkErrorMessage(e, Some("DELTA_NON_GENERATED_COLUMN_MISSING_UPDATE_EXPR"), Some("XXKDS"), + Some(msg)) + } + { + val e = intercept[DeltaAnalysisException] { + val s1 = StructType(Seq(StructField("c0", IntegerType, true))) + val s2 = StructType(Seq(StructField("c0", StringType, false))) + SchemaMergingUtils.mergeSchemas(s1, s2, false, false, Set("c0")) + } + checkErrorMessage(e, Some("DELTA_GENERATED_COLUMNS_DATA_TYPE_MISMATCH"), Some("42K09"), + Some("Column c0 is a generated column or a column used by a generated " + + "column. The data type is INT. It doesn't accept data type STRING")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.useSetLocation() + } + checkErrorMessage(e, Some("DELTA_CANNOT_CHANGE_LOCATION"), Some("42601"), + Some("Cannot change the 'location' of the Delta table using SET " + + "TBLPROPERTIES. Please use ALTER TABLE SET LOCATION instead.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nonPartitionColumnAbsentException(false) + } + checkErrorMessage(e, Some("DELTA_NON_PARTITION_COLUMN_ABSENT"), Some("KD005"), + Some("Data written into Delta needs to contain at least " + + "one non-partitioned column.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nonPartitionColumnAbsentException(true) + } + checkErrorMessage(e, Some("DELTA_NON_PARTITION_COLUMN_ABSENT"), Some("KD005"), + Some("Data written into Delta needs to contain at least " + + "one non-partitioned column. Columns which are of NullType have been dropped.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.constraintAlreadyExists("name", "oldExpr") + } + checkErrorMessage(e, Some("DELTA_CONSTRAINT_ALREADY_EXISTS"), Some("42710"), + Some("Constraint 'name' already exists. Please " + + "delete the old constraint first.\nOld constraint:\noldExpr")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.timeTravelNotSupportedException + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_TIME_TRAVEL_VIEWS"), Some("0AKDC"), + Some("Cannot time travel views, subqueries, streams or change data feed queries.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.addFilePartitioningMismatchException(Seq("col3"), Seq("col2")) + } + checkErrorMessage(e, Some("DELTA_INVALID_PARTITIONING_SCHEMA"), Some("XXKDS"), + Some(""" + |The AddFile contains partitioning schema different from the table's partitioning schema + |expected: [`col2`] + |actual: [`col3`] + |To disable this check set """.stripMargin + + "spark.databricks.delta.commitValidation.enabled to \"false\"")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.emptyCalendarInterval + } + checkErrorMessage(e, Some("DELTA_INVALID_CALENDAR_INTERVAL_EMPTY"), Some("2200P"), + Some("Interval cannot be null or blank.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.createManagedTableWithoutSchemaException("table-1", spark) + } + checkErrorMessage(e, Some("DELTA_INVALID_MANAGED_TABLE_SYNTAX_NO_SCHEMA"), Some("42000"), + Some(s""" + |You are trying to create a managed table table-1 + |using Delta, but the schema is not specified. + | + |To learn more about Delta, see ${generateDocsLink(spark.sparkContext.getConf, + "/index.html", skipValidation = true)}""".stripMargin)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.generatedColumnsUnsupportedExpression("someExp".expr) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_EXPRESSION_GENERATED_COLUMN"), Some("42621"), + Some("'someExp' cannot be used in a generated column")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unsupportedExpression("Merge", DataTypes.DateType, Seq("Integer", "Long")) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_EXPRESSION"), Some("0A000"), + Some("Unsupported expression type(DateType) for Merge. " + + "The supported types are [Integer,Long].")) + } + { + val expr = "someExp" + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.generatedColumnsUDF(expr.expr) + } + checkErrorMessage(e, Some("DELTA_UDF_IN_GENERATED_COLUMN"), Some("42621"), + Some(s"Found ${expr.sql}. A generated column cannot use a user-defined function")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.bloomFilterOnColumnTypeNotSupportedException("col1", DateType) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_COLUMN_TYPE_IN_BLOOM_FILTER"), Some("0AKDC"), + Some("Creating a bloom filter index on a column with type date is " + + "unsupported: col1")) + } + { + val e = intercept[DeltaTableFeatureException] { + throw DeltaErrors.tableFeatureDropHistoryTruncationNotAllowed() + } + checkErrorMessage(e, Some("DELTA_FEATURE_DROP_HISTORY_TRUNCATION_NOT_ALLOWED"), + Some("0AKDE"), Some("History truncation is only relevant for reader features.")) + } + { + val logRetention = DeltaConfigs.LOG_RETENTION + val e = intercept[DeltaTableFeatureException] { + throw DeltaErrors.dropTableFeatureWaitForRetentionPeriod( + "test_feature", + Metadata(configuration = Map(logRetention.key -> "30 days"))) + } + + val expectedMessage = + """Dropping test_feature was partially successful. + | + |The feature is now no longer used in the current version of the table. However, the feature + |is still present in historical versions of the table. The table feature cannot be dropped + |from the table protocol until these historical versions have expired. + | + |To drop the table feature from the protocol, please wait for the historical versions to + |expire, and then repeat this command. The retention period for historical versions is + |currently configured as delta.logRetentionDuration=30 days. + | + |Alternatively, please wait for the TRUNCATE HISTORY retention period to expire (24 hours) + |and then run: + | ALTER TABLE table_name DROP FEATURE feature_name TRUNCATE HISTORY""".stripMargin + checkErrorMessage(e, Some("DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD"), Some("0AKDE"), + Some(expectedMessage)) + } + } + + test("test DeltaErrors methods -- part 2") { + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unsupportedDataTypes( + UnsupportedDataTypeInfo("foo", CalendarIntervalType), + UnsupportedDataTypeInfo("bar", TimestampNTZType)) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_DATA_TYPES"), Some("0AKDC"), + Some("Found columns using unsupported data types: " + + "[foo: CalendarIntervalType, bar: TimestampNTZType]. " + + "You can set 'spark.databricks.delta.schema.typeCheck.enabled' to 'false' " + + "to disable the type check. Disabling this type check may allow users to create " + + "unsupported Delta tables and should only be used when trying to read/write legacy tables.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.failOnDataLossException(12, 10) + } + checkErrorMessage(e, Some("DELTA_MISSING_FILES_UNEXPECTED_VERSION"), Some("XXKDS"), + Some(s"""The stream from your Delta table was expecting process data from version 12, + |but the earliest available version in the _delta_log directory is 10. The files + |in the transaction log may have been deleted due to log cleanup. In order to avoid losing + |data, we recommend that you restart your stream with a new checkpoint location and to + |increase your delta.logRetentionDuration setting, if you have explicitly set it below 30 + |days. + |If you would like to ignore the missed data and continue your stream from where it left + |off, you can set the .option("failOnDataLoss", "false") as part + |of your readStream statement.""".stripMargin)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nestedFieldNotSupported("INSERT clause of MERGE operation", "col1") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_NESTED_FIELD_IN_OPERATION"), Some("0AKDC"), + Some("Nested field is not supported in the INSERT clause of MERGE " + + "operation (field = col1).")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.newCheckConstraintViolated(10, "table-1", "sample") + } + checkErrorMessage(e, Some("DELTA_NEW_CHECK_CONSTRAINT_VIOLATION"), Some("23512"), + Some("10 rows in table-1 violate the new CHECK constraint (sample)")) + } + { + val e = intercept[DeltaRuntimeException] { + throw DeltaErrors.failedInferSchema + } + checkErrorMessage(e, Some("DELTA_FAILED_INFER_SCHEMA"), Some("42KD9"), + Some("Failed to infer schema from the given list of files.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unexpectedPartialScan(new Path("path-1")) + } + checkErrorMessage(e, Some("DELTA_UNEXPECTED_PARTIAL_SCAN"), Some("KD00A"), + Some("Expect a full scan of Delta sources, but found a partial scan. " + + "path:path-1")) + } + { + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.unrecognizedLogFile(new Path("path-1")) + } + checkErrorMessage(e, Some("DELTA_UNRECOGNIZED_LOGFILE"), Some("KD00B"), + Some("Unrecognized log file path-1")) + } + { + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.unsupportedAbsPathAddFile("path-1") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_ABS_PATH_ADD_FILE"), Some("0AKDC"), + Some("path-1 does not support adding files with an absolute path")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.outputModeNotSupportedException("source1", "sample") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_OUTPUT_MODE"), Some("0AKDC"), + Some("Data source source1 does not support sample output mode")) + } + { + val e = intercept[DeltaAnalysisException] { + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm", Locale.US) + throw DeltaErrors.timestampGreaterThanLatestCommit( + new Timestamp(sdf.parse("2022-02-28 10:30:00").getTime), + new Timestamp(sdf.parse("2022-02-28 10:00:00").getTime), "2022-02-28 10:00:00") + } + checkErrorMessage(e, Some("DELTA_TIMESTAMP_GREATER_THAN_COMMIT"), Some("42816"), + Some("""The provided timestamp (2022-02-28 10:30:00.0) is after the latest version available to this + |table (2022-02-28 10:00:00.0). Please use a timestamp before or """.stripMargin + + "at 2022-02-28 10:00:00.")) + } + { + val expr = "1".expr + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.timestampInvalid(expr) + } + checkErrorMessage(e, Some("DELTA_TIMESTAMP_INVALID"), Some("42816"), + Some(s"The provided timestamp (${expr.sql}) cannot be converted to a valid timestamp.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.notADeltaSourceException("sample") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_SOURCE"), Some("0AKDD"), + Some("sample destination only supports Delta sources.\n")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.restoreTimestampGreaterThanLatestException("2022-02-02 12:12:12", + "2022-02-02 12:12:10") + } + checkErrorMessage(e, Some("DELTA_CANNOT_RESTORE_TIMESTAMP_GREATER"), Some("22003"), + Some("Cannot restore table to timestamp (2022-02-02 12:12:12) as it is " + + "after the latest version available. Please use a timestamp before (2022-02-02 12:12:10)")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.addColumnStructNotFoundException("pos1") + } + checkErrorMessage(e, Some("DELTA_ADD_COLUMN_STRUCT_NOT_FOUND"), Some("42KD3"), + Some("Struct not found at position pos1")) + } + { + val column = StructField("c0", IntegerType) + val other = IntegerType + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.addColumnParentNotStructException(column, other) + } + checkErrorMessage(e, Some("DELTA_ADD_COLUMN_PARENT_NOT_STRUCT"), Some("42KD3"), + Some(s"Cannot add ${column.name} because its parent is not a " + + s"StructType. Found $other")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.updateNonStructTypeFieldNotSupportedException("col1", DataTypes.DateType) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_FIELD_UPDATE_NON_STRUCT"), Some("0AKDC"), + Some("Updating nested fields is only supported for StructType, but you " + + "are trying to update a field of `col1`, which is of type: DateType.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.extractReferencesFieldNotFound("struct1", + DeltaErrors.updateSchemaMismatchExpression( + StructType(Seq(StructField("c0", IntegerType))), + StructType(Seq(StructField("c1", IntegerType))) + )) + } + checkErrorMessage(e, Some("DELTA_EXTRACT_REFERENCES_FIELD_NOT_FOUND"), Some("XXKDS"), + Some("Field struct1 could not be found when extracting references.")) + } + { + val e = intercept[DeltaIndexOutOfBoundsException] { + throw DeltaErrors.notNullColumnNotFoundInStruct("struct1") + } + checkErrorMessage(e, Some("DELTA_NOT_NULL_COLUMN_NOT_FOUND_IN_STRUCT"), Some("42K09"), + Some("Not nullable column not found in struct: struct1")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.invalidIdempotentWritesOptionsException("reason") + } + checkErrorMessage(e, Some("DELTA_INVALID_IDEMPOTENT_WRITES_OPTIONS"), Some("42616"), + Some("Invalid options for idempotent Dataframe writes: reason")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.operationNotSupportedException("dummyOp") + } + checkErrorMessage(e, Some("DELTA_OPERATION_NOT_ALLOWED"), Some("0AKDC"), + Some("Operation not allowed: `dummyOp` is not supported for Delta tables")) + } + { + val e = intercept[DeltaAnalysisException] { + val s1 = StructType(Seq(StructField("c0", IntegerType))) + val s2 = StructType(Seq(StructField("c0", StringType))) + throw DeltaErrors.alterTableSetLocationSchemaMismatchException(s1, s2) + } + checkErrorMessage(e, Some("DELTA_SET_LOCATION_SCHEMA_MISMATCH"), Some("42KD7"), + Some(s""" + |The schema of the new Delta location is different than the current table schema. + |original schema: + |root + | |-- c0: integer (nullable = true) + | + |destination schema: + |root + | |-- c0: string (nullable = true) + | + | + |If this is an intended change, you may turn this check off by running: + |%sql set ${DeltaSQLConf.DELTA_ALTER_LOCATION_BYPASS_SCHEMA_CHECK.key}""".stripMargin + + " = true")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.foundDuplicateColumnsException("integer", "col1") + } + checkErrorMessage(e, Some("DELTA_DUPLICATE_COLUMNS_FOUND"), Some("42711"), + Some("Found duplicate column(s) integer: col1")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.subqueryNotSupportedException("dummyOp", "col1") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_SUBQUERY"), Some("0AKDC"), + Some("Subqueries are not supported in the dummyOp (condition = 'col1').")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.foundMapTypeColumnException("dummyKey", "dummyVal") + } + checkErrorMessage(e, Some("DELTA_FOUND_MAP_TYPE_COLUMN"), Some("KD003"), + Some("""A MapType was found. In order to access the key or value of a MapType, specify one + |of: + |dummyKey or + |dummyVal + |followed by the name of the column (only if that column is a struct type). + |e.g. mymap.key.mykey + |If the column is a basic type, mymap.key or mymap.value is sufficient.""".stripMargin)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.columnOfTargetTableNotFoundInMergeException("target", "dummyCol") + } + checkErrorMessage(e, Some("DELTA_COLUMN_NOT_FOUND_IN_MERGE"), Some("42703"), + Some("Unable to find the column 'target' of the target table from " + + "the INSERT columns: dummyCol. " + + "INSERT clause must specify value for all the columns of the target table." + )) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.multiColumnInPredicateNotSupportedException("dummyOp") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_MULTI_COL_IN_PREDICATE"), Some("0AKDC"), + Some("Multi-column In predicates are not supported in the dummyOp condition.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.newNotNullViolated(10L, "table1", UnresolvedAttribute("col1")) + } + checkErrorMessage(e, Some("DELTA_NEW_NOT_NULL_VIOLATION"), Some("23512"), + Some("10 rows in table1 violate the new NOT NULL constraint on col1")) + } + { + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.modifyAppendOnlyTableException("dummyTable") + } + checkErrorMessage(e, Some("DELTA_CANNOT_MODIFY_APPEND_ONLY"), Some("42809"), + Some("This table is configured to only allow appends. If you would like to permit " + + "updates or deletes, use 'ALTER TABLE dummyTable SET TBLPROPERTIES " + + s"(${DeltaConfigs.IS_APPEND_ONLY.key}=false)'.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.schemaNotConsistentWithTarget("dummySchema", "targetAttr") + } + checkErrorMessage(e, Some("DELTA_SCHEMA_NOT_CONSISTENT_WITH_TARGET"), Some("XXKDS"), + Some("The table schema dummySchema is not consistent with " + + "the target attributes: targetAttr")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.sparkTaskThreadNotFound + } + checkErrorMessage(e, Some("DELTA_SPARK_THREAD_NOT_FOUND"), Some("XXKDS"), + Some("Not running on a Spark task thread")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.staticPartitionsNotSupportedException + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_STATIC_PARTITIONS"), Some("0AKDD"), + Some("Specifying static partitions in the partition spec is" + + " currently not supported during inserts")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unsupportedWriteStagedTable("table1") + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_WRITES_STAGED_TABLE"), Some("42807"), + Some("Table implementation does not support writes: table1")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.vacuumBasePathMissingException(new Path("path-1")) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_VACUUM_SPECIFIC_PARTITION"), Some("0AKDC"), + Some("Please provide the base path (path-1) when Vacuuming Delta tables. " + + "Vacuuming specific partitions is currently not supported.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.bloomFilterCreateOnNonExistingColumnsException(Seq("col1", "col2")) + } + checkErrorMessage(e, Some("DELTA_CANNOT_CREATE_BLOOM_FILTER_NON_EXISTING_COL"), Some("42703"), + Some("Cannot create bloom filter indices for the following non-existent column(s): col1, col2")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.zOrderingColumnDoesNotExistException("colName") + } + checkErrorMessage(e, None, None, + Some("Z-Ordering column colName does not exist in data schema.")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.zOrderingOnPartitionColumnException("column1") + } + checkErrorMessage(e, Some("DELTA_ZORDERING_ON_PARTITION_COLUMN"), Some("42P10"), + Some("column1 is a partition column. Z-Ordering can only be performed on data columns")) + } + { + val colNames = Seq("col1", "col2") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.zOrderingOnColumnWithNoStatsException(colNames, spark) + } + checkErrorMessage(e, Some("DELTA_ZORDERING_ON_COLUMN_WITHOUT_STATS"), Some("KD00D"), None) + } + { + checkError( + exception = intercept[DeltaIllegalStateException] { + throw MaterializedRowId.missingMetadataException("table_name") + }, + errorClass = "DELTA_MATERIALIZED_ROW_TRACKING_COLUMN_NAME_MISSING", + parameters = Map( + "rowTrackingColumn" -> "Row ID", + "tableName" -> "table_name" + ) + ) + } + { + checkError( + exception = intercept[DeltaIllegalStateException] { + throw MaterializedRowCommitVersion.missingMetadataException("table_name") + }, + errorClass = "DELTA_MATERIALIZED_ROW_TRACKING_COLUMN_NAME_MISSING", + parameters = Map( + "rowTrackingColumn" -> "Row Commit Version", + "tableName" -> "table_name" + ) + ) + } + } + + // The compiler complains the lambda function is too large if we put all tests in one lambda. + test("test DeltaErrors OSS methods more") { + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.schemaNotSetException + } + checkErrorMessage(e, Some("DELTA_SCHEMA_NOT_SET"), None, + Some("Table schema is not set. Write data into it or use CREATE TABLE to set the schema.")) + checkErrorMessage(e, None, Some("KD008"), None) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.schemaNotProvidedException + } + checkErrorMessage(e, Some("DELTA_SCHEMA_NOT_PROVIDED"), None, + Some("Table schema is not provided. Please provide the schema (column definition) " + + "of the table when using REPLACE table and an AS SELECT query is not provided.")) + checkErrorMessage(e, None, Some("42908"), None) + } + { + val st1 = StructType(Seq(StructField("a0", IntegerType))) + val st2 = StructType(Seq(StructField("b0", IntegerType))) + val schemaDiff = SchemaUtils.reportDifferences(st1, st2) + .map(_.replace("Specified", "Latest")) + + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.schemaChangedSinceAnalysis(st1, st2) + } + + val msg = + s"""The schema of your Delta table has changed in an incompatible way since your DataFrame + |or DeltaTable object was created. Please redefine your DataFrame or DeltaTable object. + |Changes: + |${schemaDiff.mkString("\n")}""".stripMargin + checkErrorMessage(e, Some("DELTA_SCHEMA_CHANGE_SINCE_ANALYSIS"), Some("KD007"), + Some(msg)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.generatedColumnsAggregateExpression("1".expr) + } + + checkErrorMessage(e, Some("DELTA_AGGREGATE_IN_GENERATED_COLUMN"), Some("42621"), + Some(s"Found ${"1".expr.sql}. " + + "A generated column cannot use an aggregate expression")) + } + { + val path = new Path("path") + val specifiedColumns = Seq("col1", "col2") + val existingColumns = Seq("col3", "col4") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.createTableWithDifferentPartitioningException( + path, specifiedColumns, existingColumns) + } + + val msg = + s"""The specified partitioning does not match the existing partitioning at $path. + | + |== Specified == + |${specifiedColumns.mkString(", ")} + | + |== Existing == + |${existingColumns.mkString(", ")} + |""".stripMargin + checkErrorMessage(e, Some("DELTA_CREATE_TABLE_WITH_DIFFERENT_PARTITIONING"), Some("42KD7"), + Some(msg)) + } + { + val path = new Path("a/b") + val smaps = Map("abc" -> "xyz") + val emaps = Map("def" -> "hjk") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.createTableWithDifferentPropertiesException(path, smaps, emaps) + } + + val msg = + s"""The specified properties do not match the existing properties at $path. + | + |== Specified == + |${smaps.map { case (k, v) => s"$k=$v" }.mkString("\n")} + | + |== Existing == + |${emaps.map { case (k, v) => s"$k=$v" }.mkString("\n")} + |""".stripMargin + checkErrorMessage(e, Some("DELTA_CREATE_TABLE_WITH_DIFFERENT_PROPERTY"), Some("42KD7"), + Some(msg)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unsupportSubqueryInPartitionPredicates() + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_SUBQUERY_IN_PARTITION_PREDICATES"), + Some("0AKDC"), Some("Subquery is not supported in partition predicates.")) + } + { + val e = intercept[DeltaFileNotFoundException] { + throw DeltaErrors.emptyDirectoryException("dir") + } + checkErrorMessage(e, Some("DELTA_EMPTY_DIRECTORY"), None, + Some("No file found in the directory: dir.")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.replaceWhereUsedWithDynamicPartitionOverwrite() + } + checkErrorMessage(e, Some("DELTA_REPLACE_WHERE_WITH_DYNAMIC_PARTITION_OVERWRITE"), None, + Some("A 'replaceWhere' expression and 'partitionOverwriteMode'='dynamic' " + + "cannot both be set in the DataFrameWriter options.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.replaceWhereUsedInOverwrite() + } + checkErrorMessage(e, Some("DELTA_REPLACE_WHERE_IN_OVERWRITE"), Some("42613"), + Some("You can't use replaceWhere in conjunction with an overwrite by filter")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.incorrectArrayAccessByName("rightName", "wrongName") + } + + val msg = + s"""An ArrayType was found. In order to access elements of an ArrayType, specify + |rightName + |Instead of wrongName + |""".stripMargin + checkErrorMessage(e, Some("DELTA_INCORRECT_ARRAY_ACCESS_BY_NAME"), Some("KD003"), + Some(msg)) + } + { + val columnPath = "colPath" + val other = IntegerType + val column = Seq("col1", "col2") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.columnPathNotNested(columnPath, other, column) + } + val msg = + s"""Expected $columnPath to be a nested data type, but found $other. Was looking for the + |index of ${SchemaUtils.prettyFieldName(column)} in a nested field + |""".stripMargin + checkErrorMessage(e, Some("DELTA_COLUMN_PATH_NOT_NESTED"), Some("42704"), + Some(msg)) + } + { + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.multipleSourceRowMatchingTargetRowInMergeException(spark) + } + + val docLink = generateDocsLink( + spark.sparkContext.getConf, + multipleSourceRowMatchingTargetRowInMergeUrl, + skipValidation = true) + val msg = + s"""Cannot perform Merge as multiple source rows matched and attempted to modify the same + |target row in the Delta table in possibly conflicting ways. By SQL semantics of Merge, + |when multiple source rows match on the same target row, the result may be ambiguous + |as it is unclear which source row should be used to update or delete the matching + |target row. You can preprocess the source table to eliminate the possibility of + |multiple matches. Please refer to + |${docLink}""".stripMargin + checkErrorMessage(e, Some("DELTA_MULTIPLE_SOURCE_ROW_MATCHING_TARGET_ROW_IN_MERGE"), Some("21506"), + Some(msg)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.showPartitionInNotPartitionedTable("table") + } + checkErrorMessage(e, Some("DELTA_SHOW_PARTITION_IN_NON_PARTITIONED_TABLE"), Some("42809"), + Some("SHOW PARTITIONS is not allowed on a table that is not partitioned: table")) + } + { + val badColumns = Set("col1", "col2") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.showPartitionInNotPartitionedColumn(badColumns) + } + checkErrorMessage(e, Some("DELTA_SHOW_PARTITION_IN_NON_PARTITIONED_COLUMN"), Some("42P10"), + Some(s"Non-partitioning column(s) ${badColumns.mkString("[", ", ", "]")}" + + " are specified for SHOW PARTITIONS")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.duplicateColumnOnInsert() + } + checkErrorMessage(e, Some("DELTA_DUPLICATE_COLUMNS_ON_INSERT"), Some("42701"), + Some("Duplicate column names in INSERT clause")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.timeTravelInvalidBeginValue("key", new Throwable) + } + checkErrorMessage(e, Some("DELTA_TIME_TRAVEL_INVALID_BEGIN_VALUE"), Some("42604"), + Some("key needs to be a valid begin value.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.metadataAbsentException() + } + checkErrorMessage(e, Some("DELTA_METADATA_ABSENT"), Some("XXKDS"), + Some("Couldn't find Metadata while committing the first version of the " + + "Delta table.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw new DeltaAnalysisException(errorClass = "DELTA_CANNOT_USE_ALL_COLUMNS_FOR_PARTITION", + Array.empty) + } + checkErrorMessage(e, Some("DELTA_CANNOT_USE_ALL_COLUMNS_FOR_PARTITION"), Some("428FT"), + Some("Cannot use all columns for partition columns")) + } + { + val e = intercept[DeltaIOException] { + throw DeltaErrors.failedReadFileFooter("test.txt", null) + } + checkErrorMessage(e, Some("DELTA_FAILED_READ_FILE_FOOTER"), Some("KD001"), + Some("Could not read footer for file: test.txt")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.failedScanWithHistoricalVersion(123) + } + checkErrorMessage(e, Some("DELTA_FAILED_SCAN_WITH_HISTORICAL_VERSION"), Some("KD002"), + Some("Expect a full scan of the latest version of the Delta source, " + + "but found a historical scan of version 123")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.failedRecognizePredicate("select ALL", new Throwable()) + } + checkErrorMessage(e, Some("DELTA_FAILED_RECOGNIZE_PREDICATE"), Some("42601"), + Some("Cannot recognize the predicate 'select ALL'")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.failedFindAttributeInOutputColumns("col1", + "col2,col3,col4") + } + + val msg = "Could not find col1 among the existing target output col2,col3,col4" + checkErrorMessage(e, Some("DELTA_FAILED_FIND_ATTRIBUTE_IN_OUTPUT_COLUMNS"), Some("42703"), + Some(msg)) + } + { + val col = "col1" + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.failedFindPartitionColumnInOutputPlan(col) + } + checkErrorMessage(e, Some("DELTA_FAILED_FIND_PARTITION_COLUMN_IN_OUTPUT_PLAN"), Some("XXKDS"), + Some(s"Could not find $col in output plan.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.deltaTableFoundInExecutor() + } + checkErrorMessage(e, Some("DELTA_TABLE_FOUND_IN_EXECUTOR"), Some("XXKDS"), + Some("DeltaTable cannot be used in executors")) + } + { + val e = intercept[DeltaFileAlreadyExistsException] { + throw DeltaErrors.fileAlreadyExists("file.txt") + } + checkErrorMessage(e, Some("DELTA_FILE_ALREADY_EXISTS"), Some("42K04"), + Some("Existing file path file.txt")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.configureSparkSessionWithExtensionAndCatalog(Some(new Throwable())) + } + + val catalogImplConfig = SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key + val msg = + s"""This Delta operation requires the SparkSession to be configured with the + |DeltaSparkSessionExtension and the DeltaCatalog. Please set the necessary + |configurations when creating the SparkSession as shown below. + | + | SparkSession.builder() + | .config("spark.sql.extensions", "${classOf[DeltaSparkSessionExtension].getName}") + | .config("$catalogImplConfig", "${classOf[DeltaCatalog].getName}") + | ... + | .getOrCreate() + |""".stripMargin + + "\nIf you are using spark-shell/pyspark/spark-submit, you can add the required " + + "configurations to the command as show below:\n" + + s"--conf spark.sql.extensions=${classOf[DeltaSparkSessionExtension].getName} " + + s"--conf ${catalogImplConfig}=${classOf[DeltaCatalog].getName}\n" + checkErrorMessage(e, Some("DELTA_CONFIGURE_SPARK_SESSION_WITH_EXTENSION_AND_CATALOG"), Some("56038"), + Some(msg)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cdcNotAllowedInThisVersion() + } + checkErrorMessage(e, Some("DELTA_CDC_NOT_ALLOWED_IN_THIS_VERSION"), Some("0AKDC"), + Some("Configuration delta.enableChangeDataFeed cannot be set." + + " Change data feed from Delta is not yet available.")) + } + { + val ident = TableIdentifier("view1") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.convertNonParquetTablesException(ident, "source1") + } + checkErrorMessage(e, Some("DELTA_CONVERT_NON_PARQUET_TABLE"), Some("0AKDC"), + Some("CONVERT TO DELTA only supports parquet tables, but you are trying to " + + s"convert a source1 source: $ident")) + } + { + val from = StructType(Seq(StructField("c0", IntegerType))) + val to = StructType(Seq(StructField("c1", IntegerType))) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.updateSchemaMismatchExpression(from, to) + } + checkErrorMessage(e, Some("DELTA_UPDATE_SCHEMA_MISMATCH_EXPRESSION"), Some("42846"), + Some(s"Cannot cast ${from.catalogString} to ${to.catalogString}. All nested " + + "columns must match.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.removeFileCDCMissingExtendedMetadata("file") + } + checkErrorMessage(e, Some("DELTA_REMOVE_FILE_CDC_MISSING_EXTENDED_METADATA"), Some("XXKDS"), + Some("""RemoveFile created without extended metadata is ineligible for CDC: + |file""".stripMargin)) + } + { + val columnName = "c0" + val colMatches = Seq(StructField("c0", IntegerType)) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.ambiguousPartitionColumnException(columnName, colMatches) + } + + val msg = + s"Ambiguous partition column ${DeltaErrors.formatColumn(columnName)} can be" + + s" ${DeltaErrors.formatColumnList(colMatches.map(_.name))}." + checkErrorMessage(e, Some("DELTA_AMBIGUOUS_PARTITION_COLUMN"), Some("42702"), + Some(msg)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.truncateTablePartitionNotSupportedException + } + checkErrorMessage(e, Some("DELTA_TRUNCATE_TABLE_PARTITION_NOT_SUPPORTED"), Some("0AKDC"), + Some("Operation not allowed: TRUNCATE TABLE on Delta tables does not support" + + " partition predicates; use DELETE to delete specific partitions or rows.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.invalidFormatFromSourceVersion(100, 10) + } + checkErrorMessage(e, Some("DELTA_INVALID_FORMAT_FROM_SOURCE_VERSION"), Some("XXKDS"), + Some("Unsupported format. Expected version should be smaller than or equal to 10 but was 100. " + + "Please upgrade to newer version of Delta.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.emptyDataException + } + checkErrorMessage(e, Some("DELTA_EMPTY_DATA"), Some("428GU"), + Some("Data used in creating the Delta table doesn't have any columns.")) + } + { + val path = "path" + val parsedCol = "col1" + val expectedCol = "col2" + + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unexpectedPartitionColumnFromFileNameException(path, parsedCol, + expectedCol) + } + + val msg = + s"Expecting partition column ${DeltaErrors.formatColumn(expectedCol)}, but" + + s" found partition column ${DeltaErrors.formatColumn(parsedCol)}" + + s" from parsing the file name: $path" + checkErrorMessage(e, Some("DELTA_UNEXPECTED_PARTITION_COLUMN_FROM_FILE_NAME"), Some("KD009"), + Some(msg)) + } + { + val path = "path" + val parsedCols = Seq("col1", "col2") + val expectedCols = Seq("col3", "col4") + + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unexpectedNumPartitionColumnsFromFileNameException(path, parsedCols, + expectedCols) + } + + val msg = + s"Expecting ${expectedCols.size} partition column(s): " + + s"${DeltaErrors.formatColumnList(expectedCols)}," + + s" but found ${parsedCols.size} partition column(s): " + + s"${DeltaErrors.formatColumnList(parsedCols)} from parsing the file name: $path" + checkErrorMessage(e, Some("DELTA_UNEXPECTED_NUM_PARTITION_COLUMNS_FROM_FILE_NAME"), Some("KD009"), + Some(msg)) + } + { + val version = 100L + val removedFile = "file" + val dataPath = "tablePath" + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.deltaSourceIgnoreDeleteError(version, removedFile, dataPath) + } + + val msg = + s"Detected deleted data (for example $removedFile) from streaming source at " + + s"version $version. This is currently not supported. If you'd like to ignore deletes, " + + "set the option 'ignoreDeletes' to 'true'. The source table can be found " + + s"at path $dataPath." + checkErrorMessage(e, Some("DELTA_SOURCE_IGNORE_DELETE"), Some("0A000"), + Some(msg)) + } + { + val tableId = "tableId" + val tableLocation = "path" + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.createTableWithNonEmptyLocation(tableId, tableLocation) + } + + val msg = + s"Cannot create table ('${tableId}')." + + s" The associated location ('${tableLocation}') is not empty and " + + "also not a Delta table." + checkErrorMessage(e, Some("DELTA_CREATE_TABLE_WITH_NON_EMPTY_LOCATION"), Some("42601"), + Some(msg)) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.maxArraySizeExceeded() + } + checkErrorMessage(e, Some("DELTA_MAX_ARRAY_SIZE_EXCEEDED"), Some("42000"), + Some("Please use a limit less than Int.MaxValue - 8.")) + } + { + val unknownColumns = Seq("col1", "col2") + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.bloomFilterDropOnNonExistingColumnsException(unknownColumns) + } + checkErrorMessage(e, Some("DELTA_BLOOM_FILTER_DROP_ON_NON_EXISTING_COLUMNS"), Some("42703"), + Some("Cannot drop bloom filter indices for the following non-existent column(s): " + + unknownColumns.mkString(", "))) + } + { + val dataFilters = "filters" + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.replaceWhereWithFilterDataChangeUnset(dataFilters) + } + + val msg = + "'replaceWhere' cannot be used with data filters when " + + s"'dataChange' is set to false. Filters: ${dataFilters}" + checkErrorMessage(e, Some("DELTA_REPLACE_WHERE_WITH_FILTER_DATA_CHANGE_UNSET"), Some("42613"), + Some(msg)) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.missingTableIdentifierException("read") + } + checkErrorMessage(e, Some("DELTA_OPERATION_MISSING_PATH"), Some("42601"), + Some("Please provide the path or table identifier for read.")) + } + { + val column = StructField("c0", IntegerType) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cannotUseDataTypeForPartitionColumnError(column) + } + checkErrorMessage(e, Some("DELTA_INVALID_PARTITION_COLUMN_TYPE"), Some("42996"), + Some("Using column c0 of type IntegerType as a partition column is not supported.")) + } + { + val catalogPartitionSchema = StructType(Seq(StructField("a", IntegerType))) + val userPartitionSchema = StructType(Seq(StructField("b", StringType))) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unexpectedPartitionSchemaFromUserException(catalogPartitionSchema, + userPartitionSchema) + } + checkErrorMessage(e, Some("DELTA_UNEXPECTED_PARTITION_SCHEMA_FROM_USER"), Some("KD009"), + Some("CONVERT TO DELTA was called with a partition schema different from the partition " + + "schema inferred from the catalog, please avoid providing the schema so that the " + + "partition schema can be chosen from the catalog.\n" + + s"\ncatalog partition schema:\n${catalogPartitionSchema.treeString}" + + s"\nprovided partition schema:\n${userPartitionSchema.treeString}")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.invalidInterval("interval1") + } + checkErrorMessage(e, Some("DELTA_INVALID_INTERVAL"), Some("22006"), + Some("interval1 is not a valid INTERVAL.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.cdcWriteNotAllowedInThisVersion + } + checkErrorMessage(e, Some("DELTA_CHANGE_TABLE_FEED_DISABLED"), Some("42807"), + Some("Cannot write to table with delta.enableChangeDataFeed set. " + + "Change data feed from Delta is not available.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.specifySchemaAtReadTimeException + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_SCHEMA_DURING_READ"), Some("0AKDC"), + Some("Delta does not support specifying the schema at read time.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.unexpectedDataChangeException("operation1") + } + checkErrorMessage(e, Some("DELTA_DATA_CHANGE_FALSE"), Some("0AKDE"), + Some("Cannot change table metadata because the 'dataChange' option is " + + "set to false. Attempted operation: 'operation1'.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.noStartVersionForCDC + } + checkErrorMessage(e, Some("DELTA_NO_START_FOR_CDC_READ"), Some("42601"), + Some("No startingVersion or startingTimestamp provided for CDC read.")) + } + { + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.unrecognizedColumnChange("change1") + } + checkErrorMessage(e, Some("DELTA_UNRECOGNIZED_COLUMN_CHANGE"), Some("42601"), + Some("Unrecognized column change change1. You may be running an out-of-date Delta Lake version.")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.endBeforeStartVersionInCDC(2, 1) + } + checkErrorMessage(e, Some("DELTA_INVALID_CDC_RANGE"), Some("22003"), + Some("CDC range from start 2 to end 1 was invalid. End cannot be before start.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.unexpectedChangeFilesFound("a.parquet") + } + checkErrorMessage(e, Some("DELTA_UNEXPECTED_CHANGE_FILES_FOUND"), Some("XXKDS"), + Some("""Change files found in a dataChange = false transaction. Files: + |a.parquet""".stripMargin)) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.logFailedIntegrityCheck(2, "option1") + } + checkErrorMessage(e, Some("DELTA_TXN_LOG_FAILED_INTEGRITY"), Some("XXKDS"), + Some("The transaction log has failed integrity checks. Failed " + + "verification at version 2 of:\noption1")) + } + { + val path = new Path("parent", "child") + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.checkpointNonExistTable(path) + } + checkErrorMessage(e, Some("DELTA_CHECKPOINT_NON_EXIST_TABLE"), Some("42K03"), + Some(s"Cannot checkpoint a non-existing table $path. " + + "Did you manually delete files in the _delta_log directory?")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.viewInDescribeDetailException(TableIdentifier("customer")) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_DESCRIBE_DETAIL_VIEW"), Some("42809"), + Some("`customer` is a view. DESCRIBE DETAIL is only supported for tables.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.pathAlreadyExistsException(new Path(path)) + } + checkErrorMessage(e, Some("DELTA_PATH_EXISTS"), Some("42K04"), + Some("Cannot write to already existent path /sample/path without setting OVERWRITE = 'true'.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw new DeltaAnalysisException( + errorClass = "DELTA_MERGE_MISSING_WHEN", + messageParameters = Array.empty + ) + } + checkErrorMessage(e, Some("DELTA_MERGE_MISSING_WHEN"), Some("42601"), + Some("There must be at least one WHEN clause in a MERGE statement.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.unrecognizedFileAction("invalidAction", "invalidClass") + } + checkErrorMessage(e, Some("DELTA_UNRECOGNIZED_FILE_ACTION"), Some("XXKDS"), + Some("Unrecognized file action invalidAction with type invalidClass.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.streamWriteNullTypeException + } + checkErrorMessage(e, Some("DELTA_NULL_SCHEMA_IN_STREAMING_WRITE"), Some("42P18"), + Some("Delta doesn't accept NullTypes in the schema for streaming writes.")) + } + { + val expr = "1".expr + val e = intercept[DeltaIllegalArgumentException] { + throw new DeltaIllegalArgumentException( + errorClass = "DELTA_UNEXPECTED_ACTION_EXPRESSION", + messageParameters = Array(s"$expr")) + } + checkErrorMessage(e, Some("DELTA_UNEXPECTED_ACTION_EXPRESSION"), Some("42601"), + Some(s"Unexpected action expression $expr.")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.unexpectedAlias("alias1") + } + checkErrorMessage(e, Some("DELTA_UNEXPECTED_ALIAS"), Some("XXKDS"), + Some("Expected Alias but got alias1")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.unexpectedProject("project1") + } + checkErrorMessage(e, Some("DELTA_UNEXPECTED_PROJECT"), Some("XXKDS"), + Some("Expected Project but got project1")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nullableParentWithNotNullNestedField + } + checkErrorMessage(e, Some("DELTA_NOT_NULL_NESTED_FIELD"), Some("0A000"), + Some("A non-nullable nested field can't be added to a nullable parent. " + + "Please set the nullability of the parent column accordingly.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.useAddConstraints + } + checkErrorMessage(e, Some("DELTA_ADD_CONSTRAINTS"), Some("0A000"), + Some("Please use ALTER TABLE ADD CONSTRAINT to add CHECK constraints.")) + } + { + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.deltaSourceIgnoreChangesError(10, "removedFile", "tablePath") + } + checkErrorMessage(e, Some("DELTA_SOURCE_TABLE_IGNORE_CHANGES"), Some("0A000"), + Some("Detected a data update (for example removedFile) in the source table at version " + + "10. This is currently not supported. If you'd like to ignore updates, set the " + + "option 'skipChangeCommits' to 'true'. If you would like the data update to be reflected, " + + "please restart this query with a fresh checkpoint directory. The source table can be " + + "found at path tablePath.")) + } + { + val limit = "limit" + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.unknownReadLimit(limit) + } + checkErrorMessage(e, Some("DELTA_UNKNOWN_READ_LIMIT"), Some("42601"), + Some(s"Unknown ReadLimit: $limit")) + } + { + val privilege = "unknown" + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.unknownPrivilege(privilege) + } + checkErrorMessage(e, Some("DELTA_UNKNOWN_PRIVILEGE"), Some("42601"), + Some(s"Unknown privilege: $privilege")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.deltaLogAlreadyExistsException("path") + } + checkErrorMessage(e, Some("DELTA_LOG_ALREADY_EXISTS"), Some("42K04"), + Some("A Delta log already exists at path")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.missingPartFilesException(10L, new FileNotFoundException("reason")) + } + checkErrorMessage(e, Some("DELTA_MISSING_PART_FILES"), Some("42KD6"), + Some("Couldn't find all part files of the checkpoint version: 10")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.checkConstraintNotBoolean("name1", "expr1") + } + checkErrorMessage(e, Some("DELTA_NON_BOOLEAN_CHECK_CONSTRAINT"), Some("42621"), + Some("CHECK constraint 'name1' (expr1) should be a boolean expression.")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.checkpointMismatchWithSnapshot + } + checkErrorMessage(e, Some("DELTA_CHECKPOINT_SNAPSHOT_MISMATCH"), Some("XXKDS"), + Some("State of the checkpoint doesn't match that of the snapshot.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.notADeltaTableException("operation1") + } + checkErrorMessage(e, Some("DELTA_ONLY_OPERATION"), Some("0AKDD"), + Some("operation1 is only supported for Delta tables.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.dropNestedColumnsFromNonStructTypeException(StringType) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_DROP_NESTED_COLUMN_FROM_NON_STRUCT_TYPE"), Some("0AKDC"), + Some(s"Can only drop nested columns from StructType. Found $StringType")) + } + { + val columnsThatNeedRename = Set("c0", "c1") + val schema = StructType(Seq(StructField("schema1", StringType))) + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.nestedFieldsNeedRename(columnsThatNeedRename, schema) + } + checkErrorMessage(e, Some("DELTA_NESTED_FIELDS_NEED_RENAME"), Some("42K05"), + Some("Nested fields need renaming to avoid data loss. Fields:\n[c0, c1].\n" + + s"Original schema:\n${schema.treeString}")) + } + { + val locations = Seq("location1", "location2") + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.cannotSetLocationMultipleTimes(locations) + } + checkErrorMessage(e, Some("DELTA_CANNOT_SET_LOCATION_MULTIPLE_TIMES"), Some("XXKDS"), + Some(s"Can't set location multiple times. Found ${locations}")) + } + { + val e = intercept[DeltaIllegalStateException] { + throw DeltaErrors.metadataAbsentForExistingCatalogTable("tblName", "file://path/to/table") + } + checkErrorMessage( + e, + Some("DELTA_METADATA_ABSENT_EXISTING_CATALOG_TABLE"), + Some("XXKDS"), + Some( + "The table tblName already exists in the catalog but no metadata could be found for the table at the path file://path/to/table.\n" + + "Did you manually delete files from the _delta_log directory? If so, then you should be able to recreate it as follows. First, drop the table by running `DROP TABLE tblName`. Then, recreate it by running the current command again." + ) + ) + } + { + val e = intercept[DeltaStreamingColumnMappingSchemaIncompatibleException] { + throw DeltaErrors.blockStreamingReadsWithIncompatibleColumnMappingSchemaChanges( + spark, + StructType.fromDDL("id int"), + StructType.fromDDL("id2 int"), + detectedDuringStreaming = true + ) + } + checkErrorMessage(e, Some("DELTA_STREAMING_INCOMPATIBLE_SCHEMA_CHANGE_USE_SCHEMA_LOG"), Some("42KD4"), None) + assert(e.readSchema == StructType.fromDDL("id int")) + assert(e.incompatibleSchema == StructType.fromDDL("id2 int")) + assert(e.additionalProperties("detectedDuringStreaming").toBoolean) + } + { + val e = intercept[DeltaStreamingColumnMappingSchemaIncompatibleException] { + throw DeltaErrors.blockStreamingReadsWithIncompatibleColumnMappingSchemaChanges( + spark, + StructType.fromDDL("id int"), + StructType.fromDDL("id2 int"), + detectedDuringStreaming = false + ) + } + checkErrorMessage(e, Some("DELTA_STREAMING_INCOMPATIBLE_SCHEMA_CHANGE_USE_SCHEMA_LOG"), Some("42KD4"), None) + assert(e.readSchema == StructType.fromDDL("id int")) + assert(e.incompatibleSchema == StructType.fromDDL("id2 int")) + assert(!e.additionalProperties("detectedDuringStreaming").toBoolean) + } + { + val e = intercept[DeltaUnsupportedOperationException] { + throw DeltaErrors.blockColumnMappingAndCdcOperation(DeltaOperations.ManualUpdate) + } + checkErrorMessage(e, Some("DELTA_BLOCK_COLUMN_MAPPING_AND_CDC_OPERATION"), Some("42KD4"), + Some("Operation \"Manual Update\" is not allowed when the table has " + + "enabled change data feed (CDF) and has undergone schema changes using DROP COLUMN or " + + "RENAME COLUMN.")) + } + { + val options = Map( + "foo" -> "1", + "bar" -> "2" + ) + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.unsupportedDeltaTableForPathHadoopConf(options) + } + val prefixStr = DeltaTableUtils.validDeltaTableHadoopPrefixes.mkString("[", ",", "]") + checkErrorMessage(e, Some("DELTA_TABLE_FOR_PATH_UNSUPPORTED_HADOOP_CONF"), Some("0AKDC"), + Some("Currently DeltaTable.forPath only supports hadoop configuration " + + s"keys starting with $prefixStr but got ${options.mkString(",")}")) + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.cloneOnRelativePath("path") + } + checkErrorMessage(e, None, None, + Some("""The target location for CLONE needs to be an absolute path or table name. Use an + |absolute path instead of path.""".stripMargin)) + } + { + val e = intercept[AnalysisException] { + throw DeltaErrors.cloneFromUnsupportedSource( "table-0", "CSV") + } + assert(e.getErrorClass == "DELTA_CLONE_UNSUPPORTED_SOURCE") + assert(e.getSqlState == "0AKDC") + assert(e.getMessage == s"[DELTA_CLONE_UNSUPPORTED_SOURCE] Unsupported clone " + + s"source 'table-0', whose format is CSV.\n" + + "The supported formats are 'delta', 'iceberg' and 'parquet'.") + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.cloneReplaceUnsupported(TableIdentifier("customer")) + } + checkErrorMessage(e, Some("DELTA_UNSUPPORTED_CLONE_REPLACE_SAME_TABLE"), Some("0AKDC"), + Some(s""" + |You tried to REPLACE an existing table (`customer`) with CLONE. This operation is + |unsupported. Try a different target for CLONE or delete the table at the current target. + |""".stripMargin)) + + } + { + val e = intercept[DeltaIllegalArgumentException] { + throw DeltaErrors.cloneAmbiguousTarget("external-location", TableIdentifier("table1")) + } + checkErrorMessage(e, Some("DELTA_CLONE_AMBIGUOUS_TARGET"), Some("42613"), + Some(s""" + |Two paths were provided as the CLONE target so it is ambiguous which to use. An external + |location for CLONE was provided at external-location at the same time as the path + |`table1`.""".stripMargin)) + } + { + DeltaTableValueFunctions.supportedFnNames.foreach { fnName => + { + val e = intercept[AnalysisException] { + sql(s"SELECT * FROM ${fnName}()").collect() + } + assert(e.getErrorClass == "INCORRECT_NUMBER_OF_ARGUMENTS") + assert(e.getMessage.contains( + s"not enough args, $fnName requires at least 2 arguments " + + "and at most 3 arguments.")) + } + { + val e = intercept[AnalysisException] { + sql(s"SELECT * FROM ${fnName}(1, 2, 3, 4, 5)").collect() + } + assert(e.getErrorClass == "INCORRECT_NUMBER_OF_ARGUMENTS") + assert(e.getMessage.contains( + s"too many args, $fnName requires at least 2 arguments " + + "and at most 3 arguments.")) + } + } + } + { + val e = intercept[DeltaAnalysisException] { + throw DeltaErrors.invalidTableValueFunction("invalid1") + } + checkErrorMessage(e, Some("DELTA_INVALID_TABLE_VALUE_FUNCTION"), Some("22000"), + Some("Function invalid1 is an unsupported table valued function for CDC reads.")) + } + { + val e = intercept[DeltaAnalysisException] { + throw new DeltaAnalysisException( + errorClass = "WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED", + messageParameters = Array("ALTER TABLE")) + } + checkErrorMessage( + e, Some("WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED"), + Some("0AKDE"), + Some( + s"""Failed to execute ALTER TABLE command because it assigned a column DEFAULT value, + |but the corresponding table feature was not enabled. Please retry the command again + |after executing ALTER TABLE tableName SET + |TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported').""".stripMargin)) + } + { + val e = intercept[DeltaAnalysisException] { + throw new DeltaAnalysisException( + errorClass = "WRONG_COLUMN_DEFAULTS_FOR_DELTA_ALTER_TABLE_ADD_COLUMN_NOT_SUPPORTED", + messageParameters = Array.empty) + } + checkErrorMessage( + e, Some("WRONG_COLUMN_DEFAULTS_FOR_DELTA_ALTER_TABLE_ADD_COLUMN_NOT_SUPPORTED"), + Some("0AKDC"), + Some( + s"""Failed to execute the command because DEFAULT values are not supported when adding new + |columns to previously existing Delta tables; please add the column without a default + |value first, then run a second ALTER TABLE ALTER COLUMN SET DEFAULT command to apply + |for future inserted rows instead.""".stripMargin)) + } + } +} + +class DeltaErrorsSuite + extends DeltaErrorsSuiteBase diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala new file mode 100644 index 00000000000..21dff34d472 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala @@ -0,0 +1,850 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.net.URI + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaOperations.Delete +import org.apache.spark.sql.delta.commands.DeltaGenerateCommand +import org.apache.spark.sql.delta.hooks.GenerateSymlinkManifest +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.DeltaFileOperations +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs._ +import org.apache.hadoop.fs.permission.FsPermission +import org.apache.hadoop.util.Progressable + +import org.apache.spark.SparkThrowable +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSparkSession +// scalastyle:on import.ordering.noEmptyLine + +class DeltaGenerateSymlinkManifestSuite + extends DeltaGenerateSymlinkManifestSuiteBase + with DeltaSQLCommandTest + +trait DeltaGenerateSymlinkManifestSuiteBase extends QueryTest + with SharedSparkSession + with DeletionVectorsTestUtils + with DeltaTestUtilsForTempViews { + + import testImplicits._ + + test("basic case: SQL command - path-based table") { + withTempDir { tablePath => + tablePath.delete() + + spark.createDataset(spark.sparkContext.parallelize(1 to 100, 7)) + .write.format("delta").mode("overwrite").save(tablePath.toString) + + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + + // Create a Delta table and call the scala api for generating manifest files + spark.sql(s"GENERATE symlink_ForMat_Manifest FOR TABLE delta.`${tablePath.getAbsolutePath}`") + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 7) + } + } + + test("basic case: SQL command - name-based table") { + withTable("deltaTable") { + spark.createDataset(spark.sparkContext.parallelize(1 to 100, 7)) + .write.format("delta").saveAsTable("deltaTable") + + val tableId = TableIdentifier("deltaTable") + val tablePath = new File(spark.sessionState.catalog.getTableMetadata(tableId).location) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + + spark.sql(s"GENERATE symlink_ForMat_Manifest FOR TABLE deltaTable") + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 7) + } + } + + test("basic case: SQL command - throw error on bad tables") { + var e: Exception = intercept[AnalysisException] { + spark.sql("GENERATE symlink_format_manifest FOR TABLE nonExistentTable") + } + assert(e.getMessage.contains("not found") || e.getMessage.contains("cannot be found")) + + withTable("nonDeltaTable") { + spark.range(2).write.format("parquet").saveAsTable("nonDeltaTable") + e = intercept[AnalysisException] { + spark.sql("GENERATE symlink_format_manifest FOR TABLE nonDeltaTable") + } + assert(e.getMessage.contains("only supported for Delta")) + } + } + + test("basic case: SQL command - throw error on non delta table paths") { + withTempDir { dir => + var e = intercept[AnalysisException] { + spark.sql(s"GENERATE symlink_format_manifest FOR TABLE delta.`$dir`") + } + + assert(e.getMessage.contains("not found") || + e.getMessage.contains("only supported for Delta")) + + spark.range(2).write.format("parquet").mode("overwrite").save(dir.toString) + + e = intercept[AnalysisException] { + spark.sql(s"GENERATE symlink_format_manifest FOR TABLE delta.`$dir`") + } + assert(e.getMessage.contains("table not found") || + e.getMessage.contains("only supported for Delta")) + + e = intercept[AnalysisException] { + spark.sql(s"GENERATE symlink_format_manifest FOR TABLE parquet.`$dir`") + } + assert(e.getMessage.contains("not found") || e.getMessage.contains("cannot be found")) + } + } + + testWithTempView("basic case: SQL command - throw error on temp views") { isSQLTempView => + withTable("t1") { + spark.range(2).write.format("delta").saveAsTable("t1") + createTempViewFromTable("t1", isSQLTempView) + val e = intercept[AnalysisException] { + spark.sql(s"GENERATE symlink_format_manifest FOR TABLE v") + } + assert(e.getMessage.contains("not found") || e.getMessage.contains("cannot be found")) + } + } + + test("basic case: SQL command - throw error on unsupported mode") { + withTempDir { tablePath => + spark.range(2).write.format("delta").save(tablePath.getAbsolutePath) + val e = intercept[IllegalArgumentException] { + spark.sql(s"GENERATE xyz FOR TABLE delta.`${tablePath.getAbsolutePath}`") + } + assert(e.toString.contains("not supported")) + DeltaGenerateCommand.modeNameToGenerationFunc.keys.foreach { modeName => + assert(e.toString.contains(modeName)) + } + } + } + + test("basic case: Scala API - path-based table") { + withTempDir { tablePath => + tablePath.delete() + + spark.createDataset(spark.sparkContext.parallelize(1 to 100, 7)) + .write.format("delta").mode("overwrite").save(tablePath.toString) + + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + + // Create a Delta table and call the scala api for generating manifest files + val deltaTable = io.delta.tables.DeltaTable.forPath(tablePath.getAbsolutePath) + deltaTable.generate("symlink_format_manifest") + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 7) + } + } + + test("basic case: Scala API - name-based table") { + withTable("deltaTable") { + spark.createDataset(spark.sparkContext.parallelize(1 to 100, 7)) + .write.format("delta").saveAsTable("deltaTable") + + val tableId = TableIdentifier("deltaTable") + val tablePath = new File(spark.sessionState.catalog.getTableMetadata(tableId).location) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + + val deltaTable = io.delta.tables.DeltaTable.forName("deltaTable") + deltaTable.generate("symlink_format_manifest") + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 7) + } + } + + + test ("full manifest: non-partitioned table") { + withTempDir { tablePath => + tablePath.delete() + + def write(parallelism: Int): Unit = { + spark.createDataset(spark.sparkContext.parallelize(1 to 100, parallelism)) + .write.format("delta").mode("overwrite").save(tablePath.toString) + } + + write(7) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 7) + + // Reduce files + write(5) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 7) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 5) + + // Remove all data + spark.emptyDataset[Int].write.format("delta").mode("overwrite").save(tablePath.toString) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 5) + generateSymlinkManifest(tablePath.toString) + assertManifest( + tablePath, expectSameFiles = true, expectedNumFiles = 0) + assert(spark.read.format("delta").load(tablePath.toString).count() == 0) + + // delete all data + write(5) + assertManifest( + tablePath, expectSameFiles = false, expectedNumFiles = 0) + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tablePath.toString) + deltaTable.delete() + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 0) + assert(spark.read.format("delta").load(tablePath.toString).count() == 0) + } + } + + test("full manifest: partitioned table") { + withTempDir { tablePath => + tablePath.delete() + + def write(parallelism: Int, partitions1: Int, partitions2: Int): Unit = { + spark.createDataset(spark.sparkContext.parallelize(1 to 100, parallelism)).toDF("value") + .withColumn("part1", $"value" % partitions1) + .withColumn("part2", $"value" % partitions2) + .write.format("delta").partitionBy("part1", "part2") + .mode("overwrite").save(tablePath.toString) + } + + write(10, 10, 10) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + generateSymlinkManifest(tablePath.toString) + // 10 files each in ../part1=X/part2=X/ for X = 0 to 9 + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 100) + + // Reduce # partitions on both dimensions + write(1, 1, 1) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 100) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 1) + + // Increase # partitions on both dimensions + write(5, 5, 5) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 1) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 25) + + // Increase # partitions on only one dimension + write(5, 10, 5) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 25) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 50) + + // Remove all data + spark.emptyDataset[Int].toDF("value") + .withColumn("part1", $"value" % 10) + .withColumn("part2", $"value" % 10) + .write.format("delta").mode("overwrite").save(tablePath.toString) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 50) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 0) + assert(spark.read.format("delta").load(tablePath.toString).count() == 0) + + // delete all data + write(5, 5, 5) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 25) + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tablePath.toString) + deltaTable.delete() + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 0) + assert(spark.read.format("delta").load(tablePath.toString).count() == 0) + } + } + + test("incremental manifest: table property controls post commit manifest generation") { + withTempDir { tablePath => + tablePath.delete() + + def writeWithIncrementalManifest(enabled: Boolean, numFiles: Int): Unit = { + withIncrementalManifest(tablePath, enabled) { + spark.createDataset(spark.sparkContext.parallelize(1 to 100, numFiles)) + .write.format("delta").mode("overwrite").save(tablePath.toString) + } + } + + writeWithIncrementalManifest(enabled = false, numFiles = 1) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + + // Enabling it should automatically generate manifest files + writeWithIncrementalManifest(enabled = true, numFiles = 2) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 2) + + // Disabling it should stop updating existing manifest files + writeWithIncrementalManifest(enabled = false, numFiles = 3) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 2) + } + } + + test("incremental manifest: unpartitioned table") { + withTempDir { tablePath => + tablePath.delete() + + def write(numFiles: Int): Unit = withIncrementalManifest(tablePath, enabled = true) { + spark.createDataset(spark.sparkContext.parallelize(1 to 100, numFiles)) + .write.format("delta").mode("overwrite").save(tablePath.toString) + } + + write(1) + // first write won't generate automatic manifest as mode enable after first write + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + + // Increase files + write(7) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 7) + + // Reduce files + write(5) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 5) + + // Remove all data + spark.emptyDataset[Int].write.format("delta").mode("overwrite").save(tablePath.toString) + assert(spark.read.format("delta").load(tablePath.toString).count() == 0) + assertManifest( + tablePath, expectSameFiles = true, expectedNumFiles = 0) + } + } + + + + test("incremental manifest: partitioned table") { + withTempDir { tablePath => + tablePath.delete() + + def writePartitioned(parallelism: Int, numPartitions1: Int, numPartitions2: Int): Unit = { + withIncrementalManifest(tablePath, enabled = true) { + val input = + if (parallelism == 0) spark.emptyDataset[Int] + else spark.createDataset(spark.sparkContext.parallelize(1 to 100, parallelism)) + input.toDF("value") + .withColumn("part1", $"value" % numPartitions1) + .withColumn("part2", $"value" % numPartitions2) + .write.format("delta").partitionBy("part1", "part2") + .mode("overwrite").save(tablePath.toString) + } + } + + writePartitioned(1, 1, 1) + // Manifests wont be generated in the first write because `withIncrementalManifest` will + // enable manifest generation only after the first write defines the table log. + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + + writePartitioned(10, 10, 10) + // 10 files each in ../part1=X/part2=X/ for X = 0 to 9 (so only 10 subdirectories) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 100) + + // Update such that 1 file is removed and 1 file is added in another partition + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tablePath.toString) + deltaTable.updateExpr("value = 1", Map("part1" -> "0", "value" -> "-1")) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 100) + + // Delete such that 1 file is removed + deltaTable.delete("value = -1") + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 99) + + // Reduce # partitions on both dimensions + writePartitioned(1, 1, 1) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 1) + + // Increase # partitions on both dimensions + writePartitioned(5, 5, 5) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 25) + + // Increase # partitions on only one dimension + writePartitioned(5, 10, 5) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 50) + + // Remove all data + writePartitioned(0, 1, 1) + assert(spark.read.format("delta").load(tablePath.toString).count() == 0) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 0) + } + } + + test("incremental manifest: generate full manifest if manifest did not exist") { + withTempDir { tablePath => + + def write(numPartitions: Int): Unit = { + spark.range(0, 100, 1, 1).toDF("value").withColumn("part", $"value" % numPartitions) + .write.format("delta").partitionBy("part").mode("append").save(tablePath.toString) + } + + write(10) + assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 0) + + withIncrementalManifest(tablePath, enabled = true) { + write(1) // update only one partition + } + // Manifests should be generated for all partitions + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 11) + } + } + + test("incremental manifest: failure to generate manifest throws exception") { + withTempDir { tablePath => + tablePath.delete() + + import SymlinkManifestFailureTestFileSystem._ + + withSQLConf( + s"fs.$SCHEME.impl" -> classOf[SymlinkManifestFailureTestFileSystem].getName, + s"fs.$SCHEME.impl.disable.cache" -> "true", + s"fs.AbstractFileSystem.$SCHEME.impl" -> + classOf[SymlinkManifestFailureTestAbstractFileSystem].getName, + s"fs.AbstractFileSystem.$SCHEME.impl.disable.cache" -> "true") { + def write(numFiles: Int): Unit = withIncrementalManifest(tablePath, enabled = true) { + spark.createDataset(spark.sparkContext.parallelize(1 to 100, numFiles)) + .write.format("delta").mode("overwrite").save(s"$SCHEME://$tablePath") + } + + val manifestPath = new File(tablePath, GenerateSymlinkManifest.MANIFEST_LOCATION) + require(!manifestPath.exists()) + write(1) // first write enables the property does not write any file + require(!manifestPath.exists()) + + val ex = catalyst.util.quietly { + intercept[RuntimeException] { write(2) } + } + + assert(ex.getMessage().contains(GenerateSymlinkManifest.name)) + assert(ex.getCause().toString.contains("Test exception")) + } + } + } + + test("special partition column names") { + + def assertColNames(inputStr: String): Unit = withClue(s"input: $inputStr") { + withTempDir { tablePath => + tablePath.delete() + val inputLines = inputStr.trim.stripMargin.trim.split("\n").toSeq + require(inputLines.size > 0) + val input = spark.read.json(inputLines.toDS) + val partitionCols = input.schema.fieldNames + val inputWithValue = input.withColumn("value", lit(1)) + + inputWithValue.write.format("delta").partitionBy(partitionCols: _*).save(tablePath.toString) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = inputLines.size) + } + } + + intercept[AnalysisException] { + assertColNames("""{ " " : 0 }""") + } + assertColNames("""{ "%" : 0 }""") + assertColNames("""{ "a.b." : 0 }""") + assertColNames("""{ "a/b." : 0 }""") + assertColNames("""{ "a_b" : 0 }""") + intercept[AnalysisException] { + assertColNames("""{ "a b" : 0 }""") + } + } + + test("special partition column values") { + withTempDir { tablePath => + tablePath.delete() + val inputStr = """ + |{ "part1" : 1, "part2": "$0$", "value" : 1 } + |{ "part1" : null, "part2": "_1_", "value" : 1 } + |{ "part1" : 1, "part2": "", "value" : 1 } + |{ "part1" : null, "part2": " ", "value" : 1 } + |{ "part1" : 1, "part2": " ", "value" : 1 } + |{ "part1" : null, "part2": "/", "value" : 1 } + |{ "part1" : 1, "part2": null, "value" : 1 } + |""" + val input = spark.read.json(inputStr.trim.stripMargin.trim.split("\n").toSeq.toDS) + input.write.format("delta").partitionBy("part1", "part2").save(tablePath.toString) + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 7) + } + } + + test("root table path with escapable chars like space") { + withTempDir { p => + val tablePath = new File(p.toString, "path with space") + spark.createDataset(spark.sparkContext.parallelize(1 to 100, 1)).toDF("value") + .withColumn("part", $"value" % 2) + .write.format("delta").partitionBy("part").save(tablePath.toString) + + generateSymlinkManifest(tablePath.toString) + assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 2) + } + } + + test("block manifest generation with persistent DVs") { + withDeletionVectorsEnabled() { + val rowsToBeRemoved = Seq(1L, 42L, 43L) + + withTempDir { dir => + val tablePath = dir.getAbsolutePath + // Write in 2 files. + spark.range(end = 50L).toDF("id").coalesce(1) + .write.format("delta").mode("overwrite").save(tablePath) + spark.range(start = 50L, end = 100L).toDF("id").coalesce(1) + .write.format("delta").mode("append").save(tablePath) + val deltaLog = DeltaLog.forTable(spark, tablePath) + assert(deltaLog.snapshot.allFiles.count() === 2L) + + // Step 1: Make sure generation works on DV enabled tables without a DV in the snapshot. + // Delete an entire file, which can't produce DVs. + spark.sql(s"""DELETE FROM delta.`$tablePath` WHERE id BETWEEN 0 and 49""") + val remainingFiles = deltaLog.snapshot.allFiles.collect() + assert(remainingFiles.size === 1L) + assert(remainingFiles(0).deletionVector === null) + // Should work fine, since the snapshot doesn't contain DVs. + spark.sql(s"""GENERATE symlink_format_manifest FOR TABLE delta.`$tablePath`""") + + // Step 2: Make sure generation fails if there are DVs in the snapshot. + + // This is needed to make the manual commit work correctly, since we are not actually + // running a command that produces metrics. + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "false") { + val txn = deltaLog.startTransaction() + assert(txn.snapshot.allFiles.count() === 1) + val file = txn.snapshot.allFiles.collect().head + val actions = removeRowsFromFileUsingDV(deltaLog, file, rowIds = rowsToBeRemoved) + txn.commit(actions, Delete(predicate = Seq.empty)) + } + val e = intercept[DeltaCommandUnsupportedWithDeletionVectorsException] { + spark.sql(s"""GENERATE symlink_format_manifest FOR TABLE delta.`$tablePath`""") + } + checkErrorHelper( + exception = e, + errorClass = "DELTA_UNSUPPORTED_GENERATE_WITH_DELETION_VECTORS") + } + } + } + + private def setEnabledIncrementalManifest(tablePath: String, enabled: Boolean): Unit = { + spark.sql(s"ALTER TABLE delta.`$tablePath` " + + s"SET TBLPROPERTIES('${DeltaConfigs.SYMLINK_FORMAT_MANIFEST_ENABLED.key}'='$enabled')") + } + + test("block incremental manifest generation with persistent DVs") { + import DeltaTablePropertyValidationFailedSubClass._ + + def expectConstraintViolation(subClass: DeltaTablePropertyValidationFailedSubClass) + (thunk: => Unit): Unit = { + val e = intercept[DeltaTablePropertyValidationFailedException] { + thunk + } + checkErrorHelper( + exception = e, + errorClass = "DELTA_VIOLATE_TABLE_PROPERTY_VALIDATION_FAILED." + subClass.tag + ) + } + + withDeletionVectorsEnabled() { + val rowsToBeRemoved = Seq(1L, 42L, 43L) + + withTempDir { dir => + val tablePath = dir.getAbsolutePath + spark.range(end = 100L).toDF("id").coalesce(1) + .write.format("delta").mode("overwrite").save(tablePath) + val deltaLog = DeltaLog.forTable(spark, tablePath) + + // Make sure both properties can't be enabled together. + enableDeletionVectorsInTable(new Path(tablePath), enable = true) + expectConstraintViolation( + subClass = PersistentDeletionVectorsWithIncrementalManifestGeneration) { + setEnabledIncrementalManifest(tablePath, enabled = true) + } + // Or in the other order. + enableDeletionVectorsInTable(new Path(tablePath), enable = false) + setEnabledIncrementalManifest(tablePath, enabled = true) + expectConstraintViolation( + subClass = PersistentDeletionVectorsWithIncrementalManifestGeneration) { + enableDeletionVectorsInTable(new Path(tablePath), enable = true) + } + setEnabledIncrementalManifest(tablePath, enabled = false) + // Or both at once. + expectConstraintViolation( + subClass = PersistentDeletionVectorsWithIncrementalManifestGeneration) { + spark.sql(s"ALTER TABLE delta.`$tablePath` " + + s"SET TBLPROPERTIES('${DeltaConfigs.SYMLINK_FORMAT_MANIFEST_ENABLED.key}'='true'," + + s" '${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key}' = 'true')") + } + + // If DVs were allowed at some point and are still present in the table, + // enabling incremental manifest generation must still fail. + enableDeletionVectorsInTable(new Path(tablePath), enable = true) + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "false") { + val txn = deltaLog.startTransaction() + assert(txn.snapshot.allFiles.count() === 1) + val file = txn.snapshot.allFiles.collect().head + val actions = removeRowsFromFileUsingDV(deltaLog, file, rowIds = rowsToBeRemoved) + txn.commit(actions, Delete(predicate = Seq.empty)) + } + assert(getFilesWithDeletionVectors(deltaLog).nonEmpty) + enableDeletionVectorsInTable(new Path(tablePath), enable = false) + expectConstraintViolation( + subClass = ExistingDeletionVectorsWithIncrementalManifestGeneration) { + setEnabledIncrementalManifest(tablePath, enabled = true) + } + // Purge + spark.sql(s"REORG TABLE delta.`$tablePath` APPLY (PURGE)") + assert(getFilesWithDeletionVectors(deltaLog).isEmpty) + // Now it should work. + setEnabledIncrementalManifest(tablePath, enabled = true) + + // As a last fallback, in case some other writer put the table into an illegal state, + // we still need to fail the manifest generation if there are DVs. + // Reset table. + setEnabledIncrementalManifest(tablePath, enabled = false) + enableDeletionVectorsInTable(new Path(tablePath), enable = false) + spark.range(end = 100L).toDF("id").coalesce(1) + .write.format("delta").mode("overwrite").save(tablePath) + // Add DVs + enableDeletionVectorsInTable(new Path(tablePath), enable = true) + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "false") { + val txn = deltaLog.startTransaction() + assert(txn.snapshot.allFiles.count() === 1) + val file = txn.snapshot.allFiles.collect().head + val actions = removeRowsFromFileUsingDV(deltaLog, file, rowIds = rowsToBeRemoved) + txn.commit(actions, Delete(predicate = Seq.empty)) + } + // Force enable manifest generation. + withSQLConf(DeltaSQLConf.DELTA_TABLE_PROPERTY_CONSTRAINTS_CHECK_ENABLED.key -> "false") { + setEnabledIncrementalManifest(tablePath, enabled = true) + } + val e2 = intercept[DeltaCommandUnsupportedWithDeletionVectorsException] { + spark.range(10).write.format("delta").mode("append").save(tablePath) + } + checkErrorHelper( + exception = e2, + errorClass = "DELTA_UNSUPPORTED_GENERATE_WITH_DELETION_VECTORS") + // This is fine, since the new snapshot won't contain DVs. + spark.range(10).write.format("delta").mode("overwrite").save(tablePath) + + // Make sure we can get the table back into a consistent state, as well + setEnabledIncrementalManifest(tablePath, enabled = false) + // No more exception. + spark.range(10).write.format("delta").mode("append").save(tablePath) + } + } + } + + private def checkErrorHelper( + exception: SparkThrowable, + errorClass: String + ): Unit = { + assert(exception.getErrorClass === errorClass, + s"Expected errorClass $errorClass, but got $exception") + } + + Seq(true, false).foreach { useIncremental => + test(s"delete partition column with special char - incremental=$useIncremental") { + + def writePartition(dir: File, partName: String): Unit = { + spark.range(10) + .withColumn("part", lit(partName)) + .repartition(1) + .write + .format("delta") + .mode("append") + .partitionBy("part") + .save(dir.toString) + } + + withTempDir { dir => + // create table and write first manifest + writePartition(dir, "noSpace") + generateSymlinkManifest(dir.toString) + + withIncrementalManifest(dir, useIncremental) { + // 1. test paths with spaces + writePartition(dir, "yes space") + + if (!useIncremental) { generateSymlinkManifest(dir.toString) } + assertManifest(dir, expectSameFiles = true, expectedNumFiles = 2) + + // delete partition + sql(s"""DELETE FROM delta.`${dir.toString}` WHERE part="yes space";""") + + if (!useIncremental) { generateSymlinkManifest(dir.toString) } + assertManifest(dir, expectSameFiles = true, expectedNumFiles = 1) + + // 2. test special characters + // scalastyle:off nonascii + writePartition(dir, "库尔 勒") + if (!useIncremental) { generateSymlinkManifest(dir.toString) } + assertManifest(dir, expectSameFiles = true, expectedNumFiles = 2) + + // delete partition + sql(s"""DELETE FROM delta.`${dir.toString}` WHERE part="库尔 勒";""") + // scalastyle:on nonascii + + if (!useIncremental) { generateSymlinkManifest(dir.toString) } + assertManifest(dir, expectSameFiles = true, expectedNumFiles = 1) + } + } + } + } + + /** + * Assert that the manifest files in the table meet the expectations. + * @param tablePath Path of the Delta table + * @param expectSameFiles Expect that the manifest files contain the same data files + * as the latest version of the table + * @param expectedNumFiles Expected number of manifest files + */ + def assertManifest( + tablePath: File, + expectSameFiles: Boolean, + expectedNumFiles: Int): Unit = { + val deltaSnapshot = DeltaLog.forTable(spark, tablePath.toString).update() + val manifestPath = new File(tablePath, GenerateSymlinkManifest.MANIFEST_LOCATION) + + if (!manifestPath.exists) { + assert(expectedNumFiles == 0 && !expectSameFiles) + return + } + + // Validate the expected number of files are present in the manifest + val filesInManifest = spark.read.text(manifestPath.toString).select("value").as[String] + .map { _.stripPrefix("file:") }.toDF("file") + assert(filesInManifest.count() == expectedNumFiles) + + // Validate that files in the latest version of DeltaLog is same as those in the manifest + val filesInLog = deltaSnapshot.allFiles.map { addFile => + // Note: this unescapes the relative path in `addFile` + DeltaFileOperations.absolutePath(tablePath.toString, addFile.path).toString + }.toDF("file") + if (expectSameFiles) { + checkAnswer(filesInManifest, filesInLog.toDF()) + + // Validate that each file in the manifest is actually present in table. This mainly checks + // whether the file names in manifest are not escaped and therefore are readable directly + // by Hadoop APIs. + val fs = new Path(manifestPath.toString) + .getFileSystem(deltaSnapshot.deltaLog.newDeltaHadoopConf()) + spark.read.text(manifestPath.toString).select("value").as[String].collect().foreach { p => + assert(fs.exists(new Path(p)), s"path $p in manifest not found in file system") + } + } else { + assert(filesInManifest.as[String].collect().toSet != filesInLog.as[String].collect().toSet) + } + + // If there are partitioned files, make sure the partitions values read from them are the + // same as those in the table. + val partitionCols = deltaSnapshot.metadata.partitionColumns.map(x => s"`$x`") + if (partitionCols.nonEmpty && expectSameFiles && expectedNumFiles > 0) { + val partitionsInManifest = spark.read.text(manifestPath.toString) + .selectExpr(partitionCols: _*).distinct() + val partitionsInData = spark.read.format("delta").load(tablePath.toString) + .selectExpr(partitionCols: _*).distinct() + checkAnswer(partitionsInManifest, partitionsInData) + } + } + + protected def withIncrementalManifest(tablePath: File, enabled: Boolean)(func: => Unit): Unit = { + if (tablePath.exists()) { + val latestMetadata = DeltaLog.forTable(spark, tablePath).update().metadata + if (DeltaConfigs.SYMLINK_FORMAT_MANIFEST_ENABLED.fromMetaData(latestMetadata) != enabled) { + spark.sql(s"ALTER TABLE delta.`$tablePath` " + + s"SET TBLPROPERTIES(${DeltaConfigs.SYMLINK_FORMAT_MANIFEST_ENABLED.key}=$enabled)") + } + } + func + } + + protected def generateSymlinkManifest(tablePath: String): Unit = { + val deltaLog = DeltaLog.forTable(spark, tablePath) + GenerateSymlinkManifest.generateFullManifest(spark, deltaLog) + } +} + +class SymlinkManifestFailureTestAbstractFileSystem( + uri: URI, + conf: org.apache.hadoop.conf.Configuration) + extends org.apache.hadoop.fs.DelegateToFileSystem( + uri, + new SymlinkManifestFailureTestFileSystem, + conf, + SymlinkManifestFailureTestFileSystem.SCHEME, + false) { + + // Implementation copied from RawLocalFs + import org.apache.hadoop.fs.local.LocalConfigKeys + import org.apache.hadoop.fs._ + + override def getUriDefaultPort(): Int = -1 + override def getServerDefaults(): FsServerDefaults = LocalConfigKeys.getServerDefaults() + override def isValidName(src: String): Boolean = true +} + + +class SymlinkManifestFailureTestFileSystem extends RawLocalFileSystem { + + private var uri: URI = _ + override def getScheme: String = SymlinkManifestFailureTestFileSystem.SCHEME + + override def initialize(name: URI, conf: Configuration): Unit = { + uri = URI.create(name.getScheme + ":///") + super.initialize(name, conf) + } + + override def getUri(): URI = if (uri == null) { + // RawLocalFileSystem's constructor will call this one before `initialize` is called. + // Just return the super's URI to avoid NPE. + super.getUri + } else { + uri + } + + // Override both create() method defined in RawLocalFileSystem such that any file creation + // throws error. + + override def create( + path: Path, + overwrite: Boolean, + bufferSize: Int, + replication: Short, + blockSize: Long, + progress: Progressable): FSDataOutputStream = { + if (path.toString.contains(GenerateSymlinkManifest.MANIFEST_LOCATION)) { + throw new RuntimeException("Test exception") + } + super.create(path, overwrite, bufferSize, replication, blockSize, null) + } + + override def create( + path: Path, + permission: FsPermission, + overwrite: Boolean, + bufferSize: Int, + replication: Short, + blockSize: Long, + progress: Progressable): FSDataOutputStream = { + if (path.toString.contains(GenerateSymlinkManifest.MANIFEST_LOCATION)) { + throw new RuntimeException("Test exception") + } + super.create(path, permission, overwrite, bufferSize, replication, blockSize, progress) + } +} + +object SymlinkManifestFailureTestFileSystem { + val SCHEME = "testScheme" +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala new file mode 100644 index 00000000000..1c37bd595ba --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala @@ -0,0 +1,614 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File, FileNotFoundException} +import java.net.URI +import java.sql.Timestamp +import java.text.SimpleDateFormat +import java.util.{Date, Locale} + +import scala.concurrent.duration._ +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.StatsUtils +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.scalatest.GivenWhenThen + +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.util.quietly +import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +/** A set of tests which we can open source after Spark 3.0 is released. */ +trait DeltaTimeTravelTests extends QueryTest + with SharedSparkSession + with GivenWhenThen + with DeltaSQLCommandTest + with StatsUtils { + protected implicit def durationToLong(duration: FiniteDuration): Long = { + duration.toMillis + } + + protected implicit def longToTimestamp(ts: Long): Timestamp = new Timestamp(ts) + + protected val timeFormatter = new SimpleDateFormat("yyyyMMddHHmmssSSS") + + protected def modifyCommitTimestamp(deltaLog: DeltaLog, version: Long, ts: Long): Unit = { + val file = new File(FileNames.deltaFile(deltaLog.logPath, version).toUri) + file.setLastModified(ts) + val crc = new File(FileNames.checksumFile(deltaLog.logPath, version).toUri) + if (crc.exists()) { + crc.setLastModified(ts) + } + } + + protected def versionAsOf(table: String, version: Long): String = { + s"$table version as of $version" + } + + protected def timestampAsOf(table: String, expr: String): String = { + s"$table timestamp as of $expr" + } + + protected def verifyLogging( + tableVersion: Long, + queriedVersion: Long, + accessType: String, + apiUsed: String)(f: => Unit): Unit = { + // TODO: would be great to verify our logging metrics + } + + protected def getTableLocation(table: String): String = { + spark.sessionState.catalog.getTableMetadata(TableIdentifier(table)).location.toString + } + + /** Generate commits with the given timestamp in millis. */ + protected def generateCommitsCheap( + deltaLog: DeltaLog, commits: Long*): Unit = { + var startVersion = deltaLog.snapshot.version + 1 + commits.foreach { ts => + val action = createTestAddFile(path = startVersion.toString, modificationTime = startVersion) + deltaLog.startTransaction().commitManually(action) + modifyCommitTimestamp(deltaLog, startVersion, ts) + startVersion += 1 + } + } + + protected def generateCommitsAtPath(table: String, path: String, commits: Long*): Unit = { + generateCommitsBase(table, Some(path), commits: _*) + } + + /** Generate commits with the given timestamp in millis. */ + protected def generateCommits(table: String, commits: Long*): Unit = { + generateCommitsBase(table, None, commits: _*) + } + + private def generateCommitsBase(table: String, path: Option[String], commits: Long*): Unit = { + var commitList = commits.toSeq + if (commitList.isEmpty) return + if (!spark.sessionState.catalog.tableExists(TableIdentifier(table))) { + if (path.isDefined) { + spark.range(0, 10).write.format("delta") + .mode("append") + .option("path", path.get) + .saveAsTable(table) + } else { + spark.range(0, 10).write.format("delta") + .mode("append") + .saveAsTable(table) + } + val deltaLog = DeltaLog.forTable(spark, new TableIdentifier(table)) + val file = new File(FileNames.deltaFile(deltaLog.logPath, 0).toUri) + file.setLastModified(commitList.head) + commitList = commits.slice(1, commits.length) // we already wrote the first commit here + var startVersion = deltaLog.snapshot.version + 1 + commitList.foreach { ts => + val rangeStart = startVersion * 10 + val rangeEnd = rangeStart + 10 + spark.range(rangeStart, rangeEnd).write.format("delta").mode("append").saveAsTable(table) + val file = new File(FileNames.deltaFile(deltaLog.logPath, startVersion).toUri) + file.setLastModified(ts) + startVersion += 1 + } + } + } + + /** Alternate for `withTables` as we leave some tables in an unusable state for clean up */ + protected def withTable(tableName: String, dir: String)(f: => Unit): Unit = { + try f finally { + try { + Utils.deleteRecursively(new File(dir.toString)) + } catch { + case _: Throwable => + Nil // do nothing, this can fail if the table was deleted by the test. + } finally { + try { + sql(s"DROP TABLE IF EXISTS $tableName") + } catch { + case _: Throwable => + // There is one test that fails the drop table as well + // we ignore this exception as that test uses a path based location. + Nil + } + } + } + } + + protected implicit def longToTimestampExpr(value: Long): String = { + s"cast($value / 1000 as timestamp)" + } + + import testImplicits._ + + test("time travel with partition changes and data skipping - should instantiate old schema") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val v0 = spark.range(10).withColumn("part5", 'id % 5) + + v0.write.format("delta").partitionBy("part5").mode("append").save(tblLoc) + val deltaLog = DeltaLog.forTable(spark, tblLoc) + + val schemaString = spark.range(10, 20).withColumn("part2", 'id % 2).schema.json + deltaLog.startTransaction().commit( + Seq(deltaLog.snapshot.metadata.copy( + schemaString = schemaString, + partitionColumns = Seq("part2"))), + DeltaOperations.ManualUpdate + ) + checkAnswer( + spark.read.option("versionAsOf", 0).format("delta").load(tblLoc).where("part5 = 1"), + v0.where("part5 = 1")) + } + } + + test("can't provide both version and timestamp in DataFrameReader") { + val e = intercept[IllegalArgumentException] { + spark.read.option("versionaSof", 1) + .option("timestampAsOF", "fake").format("delta").load("/some/fake") + } + assert(e.getMessage.contains("either provide 'timestampAsOf' or 'versionAsOf'")) + } + + + test("don't time travel a valid non-delta path with @ syntax") { + val format = "json" + withTempDir { dir => + val path = new File(dir, "base@v0").getCanonicalPath + spark.range(10).write.format(format).mode("append").save(path) + spark.range(10).write.format(format).mode("append").save(path) + + checkAnswer( + spark.read.format(format).load(path), + spark.range(10).union(spark.range(10)).toDF() + ) + + checkAnswer( + spark.table(s"$format.`$path`"), + spark.range(10).union(spark.range(10)).toDF() + ) + + intercept[AnalysisException] { + spark.read.format(format).load(path + "@v0").count() + } + + intercept[AnalysisException] { + spark.table(s"$format.`$path@v0`").count() + } + } + } + + /////////////////////////// + // Time Travel SQL Tests // + /////////////////////////// + + test("AS OF support does not impact non-delta tables") { + withTable("t1") { + spark.range(10).write.format("parquet").mode("append").saveAsTable("t1") + spark.range(10, 20).write.format("parquet").mode("append").saveAsTable("t1") + + // We should still use the default, non-delta code paths for a non-delta table. + // For parquet, that means to fail with QueryCompilationErrors::tableNotSupportTimeTravelError + val e = intercept[Exception] { + spark.sql("SELECT * FROM t1 VERSION AS OF 0") + }.getMessage + assert(e.contains("does not support time travel") || + e.contains("The feature is not supported: Time travel on the relation")) + } + } + + // scalastyle:off line.size.limit + test("as of timestamp in between commits should use commit before timestamp") { + // scalastyle:off line.size.limit + val tblName = "delta_table" + withTable(tblName) { + val start = 1540415658000L + generateCommits(tblName, start, start + 20.minutes, start + 40.minutes) + + verifyLogging(2L, 0L, "timestamp", "sql") { + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, start + 10.minutes)}"), + Row(10L) + ) + } + + + verifyLogging(2L, 0L, "timestamp", "sql") { + checkAnswer( + sql("select count(*) from " + + s"${timestampAsOf(s"delta.`${getTableLocation(tblName)}`", start + 10.minutes)}"), + Row(10L) + ) + } + + + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, start + 30.minutes)}"), + Row(20L) + ) + } + } + + test("as of timestamp on exact timestamp") { + val tblName = "delta_table" + withTable(tblName) { + val start = 1540415658000L + generateCommits(tblName, start, start + 20.minutes) + + // Simulate getting the timestamp directly from Spark SQL + val ts = Seq(new Timestamp(start), new Timestamp(start + 20.minutes)).toDF("ts") + .select($"ts".cast("string")).as[String].collect() + .map(i => s"'$i'") + + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, ts(0))}"), + Row(10L) + ) + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, start)}"), + Row(10L) + ) + + + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, start + 20.minutes)}"), + Row(20L) + ) + + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, ts(1))}"), + Row(20L) + ) + } + } + + test("as of with versions") { + val tblName = s"delta_table" + withTempDir { dir => + withTable(tblName, dir.toString) { + val start = 1540415658000L + generateCommitsAtPath(tblName, dir.toString, start, start + 20.minutes, start + 40.minutes) + verifyLogging(2L, 0L, "version", "sql") { + checkAnswer( + sql(s"select count(*) from ${versionAsOf(tblName, 0)}"), + Row(10L) + ) + } + + + verifyLogging(2L, 0L, "version", "dfReader") { + checkAnswer( + spark.read.format("delta").option("versionAsOf", "0") + .load(getTableLocation(tblName)).groupBy().count(), + Row(10) + ) + } + checkAnswer( + sql(s"select count(*) from ${versionAsOf(tblName, 1)}"), + Row(20L) + ) + checkAnswer( + spark.read.format("delta").option("versionAsOf", 1) + .load(getTableLocation(tblName)).groupBy().count(), + Row(20) + ) + checkAnswer( + sql(s"select count(*) from ${versionAsOf(tblName, 2)}"), + Row(30L) + ) + val e1 = intercept[AnalysisException] { + sql(s"select count(*) from ${versionAsOf(tblName, 3)}").collect() + } + assert(e1.getMessage.contains("[0, 2]")) + + val deltaLog = DeltaLog.forTable(spark, getTableLocation(tblName)) + new File(FileNames.deltaFile(deltaLog.logPath, 0).toUri).delete() + // Delta Lake will create a DeltaTableV2 explicitly with time travel options in the catalog. + // These options will be verified by DeltaHistoryManager, which will throw an + // AnalysisException. + val e2 = intercept[AnalysisException] { + sql(s"select count(*) from ${versionAsOf(tblName, 0)}").collect() + } + assert(e2.getMessage.contains("No recreatable commits found at")) + } + } + } + + test("as of exact timestamp after last commit should fail") { + val tblName = "delta_table" + withTable(tblName) { + val start = 1540415658000L + generateCommits(tblName, start) + + // Simulate getting the timestamp directly from Spark SQL + val ts = Seq(new Timestamp(start + 10.minutes)).toDF("ts") + .select($"ts".cast("string")).as[String].collect() + .map(i => s"'$i'") + + val e1 = intercept[AnalysisException] { + sql(s"select count(*) from ${timestampAsOf(tblName, ts(0))}").collect() + } + assert(e1.getMessage.contains("VERSION AS OF 0")) + assert(e1.getMessage.contains("TIMESTAMP AS OF '2018-10-24 14:14:18'")) + + val e2 = intercept[AnalysisException] { + sql(s"select count(*) from ${timestampAsOf(tblName, start + 10.minutes)}").collect() + } + assert(e2.getMessage.contains("VERSION AS OF 0")) + assert(e2.getMessage.contains("TIMESTAMP AS OF '2018-10-24 14:14:18'")) + + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, "'2018-10-24 14:14:18'")}"), + Row(10) + ) + + verifyLogging(0L, 0L, "timestamp", "dfReader") { + checkAnswer( + spark.read.format("delta").option("timestampAsOf", "2018-10-24 14:14:18") + .load(getTableLocation(tblName)).groupBy().count(), + Row(10) + ) + } + } + } + + test("time travelling with adjusted timestamps") { + val tblName = "delta_table" + withTable(tblName) { + val start = 1540415658000L + generateCommits(tblName, start, start - 5.seconds, start + 3.minutes) + + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, start)}"), + Row(10L) + ) + + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, start + 1.milli)}"), + Row(20L) + ) + + checkAnswer( + sql(s"select count(*) from ${timestampAsOf(tblName, start + 119.seconds)}"), + Row(20L) + ) + + val e = intercept[AnalysisException] { + sql(s"select count(*) from ${timestampAsOf(tblName, start - 3.seconds)}").collect() + } + assert(e.getMessage.contains("before the earliest version")) + } + } + + test("Time travel with schema changes") { + val tblName = "delta_table" + withTable(tblName) { + spark.range(10).write.format("delta").mode("append").saveAsTable(tblName) + sql(s"ALTER TABLE $tblName ADD COLUMNS (part bigint)") + spark.range(10, 20).withColumn("part", 'id) + .write.format("delta").mode("append").saveAsTable(tblName) + + val tableLoc = getTableLocation(tblName) + checkAnswer( + sql(s"select * from ${versionAsOf(tblName, 0)}"), + spark.range(10).toDF()) + + checkAnswer( + sql(s"select * from ${versionAsOf(s"delta.`$tableLoc`", 0)}"), + spark.range(10).toDF()) + + checkAnswer( + spark.read.option("versionAsOf", 0).format("delta").load(tableLoc), + spark.range(10).toDF()) + + } + } + + test("data skipping still works with time travel") { + val tblName = "delta_table" + withTable(tblName) { + val start = 1540415658000L + generateCommits(tblName, start, start + 20.minutes) + + def testScan(df: DataFrame): Unit = { + val scan = getStats(df) + assert(scan.scanned.bytesCompressed.get < scan.total.bytesCompressed.get) + } + + testScan(sql(s"select * from ${versionAsOf(tblName, 0)} where id = 2")) + + testScan(spark.read.format("delta").option("versionAsOf", 0).load(getTableLocation(tblName)) + .where("id = 2")) + + } + } + + test("fail to time travel a different relation than Delta") { + withTempDir { output => + val dir = output.getCanonicalPath + spark.range(10).write.mode("append").parquet(dir) + spark.range(10).write.mode("append").parquet(dir) + def assertFormatFailure(f: => Unit): Unit = { + val e = intercept[AnalysisException] { + f + } + assert( + e.getMessage.contains("path-based tables") || + e.message.contains("[UNSUPPORTED_FEATURE.TIME_TRAVEL] The feature is not supported"), + s"Returned instead:\n$e") + } + + assertFormatFailure { + sql(s"select * from ${versionAsOf(s"parquet.`$dir`", 0)}").collect() + } + + assertFormatFailure { + sql(s"select * from ${versionAsOf(s"parquet.`$dir`", 0)}").collect() + } + + + checkAnswer( + spark.read.option("versionAsOf", 0).parquet(dir), // do not time travel other relations + spark.range(10).union(spark.range(10)).toDF() + ) + + checkAnswer( + // do not time travel other relations + spark.read.option("timestampAsOf", "2018-10-12 01:01:01").parquet(dir), + spark.range(10).union(spark.range(10)).toDF() + ) + + val tblName = "parq_table" + withTable(tblName) { + sql(s"create table $tblName using parquet as select * from parquet.`$dir`") + val e = intercept[Exception] { + sql(s"select * from ${versionAsOf(tblName, 0)}").collect() + } + val catalogName = CatalogManager.SESSION_CATALOG_NAME + val catalogPrefix = catalogName + "." + assert(e.getMessage.contains( + s"Table ${catalogPrefix}default.parq_table does not support time travel") || + e.getMessage.contains(s"Time travel on the relation: `$catalogName`.`default`.`parq_table`")) + } + + val viewName = "parq_view" + assertFormatFailure { + sql(s"create temp view $viewName as select * from ${versionAsOf(s"parquet.`$dir`", 0)}") + } + } + } +} + +abstract class DeltaHistoryManagerBase extends DeltaTimeTravelTests + { + test("cannot time travel target tables of insert/delete/update/merge") { + val tblName = "delta_table" + withTable(tblName) { + val start = 1540415658000L + generateCommits(tblName, start, start + 20.minutes) + + // These all actually fail parsing + intercept[ParseException] { + sql(s"insert into ${versionAsOf(tblName, 0)} values (11, 12, 13)") + } + + intercept[ParseException] { + sql(s"update ${versionAsOf(tblName, 0)} set id = id - 1 where id < 10") + } + + intercept[ParseException] { + sql(s"delete from ${versionAsOf(tblName, 0)} id < 10") + } + + intercept[ParseException] { + sql(s"""merge into ${versionAsOf(tblName, 0)} old + |using $tblName new + |on old.id = new.id + |when not matched then insert * + """.stripMargin) + } + } + } + + test("vacuumed version") { + quietly { + val tblName = "delta_table" + withTable(tblName) { + val start = 1540415658000L + generateCommits(tblName, start, start + 20.minutes) + sql(s"optimize $tblName") + + withSQLConf( + // Disable query rewrite or else the parquet files are not scanned. + DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "false", + DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false") { + sql(s"vacuum $tblName retain 0 hours") + intercept[SparkException] { + sql(s"select * from ${versionAsOf(tblName, 0)}").collect() + } + intercept[SparkException] { + sql(s"select count(*) from ${versionAsOf(tblName, 1)}").collect() + } + } + } + } + } + + + test("as of with table API") { + val tblName = "delta_table" + withTable(tblName) { + val start = 1540415658000L + generateCommits(tblName, start, start + 20.minutes, start + 40.minutes) + + assert(spark.read.format("delta").option("versionAsOf", "0").table(tblName).count() == 10) + assert(spark.read.format("delta").option("versionAsOf", 1).table(tblName).count() == 20) + assert(spark.read.format("delta").option("versionAsOf", 2).table(tblName).count() == 30) + val e1 = intercept[AnalysisException] { + spark.read.format("delta").option("versionAsOf", 3).table(tblName).collect() + } + assert(e1.getMessage.contains("[0, 2]")) + + val e2 = intercept[IllegalArgumentException] { + spark.read.format("delta") + .option("versionAsOf", 3) + .option("timestampAsOf", "2020-10-22 23:20:11") + .table(tblName).collect() + } + assert(e2.getMessage.contains("either provide 'timestampAsOf' or 'versionAsOf'")) + + } + } + +} + +/** Uses V2 resolution code paths */ +class DeltaHistoryManagerSuite extends DeltaHistoryManagerBase { + override protected def sparkConf: SparkConf = { + super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST.key, "parquet,json") + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaImplicitsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaImplicitsSuite.scala new file mode 100644 index 00000000000..c9df8c9fd37 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaImplicitsSuite.scala @@ -0,0 +1,59 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.AddFile + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.test.SharedSparkSession + +class DeltaImplicitsSuite extends SparkFunSuite with SharedSparkSession { + + private def testImplict(name: String, func: => Unit): Unit = { + test(name) { + func + } + } + + import org.apache.spark.sql.delta.implicits._ + + testImplict("int", intEncoder) + testImplict("long", longEncoder) + testImplict("string", stringEncoder) + testImplict("longLong", longLongEncoder) + testImplict("stringLong", stringLongEncoder) + testImplict("stringString", stringStringEncoder) + testImplict("javaLong", javaLongEncoder) + testImplict("singleAction", singleActionEncoder) + testImplict("addFile", addFileEncoder) + testImplict("removeFile", removeFileEncoder) + testImplict("serializableFileStatus", serializableFileStatusEncoder) + testImplict("indexedFile", indexedFileEncoder) + testImplict("addFileWithIndex", addFileWithIndexEncoder) + testImplict("addFileWithSourcePath", addFileWithSourcePathEncoder) + testImplict("deltaHistoryEncoder", deltaHistoryEncoder) + testImplict("historyCommitEncoder", historyCommitEncoder) + testImplict("snapshotStateEncoder", snapshotStateEncoder) + + testImplict("RichAddFileSeq: toDF", Seq(AddFile("foo", Map.empty, 0, 0, true)).toDF(spark)) + testImplict("RichAddFileSeq: toDS", Seq(AddFile("foo", Map.empty, 0, 0, true)).toDS(spark)) + testImplict("RichStringSeq: toDF", Seq("foo").toDF(spark)) + testImplict("RichStringSeq: toDF(col)", Seq("foo").toDF(spark, "str")) + testImplict("RichIntSeq: toDF", Seq(1).toDF(spark)) + testImplict("RichIntSeq: toDF(col)", Seq(1).toDF(spark, "int")) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala new file mode 100644 index 00000000000..b51458b99a6 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala @@ -0,0 +1,1575 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.schema.InvariantViolationException +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.scalatest.BeforeAndAfter + +import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkThrowable} +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.functions.{lit, struct} +import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf} +import org.apache.spark.sql.internal.SQLConf.{LEAF_NODE_DEFAULT_PARALLELISM, PARTITION_OVERWRITE_MODE, PartitionOverwriteMode} +import org.apache.spark.sql.test.{SharedSparkSession, TestSparkSession} +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + +class DeltaInsertIntoSQLSuite + extends DeltaInsertIntoTestsWithTempViews( + supportsDynamicOverwrite = true, + includeSQLOnlyTests = true) + with DeltaSQLCommandTest { + + import testImplicits._ + + override protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = { + val tmpView = "tmp_view" + withTempView(tmpView) { + insert.createOrReplaceTempView(tmpView) + val overwrite = if (mode == SaveMode.Overwrite) "OVERWRITE" else "INTO" + sql(s"INSERT $overwrite TABLE $tableName SELECT * FROM $tmpView") + } + } + + test("insert overwrite should work with selecting constants") { + withTable("t1") { + sql("CREATE TABLE t1 (a int, b int, c int) USING delta PARTITIONED BY (b, c)") + sql("INSERT OVERWRITE TABLE t1 PARTITION (c=3) SELECT 1, 2") + checkAnswer( + sql("SELECT * FROM t1"), + Row(1, 2, 3) :: Nil + ) + sql("INSERT OVERWRITE TABLE t1 PARTITION (b=2, c=3) SELECT 1") + checkAnswer( + sql("SELECT * FROM t1"), + Row(1, 2, 3) :: Nil + ) + sql("INSERT OVERWRITE TABLE t1 PARTITION (b=2, c) SELECT 1, 3") + checkAnswer( + sql("SELECT * FROM t1"), + Row(1, 2, 3) :: Nil + ) + } + } + + test("insertInto: append by name") { + import testImplicits._ + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + sql(s"INSERT INTO $t1(id, data) VALUES(1L, 'a')") + // Can be in a different order + sql(s"INSERT INTO $t1(data, id) VALUES('b', 2L)") + // Can be casted automatically + sql(s"INSERT INTO $t1(data, id) VALUES('c', 3)") + verifyTable(t1, df) + withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") { + // Missing columns + assert(intercept[AnalysisException] { + sql(s"INSERT INTO $t1(data) VALUES(4)") + }.getMessage.contains("Column id is not specified in INSERT")) + // Missing columns with matching dataType + assert(intercept[AnalysisException] { + sql(s"INSERT INTO $t1(data) VALUES('b')") + }.getMessage.contains("Column id is not specified in INSERT")) + } + // Duplicate columns + assert(intercept[AnalysisException]( + sql(s"INSERT INTO $t1(data, data) VALUES(5)")).getMessage.nonEmpty) + } + } + + test("insertInto: overwrite by name") { + import testImplicits._ + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + sql(s"INSERT OVERWRITE $t1(id, data) VALUES(1L, 'a')") + verifyTable(t1, Seq((1L, "a")).toDF("id", "data")) + // Can be in a different order + sql(s"INSERT OVERWRITE $t1(data, id) VALUES('b', 2L)") + verifyTable(t1, Seq((2L, "b")).toDF("id", "data")) + // Can be casted automatically + sql(s"INSERT OVERWRITE $t1(data, id) VALUES('c', 3)") + verifyTable(t1, Seq((3L, "c")).toDF("id", "data")) + withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") { + // Missing columns + assert(intercept[AnalysisException] { + sql(s"INSERT OVERWRITE $t1(data) VALUES(4)") + }.getMessage.contains("Column id is not specified in INSERT")) + // Missing columns with matching datatype + assert(intercept[AnalysisException] { + sql(s"INSERT OVERWRITE $t1(data) VALUES(4L)") + }.getMessage.contains("Column id is not specified in INSERT")) + } + // Duplicate columns + assert(intercept[AnalysisException]( + sql(s"INSERT OVERWRITE $t1(data, data) VALUES(5)")).getMessage.nonEmpty) + } + } + + dynamicOverwriteTest("insertInto: dynamic overwrite by name") { + import testImplicits._ + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string, data2 string) " + + s"USING $v2Format PARTITIONED BY (id)") + sql(s"INSERT OVERWRITE $t1(id, data, data2) VALUES(1L, 'a', 'b')") + verifyTable(t1, Seq((1L, "a", "b")).toDF("id", "data", "data2")) + // Can be in a different order + sql(s"INSERT OVERWRITE $t1(data, data2, id) VALUES('b', 'd', 2L)") + verifyTable(t1, Seq((1L, "a", "b"), (2L, "b", "d")).toDF("id", "data", "data2")) + // Can be casted automatically + sql(s"INSERT OVERWRITE $t1(data, data2, id) VALUES('c', 'e', 1)") + verifyTable(t1, Seq((1L, "c", "e"), (2L, "b", "d")).toDF("id", "data", "data2")) + withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") { + // Missing columns + assert(intercept[AnalysisException] { + sql(s"INSERT OVERWRITE $t1(data, id) VALUES('c', 1)") + }.getMessage.contains("Column data2 is not specified in INSERT")) + // Missing columns with matching datatype + assert(intercept[AnalysisException] { + sql(s"INSERT OVERWRITE $t1(data, id) VALUES('c', 1L)") + }.getMessage.contains("Column data2 is not specified in INSERT")) + } + // Duplicate columns + assert(intercept[AnalysisException]( + sql(s"INSERT OVERWRITE $t1(data, data) VALUES(5)")).getMessage.nonEmpty) + } + } + + test("insertInto: static partition column name should not be used in the column list") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c string) USING $v2Format PARTITIONED BY (c)") + checkError( + exception = intercept[AnalysisException] { + sql("INSERT OVERWRITE t PARTITION (c='1') (c) VALUES ('2')") + }, + errorClass = "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST", + parameters = Map("staticName" -> "c")) + } + } + + + Seq(("ordinal", ""), ("name", "(id, col2, col)")).foreach { case (testName, values) => + test(s"INSERT OVERWRITE schema evolution works for array struct types - $testName") { + val sourceSchema = "id INT, col2 STRING, col ARRAY>" + val sourceRecord = "1, '2022-11-01', array(struct('s1', 's2', DATE'2022-11-01'))" + val targetSchema = "id INT, col2 DATE, col ARRAY>" + val targetRecord = "1, DATE'2022-11-02', array(struct('t1', 't2'))" + + runInsertOverwrite(sourceSchema, sourceRecord, targetSchema, targetRecord) { + (sourceTable, targetTable) => + sql(s"INSERT OVERWRITE $targetTable $values SELECT * FROM $sourceTable") + + // make sure table is still writeable + sql(s"""INSERT INTO $targetTable VALUES (2, DATE'2022-11-02', + | array(struct('s3', 's4', DATE'2022-11-02')))""".stripMargin) + sql(s"""INSERT INTO $targetTable VALUES (3, DATE'2022-11-03', + |array(struct('s5', 's6', NULL)))""".stripMargin) + val df = spark.sql( + """SELECT 1 as id, DATE'2022-11-01' as col2, + | array(struct('s1', 's2', DATE'2022-11-01')) as col UNION + | SELECT 2 as id, DATE'2022-11-02' as col2, + | array(struct('s3', 's4', DATE'2022-11-02')) as col UNION + | SELECT 3 as id, DATE'2022-11-03' as col2, + | array(struct('s5', 's6', NULL)) as col""".stripMargin) + verifyTable(targetTable, df) + } + } + } + + Seq(("ordinal", ""), ("name", "(id, col2, col)")).foreach { case (testName, values) => + test(s"INSERT OVERWRITE schema evolution works for array nested types - $testName") { + val sourceSchema = "id INT, col2 STRING, " + + "col ARRAY, f3: STRUCT>>" + val sourceRecord = "1, '2022-11-01', " + + "array(struct(1, struct('s1', DATE'2022-11-01'), struct('s1')))" + val targetSchema = "id INT, col2 DATE, col ARRAY>>" + val targetRecord = "2, DATE'2022-11-02', array(struct(2, struct('s2')))" + + runInsertOverwrite(sourceSchema, sourceRecord, targetSchema, targetRecord) { + (sourceTable, targetTable) => + sql(s"INSERT OVERWRITE $targetTable $values SELECT * FROM $sourceTable") + + // make sure table is still writeable + sql(s"""INSERT INTO $targetTable VALUES (2, DATE'2022-11-02', + | array(struct(2, struct('s2', DATE'2022-11-02'), struct('s2'))))""".stripMargin) + sql(s"""INSERT INTO $targetTable VALUES (3, DATE'2022-11-03', + | array(struct(3, struct('s3', NULL), struct(NULL))))""".stripMargin) + val df = spark.sql( + """SELECT 1 as id, DATE'2022-11-01' as col2, + | array(struct(1, struct('s1', DATE'2022-11-01'), struct('s1'))) as col UNION + | SELECT 2 as id, DATE'2022-11-02' as col2, + | array(struct(2, struct('s2', DATE'2022-11-02'), struct('s2'))) as col UNION + | SELECT 3 as id, DATE'2022-11-03' as col2, + | array(struct(3, struct('s3', NULL), struct(NULL))) as col + |""".stripMargin) + verifyTable(targetTable, df) + } + } + } + + def runInsertOverwrite( + sourceSchema: String, + sourceRecord: String, + targetSchema: String, + targetRecord: String)( + runAndVerify: (String, String) => Unit): Unit = { + val sourceTable = "source" + val targetTable = "target" + withTable(sourceTable) { + withTable(targetTable) { + withSQLConf("spark.databricks.delta.schema.autoMerge.enabled" -> "true") { + // prepare source table + sql(s"""CREATE TABLE $sourceTable ($sourceSchema) + | USING DELTA""".stripMargin) + sql(s"INSERT INTO $sourceTable VALUES ($sourceRecord)") + // prepare target table + sql(s"""CREATE TABLE $targetTable ($targetSchema) + | USING DELTA""".stripMargin) + sql(s"INSERT INTO $targetTable VALUES ($targetRecord)") + runAndVerify(sourceTable, targetTable) + } + } + } + } +} + +class DeltaInsertIntoSQLByPathSuite + extends DeltaInsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true) + with DeltaSQLCommandTest { + override protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = { + val tmpView = "tmp_view" + withTempView(tmpView) { + insert.createOrReplaceTempView(tmpView) + val overwrite = if (mode == SaveMode.Overwrite) "OVERWRITE" else "INTO" + val ident = spark.sessionState.sqlParser.parseTableIdentifier(tableName) + val catalogTable = spark.sessionState.catalog.getTableMetadata(ident) + sql(s"INSERT $overwrite TABLE delta.`${catalogTable.location}` SELECT * FROM $tmpView") + } + } + + testQuietly("insertInto: cannot insert into a table that doesn't exist") { + import testImplicits._ + Seq(SaveMode.Append, SaveMode.Overwrite).foreach { mode => + withTempDir { dir => + val t1 = s"delta.`${dir.getCanonicalPath}`" + val tmpView = "tmp_view" + withTempView(tmpView) { + val overwrite = if (mode == SaveMode.Overwrite) "OVERWRITE" else "INTO" + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + df.createOrReplaceTempView(tmpView) + + intercept[AnalysisException] { + sql(s"INSERT $overwrite TABLE $t1 SELECT * FROM $tmpView") + } + + assert(new File(dir, "_delta_log").mkdirs(), "Failed to create a _delta_log directory") + intercept[AnalysisException] { + sql(s"INSERT $overwrite TABLE $t1 SELECT * FROM $tmpView") + } + } + } + } + } +} + +class DeltaInsertIntoDataFrameSuite + extends DeltaInsertIntoTestsWithTempViews( + supportsDynamicOverwrite = true, + includeSQLOnlyTests = false) + with DeltaSQLCommandTest { + override protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = { + val dfw = insert.write.format(v2Format) + if (mode != null) { + dfw.mode(mode) + } + dfw.insertInto(tableName) + } +} + +class DeltaInsertIntoDataFrameByPathSuite + extends DeltaInsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false) + with DeltaSQLCommandTest { + override protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = { + val dfw = insert.write.format(v2Format) + if (mode != null) { + dfw.mode(mode) + } + val ident = spark.sessionState.sqlParser.parseTableIdentifier(tableName) + val catalogTable = spark.sessionState.catalog.getTableMetadata(ident) + dfw.insertInto(s"delta.`${catalogTable.location}`") + } + + testQuietly("insertInto: cannot insert into a table that doesn't exist") { + import testImplicits._ + Seq(SaveMode.Append, SaveMode.Overwrite).foreach { mode => + withTempDir { dir => + val t1 = s"delta.`${dir.getCanonicalPath}`" + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + + intercept[AnalysisException] { + df.write.mode(mode).insertInto(t1) + } + + assert(new File(dir, "_delta_log").mkdirs(), "Failed to create a _delta_log directory") + intercept[AnalysisException] { + df.write.mode(mode).insertInto(t1) + } + + // Test DataFrameWriterV2 as well + val dfW2 = df.writeTo(t1) + if (mode == SaveMode.Append) { + intercept[AnalysisException] { + dfW2.append() + } + } else { + intercept[AnalysisException] { + dfW2.overwrite(lit(true)) + } + } + } + } + } +} + + +trait DeltaInsertIntoColumnMappingSelectedTests extends DeltaColumnMappingSelectedTestMixin { + override protected def runOnlyTests = Seq( + "InsertInto: overwrite - mixed clause reordered - static mode", + "InsertInto: overwrite - multiple static partitions - dynamic mode" + ) +} + +class DeltaInsertIntoSQLNameColumnMappingSuite extends DeltaInsertIntoSQLSuite + with DeltaColumnMappingEnableNameMode + with DeltaInsertIntoColumnMappingSelectedTests { + override protected def runOnlyTests: Seq[String] = super.runOnlyTests :+ + "insert overwrite should work with selecting constants" +} + +class DeltaInsertIntoSQLByPathNameColumnMappingSuite extends DeltaInsertIntoSQLByPathSuite + with DeltaColumnMappingEnableNameMode + with DeltaInsertIntoColumnMappingSelectedTests + +class DeltaInsertIntoDataFrameNameColumnMappingSuite extends DeltaInsertIntoDataFrameSuite + with DeltaColumnMappingEnableNameMode + with DeltaInsertIntoColumnMappingSelectedTests + +class DeltaInsertIntoDataFrameByPathNameColumnMappingSuite + extends DeltaInsertIntoDataFrameByPathSuite + with DeltaColumnMappingEnableNameMode + with DeltaInsertIntoColumnMappingSelectedTests + +abstract class DeltaInsertIntoTestsWithTempViews( + supportsDynamicOverwrite: Boolean, + includeSQLOnlyTests: Boolean) + extends DeltaInsertIntoTests(supportsDynamicOverwrite, includeSQLOnlyTests) + with DeltaTestUtilsForTempViews { + protected def testComplexTempViews(name: String)(text: String, expectedResult: Seq[Row]): Unit = { + testWithTempView(s"insertInto a temp view created on top of a table - $name") { isSQLTempView => + import testImplicits._ + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (key int, value int) USING $v2Format") + Seq(SaveMode.Append, SaveMode.Overwrite).foreach { mode => + createTempViewFromSelect(text, isSQLTempView) + val df = Seq((0, 3), (1, 2)).toDF("key", "value") + try { + doInsert("v", df, mode) + checkAnswer(spark.table("v"), expectedResult) + } catch { + case e: AnalysisException => + assert(e.getMessage.contains("Inserting into a view is not allowed") || + e.getMessage.contains("Inserting into an RDD-based table is not allowed") || + e.getMessage.contains("Table default.v not found") || + e.getMessage.contains("Table or view 'v' not found in database 'default'") || + e.getMessage.contains("The table or view `default`.`v` cannot be found") || + e.getMessage.contains("[UNSUPPORTED_INSERT.RDD_BASED] Can't insert into the target.")) + } + } + } + } + + testComplexTempViews("basic") ( + "SELECT * FROM tbl", + Seq(Row(0, 3), Row(1, 2)) + ) + + testComplexTempViews("subset cols")( + "SELECT key FROM tbl", + Seq(Row(0), Row(1)) + ) + + testComplexTempViews("superset cols")( + "SELECT key, value, 1 FROM tbl", + Seq(Row(0, 3, 1), Row(1, 2, 1)) + ) + + testComplexTempViews("nontrivial projection")( + "SELECT value as key, key as value FROM tbl", + Seq(Row(3, 0), Row(2, 1)) + ) + + testComplexTempViews("view with too many internal aliases")( + "SELECT * FROM (SELECT * FROM tbl AS t1) AS t2", + Seq(Row(0, 3), Row(1, 2)) + ) + +} + +class DeltaColumnDefaultsInsertSuite extends InsertIntoSQLOnlyTests with DeltaSQLCommandTest { + + import testImplicits._ + + override val supportsDynamicOverwrite = true + override val includeSQLOnlyTests = true + + val tblPropertiesAllowDefaults = + """tblproperties ( + | 'delta.feature.allowColumnDefaults' = 'enabled', + | 'delta.columnMapping.mode' = 'name' + |)""".stripMargin + + test("Column DEFAULT value support with Delta Lake, positive tests") { + Seq( + PartitionOverwriteMode.STATIC.toString, + PartitionOverwriteMode.DYNAMIC.toString + ).foreach { partitionOverwriteMode => + withSQLConf( + SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "true", + SQLConf.PARTITION_OVERWRITE_MODE.key -> partitionOverwriteMode, + // Set these configs to allow writing test values like timestamps of Jan. 1, year 1, etc. + SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key -> LegacyBehaviorPolicy.LEGACY.toString, + SQLConf.PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> LegacyBehaviorPolicy.LEGACY.toString) { + withTable("t1", "t2", "t3", "t4") { + // Positive tests: + // Create some columns with default values and then insert into them. + sql("create table t1(" + + s"a int default 42, b boolean default true, c string default 'abc') using $v2Format " + + s"partitioned by (a) $tblPropertiesAllowDefaults") + sql("insert into t1 values (1, false, default)") + sql("insert into t1 values (1, default, default)") + sql("alter table t1 alter column c set default 'def'") + sql("insert into t1 values (default, default, default)") + sql("alter table t1 alter column c drop default") + // Exercise INSERT INTO commands with VALUES lists mapping columns positionally. + sql("insert into t1 values (default, default, default)") + // Write the data in the table 't1' to new table 't4' and then perform an INSERT OVERWRITE + // back to 't1' here, to exercise static and dynamic partition overwrites. + sql(f"create table t4(a int, b boolean, c string) using $v2Format " + + s"partitioned by (a) $tblPropertiesAllowDefaults") + // Exercise INSERT INTO commands with SELECT queries mapping columns by name. + sql("insert into t4(a, b, c) select a, b, c from t1") + sql("insert overwrite table t1 select * from t4") + checkAnswer(spark.table("t1"), Seq( + Row(1, false, "abc"), + Row(1, true, "abc"), + Row(42, true, "def"), + Row(42, true, null) + )) + // Insert default values with all supported types. + sql("create table t2(" + + "s boolean default true, " + + "t byte default cast(null as byte), " + + "u short default cast(42 as short), " + + "v float default 0, " + + "w double default 0, " + + "x date default date'0000', " + + "y timestamp default timestamp'0000', " + + "z decimal(5, 2) default 123.45," + + "a1 bigint default 43," + + "a2 smallint default cast(5 as smallint)," + + s"a3 tinyint default cast(6 as tinyint)) using $v2Format " + + tblPropertiesAllowDefaults) + sql("insert into t2 values (default, default, default, default, default, default, " + + "default, default, default, default, default)") + val result: Array[Row] = spark.table("t2").collect() + assert(result.length == 1) + val row: Row = result(0) + assert(row.length == 11) + assert(row(0) == true) + assert(row(1) == null) + assert(row(2) == 42) + assert(row(3) == 0.0f) + assert(row(4) == 0.0d) + assert(row(5).toString == "0001-01-01") + assert(row(6).toString == "0001-01-01 00:00:00.0") + assert(row(7).toString == "123.45") + assert(row(8) == 43L) + assert(row(9) == 5) + assert(row(10) == 6) + } + withTable("t3") { + // Set a default value for a partitioning column. + sql(s"create table t3(i boolean, s bigint, q int default 42) using $v2Format " + + s"partitioned by (i) $tblPropertiesAllowDefaults") + sql("alter table t3 alter column i set default true") + sql("insert into t3(i, s, q) values (default, default, default)") + checkAnswer(spark.table("t3"), Seq( + Row(true, null, 42))) + // Drop the column and add it again without the default. Querying the column now returns + // NULL. + sql("alter table t3 drop column q") + sql("alter table t3 add column q int") + checkAnswer(spark.table("t3"), Seq( + Row(true, null, null))) + } + } + } + } + + test("Column DEFAULT value support with Delta Lake, negative tests") { + withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "true") { + // The table feature is not enabled via TBLPROPERTIES. + withTable("createTableWithDefaultFeatureNotEnabled") { + checkError( + exception = intercept[DeltaAnalysisException] { + sql(s"create table createTableWithDefaultFeatureNotEnabled(" + + s"i boolean, s bigint, q int default 42) using $v2Format " + + "partitioned by (i)") + }, + errorClass = "WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED", + parameters = Map("commandType" -> "CREATE TABLE") + ) + } + withTable("alterTableSetDefaultFeatureNotEnabled") { + sql(s"create table alterTableSetDefaultFeatureNotEnabled(a int) using $v2Format") + checkError( + exception = intercept[DeltaAnalysisException] { + sql("alter table alterTableSetDefaultFeatureNotEnabled alter column a set default 42") + }, + errorClass = "WRONG_COLUMN_DEFAULTS_FOR_DELTA_FEATURE_NOT_ENABLED", + parameters = Map("commandType" -> "ALTER TABLE") + ) + } + // Adding a new column with a default value to an existing table is not allowed. + withTable("alterTableTest") { + sql(s"create table alterTableTest(i boolean, s bigint, q int default 42) using $v2Format " + + s"partitioned by (i) $tblPropertiesAllowDefaults") + checkError( + exception = intercept[DeltaAnalysisException] { + sql("alter table alterTableTest add column z int default 42") + }, + errorClass = "WRONG_COLUMN_DEFAULTS_FOR_DELTA_ALTER_TABLE_ADD_COLUMN_NOT_SUPPORTED" + ) + } + // The default value fails to analyze. + checkError( + exception = intercept[AnalysisException] { + sql(s"create table t4 (s int default badvalue) using $v2Format " + + s"$tblPropertiesAllowDefaults") + }, + errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION", + parameters = Map( + "statement" -> "CREATE TABLE", + "colName" -> "`s`", + "defaultValue" -> "badvalue")) + + // The default value analyzes to a table not in the catalog. + // The error message reports that we failed to execute the command because subquery + // expressions are not allowed in DEFAULT values. + checkError( + exception = intercept[AnalysisException] { + sql(s"create table t4 (s int default (select min(x) from badtable)) using $v2Format " + + tblPropertiesAllowDefaults) + }, + errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION", + parameters = Map( + "statement" -> "CREATE TABLE", + "colName" -> "`s`", + "defaultValue" -> "(select min(x) from badtable)")) + // The default value has an explicit alias. It fails to evaluate when inlined into the + // VALUES list at the INSERT INTO time. + // The error message reports that we failed to execute the command because subquery + // expressions are not allowed in DEFAULT values. + checkError( + exception = intercept[AnalysisException] { + sql(s"create table t4 (s int default (select 42 as alias)) using $v2Format " + + tblPropertiesAllowDefaults) + }, + errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION", + parameters = Map( + "statement" -> "CREATE TABLE", + "colName" -> "`s`", + "defaultValue" -> "(select 42 as alias)")) + // The default value parses but the type is not coercible. + checkError( + exception = intercept[AnalysisException] { + sql(s"create table t4 (s bigint default false) " + + s"using $v2Format $tblPropertiesAllowDefaults") + }, + errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE", + parameters = Map( + "statement" -> "CREATE TABLE", + "colName" -> "`s`", + "expectedType" -> "\"BIGINT\"", + "actualType" -> "\"BOOLEAN\"", + "defaultValue" -> "false")) + // It is possible to create a table with NOT NULL constraint and a DEFAULT value of NULL. + // However, future inserts into that table will fail. + withTable("t4") { + sql(s"create table t4(i boolean, s bigint, q int default null not null) using $v2Format " + + s"partitioned by (i) $tblPropertiesAllowDefaults") + // The InvariantViolationException is not a SparkThrowable, so just check we receive one. + assert(intercept[InvariantViolationException] { + sql("insert into t4 values (default, default, default)") + }.getMessage.nonEmpty) + } + // It is possible to create a table with a check constraint and a DEFAULT value that does not + // conform. However, future inserts into that table will fail. + withTable("t4") { + sql(s"create table t4(i boolean, s bigint, q int default 42) using $v2Format " + + s"partitioned by (i) $tblPropertiesAllowDefaults") + sql("alter table t4 add constraint smallq check (q < 10)") + assert(intercept[InvariantViolationException] { + sql("insert into t4 values (default, default, default)") + }.getMessage.nonEmpty) + } + } + // Column default values are disabled per configuration in general. + withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") { + checkError( + exception = intercept[ParseException] { + sql(s"create table t4 (s int default 41 + 1) using $v2Format " + + tblPropertiesAllowDefaults) + }, + errorClass = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION", + parameters = Map.empty, + context = ExpectedContext(fragment = "s int default 41 + 1", start = 17, stop = 36)) + } + } + + test("Exercise column defaults with dataframe writes") { + // There are three column types exercising various combinations of implicit and explicit + // default column value references in the 'insert into' statements. Note these tests depend on + // enabling the configuration to use NULLs for missing DEFAULT column values. + withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "true") { + for (useDataFrames <- Seq(false, true)) { + withTable("t1", "t2") { + sql(s"create table t1(j int, s bigint default 42, x bigint default 43) using $v2Format " + + tblPropertiesAllowDefaults) + if (useDataFrames) { + // Use 'saveAsTable' to exercise mapping columns by name. Note that we have to specify + // values for all columns of the target table here whether we use 'saveAsTable' or + // 'insertInto', since the DataFrame generates a LogicalPlan equivalent to a SQL INSERT + // INTO command without any explicit user-specified column list. For example, if we + // used Seq((1)).toDF("j", "s", "x").write.mode("append") here instead, it would + // generate an unresolved LogicalPlan equivalent to the SQL query + // "INSERT INTO t1 VALUES (1)". This would fail with an error reporting the VALUES + // list is not long enough, since the analyzer would consider this equivalent to + // "INSERT INTO t1 (j, s, x) VALUES (1)". + Seq((1, 42L, 43L)).toDF("j", "s", "x").write.mode("append") + .format("delta").saveAsTable("t1") + Seq((2, 42L, 43L)).toDF("j", "s", "x").write.mode("append") + .format("delta").saveAsTable("t1") + Seq((3, 42L, 43L)).toDF("j", "s", "x").write.mode("append") + .format("delta").saveAsTable("t1") + Seq((4, 44L, 43L)).toDF("j", "s", "x").write.mode("append") + .format("delta").saveAsTable("t1") + Seq((5, 44L, 45L)).toDF("j", "s", "x") + .write.mode("append").format("delta").saveAsTable("t1") + } else { + sql("insert into t1(j) values(1)") + sql("insert into t1(j, s) values(2, default)") + sql("insert into t1(j, s, x) values(3, default, default)") + sql("insert into t1(j, s) values(4, 44)") + sql("insert into t1(j, s, x) values(5, 44, 45)") + } + sql(s"create table t2(j int, s bigint default 42, x bigint default 43) using $v2Format " + + tblPropertiesAllowDefaults) + if (useDataFrames) { + // Use 'insertInto' to exercise mapping columns positionally. + spark.table("t1").where("j = 1").write.insertInto("t2") + spark.table("t1").where("j = 2").write.insertInto("t2") + spark.table("t1").where("j = 3").write.insertInto("t2") + spark.table("t1").where("j = 4").write.insertInto("t2") + spark.table("t1").where("j = 5").write.insertInto("t2") + } else { + sql("insert into t2(j) select j from t1 where j = 1") + sql("insert into t2(j, s) select j, default from t1 where j = 2") + sql("insert into t2(j, s, x) select j, default, default from t1 where j = 3") + sql("insert into t2(j, s) select j, s from t1 where j = 4") + sql("insert into t2(j, s, x) select j, s, 45L from t1 where j = 5") + } + checkAnswer( + spark.table("t2"), + Row(1, 42L, 43L) :: + Row(2, 42L, 43L) :: + Row(3, 42L, 43L) :: + Row(4, 44L, 43L) :: + Row(5, 44L, 45L) :: Nil) + // Also exercise schema evolution with DataFrames. + if (useDataFrames) { + Seq((5, 44L, 45L, 46L)).toDF("j", "s", "x", "y") + .write.mode("append").format("delta").option("mergeSchema", "true") + .saveAsTable("t2") + checkAnswer( + spark.table("t2"), + Row(1, 42L, 43L, null) :: + Row(2, 42L, 43L, null) :: + Row(3, 42L, 43L, null) :: + Row(4, 44L, 43L, null) :: + Row(5, 44L, 45L, null) :: + Row(5, 44L, 45L, 46L) :: Nil) + } + } + } + } + } + + test("ReplaceWhere with column defaults with dataframe writes") { + withTable("t1", "t2", "t3") { + sql(s"create table t1(j int, s bigint default 42, x bigint default 43) using $v2Format " + + tblPropertiesAllowDefaults) + Seq((1, 42L, 43L)).toDF.write.insertInto("t1") + Seq((2, 42L, 43L)).toDF.write.insertInto("t1") + Seq((3, 42L, 43L)).toDF.write.insertInto("t1") + Seq((4, 44L, 43L)).toDF.write.insertInto("t1") + Seq((5, 44L, 45L)).toDF.write.insertInto("t1") + spark.table("t1") + .write.format("delta") + .mode("overwrite") + .option("replaceWhere", "j = default and s = default and x = default") + .saveAsTable("t2") + Seq("t1", "t2").foreach { t => + checkAnswer( + spark.table(t), + Row(1, 42L, 43L) :: + Row(2, 42L, 43L) :: + Row(3, 42L, 43L) :: + Row(4, 44L, 43L) :: + Row(5, 44L, 45L) :: Nil) + } + } + } + + test("DESCRIBE and SHOW CREATE TABLE with column defaults") { + withTable("t") { + spark.sql(s"CREATE TABLE t (id bigint default 42) " + + s"using $v2Format $tblPropertiesAllowDefaults") + val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED t") + assert(descriptionDf.schema.map { field => + (field.name, field.dataType) + } === Seq( + ("col_name", StringType), + ("data_type", StringType), + ("comment", StringType))) + QueryTest.checkAnswer( + descriptionDf.filter( + "!(col_name in ('Catalog', 'Created Time', 'Created By', 'Database', " + + "'index', 'Is_managed_location', 'Location', 'Name', 'Owner', 'Partition Provider'," + + "'Provider', 'Table', 'Table Properties', 'Type', '_partition', 'Last Access', " + + "'Statistics', ''))"), + Seq( + Row("# Column Default Values", "", ""), + Row("# Detailed Table Information", "", ""), + Row("id", "bigint", "42"), + Row("id", "bigint", null) + )) + } + withTable("t") { + sql( + s""" + |CREATE TABLE t ( + | a bigint NOT NULL, + | b bigint DEFAULT 42, + | c string DEFAULT 'abc, "def"' COMMENT 'comment' + |) + |USING parquet + |COMMENT 'This is a comment' + |$tblPropertiesAllowDefaults + """.stripMargin) + val currentCatalog = spark.sessionState.catalogManager.currentCatalog.name() + QueryTest.checkAnswer(sql("SHOW CREATE TABLE T"), + Seq( + Row( + s"""CREATE TABLE ${currentCatalog}.default.T ( + | a BIGINT, + | b BIGINT DEFAULT 42, + | c STRING DEFAULT 'abc, "def"' COMMENT 'comment') + |USING parquet + |COMMENT 'This is a comment' + |TBLPROPERTIES ( + | 'delta.columnMapping.mode' = 'name', + | 'delta.feature.allowColumnDefaults' = 'enabled') + |""".stripMargin))) + } + } +} + +/** These tests come from Apache Spark with some modifications to match Delta behavior. */ +abstract class DeltaInsertIntoTests( + override protected val supportsDynamicOverwrite: Boolean, + override protected val includeSQLOnlyTests: Boolean) + extends InsertIntoSQLOnlyTests { + + import testImplicits._ + + override def afterEach(): Unit = { + spark.catalog.listTables().collect().foreach(t => + sql(s"drop table ${t.name}")) + super.afterEach() + } + + // START Apache Spark tests + + /** + * Insert data into a table using the insertInto statement. Implementations can be in SQL + * ("INSERT") or using the DataFrameWriter (`df.write.insertInto`). Insertions will be + * by column ordinal and not by column name. + */ + protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode = null): Unit + + test("insertInto: append") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + doInsert(t1, df) + verifyTable(t1, df) + } + + test("insertInto: append by position") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + val dfr = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("data", "id") + + doInsert(t1, dfr) + verifyTable(t1, df) + } + + test("insertInto: append cast automatically") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val df = Seq((1, "a"), (2, "b"), (3, "c")).toDF("id", "data") + doInsert(t1, df) + verifyTable(t1, df) + } + + + test("insertInto: append partitioned table") { + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + doInsert(t1, df) + verifyTable(t1, df) + } + } + + test("insertInto: overwrite non-partitioned table") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + val df2 = Seq((4L, "d"), (5L, "e"), (6L, "f")).toDF("id", "data") + doInsert(t1, df) + doInsert(t1, df2, SaveMode.Overwrite) + verifyTable(t1, df2) + } + + test("insertInto: overwrite partitioned table in static mode") { + withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + val init = Seq((2L, "dummy"), (4L, "keep")).toDF("id", "data") + doInsert(t1, init) + + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + doInsert(t1, df, SaveMode.Overwrite) + verifyTable(t1, df) + } + } + + + test("insertInto: overwrite by position") { + withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + val init = Seq((2L, "dummy"), (4L, "keep")).toDF("id", "data") + doInsert(t1, init) + + val dfr = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("data", "id") + doInsert(t1, dfr, SaveMode.Overwrite) + + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + verifyTable(t1, df) + } + } + } + + test("insertInto: overwrite cast automatically") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + val df2 = Seq((4L, "d"), (5L, "e"), (6L, "f")).toDF("id", "data") + val df2c = Seq((4, "d"), (5, "e"), (6, "f")).toDF("id", "data") + doInsert(t1, df) + doInsert(t1, df2c, SaveMode.Overwrite) + verifyTable(t1, df2) + } + + test("insertInto: fails when missing a column") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string, missing string) USING $v2Format") + val df1 = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + // mismatched datatype + val df2 = Seq((1, "a"), (2, "b"), (3, "c")).toDF("id", "data") + for (df <- Seq(df1, df2)) { + val exc = intercept[AnalysisException] { + doInsert(t1, df) + } + verifyTable(t1, Seq.empty[(Long, String, String)].toDF("id", "data", "missing")) + assert(exc.getMessage.contains("not enough data columns")) + } + } + + test("insertInto: overwrite fails when missing a column") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string, missing string) USING $v2Format") + val df1 = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + // mismatched datatype + val df2 = Seq((1, "a"), (2, "b"), (3, "c")).toDF("id", "data") + for (df <- Seq(df1, df2)) { + val exc = intercept[AnalysisException] { + doInsert(t1, df, SaveMode.Overwrite) + } + verifyTable(t1, Seq.empty[(Long, String, String)].toDF("id", "data", "missing")) + assert(exc.getMessage.contains("not enough data columns")) + } + } + + // This behavior is specific to Delta + test("insertInto: fails when an extra column is present but can evolve schema") { + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val df = Seq((1L, "a", "mango")).toDF("id", "data", "fruit") + val exc = intercept[AnalysisException] { + doInsert(t1, df) + } + + verifyTable(t1, Seq.empty[(Long, String)].toDF("id", "data")) + assert(exc.getMessage.contains(s"mergeSchema")) + + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "true") { + doInsert(t1, df) + } + verifyTable(t1, Seq((1L, "a", "mango")).toDF("id", "data", "fruit")) + } + } + + // This behavior is specific to Delta + testQuietly("insertInto: schema enforcement") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val df = Seq(("a", 1L)).toDF("id", "data") // reverse order + + def getDF(rows: Row*): DataFrame = { + spark.createDataFrame(spark.sparkContext.parallelize(rows), spark.table(t1).schema) + } + + withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> "strict") { + intercept[AnalysisException] { + doInsert(t1, df, SaveMode.Overwrite) + } + + verifyTable(t1, Seq.empty[(Long, String)].toDF("id", "data")) + + intercept[AnalysisException] { + doInsert(t1, df) + } + + verifyTable(t1, Seq.empty[(Long, String)].toDF("id", "data")) + } + + withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> "ansi") { + intercept[SparkException] { + doInsert(t1, df, SaveMode.Overwrite) + } + + verifyTable(t1, Seq.empty[(Long, String)].toDF("id", "data")) + + intercept[SparkException] { + doInsert(t1, df) + } + + verifyTable(t1, Seq.empty[(Long, String)].toDF("id", "data")) + } + + withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> "legacy") { + doInsert(t1, df, SaveMode.Overwrite) + verifyTable( + t1, + getDF(Row(null, "1"))) + + doInsert(t1, df) + + verifyTable( + t1, + getDF(Row(null, "1"), Row(null, "1"))) + } + } + + testQuietly("insertInto: struct types and schema enforcement") { + val t1 = "tbl" + withTable(t1) { + sql( + s"""CREATE TABLE $t1 ( + | id bigint, + | point struct + |) + |USING delta""".stripMargin) + val init = Seq((1L, (0.0, 1.0))).toDF("id", "point") + doInsert(t1, init) + + doInsert(t1, Seq((2L, (1.0, 0.0))).toDF("col1", "col2")) // naming doesn't matter + + // can handle null types + doInsert(t1, Seq((3L, (1.0, null))).toDF("col1", "col2")) + doInsert(t1, Seq((4L, (null, 1.0))).toDF("col1", "col2")) + + val expected = Seq( + Row(1L, Row(0.0, 1.0)), + Row(2L, Row(1.0, 0.0)), + Row(3L, Row(1.0, null)), + Row(4L, Row(null, 1.0))) + verifyTable( + t1, + spark.createDataFrame(expected.asJava, spark.table(t1).schema)) + + // schema enforcement + val complexSchema = Seq((5L, (0.5, 0.5), (2.5, 2.5, 1.0), "a", (0.5, "b"))) + .toDF("long", "struct", "newstruct", "string", "badstruct") + .select( + $"long", + $"struct", + struct( + $"newstruct._1".as("x"), + $"newstruct._2".as("y"), + $"newstruct._3".as("z")) as "newstruct", + $"string", + $"badstruct") + + // new column in root + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("long", "struct", "string")) + } + + // new column in struct not accepted + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("long", "newstruct")) + } + + withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> "strict") { + // bad data type not accepted + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("string", "struct")) + } + + // nested bad data type in struct not accepted + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("long", "badstruct")) + } + } + + // missing column in struct + intercept[AnalysisException] { + doInsert(t1, complexSchema.select($"long", struct(lit(0.1)))) + } + + // wrong ordering + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("struct", "long")) + } + + // schema evolution + withSQLConf( + DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "true", + SQLConf.STORE_ASSIGNMENT_POLICY.key -> "strict") { + // ordering should still match + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("struct", "long")) + } + + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("struct", "long", "string")) + } + + // new column to the end works + doInsert(t1, complexSchema.select($"long", $"struct", $"string".as("letter"))) + + // still cannot insert missing column + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("long", "struct")) + } + + intercept[AnalysisException] { + doInsert(t1, complexSchema.select($"long", struct(lit(0.1)), $"string")) + } + + // still perform nested data type checks + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("long", "badstruct", "string")) + } + + // bad column within struct + intercept[AnalysisException] { + doInsert(t1, complexSchema.select( + $"long", struct(lit(0.1), lit("a"), lit(0.2)), $"string")) + } + + // Add column to nested field + doInsert(t1, complexSchema.select($"long", $"newstruct", lit(null))) + + // cannot insert missing field into struct now + intercept[AnalysisException] { + doInsert(t1, complexSchema.select("long", "struct", "string")) + } + } + + val expected2 = Seq( + Row(1L, Row(0.0, 1.0, null), null), + Row(2L, Row(1.0, 0.0, null), null), + Row(3L, Row(1.0, null, null), null), + Row(4L, Row(null, 1.0, null), null), + Row(5L, Row(0.5, 0.5, null), "a"), + Row(5L, Row(2.5, 2.5, 1.0), null)) + verifyTable( + t1, + spark.createDataFrame(expected2.asJava, spark.table(t1).schema)) + + val expectedSchema = new StructType() + .add("id", LongType) + .add("point", new StructType() + .add("x", DoubleType) + .add("y", DoubleType) + .add("z", DoubleType)) + .add("letter", StringType) + val diff = SchemaUtils.reportDifferences(spark.table(t1).schema, expectedSchema) + if (diff.nonEmpty) { + fail(diff.mkString("\n")) + } + } + } + + dynamicOverwriteTest("insertInto: overwrite partitioned table in dynamic mode") { + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + val init = Seq((2L, "dummy"), (4L, "keep")).toDF("id", "data") + doInsert(t1, init) + + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + doInsert(t1, df, SaveMode.Overwrite) + + verifyTable(t1, df.union(sql("SELECT 4L, 'keep'"))) + } + } + + dynamicOverwriteTest("insertInto: overwrite partitioned table in dynamic mode by position") { + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + val init = Seq((2L, "dummy"), (4L, "keep")).toDF("id", "data") + doInsert(t1, init) + + val dfr = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("data", "id") + doInsert(t1, dfr, SaveMode.Overwrite) + + val df = Seq((1L, "a"), (2L, "b"), (3L, "c"), (4L, "keep")).toDF("id", "data") + verifyTable(t1, df) + } + } + + dynamicOverwriteTest( + "insertInto: overwrite partitioned table in dynamic mode automatic casting") { + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + val init = Seq((2L, "dummy"), (4L, "keep")).toDF("id", "data") + doInsert(t1, init) + + val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + val dfc = Seq((1, "a"), (2, "b"), (3, "c")).toDF("id", "data") + doInsert(t1, df, SaveMode.Overwrite) + + verifyTable(t1, df.union(sql("SELECT 4L, 'keep'"))) + } + } + + dynamicOverwriteTest("insertInto: overwrite fails when missing a column in dynamic mode") { + val t1 = "tbl" + sql(s"CREATE TABLE $t1 (id bigint, data string, missing string) USING $v2Format") + val df1 = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") + // mismatched datatype + val df2 = Seq((1, "a"), (2, "b"), (3, "c")).toDF("id", "data") + for (df <- Seq(df1, df2)) { + val exc = intercept[AnalysisException] { + doInsert(t1, df, SaveMode.Overwrite) + } + verifyTable(t1, Seq.empty[(Long, String, String)].toDF("id", "data", "missing")) + assert(exc.getMessage.contains("not enough data columns")) + } + } + + test("insert nested struct from view into delta") { + withTable("testNestedStruct") { + sql(s"CREATE TABLE testNestedStruct " + + s" (num INT, text STRING, s STRUCT, b:STRING>)" + + s" USING DELTA") + val data = sql(s"SELECT 1, 'a', struct('a', struct('c', 'd'), 'b')") + doInsert("testNestedStruct", data) + verifyTable("testNestedStruct", + sql(s"SELECT 1 AS num, 'a' AS text, struct('a', struct('c', 'd') AS s2, 'b') AS s")) + } + } +} + +trait InsertIntoSQLOnlyTests + extends QueryTest + with SharedSparkSession + with BeforeAndAfter { + + import testImplicits._ + + /** Check that the results in `tableName` match the `expected` DataFrame. */ + protected def verifyTable(tableName: String, expected: DataFrame): Unit = { + checkAnswer(spark.table(tableName), expected) + } + + protected val v2Format: String = "delta" + + /** + * Whether dynamic partition overwrites are supported by the `Table` definitions used in the + * test suites. Tables that leverage the V1 Write interface do not support dynamic partition + * overwrites. + */ + protected val supportsDynamicOverwrite: Boolean + + /** Whether to include the SQL specific tests in this trait within the extending test suite. */ + protected val includeSQLOnlyTests: Boolean + + private def withTableAndData(tableName: String)(testFn: String => Unit): Unit = { + withTable(tableName) { + val viewName = "tmp_view" + val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") + df.createOrReplaceTempView(viewName) + withTempView(viewName) { + testFn(viewName) + } + } + } + + protected def dynamicOverwriteTest(testName: String)(f: => Unit): Unit = { + test(testName) { + try { + withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString) { + f + } + if (!supportsDynamicOverwrite) { + fail("Expected failure from test, because the table doesn't support dynamic overwrites") + } + } catch { + case a: AnalysisException if !supportsDynamicOverwrite => + assert(a.getMessage.contains("does not support dynamic overwrite")) + } + } + } + + if (includeSQLOnlyTests) { + test("InsertInto: when the table doesn't exist") { + val t1 = "tbl" + val t2 = "tbl2" + withTableAndData(t1) { _ => + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") + val e = intercept[AnalysisException] { + sql(s"INSERT INTO $t2 VALUES (2L, 'dummy')") + } + assert(e.getMessage.contains(t2)) + assert(e.getMessage.contains("Table not found") || + e.getMessage.contains(s"table or view `$t2` cannot be found") + ) + } + } + + test("InsertInto: append to partitioned table - static clause") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + sql(s"INSERT INTO $t1 PARTITION (id = 23) SELECT data FROM $view") + verifyTable(t1, sql(s"SELECT 23, data FROM $view")) + } + } + + test("InsertInto: static PARTITION clause fails with non-partition column") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (data)") + + val exc = intercept[AnalysisException] { + sql(s"INSERT INTO TABLE $t1 PARTITION (id=1) SELECT data FROM $view") + } + + verifyTable(t1, spark.emptyDataFrame) + assert( + exc.getMessage.contains("PARTITION clause cannot contain a non-partition column") || + exc.getMessage.contains("PARTITION clause cannot contain the non-partition column") || + exc.getMessage.contains( + "[NON_PARTITION_COLUMN] PARTITION clause cannot contain the non-partition column")) + assert(exc.getMessage.contains("id")) + } + } + + test("InsertInto: dynamic PARTITION clause fails with non-partition column") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + + val exc = intercept[AnalysisException] { + sql(s"INSERT INTO TABLE $t1 PARTITION (data) SELECT * FROM $view") + } + + verifyTable(t1, spark.emptyDataFrame) + assert( + exc.getMessage.contains("PARTITION clause cannot contain a non-partition column") || + exc.getMessage.contains("PARTITION clause cannot contain the non-partition column") || + exc.getMessage.contains( + "[NON_PARTITION_COLUMN] PARTITION clause cannot contain the non-partition column")) + assert(exc.getMessage.contains("data")) + } + } + + test("InsertInto: overwrite - dynamic clause - static mode") { + withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy'), (4L, 'also-deleted')") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a"), + (2, "b"), + (3, "c")).toDF()) + } + } + } + + dynamicOverwriteTest("InsertInto: overwrite - dynamic clause - dynamic mode") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy'), (4L, 'keep')") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a"), + (2, "b"), + (3, "c"), + (4, "keep")).toDF("id", "data")) + } + } + + test("InsertInto: overwrite - missing clause - static mode") { + withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy'), (4L, 'also-deleted')") + sql(s"INSERT OVERWRITE TABLE $t1 SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a"), + (2, "b"), + (3, "c")).toDF("id", "data")) + } + } + } + + dynamicOverwriteTest("InsertInto: overwrite - missing clause - dynamic mode") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy'), (4L, 'keep')") + sql(s"INSERT OVERWRITE TABLE $t1 SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a"), + (2, "b"), + (3, "c"), + (4, "keep")).toDF("id", "data")) + } + } + + test("InsertInto: overwrite - static clause") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string, p1 int) " + + s"USING $v2Format PARTITIONED BY (p1)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 23), (4L, 'keep', 2)") + verifyTable(t1, Seq( + (2L, "dummy", 23), + (4L, "keep", 2)).toDF("id", "data", "p1")) + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p1 = 23) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a", 23), + (2, "b", 23), + (3, "c", 23), + (4, "keep", 2)).toDF("id", "data", "p1")) + } + } + + test("InsertInto: overwrite - mixed clause - static mode") { + withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + + s"USING $v2Format PARTITIONED BY (id, p)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'also-deleted', 2)") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id, p = 2) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a", 2), + (2, "b", 2), + (3, "c", 2)).toDF("id", "data", "p")) + } + } + } + + test("InsertInto: overwrite - mixed clause reordered - static mode") { + withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + + s"USING $v2Format PARTITIONED BY (id, p)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'also-deleted', 2)") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p = 2, id) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a", 2), + (2, "b", 2), + (3, "c", 2)).toDF("id", "data", "p")) + } + } + } + + test("InsertInto: overwrite - implicit dynamic partition - static mode") { + withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + + s"USING $v2Format PARTITIONED BY (id, p)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'also-deleted', 2)") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p = 2) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a", 2), + (2, "b", 2), + (3, "c", 2)).toDF("id", "data", "p")) + } + } + } + + dynamicOverwriteTest("InsertInto: overwrite - mixed clause - dynamic mode") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + + s"USING $v2Format PARTITIONED BY (id, p)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'keep', 2)") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p = 2, id) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a", 2), + (2, "b", 2), + (3, "c", 2), + (4, "keep", 2)).toDF("id", "data", "p")) + } + } + + dynamicOverwriteTest("InsertInto: overwrite - mixed clause reordered - dynamic mode") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + + s"USING $v2Format PARTITIONED BY (id, p)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'keep', 2)") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id, p = 2) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a", 2), + (2, "b", 2), + (3, "c", 2), + (4, "keep", 2)).toDF("id", "data", "p")) + } + } + + dynamicOverwriteTest("InsertInto: overwrite - implicit dynamic partition - dynamic mode") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + + s"USING $v2Format PARTITIONED BY (id, p)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'keep', 2)") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p = 2) SELECT * FROM $view") + verifyTable(t1, Seq( + (1, "a", 2), + (2, "b", 2), + (3, "c", 2), + (4, "keep", 2)).toDF("id", "data", "p")) + } + } + + test("insert nested struct literal into delta") { + withTable("insertNestedTest") { + sql(s"CREATE TABLE insertNestedTest " + + s" (num INT, text STRING, s STRUCT, b:STRING>)" + + s" USING DELTA") + sql(s"INSERT INTO insertNestedTest VALUES (1, 'a', struct('a', struct('c', 'd'), 'b'))") + } + } + + dynamicOverwriteTest("InsertInto: overwrite - multiple static partitions - dynamic mode") { + val t1 = "tbl" + withTableAndData(t1) { view => + sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + + s"USING $v2Format PARTITIONED BY (id, p)") + sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'keep', 2)") + sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id = 2, p = 2) SELECT data FROM $view") + verifyTable(t1, Seq( + (2, "a", 2), + (2, "b", 2), + (2, "c", 2), + (4, "keep", 2)).toDF("id", "data", "p")) + } + } + + test("InsertInto: overwrite - dot in column names - static mode") { + import testImplicits._ + val t1 = "tbl" + withTable(t1) { + sql(s"CREATE TABLE $t1 (`a.b` string, `c.d` string) USING $v2Format PARTITIONED BY (`a.b`)") + sql(s"INSERT OVERWRITE $t1 PARTITION (`a.b` = 'a') VALUES('b')") + verifyTable(t1, Seq("a" -> "b").toDF("id", "data")) + } + } + } + + // END Apache Spark tests +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLimitPushDownSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLimitPushDownSuite.scala new file mode 100644 index 00000000000..ea2e6dfb1ad --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLimitPushDownSuite.scala @@ -0,0 +1,295 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.DatabricksLogging +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.StatsUtils +import org.apache.spark.sql.delta.test.{DeltaSQLCommandTest, ScanReportHelper} +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +trait DeltaLimitPushDownTests extends QueryTest + with SharedSparkSession + with DatabricksLogging + with ScanReportHelper + with DeletionVectorsTestUtils + with StatsUtils + with DeltaSQLCommandTest { + + import testImplicits._ + + + test("no filter or projection") { + val dir = Utils.createTempDir() + val ds = Seq(1, 1, 2, 2, 3, 3).toDS().repartition(5, $"value") + ds.write.format("delta").save(dir.toString) + + val Seq(deltaScan, deltaScanWithLimit) = getScanReport { + spark.read.format("delta").load(dir.toString).collect() + val res = spark.read.format("delta").load(dir.toString).limit(3).collect() + assert(res.size == 3) + } + + assert(deltaScan.size("total").bytesCompressed === + deltaScanWithLimit.size("total").bytesCompressed) + + assert(deltaScan.size("scanned").bytesCompressed != + deltaScanWithLimit.size("scanned").bytesCompressed) + + assert(deltaScanWithLimit.size("scanned").rows === Some(4L)) + } + + test("limit larger than total") { + val dir = Utils.createTempDir() + val data = Seq(1, 1, 2, 2) + val ds = data.toDS().repartition($"value") + ds.write.format("delta").save(dir.toString) + + val Seq(deltaScan, deltaScanWithLimit) = getScanReport { + spark.read.format("delta").load(dir.toString).collect() + checkAnswer(spark.read.format("delta").load(dir.toString).limit(5), data.toDF()) + } + + assert(deltaScan.size("total").bytesCompressed === + deltaScanWithLimit.size("total").bytesCompressed) + + assert(deltaScan.size("scanned").bytesCompressed === + deltaScanWithLimit.size("scanned").bytesCompressed) + } + + test("limit 0") { + val records = getScanReport { + val dir = Utils.createTempDir() + val ds = Seq(1, 1, 2, 2, 3, 3).toDS().repartition($"value") + ds.write.format("delta").save(dir.toString) + val res = spark.read.format("delta") + .load(dir.toString) + .limit(0) + + checkAnswer(res, Seq()) + } + } + + test("insufficient rows have stats") { + val tempDir = Utils.createTempDir() + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + val file = Seq(1, 2).toDS().coalesce(1) + + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + file.write.format("delta").mode("append").save(deltaLog.dataPath.toString) + } + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + file.write.format("delta").mode("append").save(deltaLog.dataPath.toString) + } + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "true") { + file.write.format("delta").mode("append").save(deltaLog.dataPath.toString) + } + + val deltaScan = deltaLog.snapshot.filesForScan(limit = 3, partitionFilters = Seq.empty) + + assert(deltaScan.scanned.bytesCompressed === deltaScan.total.bytesCompressed) + } + + test("sufficient rows have stats") { + val tempDir = Utils.createTempDir() + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + val file = Seq(1, 2).toDS().coalesce(1) + + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + file.write.format("delta").mode("append").save(deltaLog.dataPath.toString) + } + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "true") { + file.write.format("delta").mode("append").save(deltaLog.dataPath.toString) + } + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "true") { + file.write.format("delta").mode("append").save(deltaLog.dataPath.toString) + } + + val deltaScan = deltaLog.snapshot.filesForScan(limit = 3, partitionFilters = Seq.empty) + + assert(deltaScan.scanned.rows === Some(4)) + assert(deltaScan.scanned.bytesCompressed != deltaScan.total.bytesCompressed) + } + + test("with projection only") { + val dir = Utils.createTempDir() + val ds = Seq((1, 1), (2, 1), (3, 1)).toDF("key", "value").as[(Int, Int)] + ds.write.format("delta").partitionBy("key").save(dir.toString) + + val Seq(deltaScan) = getScanReport { + val res = spark.read.format("delta").load(dir.toString).select("value").limit(1).collect() + assert(res === Seq(Row(1))) + } + + assert(deltaScan.size("scanned").rows === Some(1L)) + } + + test("with partition filter only") { + val dir = Utils.createTempDir() + val ds = Seq((1, 4), (2, 5), (3, 6)).toDF("key", "value").as[(Int, Int)] + ds.write.format("delta").partitionBy("key").save(dir.toString) + + val Seq(deltaScan, deltaScanWithLimit, deltaScanWithLimit2) = getScanReport { + spark.read.format("delta").load(dir.toString).where("key > 1").collect() + val res1 = spark.read.format("delta").load(dir.toString).where("key > 1").limit(1).collect() + assert(res1 === Seq(Row(2, 5)) || res1 === Seq(Row(3, 6))) + val res2 = spark.read.format("delta").load(dir.toString).where("key == 1").limit(2).collect() + assert(res2 === Seq(Row(1, 4))) + } + + assert(deltaScan.size("total").bytesCompressed === + deltaScanWithLimit.size("total").bytesCompressed) + + assert(deltaScan.size("scanned").bytesCompressed != + deltaScanWithLimit.size("scanned").bytesCompressed) + + assert(deltaScan.size("scanned").bytesCompressed.get < + deltaScan.size("total").bytesCompressed.get) + assert(deltaScanWithLimit.size("scanned").rows === Some(1L)) + assert(deltaScanWithLimit2.size("scanned").rows === Some(1L)) + } + + test("with non-partition filter") { + val dir = Utils.createTempDir() + val ds = Seq((1, 4), (2, 5), (3, 6)).toDF("key", "value").as[(Int, Int)] + ds.write.format("delta").partitionBy("key").save(dir.toString) + + val Seq(deltaScan) = getScanReport { // this query should not trigger limit push-down + spark.read.format("delta").load(dir.toString) + .where("key > 1") + .where("value > 4") + .limit(1) + .collect() + } + assert(deltaScan.size("scanned").rows === Some(2L)) + } + + test("limit push-down flag") { + val dir = Utils.createTempDir() + val ds = Seq((1, 4), (2, 5), (3, 6)).toDF("key", "value").as[(Int, Int)] + ds.write.format("delta").partitionBy("key").save(dir.toString) + + val Seq(baseline, scan, scan2) = getScanReport { + withSQLConf(DeltaSQLConf.DELTA_LIMIT_PUSHDOWN_ENABLED.key -> "true") { + spark.read.format("delta").load(dir.toString).where("key > 1").limit(1).collect() + } + withSQLConf(DeltaSQLConf.DELTA_LIMIT_PUSHDOWN_ENABLED.key -> "false") { + spark.read.format("delta").load(dir.toString).where("key > 1").limit(1).collect() + spark.read.format("delta").load(dir.toString).limit(2).collect() + } + } + assert(scan.size("scanned").bytesCompressed.get > baseline.size("scanned").bytesCompressed.get) + assert(scan2.size("scanned").bytesCompressed === scan2.size("total").bytesCompressed) + } + + test("GlobalLimit should be kept") { + val dir = Utils.createTempDir() + (1 to 10).toDF.repartition(5).write.format("delta").save(dir.toString) + assert(spark.read.format("delta").load(dir.toString).limit(5).collect().size == 5) + } + + test("Works with union") { + val dir = Utils.createTempDir() + (1 to 10).toDF.repartition(5).write.format("delta").save(dir.toString) + val t1 = spark.read.format("delta").load(dir.toString) + val t2 = spark.read.format("delta").load(dir.toString) + val union = t1.union(t2) + + withSQLConf(DeltaSQLConf.DELTA_LIMIT_PUSHDOWN_ENABLED.key -> "true") { + val Seq(scanFull1, scanFull2) = getScanReport { + union.collect() + } + val Seq(scanLimit1, scanLimit2) = getScanReport { + union.limit(1).collect() + } + + assert(scanFull1.size("scanned").bytesCompressed.get > + scanLimit1.size("scanned").bytesCompressed.get) + assert(scanFull2.size("scanned").bytesCompressed.get > + scanLimit2.size("scanned").bytesCompressed.get) + } + } + + private def withDVSettings(thunk: => Unit): Unit = { + withSQLConf( + DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "false" + ) { + withDeletionVectorsEnabled() { + thunk + } + } + } + + test(s"Verify limit correctness in the presence of DVs") { + withDVSettings { + val targetDF = spark.range(start = 0, end = 100, step = 1, numPartitions = 2) + .withColumn("value", col("id")) + + withTempDeltaTable(targetDF) { (targetTable, targetLog) => + removeRowsFromAllFilesInLog(targetLog, numRowsToRemovePerFile = 10) + verifyDVsExist(targetLog, 2) + + val targetDF = targetTable().toDF + + // We have 2 files 50 rows each. We deleted 10 rows from the first file. The first file + // now contains 50 physical rows and 40 logical. Failing to take into account the DVs in + // the first file results into prematurely terminating the scan and returning an + // incorrect result. Note, the corner case in terms of correctness is when the limit is + // set to 50. When statistics collection is disabled, we read both files. + val limitToExpectedNumberOfFilesReadSeq = Range(10, 90, 10) + .map(n => (n, if (n < 50) 1 else 2)) + + for ((limit, expectedNumberOfFilesRead) <- limitToExpectedNumberOfFilesReadSeq) { + val df = targetDF.limit(limit) + + // Assess correctness. + assert(df.count === limit) + + val scanStats = getStats(df) + + // Check we do not read more files than needed. + assert(scanStats.scanned.files === Some(expectedNumberOfFilesRead)) + + // Verify physical and logical rows are updated correctly. + val numDeletedRows = 10 + val numPhysicalRowsPerFile = 50 + val numTotalPhysicalRows = numPhysicalRowsPerFile * expectedNumberOfFilesRead + val numTotalLogicalRows = numTotalPhysicalRows - + (numDeletedRows * expectedNumberOfFilesRead) + val expectedNumTotalPhysicalRows = Some(numTotalPhysicalRows) + val expectedNumTotalLogicalRows = Some(numTotalLogicalRows) + + assert(scanStats.scanned.rows === expectedNumTotalPhysicalRows) + assert(scanStats.scanned.logicalRows === expectedNumTotalLogicalRows) + } + } + } + } +} + +class DeltaLimitPushDownV1Suite extends DeltaLimitPushDownTests + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLogMinorCompactionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLogMinorCompactionSuite.scala new file mode 100644 index 00000000000..33a848cad94 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLogMinorCompactionSuite.scala @@ -0,0 +1,436 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.FileNames +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql._ +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + +// scalastyle:off: removeFile +class DeltaLogMinorCompactionSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with SQLTestUtils { + + /** Helper method to do minor compaction of [[DeltaLog]] from [startVersion, endVersion] */ + private def minorCompactDeltaLog( + tablePath: String, + startVersion: Long, + endVersion: Long): Unit = { + val deltaLog = DeltaLog.forTable(spark, tablePath) + val logReplay = new InMemoryLogReplay( + minFileRetentionTimestamp = 0, + minSetTransactionRetentionTimestamp = None) + val hadoopConf = deltaLog.newDeltaHadoopConf() + + (startVersion to endVersion).foreach { versionToRead => + val file = FileNames.deltaFile(deltaLog.logPath, versionToRead) + val actionsIterator = deltaLog.store.readAsIterator(file, hadoopConf).map(Action.fromJson) + logReplay.append(versionToRead, actionsIterator) + } + deltaLog.store.write( + path = FileNames.compactedDeltaFile(deltaLog.logPath, startVersion, endVersion), + actions = logReplay.checkpoint.map(_.json).toIterator, + overwrite = true, + hadoopConf = hadoopConf) + } + + // Helper method to validate a commit. + protected def validateCommit( + log: DeltaLog, + version: Long, + numAdds: Int = 0, + numRemoves: Int = 0, + numMetadata: Int = 0): Unit = { + assert(log.update().version === version) + val filePath = FileNames.deltaFile(log.logPath, version) + val actions = log.store.read(filePath, log.newDeltaHadoopConf()).map(Action.fromJson) + assert(actions.head.isInstanceOf[CommitInfo]) + assert(actions.tail.count(_.isInstanceOf[AddFile]) === numAdds) + assert(actions.tail.count(_.isInstanceOf[RemoveFile]) === numRemoves) + assert(actions.tail.count(_.isInstanceOf[Metadata]) === numMetadata) + } + + // Helper method to validate a compacted delta. + private def validateCompactedDelta( + log: DeltaLog, + filePath: Path, + expectedCompactedDelta: CompactedDelta): Unit = { + val actions = log.store.read(filePath, log.newDeltaHadoopConf()).map(Action.fromJson) + val observedCompactedDelta = CompactedDelta( + versionWindow = FileNames.compactedDeltaVersions(filePath), + numAdds = actions.count(_.isInstanceOf[AddFile]), + numRemoves = actions.count(_.isInstanceOf[RemoveFile]), + numMetadata = actions.count(_.isInstanceOf[Metadata]) + ) + assert(expectedCompactedDelta === observedCompactedDelta) + } + + + case class CompactedDelta( + versionWindow: (Long, Long), + numAdds: Int = 0, + numRemoves: Int = 0, + numMetadata: Int = 0) + + def createTestAddFile( + path: String = "foo", + partitionValues: Map[String, String] = Map.empty, + size: Long = 1L, + modificationTime: Long = 1L, + dataChange: Boolean = true, + stats: String = "{\"numRecords\": 1}"): AddFile = { + AddFile(path, partitionValues, size, modificationTime, dataChange, stats) + } + + def generateData(tableDir: String, checkpoints: Set[Int]): Unit = { + val files = (1 to 21).map( index => createTestAddFile(s"f${index}")) + // commit version 0 - AddFile: 4 + val deltaLog = DeltaLog.forTable(spark, tableDir) + import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + val metadata = Metadata() + val tableMetadata = metadata.copy( + configuration = DeltaConfigs.mergeGlobalConfigs(conf, metadata.configuration)) + deltaLog.startTransaction().commitManually( + files(1), files(2), files(3), files(4), tableMetadata) + validateCommit(deltaLog, version = 0, numAdds = 4, numRemoves = 0, numMetadata = 1) + if (checkpoints.contains(0)) deltaLog.checkpoint() + // commit version 1 - AddFile: 1 + deltaLog.startTransaction().commit(files(5) :: Nil, ManualUpdate) + validateCommit(deltaLog, version = 1, numAdds = 1, numRemoves = 0) + if (checkpoints.contains(1)) deltaLog.checkpoint() + // commit version 2 - RemoveFile: 1, AddFile: 1 + deltaLog.startTransaction().commit(Seq(files(5).remove, files(6)), ManualUpdate) + validateCommit(deltaLog, version = 2, numAdds = 1, numRemoves = 1) + if (checkpoints.contains(2)) deltaLog.checkpoint() + // commit version 3 - empty commit + deltaLog.startTransaction().commit(Seq(), ManualUpdate) + validateCommit(deltaLog, version = 3, numAdds = 0, numRemoves = 0) + if (checkpoints.contains(3)) deltaLog.checkpoint() + // commit version 4 - empty commit + deltaLog.startTransaction().commit(Seq(), ManualUpdate) + validateCommit(deltaLog, version = 4, numAdds = 0, numRemoves = 0) + if (checkpoints.contains(4)) deltaLog.checkpoint() + // commit version 5 - AddFile: 1, RemoveFile: 5 + deltaLog.startTransaction().commit( + (1 to 4).map(i => files(i).remove) ++ Seq(files(6).remove, files(7)), + ManualUpdate) + validateCommit(deltaLog, version = 5, numAdds = 1, numRemoves = 5) + if (checkpoints.contains(5)) deltaLog.checkpoint() + // commit version 6 - AddFile: 10, RemoveFile: 0 + deltaLog.startTransaction().commit((8 to 17).map(i => files(i)), ManualUpdate) + validateCommit(deltaLog, version = 6, numAdds = 10, numRemoves = 0) + if (checkpoints.contains(6)) deltaLog.checkpoint() + // commit version 7 - AddFile: 2, RemoveFile: 6 + deltaLog.startTransaction().commit( + (10 to 15).map(i => files(i).remove) ++ Seq(files(18), files(19)), + ManualUpdate) + validateCommit(deltaLog, version = 7, numAdds = 2, numRemoves = 6) + if (checkpoints.contains(7)) deltaLog.checkpoint() + // commit version 8 - Metadata: 1 + deltaLog.startTransaction().commit(Seq(deltaLog.unsafeVolatileSnapshot.metadata), ManualUpdate) + validateCommit(deltaLog, version = 8, numMetadata = 1) + if (checkpoints.contains(8)) deltaLog.checkpoint() + // commit version 9 - AddFile: 7 + deltaLog.startTransaction().commit( + Seq(files(16), files(17), files(18), files(19), files(7), files(8), files(9)) + .map(af => af.copy(dataChange = false)), + ManualUpdate) + validateCommit(deltaLog, version = 9, numAdds = 7) + if (checkpoints.contains(9)) deltaLog.checkpoint() + // commit version 10 - AddFiles: 1 + deltaLog.startTransaction().commit(files(20) :: Nil, ManualUpdate) + validateCommit(deltaLog, version = 10, numAdds = 1, numRemoves = 0) + } + + /** + * This test creates a Delta table with 11 commits (0, 1, ..., 10) and also creates compacted + * deltas based on the provided `compactionRange` tuples. + * + * At the end, we create a Snapshot and see if the Snapshot is initialized properly using the + * right compacted delta files instead of regular delta files. We also compare the + * `computeState`, `stateDF`, `allFiles` of this compacted delta backed Snapshot against a + * regular Snapshot backed by single delta files. + */ + def testSnapshotCreation( + compactionWindows: Seq[(Long, Long)], + checkpoints: Set[Int] = Set.empty, + postSetupFunc: Option[(DeltaLog => Unit)] = None, + expectedCompactedDeltas: Seq[CompactedDelta], + expectedDeltas: Seq[Long], + expectedCheckpoint: Long = -1L, + expectError: Boolean = false, + additionalConfs: Seq[(String, String)] = Seq.empty): Unit = { + + val confs = Seq( + DeltaSQLConf.DELTALOG_MINOR_COMPACTION_USE_FOR_READS.key -> "true", + DeltaSQLConf.DELTA_SKIP_RECORDING_EMPTY_COMMITS.key -> "false", + // Set CHECKPOINT_INTERVAL to high number so that we could checkpoint whenever we need as per + // test setup. + DeltaConfigs.CHECKPOINT_INTERVAL.defaultTablePropertyKey -> "1000" + ) ++ additionalConfs + + withSQLConf(confs: _*) { + withTempDir { tmpDir => + val tableDir = tmpDir.getAbsolutePath + generateData(tableDir, checkpoints) + val deltaLog = DeltaLog.forTable(spark, tableDir) + compactionWindows.foreach { case (startV, endV) => + minorCompactDeltaLog(tableDir, startV, endV) + } + + // Setup complete - run post setup function + postSetupFunc.foreach(_.apply(deltaLog)) + + DeltaLog.clearCache() + if (expectError) { + intercept[DeltaIllegalStateException] { + DeltaLog.forTable(spark, tableDir).unsafeVolatileSnapshot + } + return + } + val snapshot1 = DeltaLog.forTable(spark, tableDir).unsafeVolatileSnapshot + val (compactedDeltas1, deltas1) = + snapshot1.logSegment.deltas.map(_.getPath).partition(FileNames.isCompactedDeltaFile) + assert(compactedDeltas1.size === expectedCompactedDeltas.size) + compactedDeltas1.sorted + .zip(expectedCompactedDeltas.sortBy(_.versionWindow)) + .foreach { case (compactedDeltaPath, expectedCompactedDelta) => + validateCompactedDelta(deltaLog, compactedDeltaPath, expectedCompactedDelta) + } + assert(deltas1.sorted.map(FileNames.deltaVersion) === expectedDeltas) + assert(snapshot1.logSegment.checkpointProvider.version === expectedCheckpoint) + + // Disable the conf and create a new Snapshot. The new snapshot should not use the comoacted + // deltas. + withSQLConf(DeltaSQLConf.DELTALOG_MINOR_COMPACTION_USE_FOR_READS.key -> "false") { + DeltaLog.clearCache() + val snapshot2 = DeltaLog.forTable(spark, tableDir).unsafeVolatileSnapshot + val (compactedDeltas2, _) = + snapshot2.logSegment.deltas.map(_.getPath).partition(FileNames.isCompactedDeltaFile) + assert(compactedDeltas2.isEmpty) + + // Compare checksum, state reconstruction result of these 2 different snapshots. + assert(snapshot2.computeChecksum === snapshot1.computeChecksum) + checkAnswer(snapshot2.stateDF, snapshot1.stateDF) + checkAnswer(snapshot2.allFiles.toDF(), snapshot1.allFiles.toDF()) + } + } + } + } + + /////////////////////// + // Without Checkpoints + ////////////////////// + + test("smallest interval is chosen first for Snapshot creation") { + testSnapshotCreation( + compactionWindows = Seq((1, 3), (2, 3), (3, 8)), + expectedCompactedDeltas = Seq(CompactedDelta((1, 3), numAdds = 1, numRemoves = 1)), + expectedDeltas = Seq(0, 4, 5, 6, 7, 8, 9, 10) + ) + } + + test("Snapshot backed by single compacted delta") { + testSnapshotCreation( + compactionWindows = Seq((0, 10)), + expectedCompactedDeltas = + Seq(CompactedDelta((0, 10), numAdds = 8, numRemoves = 12, numMetadata = 1)), + expectedDeltas = Seq() + ) + } + + test("empty compacted delta, compacted delta covers the beginning part") { + testSnapshotCreation( + compactionWindows = Seq((0, 2), (3, 4), (4, 5)), + expectedCompactedDeltas = Seq( + CompactedDelta((0, 2), numAdds = 5, numRemoves = 1, numMetadata = 1), + CompactedDelta((3, 4), numAdds = 0, numRemoves = 0) // empty compacted delta + ), + expectedDeltas = Seq(5, 6, 7, 8, 9, 10) + ) + } + + test("compacted delta covers the end part of LogSegment") { + testSnapshotCreation( + compactionWindows = Seq((7, 10), (8, 10)), + expectedCompactedDeltas = Seq( + CompactedDelta((7, 10), numAdds = 8, numRemoves = 6, numMetadata = 1) + ), + expectedDeltas = Seq(0, 1, 2, 3, 4, 5, 6) + ) + } + + test("multiple compacted delta covers full LogSegment") { + testSnapshotCreation( + compactionWindows = Seq((0, 2), (3, 5), (6, 8), (9, 10)), + expectedCompactedDeltas = Seq( + CompactedDelta((0, 2), numAdds = 5, numRemoves = 1, numMetadata = 1), + CompactedDelta((3, 5), numAdds = 1, numRemoves = 5, numMetadata = 0), + CompactedDelta((6, 8), numAdds = 6, numRemoves = 6, numMetadata = 1), + CompactedDelta((9, 10), numAdds = 8, numRemoves = 0, numMetadata = 0) + ), + expectedDeltas = Seq() + ) + } + + + /////////////////////// + // With Checkpoints + ////////////////////// + + test("smallest interval after last checkpoint is chosen for Snapshot creation") { + testSnapshotCreation( + compactionWindows = Seq((1, 3), (2, 3), (3, 8), (4, 9), (3, 10)), + checkpoints = Set(0, 2), + expectedCompactedDeltas = + Seq(CompactedDelta((3, 8), numAdds = 7, numRemoves = 11, numMetadata = 1)), + expectedDeltas = Seq(9, 10), + expectedCheckpoint = 2, + // Disable DELTA_CHECKPOINT_V2_ENABLED conf so that we don't forcefully checkpoint at + // commit version 8 where we change `delta.dataSkippingNumIndexedCols` to 0. + additionalConfs = + Seq(DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.defaultTablePropertyKey -> "false") + ) + } + + test("Snapshot backed by single compacted delta after LAST_CHECKPOINT") { + testSnapshotCreation( + compactionWindows = Seq((0, 10), (5, 10)), + checkpoints = Set(2, 4), + expectedCompactedDeltas = + Seq(CompactedDelta((5, 10), numAdds = 8, numRemoves = 11, numMetadata = 1)), + expectedDeltas = Seq(), + expectedCheckpoint = 4, + // Disable DELTA_CHECKPOINT_V2_ENABLED conf so that we don't forcefully checkpoint at + // commit version 8 where we change `delta.dataSkippingNumIndexedCols` to 0. + additionalConfs = + Seq(DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.defaultTablePropertyKey -> "false") + + ) + } + + test("empty compacted delta, compacted delta covers the beginning part after LAST_CHECKPOINT") { + testSnapshotCreation( + compactionWindows = Seq((1, 2), (3, 4), (4, 5)), + checkpoints = Set(0), + expectedCompactedDeltas = Seq( + CompactedDelta((1, 2), numAdds = 1, numRemoves = 1), + CompactedDelta((3, 4), numAdds = 0, numRemoves = 0) // empty compacted delta + ), + expectedDeltas = Seq(5, 6, 7, 8, 9, 10), + expectedCheckpoint = 0, + // Disable DELTA_CHECKPOINT_V2_ENABLED conf so that we don't forcefully checkpoint at + // commit version 8 where we change `delta.dataSkippingNumIndexedCols` to 0. + additionalConfs = + Seq(DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.defaultTablePropertyKey -> "false") + ) + } + + test("compacted delta covers the end part of LogSegment (with Checkpoint)") { + testSnapshotCreation( + compactionWindows = Seq((7, 10), (8, 10)), + checkpoints = Set(0, 2, 5), + expectedCompactedDeltas = Seq( + CompactedDelta((7, 10), numAdds = 8, numRemoves = 6, numMetadata = 1) + ), + expectedDeltas = Seq(6), + expectedCheckpoint = 5, + // Disable DELTA_CHECKPOINT_V2_ENABLED conf so that we don't forcefully checkpoint at + // commit version 8 where we change `delta.dataSkippingNumIndexedCols` to 0. + additionalConfs = + Seq(DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.defaultTablePropertyKey -> "false") + ) + } + + test("multiple compacted delta covers full LogSegment (with Checkpoint)") { + testSnapshotCreation( + compactionWindows = Seq((0, 2), (3, 5), (3, 6), (9, 10)), + checkpoints = Set(0, 2), + expectedCompactedDeltas = Seq( + CompactedDelta((3, 5), numAdds = 1, numRemoves = 5, numMetadata = 0), + CompactedDelta((9, 10), numAdds = 8, numRemoves = 0, numMetadata = 0) + ), + expectedDeltas = Seq(6, 7, 8), + expectedCheckpoint = 2, + // Disable DELTA_CHECKPOINT_V2_ENABLED conf so that we don't forcefully checkpoint at + // commit version 8 where we change `delta.dataSkippingNumIndexedCols` to 0. + additionalConfs = + Seq(DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.defaultTablePropertyKey -> "false") + ) + } + + ///////////////////////////////////////////////////// + // negative scenarios where deltaLog is manipulated + ///////////////////////////////////////////////////// + + test("when compacted delta is available till version 11 but actual delta files are" + + " till version 10") { + testSnapshotCreation( + compactionWindows = Seq((0, 2), (3, 5), (3, 6), (9, 10)), + checkpoints = Set(0, 2), + postSetupFunc = Some( + (deltaLog: DeltaLog) => { + val logPath = deltaLog.logPath + val fromName = FileNames.compactedDeltaFile(logPath, fromVersion = 9, toVersion = 10) + val toName = FileNames.compactedDeltaFile(logPath, fromVersion = 9, toVersion = 11) + logPath.getFileSystem(deltaLog.newDeltaHadoopConf()).rename(fromName, toName) + } + ), + expectedCompactedDeltas = Seq( + CompactedDelta((3, 5), numAdds = 1, numRemoves = 5, numMetadata = 0) + ), + expectedDeltas = Seq(6, 7, 8, 9, 10), + expectedCheckpoint = 2, + // Disable DELTA_CHECKPOINT_V2_ENABLED conf so that we don't forcefully checkpoint at + // commit version 8 where we change `delta.dataSkippingNumIndexedCols` to 0. + additionalConfs = + Seq(DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.defaultTablePropertyKey -> "false") + ) + } + + + test("compacted deltas should not be used when there are holes in deltas") { + testSnapshotCreation( + compactionWindows = Seq((0, 2), (3, 5), (3, 6)), + checkpoints = Set(0, 2), + postSetupFunc = Some( + (deltaLog: DeltaLog) => { + val logPath = deltaLog.logPath + val deltaFileToDelete = FileNames.deltaFile(logPath, version = 4) + logPath.getFileSystem(deltaLog.newDeltaHadoopConf()).delete(deltaFileToDelete, true) + } + ), + expectError = true, + expectedCompactedDeltas = Seq(), + expectedDeltas = Seq(), + expectedCheckpoint = -1L, + // Disable DELTA_CHECKPOINT_V2_ENABLED conf so that we don't forcefully checkpoint at + // commit version 8 where we change `delta.dataSkippingNumIndexedCols` to 0. + additionalConfs = + Seq(DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.defaultTablePropertyKey -> "false") + ) + } +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLogSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLogSuite.scala new file mode 100644 index 00000000000..20aaee6011d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaLogSuite.scala @@ -0,0 +1,646 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{BufferedReader, File, InputStreamReader} +import java.nio.charset.StandardCharsets +import java.util.Locale + +import scala.language.postfixOps + +import org.apache.spark.sql.delta.DeltaOperations.Truncate +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import org.apache.hadoop.fs.Path + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.JsonToStructs +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.Utils + +// scalastyle:off: removeFile +class DeltaLogSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with DeltaCheckpointTestUtils + with SQLTestUtils { + + + protected val testOp = Truncate() + + testDifferentCheckpoints("checkpoint", quiet = true) { (_, _) => + val tempDir = Utils.createTempDir() + val log1 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + (1 to 15).foreach { i => + val txn = log1.startTransaction() + val file = createTestAddFile(path = i.toString) :: Nil + val delete: Seq[Action] = if (i > 1) { + RemoveFile(i - 1 toString, Some(System.currentTimeMillis()), true) :: Nil + } else { + Nil + } + txn.commitManually(delete ++ file: _*) + } + + DeltaLog.clearCache() + val log2 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(log2.snapshot.version == log1.snapshot.version) + assert(log2.snapshot.allFiles.count == 1) + } + + testDifferentCheckpoints("update deleted directory", quiet = true) { (_, _) => + withTempDir { dir => + val path = new Path(dir.getCanonicalPath) + val log = DeltaLog.forTable(spark, path) + + // Commit data so the in-memory state isn't consistent with an empty log. + val txn = log.startTransaction() + val files = (1 to 10).map(f => createTestAddFile(path = f.toString)) + txn.commitManually(files: _*) + log.checkpoint() + + val fs = path.getFileSystem(log.newDeltaHadoopConf()) + fs.delete(path, true) + + val snapshot = log.update() + assert(snapshot.version === -1) + } + } + + testDifferentCheckpoints( + "checkpoint write should use the correct Hadoop configuration") { (_, _) => + withTempDir { dir => + withSQLConf( + "fs.AbstractFileSystem.fake.impl" -> classOf[FakeAbstractFileSystem].getName, + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") { + val path = s"fake://${dir.getCanonicalPath}" + val log = DeltaLog.forTable(spark, path) + val txn = log.startTransaction() + txn.commitManually(createTestAddFile()) + log.checkpoint() + } + } + } + + testDifferentCheckpoints("update should pick up checkpoints", quiet = true) { (_, _) => + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + val checkpointInterval = log.checkpointInterval() + for (f <- 0 until (checkpointInterval * 2)) { + val txn = log.startTransaction() + txn.commitManually(createTestAddFile(path = f.toString)) + } + + def collectReservoirStateRDD(rdd: RDD[_]): Seq[RDD[_]] = { + if (rdd.name != null && rdd.name.startsWith("Delta Table State")) { + Seq(rdd) ++ rdd.dependencies.flatMap(d => collectReservoirStateRDD(d.rdd)) + } else { + rdd.dependencies.flatMap(d => collectReservoirStateRDD(d.rdd)) + } + } + + val numOfStateRDDs = collectReservoirStateRDD(log.snapshot.stateDS.rdd).size + assert(numOfStateRDDs >= 1, "collectReservoirStateRDD may not work properly") + assert(numOfStateRDDs < checkpointInterval) + } + } + + testDifferentCheckpoints( + "update shouldn't pick up delta files earlier than checkpoint") { (_, _) => + val tempDir = Utils.createTempDir() + val log1 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + (1 to 5).foreach { i => + val txn = log1.startTransaction() + val file = if (i > 1) { + createTestAddFile(path = i.toString) :: Nil + } else { + Metadata(configuration = Map(DeltaConfigs.CHECKPOINT_INTERVAL.key -> "10")) :: Nil + } + val delete: Seq[Action] = if (i > 1) { + RemoveFile(i - 1 toString, Some(System.currentTimeMillis()), true) :: Nil + } else { + Nil + } + txn.commitManually(delete ++ file: _*) + } + + DeltaLog.clearCache() + val log2 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + (6 to 15).foreach { i => + val txn = log1.startTransaction() + val file = createTestAddFile(path = i.toString) :: Nil + val delete: Seq[Action] = if (i > 1) { + RemoveFile(i - 1 toString, Some(System.currentTimeMillis()), true) :: Nil + } else { + Nil + } + txn.commitManually(delete ++ file: _*) + } + + // Since log2 is a separate instance, it shouldn't be updated to version 15 + assert(log2.snapshot.version == 4) + val updateLog2 = log2.update() + assert(updateLog2.version == log1.snapshot.version, "Did not update to correct version") + + val deltas = log2.snapshot.logSegment.deltas + assert(deltas.length === 4, "Expected 4 files starting at version 11 to 14") + val versions = deltas.map(FileNames.deltaVersion).sorted + assert(versions === Seq[Long](11, 12, 13, 14), "Received the wrong files for update") + } + + testQuietly("ActionLog cache should use the normalized path as key") { + withTempDir { tempDir => + val dir = tempDir.getAbsolutePath.stripSuffix("/") + assert(dir.startsWith("/")) + // scalastyle:off deltahadoopconfiguration + val fs = new Path("/").getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val samePaths = Seq( + new Path(dir + "/foo"), + new Path(dir + "/foo/"), + new Path(fs.getScheme + ":" + dir + "/foo"), + new Path(fs.getScheme + "://" + dir + "/foo") + ) + val logs = samePaths.map(DeltaLog.forTable(spark, _)) + logs.foreach { log => + assert(log eq logs.head) + } + } + } + + testDifferentCheckpoints( + "handle corrupted '_last_checkpoint' file", quiet = true) { (checkpointPolicy, format) => + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + val checkpointInterval = log.checkpointInterval() + for (f <- 0 to checkpointInterval) { + val txn = log.startTransaction() + txn.commitManually(createTestAddFile(path = f.toString)) + } + val lastCheckpointOpt = log.readLastCheckpointFile() + assert(lastCheckpointOpt.isDefined) + val lastCheckpoint = lastCheckpointOpt.get + import CheckpointInstance.Format._ + val expectedCheckpointFormat = if (checkpointPolicy == CheckpointPolicy.V2) V2 else SINGLE + assert(CheckpointInstance(lastCheckpoint).format === expectedCheckpointFormat) + + // Create an empty "_last_checkpoint" (corrupted) + val fs = log.LAST_CHECKPOINT.getFileSystem(log.newDeltaHadoopConf()) + fs.create(log.LAST_CHECKPOINT, true /* overwrite */).close() + + // Create a new DeltaLog + DeltaLog.clearCache() + val log2 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + // Make sure we create a new DeltaLog in order to test the loading logic. + assert(log ne log2) + + // We should get the same metadata even if "_last_checkpoint" is corrupted. + assert(CheckpointInstance(log2.readLastCheckpointFile().get) === + CheckpointInstance(lastCheckpoint.version, SINGLE)) + } + } + + testQuietly("paths should be canonicalized") { + Seq("file:", "file://").foreach { scheme => + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir) + assert(new File(log.logPath.toUri).mkdirs()) + val path = "/some/unqualified/absolute/path" + val add = AddFile( + path, Map.empty, 100L, 10L, dataChange = true) + val rm = RemoveFile( + s"$scheme$path", Some(200L), dataChange = false) + + log.store.write( + FileNames.deltaFile(log.logPath, 0L), + Iterator(Action.supportedProtocolVersion(), Metadata(), add) + .map(a => JsonUtils.toJson(a.wrap)), + overwrite = false, + log.newDeltaHadoopConf()) + log.store.write( + FileNames.deltaFile(log.logPath, 1L), + Iterator(JsonUtils.toJson(rm.wrap)), + overwrite = false, + log.newDeltaHadoopConf()) + + assert(log.update().version === 1) + assert(log.snapshot.numOfFiles === 0) + } + } + } + + testQuietly("paths should be canonicalized - special characters") { + Seq("file:", "file://").foreach { scheme => + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir) + assert(new File(log.logPath.toUri).mkdirs()) + val path = new Path("/some/unqualified/with space/p@#h").toUri.toString + val add = AddFile( + path, Map.empty, 100L, 10L, dataChange = true) + val rm = RemoveFile( + s"$scheme$path", Some(200L), dataChange = false) + + log.store.write( + FileNames.deltaFile(log.logPath, 0L), + Iterator(Action.supportedProtocolVersion(), Metadata(), add) + .map(a => JsonUtils.toJson(a.wrap)), + overwrite = false, + log.newDeltaHadoopConf()) + log.store.write( + FileNames.deltaFile(log.logPath, 1L), + Iterator(JsonUtils.toJson(rm.wrap)), + overwrite = false, + log.newDeltaHadoopConf()) + + assert(log.update().version === 1) + assert(log.snapshot.numOfFiles === 0) + } + } + } + + test("Reject read from Delta if no path is passed") { + val e = intercept[IllegalArgumentException](spark.read.format("delta").load()).getMessage + assert(e.contains("'path' is not specified")) + } + + test("do not relativize paths in RemoveFiles") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir) + assert(new File(log.logPath.toUri).mkdirs()) + val path = new File(dir, "a/b/c").getCanonicalPath + val rm = RemoveFile(path, Some(System.currentTimeMillis()), dataChange = true) + log.startTransaction().commitManually(rm) + + val committedRemove = log.update(stalenessAcceptable = false).tombstones.collect() + assert(committedRemove.head.path === s"file://$path") + } + } + + test("delete and re-add the same file in different transactions") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir) + assert(new File(log.logPath.toUri).mkdirs()) + + val add1 = createTestAddFile(modificationTime = System.currentTimeMillis()) + log.startTransaction().commitManually(add1) + + val rm = add1.remove + log.startTransaction().commit(rm :: Nil, DeltaOperations.ManualUpdate) + + val add2 = createTestAddFile(modificationTime = System.currentTimeMillis()) + log.startTransaction().commit(add2 :: Nil, DeltaOperations.ManualUpdate) + + // Add a new transaction to replay logs using the previous snapshot. If it contained + // AddFile("foo") and RemoveFile("foo"), "foo" would get removed and fail this test. + val otherAdd = createTestAddFile(path = "bar", modificationTime = System.currentTimeMillis()) + log.startTransaction().commit(otherAdd :: Nil, DeltaOperations.ManualUpdate) + + assert(log.update().allFiles.collect().find(_.path == "foo") + // `dataChange` is set to `false` after replaying logs. + === Some(add2.copy( + dataChange = false, baseRowId = Some(1), defaultRowCommitVersion = Some(2)))) + } + } + + test("error - versions not contiguous") { + withTempDir { dir => + val staleLog = DeltaLog.forTable(spark, dir) + DeltaLog.clearCache() + + val log = DeltaLog.forTable(spark, dir) + assert(new File(log.logPath.toUri).mkdirs()) + + val metadata = Metadata() + val add1 = AddFile("foo", Map.empty, 1L, System.currentTimeMillis(), dataChange = true) + log.startTransaction().commit(metadata :: add1 :: Nil, DeltaOperations.ManualUpdate) + + val add2 = AddFile("bar", Map.empty, 1L, System.currentTimeMillis(), dataChange = true) + log.startTransaction().commit(add2 :: Nil, DeltaOperations.ManualUpdate) + + val add3 = AddFile("baz", Map.empty, 1L, System.currentTimeMillis(), dataChange = true) + log.startTransaction().commit(add3 :: Nil, DeltaOperations.ManualUpdate) + + new File(new Path(log.logPath, "00000000000000000001.json").toUri).delete() + + val ex = intercept[IllegalStateException] { + staleLog.update() + } + assert(ex.getMessage.contains("Versions (0, 2) are not contiguous.")) + } + } + + Seq("protocol", "metadata").foreach { action => + test(s"state reconstruction without $action should fail") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(new File(log.logPath.toUri).mkdirs()) + val selectedAction = if (action == "metadata") { + Protocol() + } else { + Metadata() + } + val file = AddFile("abc", Map.empty, 1, 1, true) + log.store.write( + FileNames.deltaFile(log.logPath, 0L), + Iterator(selectedAction, file).map(a => JsonUtils.toJson(a.wrap)), + overwrite = false, + log.newDeltaHadoopConf()) + val e = intercept[IllegalStateException] { + log.update() + } + assert(e.getMessage === DeltaErrors.actionNotFoundException(action, 0).getMessage) + } + } + } + + Seq("protocol", "metadata").foreach { action => + testDifferentCheckpoints(s"state reconstruction from checkpoint with" + + s" missing $action should fail", quiet = true) { (_, _) => + withTempDir { tempDir => + import testImplicits._ + val staleLog = DeltaLog.forTable(spark, tempDir) + DeltaLog.clearCache() + + val log = DeltaLog.forTable(spark, tempDir) + assert (staleLog != log) + val checkpointInterval = log.checkpointInterval() + // Create a checkpoint regularly + for (f <- 0 to checkpointInterval) { + val txn = log.startTransaction() + val addFile = createTestAddFile(path = f.toString) + if (f == 0) { + txn.commitManually(addFile) + } else { + txn.commit(Seq(addFile), testOp) + } + } + + { + // Create an incomplete checkpoint without the action and overwrite the + // original checkpoint + val checkpointPathOpt = + log.listFrom(log.snapshot.version).find(FileNames.isCheckpointFile).map(_.getPath) + assert(checkpointPathOpt.nonEmpty) + assert(FileNames.checkpointVersion(checkpointPathOpt.get) === log.snapshot.version) + val checkpointPath = checkpointPathOpt.get + def removeActionFromParquetCheckpoint(tmpCheckpoint: File): Unit = { + val takeAction = if (action == "metadata") { + "protocol" + } else { + "metadata" + } + val corruptedCheckpointData = spark.read.schema(SingleAction.encoder.schema) + .parquet(checkpointPath.toString) + .where(s"add is not null or $takeAction is not null") + .as[SingleAction].collect() + + // Keep the add files and also filter by the additional condition + corruptedCheckpointData.toSeq.toDS().coalesce(1).write + .mode("overwrite").parquet(tmpCheckpoint.toString) + val writtenCheckpoint = + tmpCheckpoint.listFiles().toSeq.filter(_.getName.startsWith("part")).head + val checkpointFile = new File(checkpointPath.toUri) + new File(log.logPath.toUri).listFiles().toSeq.foreach { file => + if (file.getName.startsWith(".0")) { + // we need to delete checksum files, otherwise trying to replace our incomplete + // checkpoint file fails due to the LocalFileSystem's checksum checks. + require(file.delete(), "Failed to delete checksum file") + } + } + require(checkpointFile.delete(), "Failed to delete old checkpoint") + require(writtenCheckpoint.renameTo(checkpointFile), + "Failed to rename corrupt checkpoint") + } + if (checkpointPath.getName.endsWith("json")) { + val conf = log.newDeltaHadoopConf() + val filteredActions = log.store + .read(checkpointPath, log.newDeltaHadoopConf()) + .map(Action.fromJson) + .filter { + case _: Protocol => action != "protocol" + case _: Metadata => action != "metadata" + case _ => true + }.map(_.json) + log.store.write(checkpointPath, filteredActions.toIterator, overwrite = true, conf) + } else { + withTempDir(removeActionFromParquetCheckpoint) + } + } + + // Verify if the state reconstruction from the checkpoint fails. + val e = intercept[IllegalStateException] { + staleLog.update() + } + assert(e.getMessage === + DeltaErrors.actionNotFoundException(action, checkpointInterval).getMessage) + } + } + } + + testDifferentCheckpoints("deleting and recreating a directory should" + + " cause the snapshot to be recomputed", quiet = true) { (_, _) => + withTempDir { dir => + val path = dir.getCanonicalPath + spark.range(10).write.format("delta").mode("append").save(path) + spark.range(10, 20).write.format("delta").mode("append").save(path) + val deltaLog = DeltaLog.forTable(spark, path) + deltaLog.checkpoint() + spark.range(20, 30).write.format("delta").mode("append").save(path) + + // Store these for later usage + val actions = deltaLog.snapshot.stateDS.collect() + val commitTimestamp = deltaLog.snapshot.logSegment.lastCommitTimestamp + + checkAnswer( + spark.read.format("delta").load(path), + spark.range(30).toDF() + ) + + val fs = deltaLog.logPath.getFileSystem(deltaLog.newDeltaHadoopConf()) + + // Now let's delete the last version + deltaLog.store + .listFrom( + FileNames.listingPrefix(deltaLog.logPath, deltaLog.snapshot.version), + deltaLog.newDeltaHadoopConf()) + .filter(!_.getPath.getName.startsWith("_")) + .foreach(f => fs.delete(f.getPath, true)) + + // Should show up to 20 + checkAnswer( + spark.read.format("delta").load(path), + spark.range(20).toDF() + ) + + // Now let's delete the checkpoint and json file for version 1. We will try to list from + // version 1, but since we can't find anything, we should start listing from version 0 + deltaLog.store + .listFrom( + FileNames.listingPrefix(deltaLog.logPath, 1), + deltaLog.newDeltaHadoopConf()) + .filter(!_.getPath.getName.startsWith("_")) + .foreach(f => fs.delete(f.getPath, true)) + + checkAnswer( + spark.read.format("delta").load(path), + spark.range(10).toDF() + ) + + // Now let's delete that commit as well, and write a new first version + deltaLog.listFrom(0) + .filter(!_.getPath.getName.startsWith("_")) + .foreach(f => fs.delete(f.getPath, false)) + + assert(deltaLog.snapshot.version === 0) + + deltaLog.store.write( + FileNames.deltaFile(deltaLog.logPath, 0), + actions.map(_.unwrap.json).iterator, + overwrite = false, + deltaLog.newDeltaHadoopConf()) + + // To avoid flakiness, we manually set the modification timestamp of the file to a later + // second + new File(FileNames.deltaFile(deltaLog.logPath, 0).toUri) + .setLastModified(commitTimestamp + 5000) + + checkAnswer( + spark.read.format("delta").load(path), + spark.range(30).toDF() + ) + } + } + + test("forTableWithSnapshot should always return the latest snapshot") { + withTempDir { dir => + val path = dir.getCanonicalPath + spark.range(10).write.format("delta").mode("append").save(path) + val deltaLog = DeltaLog.forTable(spark, path) + assert(deltaLog.snapshot.version === 0) + + val (_, snapshot) = DeltaLog.withFreshSnapshot { _ => + // This update is necessary to advance the lastUpdatedTs beyond the start time of + // withFreshSnapshot call. + deltaLog.update() + // Manually add a commit. However, the deltaLog should now be fresh enough + // that we don't trigger another update, and thus don't find the commit. + val add = AddFile(path, Map.empty, 100L, 10L, dataChange = true) + deltaLog.store.write( + FileNames.deltaFile(deltaLog.logPath, 1L), + Iterator(JsonUtils.toJson(add.wrap)), + overwrite = false, + deltaLog.newDeltaHadoopConf()) + deltaLog + } + assert(snapshot.version === 0) + + val deltaLog2 = DeltaLog.forTable(spark, path) + assert(deltaLog2.snapshot.version === 0) // This shouldn't update + val (_, snapshot2) = DeltaLog.forTableWithSnapshot(spark, path) + assert(snapshot2.version === 1) // This should get the latest snapshot + } + } + + test("Delta log should handle malformed json") { + val mapper = new ObjectMapper() + mapper.registerModule(DefaultScalaModule) + def testJsonCommitParser( + path: String, func: Map[String, Map[String, String]] => String): Unit = { + spark.range(10).write.format("delta").mode("append").save(path) + spark.range(1).write.format("delta").mode("append").save(path) + + val log = DeltaLog.forTable(spark, path) + val commitFilePath = FileNames.deltaFile(log.logPath, 1L) + val fs = log.logPath.getFileSystem(log.newDeltaHadoopConf()) + val stream = fs.open(commitFilePath) + val reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)) + val commitInfo = reader.readLine() + "\n" + val addFile = reader.readLine() + stream.close() + + val map = mapper.readValue(addFile, classOf[Map[String, Map[String, String]]]) + val output = fs.create(commitFilePath, true) + output.write(commitInfo.getBytes(StandardCharsets.UTF_8)) + output.write(func(map).getBytes(StandardCharsets.UTF_8)) + output.close() + DeltaLog.clearCache() + + val parser = JsonToStructs( + schema = Action.logSchema, + options = DeltaLog.jsonCommitParseOption, + child = null, + timeZoneId = Some(spark.sessionState.conf.sessionLocalTimeZone)) + + val it = log.store.readAsIterator(commitFilePath, log.newDeltaHadoopConf()) + try { + it.foreach { json => + val utf8json = UTF8String.fromString(json) + parser.nullSafeEval(utf8json).asInstanceOf[InternalRow] + } + } finally { + it.close() + } + } + + // Parser should succeed when AddFile in json commit has missing fields + withTempDir { dir => + testJsonCommitParser(dir.toString, (content: Map[String, Map[String, String]]) => { + mapper.writeValueAsString(Map("add" -> content("add").-("path").-("size"))) + "\n" + }) + } + + // Parser should succeed when AddFile in json commit has extra fields + withTempDir { dir => + testJsonCommitParser(dir.toString, (content: Map[String, Map[String, String]]) => { + mapper.writeValueAsString(Map("add" -> content("add"). +("random" -> "field"))) + "\n" + }) + } + + // Parser should succeed when AddFile in json commit has mismatched schema + withTempDir { dir => + val json = """{"x": 1, "y": 2, "z": [10, 20]}""" + testJsonCommitParser(dir.toString, (content: Map[String, Map[String, String]]) => { + mapper.writeValueAsString(Map("add" -> content("add").updated("path", json))) + "\n" + }) + } + + // Parser should throw exception when AddFile is a bad json + withTempDir { dir => + val e = intercept[Throwable] { + testJsonCommitParser(dir.toString, (content: Map[String, Map[String, String]]) => { + "bad json{{{" + }) + } + assert(e.getMessage.contains("FAILFAST")) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaMetricsUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaMetricsUtils.scala new file mode 100644 index 00000000000..34d1100592f --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaMetricsUtils.scala @@ -0,0 +1,193 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, CommitInfo, RemoveFile} +import org.scalatest.Assertions._ + +/** + * Various helper methods to for metric tests. + */ +object DeltaMetricsUtils + { + + /** + * Get operation metrics of the last operation of a table. + * + * @param table The Delta table to query + * @return The operation metrics of the last command. + */ + def getLastOperationMetrics(table: io.delta.tables.DeltaTable): Map[String, Long] = { + table.history().select("operationMetrics").take(1).head.getMap(0) + .asInstanceOf[Map[String, String]].mapValues(_.toLong).toMap + } + + def getLastOperationMetrics(tableName: String): Map[String, Long] = { + getLastOperationMetrics(io.delta.tables.DeltaTable.forName(tableName)) + } + + /** + * Assert that metrics of a Delta operation have the expected values. + * + * @param expectedMetrics The expected metrics the values of which to check. + * @param operationMetrics The operation metrics that were collected from Delta log. + */ + def checkOperationMetrics( + expectedMetrics: Map[String, Long], + operationMetrics: Map[String, Long]): Unit = { + val sep = System.lineSeparator() * 2 + val failMessages = expectedMetrics.flatMap { case (metric, expectedValue) => + // Check missing metrics. + var errMsg = if (!operationMetrics.contains(metric)) { + Some( + s"""The recorded operation metrics does not contain metric: $metric" + | ExpectedMetrics = $expectedMetrics + | ActualMetrics = $operationMetrics + |""".stripMargin) + } else { + None + } + + // Check negative values. + errMsg = errMsg.orElse { + if (operationMetrics(metric) < 0) { + Some(s"Invalid non-positive value for metric $metric: ${operationMetrics(metric)}") + } else { + None + } + } + + // Check unexpected values. + errMsg = errMsg.orElse { + if (expectedValue != operationMetrics(metric)) { + Some( + s"""The recorded metric for $metric does not equal the expected value. + | Expected = ${expectedMetrics(metric)} + | Actual = ${operationMetrics(metric)} + | ExpectedMetrics = $expectedMetrics + | ActualMetrics = $operationMetrics + |""".stripMargin) + } else { + None + } + } + errMsg + }.mkString(sep, sep, sep).trim + assert(failMessages.isEmpty) + } + + /** + * Check that time metrics for a Delta operation are valid. + * + * @param operationMetrics The collected operation metrics from the Delta log. + * @param expectedMetrics The keys of the expected time metrics. Set to None to check for + * common time metrics. + */ + def checkOperationTimeMetrics( + operationMetrics: Map[String, Long], + expectedMetrics: Set[String]): Unit = { + // Validate that all time metrics exist and have a non-negative value. + for (key <- expectedMetrics) { + assert(operationMetrics.contains(key), s"Missing operation metric $key") + val value: Long = operationMetrics(key) + assert(value >= 0, + s"Invalid non-positive value for metric $key: $value") + } + + // Validate that if 'executionTimeMs' exists, is larger than all other time metrics. + if (expectedMetrics.contains("executionTimeMs")) { + val executionTimeMs = operationMetrics("executionTimeMs") + val maxTimeMs = operationMetrics.filterKeys(k => expectedMetrics.contains(k)) + .valuesIterator.max + assert(executionTimeMs == maxTimeMs) + } + } + + /** + * Computes the expected operation metrics from the actions in a Delta commit. + * + * @param deltaLog The Delta log of the table. + * @param version The version of the commit. + * @return A map with the expected operation metrics. + */ + def getOperationMetricsFromCommitActions( + deltaLog: DeltaLog, + version: Long): Map[String, Long] = { + val (_, changes) = deltaLog.getChanges(version).next() + val commitInfo = changes.collect { case ci: CommitInfo => ci }.head + val operationName = commitInfo.operation + + var filesAdded = ArrayBuffer.empty[AddFile] + var filesRemoved = ArrayBuffer.empty[RemoveFile] + val changeFilesAdded = ArrayBuffer.empty[AddCDCFile] + changes.foreach { + case a: AddFile => filesAdded.append(a) + case r: RemoveFile => filesRemoved.append(r) + case c: AddCDCFile => changeFilesAdded.append(c) + case _ => // Nothing + } + + // Filter-out DV updates from files added and removed. + val pathsWithDvUpdate = filesAdded.map(_.path).toSet & filesRemoved.map(_.path).toSet + filesAdded = filesAdded.filter(a => !pathsWithDvUpdate.contains(a.path)) + val numFilesAdded = filesAdded.size + val numBytesAdded = filesAdded.map(_.size).sum + + filesRemoved = filesRemoved.filter(r => !pathsWithDvUpdate.contains(r.path)) + val numFilesRemoved = filesRemoved.size + val numBytesRemoved = filesRemoved.map(_.size.getOrElse(0L)).sum + + val numChangeFilesAdded = changeFilesAdded.size + + operationName match { + case "MERGE" => Map( + "numTargetFilesAdded" -> numFilesAdded, + "numTargetFilesRemoved" -> numFilesRemoved, + "numTargetBytesAdded" -> numBytesAdded, + "numTargetBytesRemoved" -> numBytesRemoved, + "numTargetChangeFilesAdded" -> numChangeFilesAdded + ) + case "UPDATE" | "DELETE" => Map( + "numAddedFiles" -> numFilesAdded, + "numRemovedFiles" -> numFilesRemoved, + "numAddedBytes" -> numBytesAdded, + "numRemovedBytes" -> numBytesRemoved, + "numAddedChangeFiles" -> numChangeFilesAdded + ) + case _ => + throw new UnsupportedOperationException(s"Unsupported operation: $operationName") + } + } + + /** + * Checks the provided operation metrics against the actions in a Delta commit. + * + * @param deltaLog The Delta log of the table. + * @param version The version of the commit. + * @param operationMetrics The operation metrics that were collected from Delta log. + */ + def checkOperationMetricsAgainstCommitActions( + deltaLog: DeltaLog, + version: Long, + operationMetrics: Map[String, Long]): Unit = { + checkOperationMetrics( + expectedMetrics = getOperationMetricsFromCommitActions(deltaLog, version), + operationMetrics = operationMetrics) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaNotSupportedDDLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaNotSupportedDDLSuite.scala new file mode 100644 index 00000000000..a759e8b390a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaNotSupportedDDLSuite.scala @@ -0,0 +1,147 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.Locale + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{AnalysisException, QueryTest} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + + +class DeltaNotSupportedDDLSuite + extends DeltaNotSupportedDDLBase + with SharedSparkSession + with DeltaSQLCommandTest + + +abstract class DeltaNotSupportedDDLBase extends QueryTest + with SQLTestUtils { + + val format = "delta" + + val nonPartitionedTableName = "deltaTbl" + + val partitionedTableName = "partitionedTahoeTbl" + + protected override def beforeAll(): Unit = { + super.beforeAll() + try { + sql(s""" + |CREATE TABLE $nonPartitionedTableName + |USING $format + |AS SELECT 1 as a, 'a' as b + """.stripMargin) + + sql(s""" + |CREATE TABLE $partitionedTableName (a INT, b STRING, p1 INT) + |USING $format + |PARTITIONED BY (p1) + """.stripMargin) + sql(s"INSERT INTO $partitionedTableName SELECT 1, 'A', 2") + } catch { + case NonFatal(e) => + afterAll() + throw e + } + } + + protected override def afterAll(): Unit = { + try { + sql(s"DROP TABLE IF EXISTS $nonPartitionedTableName") + sql(s"DROP TABLE IF EXISTS $partitionedTableName") + } finally { + super.afterAll() + } + } + + + def assertUnsupported(query: String, messages: String*): Unit = { + val allErrMessages = "operation not allowed" +: messages + val e = intercept[AnalysisException] { + sql(query) + } + assert(allErrMessages.exists(err => e.getMessage.toLowerCase(Locale.ROOT).contains(err))) + } + + private def assertIgnored(query: String): Unit = { + val outputStream = new java.io.ByteArrayOutputStream() + Console.withOut(outputStream) { + sql(query) + } + assert(outputStream.toString.contains("The request is ignored")) + } + + test("bucketing is not supported for delta tables") { + withTable("tbl") { + assertUnsupported( + s""" + |CREATE TABLE tbl(a INT, b INT) + |USING $format + |CLUSTERED BY (a) INTO 5 BUCKETS + """.stripMargin) + } + } + + test("ANALYZE TABLE PARTITION") { + assertUnsupported( + s"ANALYZE TABLE $partitionedTableName PARTITION (p1) COMPUTE STATISTICS", + "not supported for v2 tables") + } + + test("ALTER TABLE ADD PARTITION") { + assertUnsupported( + s"ALTER TABLE $partitionedTableName ADD PARTITION (p1=3)", + "does not support partition management") + } + + test("ALTER TABLE DROP PARTITION") { + assertUnsupported( + s"ALTER TABLE $partitionedTableName DROP PARTITION (p1=2)", + "does not support partition management") + } + + test("ALTER TABLE RECOVER PARTITIONS") { + assertUnsupported( + s"ALTER TABLE $partitionedTableName RECOVER PARTITIONS", + "alter table ... recover partitions is not supported for v2 tables") + assertUnsupported( + s"MSCK REPAIR TABLE $partitionedTableName", + "msck repair table is not supported for v2 tables") + } + + test("ALTER TABLE SET SERDEPROPERTIES") { + assertUnsupported( + s"ALTER TABLE $nonPartitionedTableName SET SERDEPROPERTIES (s1=3)", + "alter table ... set [serde|serdeproperties] is not supported for v2 tables") + } + + + test("LOAD DATA") { + assertUnsupported( + s"""LOAD DATA LOCAL INPATH '/path/to/home' INTO TABLE $nonPartitionedTableName""", + "not supported for v2 tables") + } + + test("INSERT OVERWRITE DIRECTORY") { + assertUnsupported(s"INSERT OVERWRITE DIRECTORY '/path/to/home' USING $format VALUES (1, 'a')") + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaOptionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaOptionSuite.scala new file mode 100644 index 00000000000..580c86f6660 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaOptionSuite.scala @@ -0,0 +1,432 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.Locale + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaOptions.{OVERWRITE_SCHEMA_OPTION, PARTITION_OVERWRITE_MODE_OPTION} +import org.apache.spark.sql.delta.actions.{Action, FileAction} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.apache.commons.io.FileUtils +import org.apache.parquet.format.CompressionCodec + +import org.apache.spark.sql.{AnalysisException, QueryTest} +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.internal.SQLConf.PARTITION_OVERWRITE_MODE +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +class DeltaOptionSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + import testImplicits._ + + + + test("support for setting dataChange to false") { + val tempDir = Utils.createTempDir() + + spark.range(100) + .write + .format("delta") + .save(tempDir.toString) + + val df = spark.read.format("delta").load(tempDir.toString) + + df + .write + .format("delta") + .mode("overwrite") + .option("dataChange", "false") + .save(tempDir.toString) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val version = deltaLog.snapshot.version + val commitActions = deltaLog.store + .read(FileNames.deltaFile(deltaLog.logPath, version), deltaLog.newDeltaHadoopConf()) + .map(Action.fromJson) + val fileActions = commitActions.collect { case a: FileAction => a } + + assert(fileActions.forall(!_.dataChange)) + } + + test("dataChange is by default set to true") { + val tempDir = Utils.createTempDir() + + spark.range(100) + .write + .format("delta") + .save(tempDir.toString) + + val df = spark.read.format("delta").load(tempDir.toString) + + df + .write + .format("delta") + .mode("overwrite") + .save(tempDir.toString) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val version = deltaLog.snapshot.version + val commitActions = deltaLog.store + .read(FileNames.deltaFile(deltaLog.logPath, version), deltaLog.newDeltaHadoopConf()) + .map(Action.fromJson) + val fileActions = commitActions.collect { case a: FileAction => a } + + assert(fileActions.forall(_.dataChange)) + } + + test("dataChange is set to false on metadata changing operation") { + withTempDir { tempDir => + // Initialize a table while having dataChange set to false. + val e = intercept[AnalysisException] { + spark.range(100) + .write + .format("delta") + .option("dataChange", "false") + .save(tempDir.getAbsolutePath) + } + assert(e.getMessage === + DeltaErrors.unexpectedDataChangeException("Create a Delta table").getMessage) + spark.range(100) + .write + .format("delta") + .save(tempDir.getAbsolutePath) + + // Adding a new column to the existing table while having dataChange set to false. + val e2 = intercept[AnalysisException] { + val df = spark.read.format("delta").load(tempDir.getAbsolutePath) + df.withColumn("id2", 'id + 1) + .write + .format("delta") + .mode("overwrite") + .option("mergeSchema", "true") + .option("dataChange", "false") + .save(tempDir.getAbsolutePath) + } + assert(e2.getMessage === + DeltaErrors.unexpectedDataChangeException("Change the Delta table schema").getMessage) + + // Overwriting the schema of the existing table while having dataChange as false. + val e3 = intercept[AnalysisException] { + spark.range(50) + .withColumn("id3", 'id + 1) + .write + .format("delta") + .mode("overwrite") + .option("dataChange", "false") + .option("overwriteSchema", "true") + .save(tempDir.getAbsolutePath) + } + assert(e3.getMessage === + DeltaErrors.unexpectedDataChangeException("Overwrite the Delta table schema or " + + "change the partition schema").getMessage) + } + } + + + test("support the maxRecordsPerFile write option: path") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable("maxRecordsPerFile") { + spark.range(100) + .write + .format("delta") + .option("maxRecordsPerFile", 5) + .save(path) + assert(FileUtils.listFiles(tempDir, Array("parquet"), false).size === 20) + } + } + } + + test("support the maxRecordsPerFile write option: external table") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable("maxRecordsPerFile") { + spark.range(100) + .write + .format("delta") + .option("maxRecordsPerFile", 5) + .option("path", path) + .saveAsTable("maxRecordsPerFile") + assert(FileUtils.listFiles(tempDir, Array("parquet"), false).size === 20) + } + } + } + + test("support the maxRecordsPerFile write option: v2 write") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable("maxRecordsPerFile") { + spark.range(100) + .writeTo("maxRecordsPerFile") + .using("delta") + .option("maxRecordsPerFile", 5) + .tableProperty("location", path) + .create() + assert(FileUtils.listFiles(tempDir, Array("parquet"), false).size === 20) + } + } + } + + test("support no compression write option (defaults to snappy)") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable("compression") { + spark.range(100) + .writeTo("compression") + .using("delta") + .tableProperty("location", path) + .create() + assert(FileUtils.listFiles(tempDir, Array("snappy.parquet"), false).size > 0) + } + } + } + + // LZO and BROTLI left out as additional library dependencies needed + val codecsAndSubExtensions = Seq( + CompressionCodec.UNCOMPRESSED -> "", + CompressionCodec.SNAPPY -> "snappy.", + CompressionCodec.GZIP -> "gz.", + CompressionCodec.LZ4 -> "lz4hadoop.", + // CompressionCodec.LZ4_RAW -> "lz4raw.", // Support is not yet available in Spark 3.5 + CompressionCodec.ZSTD -> "zstd." + ) + + codecsAndSubExtensions.foreach { case (codec, subExt) => + val codecName = codec.name().toLowerCase(Locale.ROOT) + test(s"support compression codec '$codecName' as write option") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable(s"compression_$codecName") { + spark.range(100) + .writeTo(s"compression_$codecName") + .using("delta") + .option("compression", codecName) + .tableProperty("location", path) + .create() + assert(FileUtils.listFiles(tempDir, Array(s"${subExt}parquet"), false).size > 0) + } + } + } + } + + test("invalid compression write option") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable("compression") { + val e = intercept[IllegalArgumentException] { + spark.range(100) + .writeTo("compression") + .using("delta") + .option("compression", "???") + .tableProperty("location", path) + .create() + } + val expectedMessage = "Codec [???] is not available. Available codecs are " + assert(e.getMessage.startsWith(expectedMessage)) + } + } + } + + test("DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED = true: " + + "partitionOverwriteMode is set to invalid value in options") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + val invalidMode = "ADAPTIVE" + val e = intercept[IllegalArgumentException] { + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .option("partitionOverwriteMode", invalidMode) + .save(tempDir.getAbsolutePath) + } + assert(e.getMessage === + DeltaErrors.illegalDeltaOptionException( + PARTITION_OVERWRITE_MODE_OPTION, invalidMode, "must be 'STATIC' or 'DYNAMIC'" + ).getMessage + ) + } + } + } + + test("DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED = false: " + + "partitionOverwriteMode is set to invalid value in options") { + // partitionOverwriteMode is ignored and no error is thrown + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "false") { + withTempDir { tempDir => + val invalidMode = "ADAPTIVE" + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .option("partitionOverwriteMode", invalidMode) + .save(tempDir.getAbsolutePath) + } + } + } + + test("overwriteSchema=true should be invalid with partitionOverwriteMode=dynamic") { + withTempDir { tempDir => + val e = intercept[DeltaIllegalArgumentException] { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .mode("overwrite") + .format("delta") + .partitionBy("part") + .option(OVERWRITE_SCHEMA_OPTION, "true") + .option(PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getAbsolutePath) + } + } + assert(e.getErrorClass == "DELTA_OVERWRITE_SCHEMA_WITH_DYNAMIC_PARTITION_OVERWRITE") + } + } + + test("overwriteSchema=true should be invalid with partitionOverwriteMode=dynamic, " + + "saveAsTable") { + withTable("temp") { + val e = intercept[DeltaIllegalArgumentException] { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .mode("overwrite") + .format("delta") + .partitionBy("part") + .option(OVERWRITE_SCHEMA_OPTION, "true") + .option(PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .saveAsTable("temp") + } + } + assert(e.getErrorClass == "DELTA_OVERWRITE_SCHEMA_WITH_DYNAMIC_PARTITION_OVERWRITE") + } + } + + test("Prohibit spark.databricks.delta.dynamicPartitionOverwrite.enabled=false in " + + "dynamic partition overwrite mode") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "false") { + var e = intercept[DeltaIllegalArgumentException] { + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .option("partitionOverwriteMode", "dynamic") + .save(tempDir.getAbsolutePath) + } + assert(e.getErrorClass == "DELTA_DYNAMIC_PARTITION_OVERWRITE_DISABLED") + withSQLConf(PARTITION_OVERWRITE_MODE.key -> "dynamic") { + e = intercept[DeltaIllegalArgumentException] { + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .save(tempDir.getAbsolutePath) + } + } + assert(e.getErrorClass == "DELTA_DYNAMIC_PARTITION_OVERWRITE_DISABLED") + } + } + } + + for (createOrReplace <- Seq("CREATE OR REPLACE", "REPLACE")) { + test(s"$createOrReplace table command should not respect " + + "dynamic partition overwrite mode") { + withTempDir { tempDir => + Seq(0, 1).toDF + .withColumn("key", $"value" % 2) + .withColumn("stringColumn", lit("string")) + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .save(tempDir.getAbsolutePath) + withSQLConf(PARTITION_OVERWRITE_MODE.key -> "dynamic") { + // Write only to one partition with a different schema type of stringColumn. + sql( + s""" + |$createOrReplace TABLE delta.`${tempDir.getAbsolutePath}` + |USING delta + |PARTITIONED BY (part) + |LOCATION '${tempDir.getAbsolutePath}' + |AS SELECT -1 as value, 0 as part, 0 as stringColumn + |""".stripMargin) + assert(spark.read.format("delta").load(tempDir.getAbsolutePath).count() == 1, + "Table should be fully replaced even with DPO mode enabled") + } + } + } + } + + // Same test as above but using DeltaWriter V2. + test("create or replace table V2 should not respect dynamic partition overwrite mode") { + withTable("temp") { + Seq(0, 1).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .saveAsTable("temp") + withSQLConf(PARTITION_OVERWRITE_MODE.key -> "dynamic") { + // Write to one partition only. + Seq(0).toDF + .withColumn("part", $"value" % 2) + .writeTo("temp") + .using("delta") + .createOrReplace() + assert(spark.read.format("delta").table("temp").count() == 1, + "Table should be fully replaced even with DPO mode enabled") + } + } + } + + // Same test as above but using saveAsTable. + test("saveAsTable with overwrite should respect dynamic partition overwrite mode") { + withTable("temp") { + Seq(0, 1).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .saveAsTable("temp") + // Write to one partition only. + Seq(0).toDF + .withColumn("part", $"value" % 2) + .write + .mode("overwrite") + .option(PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .partitionBy("part") + .format("delta") + .saveAsTable("temp") + assert(spark.read.format("delta").table("temp").count() == 2, + "Table should keep the original partition with DPO mode enabled.") + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaParquetFileFormatSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaParquetFileFormatSuite.scala new file mode 100644 index 00000000000..45578fdc5d5 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaParquetFileFormatSuite.scala @@ -0,0 +1,224 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.DeltaParquetFileFormat.DeletionVectorDescriptorWithFilterType +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor +import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, RoaringBitmapArrayFormat} +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore.pathToEscapedString +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.PathWithFileSystem +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.parquet.format.converter.ParquetMetadataConverter +import org.apache.parquet.hadoop.ParquetFileReader + +import org.apache.spark.sql.{DataFrame, Dataset, QueryTest} +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.{SerializableConfiguration, Utils} + +class DeltaParquetFileFormatSuite extends QueryTest + with SharedSparkSession with DeltaSQLCommandTest { + import testImplicits._ + + // Read with deletion vectors has separate code paths based on vectorized Parquet + // reader is enabled or not. Test both the combinations + for { + readIsRowDeletedCol <- BOOLEAN_DOMAIN + readRowIndexCol <- BOOLEAN_DOMAIN + rowIndexFilterType <- Seq(RowIndexFilterType.IF_CONTAINED, RowIndexFilterType.IF_NOT_CONTAINED) + // this isn't need to be tested as it is same as regular reading path without DVs. + if readIsRowDeletedCol || readRowIndexCol + } { + testWithBothParquetReaders( + "read DV metadata columns: " + + s"with isRowDeletedCol=$readIsRowDeletedCol, " + + s"with rowIndexCol=$readRowIndexCol, " + + s"with rowIndexFilterType=$rowIndexFilterType") { + withTempDir { tempDir => + val tablePath = tempDir.toString + + // Generate a table with one parquet file containing multiple row groups. + generateData(tablePath) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val metadata = deltaLog.snapshot.metadata + + // Add additional field that has the deleted row flag to existing data schema + var readingSchema = metadata.schema + if (readIsRowDeletedCol) { + readingSchema = readingSchema.add(DeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD) + } + if (readRowIndexCol) { + readingSchema = readingSchema.add(DeltaParquetFileFormat.ROW_INDEX_STRUCT_FILED) + } + + // Fetch the only file in the DeltaLog snapshot + val addFile = deltaLog.snapshot.allFiles.collect()(0) + val addFilePath = new Path(tempDir.toString, addFile.path) + assertParquetHasMultipleRowGroups(addFilePath) + + val dv = generateDV(tablePath, 0, 200, 300, 756, 10352, 19999) + + val fs = addFilePath.getFileSystem(hadoopConf) + val broadcastDvMap = spark.sparkContext.broadcast( + Map(fs.getFileStatus(addFilePath).getPath().toUri -> + DeletionVectorDescriptorWithFilterType(dv, rowIndexFilterType)) + ) + + val broadcastHadoopConf = spark.sparkContext.broadcast( + new SerializableConfiguration(hadoopConf)) + + val deltaParquetFormat = new DeltaParquetFileFormat( + deltaLog.snapshot.protocol, + metadata, + isSplittable = false, + disablePushDowns = true, + Some(tablePath), + if (readIsRowDeletedCol) Some(broadcastDvMap) else None, + if (readIsRowDeletedCol) Some(broadcastHadoopConf) else None) + + val fileIndex = DeltaLogFileIndex(deltaParquetFormat, fs, addFilePath :: Nil) + + val relation = HadoopFsRelation( + fileIndex, + fileIndex.partitionSchema, + readingSchema, + bucketSpec = None, + deltaParquetFormat, + options = Map.empty)(spark) + val plan = LogicalRelation(relation) + + if (readIsRowDeletedCol) { + // Select some rows that are deleted and some rows not deleted + // Deleted row `value`: 0, 200, 300, 756, 10352, 19999 + // Not deleted row `value`: 7, 900 + val (deletedColumnValue, notDeletedColumnValue) = rowIndexFilterType match { + case RowIndexFilterType.IF_CONTAINED => (1, 0) + case RowIndexFilterType.IF_NOT_CONTAINED => (0, 1) + case _ => (-1, -1) // Invalid, expecting the test to fail. + } + checkDatasetUnorderly( + Dataset.ofRows(spark, plan) + .filter("value in (0, 7, 200, 300, 756, 900, 10352, 19999)") + .select("value", DeltaParquetFileFormat.IS_ROW_DELETED_COLUMN_NAME) + .as[(Int, Int)], + (0, deletedColumnValue), + (7, notDeletedColumnValue), + (200, deletedColumnValue), + (300, deletedColumnValue), + (756, deletedColumnValue), + (900, notDeletedColumnValue), + (10352, deletedColumnValue), + (19999, deletedColumnValue)) + } + + if (readRowIndexCol) { + def rowIndexes(df: DataFrame): Set[Long] = { + val colIndex = if (readIsRowDeletedCol) 2 else 1 + df.collect().map(_.getLong(colIndex)).toSet + } + + val df = Dataset.ofRows(spark, plan) + assert(rowIndexes(df) === Seq.range(0, 20000).toSet) + + assert( + rowIndexes( + df.filter("value in (0, 7, 200, 300, 756, 900, 10352, 19999)")) === + Seq(0, 7, 200, 300, 756, 900, 10352, 19999).toSet) + } + } + } + } + + /** Helper method to run the test with vectorized and non-vectorized Parquet readers */ + private def testWithBothParquetReaders(name: String)(f: => Any): Unit = { + for { + enableVectorizedParquetReader <- BOOLEAN_DOMAIN + readColumnarBatchAsRows <- BOOLEAN_DOMAIN + // don't run for the combination (vectorizedReader=false, readColumnarBathAsRows = false) + // as the non-vectorized reader always generates and returns rows, unlike the vectorized + // reader which internally generates columnar batches but can returns either columnar batches + // or rows from the columnar batch depending upon the config. + if enableVectorizedParquetReader || readColumnarBatchAsRows + } { + test(s"$name, with vectorized Parquet reader=$enableVectorizedParquetReader, " + + s"with readColumnarBatchAsRows=$readColumnarBatchAsRows") { + // Set the max code gen fields to 0 to force the vectorized Parquet reader generate rows + // from columnar batches. + val codeGenMaxFields = if (readColumnarBatchAsRows) "0" else "100" + withSQLConf( + "spark.sql.parquet.enableVectorizedReader" -> enableVectorizedParquetReader.toString, + "spark.sql.codegen.maxFields" -> codeGenMaxFields) { + f + } + } + } + } + + /** Helper method to generate a table with single Parquet file with multiple rowgroups */ + private def generateData(tablePath: String): Unit = { + // This is to generate a Parquet file with two row groups + hadoopConf().set("parquet.block.size", (1024 * 50).toString) + + // Keep the number of partitions to 1 to generate a single Parquet data file + val df = Seq.range(0, 20000).toDF().repartition(1) + df.write.format("delta").mode("append").save(tablePath) + + // Set DFS block size to be less than Parquet rowgroup size, to allow + // the file split logic to kick-in, but gets turned off due to the + // disabling of file splitting in DeltaParquetFileFormat when DVs are present. + hadoopConf().set("dfs.block.size", (1024 * 20).toString) + } + + /** Utility method that generates deletion vector based on the given row indexes */ + private def generateDV(tablePath: String, rowIndexes: Long *): DeletionVectorDescriptor = { + val bitmap = RoaringBitmapArray(rowIndexes: _*) + val tableWithFS = PathWithFileSystem.withConf(new Path(tablePath), hadoopConf) + val dvPath = dvStore.generateUniqueNameInTable(tableWithFS) + val serializedBitmap = bitmap.serializeAsByteArray(RoaringBitmapArrayFormat.Portable) + val dvRange = Utils.tryWithResource(dvStore.createWriter(dvPath)) { writer => + writer.write(serializedBitmap) + } + DeletionVectorDescriptor.onDiskWithAbsolutePath( + pathToEscapedString(dvPath.makeQualified().path), + dvRange.length, + rowIndexes.size, + Some(dvRange.offset)) + } + + private def assertParquetHasMultipleRowGroups(filePath: Path): Unit = { + val parquetMetadata = ParquetFileReader.readFooter( + hadoopConf, + filePath, + ParquetMetadataConverter.NO_FILTER) + assert(parquetMetadata.getBlocks.size() > 1) + } + + private def hadoopConf(): Configuration = { + // scalastyle:off hadoopconfiguration + // This is to generate a Parquet file with two row groups + spark.sparkContext.hadoopConfiguration + // scalastyle:on hadoopconfiguration + } + + lazy val dvStore: DeletionVectorStore = DeletionVectorStore.createInstance(hadoopConf) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaProtocolVersionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaProtocolVersionSuite.scala new file mode 100644 index 00000000000..14b7ca78672 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaProtocolVersionSuite.scala @@ -0,0 +1,3623 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File +import java.nio.file.{Files, Paths, StandardOpenOption} +import java.util.Locale +import java.util.concurrent.TimeUnit + +import com.databricks.spark.util.{Log4jUsageLogger, MetricDefinitions} +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils._ +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.{AlterTableDropFeatureDeltaCommand, AlterTableSetPropertiesDeltaCommand, AlterTableUnsetPropertiesDeltaCommand} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.apache.spark.sql.delta.util.FileNames.{deltaFile, DeltaFile} +import org.apache.spark.sql.delta.util.JsonUtils + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, QueryTest, SaveMode} +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.unsafe.types.CalendarInterval +import org.apache.spark.util.ManualClock + +trait DeltaProtocolVersionSuiteBase extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + // `.schema` generates NOT NULL columns which requires writer protocol 2. We convert all to + // NULLable to avoid silent writer protocol version bump. + private lazy val testTableSchema = spark.range(1).schema.asNullable + + // This is solely a test hook. Users cannot create new Delta tables with protocol lower than + // that of their current version. + protected def createTableWithProtocol( + protocol: Protocol, + path: File, + schema: StructType = testTableSchema): DeltaLog = { + val log = DeltaLog.forTable(spark, path) + log.ensureLogDirectoryExist() + log.store.write( + deltaFile(log.logPath, 0), + Iterator(Metadata(schemaString = schema.json).json, protocol.json), + overwrite = false, + log.newDeltaHadoopConf()) + log.update() + log + } + + test("protocol for empty folder") { + def testEmptyFolder( + readerVersion: Int, + writerVersion: Int, + features: Iterable[TableFeature] = Seq.empty, + sqlConfs: Iterable[(String, String)] = Seq.empty, + expectedProtocol: Protocol): Unit = { + withTempDir { path => + val configs = Seq( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> readerVersion.toString, + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> writerVersion.toString) ++ + features.map(defaultPropertyKey(_) -> FEATURE_PROP_ENABLED) ++ + sqlConfs + withSQLConf(configs: _*) { + val log = DeltaLog.forTable(spark, path) + assert(log.update().protocol === expectedProtocol) + } + } + } + + testEmptyFolder(1, 1, expectedProtocol = Protocol(1, 1)) + testEmptyFolder(1, 2, expectedProtocol = Protocol(1, 2)) + testEmptyFolder( + readerVersion = 1, + writerVersion = 1, + sqlConfs = Seq((DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")), + expectedProtocol = Protocol(1, 1).merge(ChangeDataFeedTableFeature.minProtocolVersion)) + testEmptyFolder( + readerVersion = 1, + writerVersion = 1, + features = Seq(TestLegacyReaderWriterFeature), + expectedProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature)) + testEmptyFolder( + readerVersion = 1, + writerVersion = 1, + features = Seq(TestWriterFeature), + expectedProtocol = Protocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestWriterFeature)) + testEmptyFolder( + readerVersion = TABLE_FEATURES_MIN_READER_VERSION, + writerVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + features = Seq(TestLegacyReaderWriterFeature), + expectedProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature)) + testEmptyFolder( + readerVersion = 1, + writerVersion = 1, + features = Seq(TestWriterFeature), + sqlConfs = Seq((DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")), + expectedProtocol = Protocol( + 1, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq(TestWriterFeature, ChangeDataFeedTableFeature))) + testEmptyFolder( + readerVersion = 1, + writerVersion = 1, + features = Seq(TestLegacyReaderWriterFeature), + sqlConfs = Seq((DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")), + expectedProtocol = Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq(TestLegacyReaderWriterFeature, ChangeDataFeedTableFeature))) + testEmptyFolder( + readerVersion = 1, + writerVersion = 1, + features = Seq(TestWriterFeature, TestLegacyReaderWriterFeature), + expectedProtocol = Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq(TestWriterFeature, TestLegacyReaderWriterFeature))) + } + + test("upgrade to current version") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(1, 1), path) + assert(log.snapshot.protocol === Protocol(1, 1)) + log.upgradeProtocol(Action.supportedProtocolVersion()) + assert(log.snapshot.protocol === Action.supportedProtocolVersion()) + } + } + + test("upgrade to a version with DeltaTable API") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(0, 0), path) + assert(log.snapshot.protocol === Protocol(0, 0)) + val table = io.delta.tables.DeltaTable.forPath(spark, path.getCanonicalPath) + table.upgradeTableProtocol(1, 1) + assert(log.snapshot.protocol === Protocol(1, 1)) + table.upgradeTableProtocol(1, 2) + assert(log.snapshot.protocol === Protocol(1, 2)) + table.upgradeTableProtocol(1, 3) + assert(log.snapshot.protocol === Protocol(1, 3)) + intercept[DeltaTableFeatureException] { + table.upgradeTableProtocol( + TABLE_FEATURES_MIN_READER_VERSION, + writerVersion = 1) + } + intercept[IllegalArgumentException] { + table.upgradeTableProtocol( + TABLE_FEATURES_MIN_READER_VERSION + 1, + TABLE_FEATURES_MIN_WRITER_VERSION) + } + intercept[IllegalArgumentException] { + table.upgradeTableProtocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION + 1) + } + } + } + + test("upgrade to support table features - no feature") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(1, 1), path) + assert(log.snapshot.protocol === Protocol(1, 1)) + val table = io.delta.tables.DeltaTable.forPath(spark, path.getCanonicalPath) + table.upgradeTableProtocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + log.snapshot.protocol === Protocol( + minReaderVersion = 1, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = None, + writerFeatures = Some(Set()))) + table.upgradeTableProtocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + log.snapshot.protocol === Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set()), + writerFeatures = Some(Set()))) + } + } + + test("upgrade to support table features - writer-only feature") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(1, 2), path) + assert(log.snapshot.protocol === Protocol(1, 2)) + val table = io.delta.tables.DeltaTable.forPath(spark, path.getCanonicalPath) + table.upgradeTableProtocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + log.snapshot.protocol === Protocol( + minReaderVersion = 1, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = None, + writerFeatures = + Some(Set(AppendOnlyTableFeature, InvariantsTableFeature).map(_.name)))) + table.upgradeTableProtocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + log.snapshot.protocol === Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set()), + writerFeatures = + Some(Set(AppendOnlyTableFeature, InvariantsTableFeature).map(_.name)))) + } + } + + test("upgrade to support table features - many features") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(2, 5), path) + assert(log.snapshot.protocol === Protocol(2, 5)) + val table = io.delta.tables.DeltaTable.forPath(spark, path.getCanonicalPath) + table.upgradeTableProtocol(2, TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + log.snapshot.protocol === Protocol( + minReaderVersion = 2, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = None, + writerFeatures = Some(Set( + AppendOnlyTableFeature, + ChangeDataFeedTableFeature, + CheckConstraintsTableFeature, + ColumnMappingTableFeature, + GeneratedColumnsTableFeature, + InvariantsTableFeature, + TestLegacyWriterFeature, + TestRemovableLegacyWriterFeature, + TestLegacyReaderWriterFeature, + TestRemovableLegacyReaderWriterFeature) + .map(_.name)))) + spark.sql( + s"ALTER TABLE delta.`${path.getPath}` SET TBLPROPERTIES (" + + s" delta.feature.${TestWriterFeature.name}='enabled'" + + s")") + table.upgradeTableProtocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + log.snapshot.protocol === Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set()), + writerFeatures = Some( + Set( + AppendOnlyTableFeature, + ChangeDataFeedTableFeature, + CheckConstraintsTableFeature, + ColumnMappingTableFeature, + GeneratedColumnsTableFeature, + InvariantsTableFeature, + TestLegacyWriterFeature, + TestRemovableLegacyWriterFeature, + TestLegacyReaderWriterFeature, + TestRemovableLegacyReaderWriterFeature, + TestWriterFeature) + .map(_.name)))) + } + } + + test("protocol upgrade using SQL API") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(1, 2), path) + + assert(log.snapshot.protocol === Protocol(1, 2)) + sql( + s"ALTER TABLE delta.`${path.getCanonicalPath}` " + + "SET TBLPROPERTIES (delta.minWriterVersion = 3)") + assert(log.snapshot.protocol === Protocol(1, 3)) + assertPropertiesAndShowTblProperties(log) + sql(s"ALTER TABLE delta.`${path.getCanonicalPath}` " + + s"SET TBLPROPERTIES (delta.minWriterVersion=$TABLE_FEATURES_MIN_WRITER_VERSION)") + assert( + log.snapshot.protocol === Protocol( + minReaderVersion = 1, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = None, + writerFeatures = Some( + Set(AppendOnlyTableFeature, CheckConstraintsTableFeature, InvariantsTableFeature) + .map(_.name)))) + assertPropertiesAndShowTblProperties(log, tableHasFeatures = true) + sql(s"ALTER TABLE delta.`${path.getCanonicalPath}` " + + s"SET TBLPROPERTIES (delta.minReaderVersion=$TABLE_FEATURES_MIN_READER_VERSION)") + assert( + log.snapshot.protocol === Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set()), + writerFeatures = Some( + Set(AppendOnlyTableFeature, CheckConstraintsTableFeature, InvariantsTableFeature) + .map(_.name)))) + assertPropertiesAndShowTblProperties(log, tableHasFeatures = true) + } + } + + test("overwrite keeps the same protocol version") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(0, 0), path) + spark.range(1) + .write + .format("delta") + .mode("overwrite") + .save(path.getCanonicalPath) + log.update() + assert(log.snapshot.protocol === Protocol(0, 0)) + } + } + + test("overwrite keeps the same table properties") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(0, 0), path) + spark.sql( + s"ALTER TABLE delta.`${path.getCanonicalPath}` SET TBLPROPERTIES ('myProp'='true')") + spark + .range(1) + .write + .format("delta") + .option("anotherProp", "true") + .mode("overwrite") + .save(path.getCanonicalPath) + log.update() + assert(log.snapshot.metadata.configuration.size === 1) + assert(log.snapshot.metadata.configuration("myProp") === "true") + } + } + + test("overwrite keeps the same protocol version and features") { + withTempDir { path => + val protocol = Protocol(0, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(AppendOnlyTableFeature) + val log = createTableWithProtocol(protocol, path) + spark + .range(1) + .write + .format("delta") + .mode("overwrite") + .save(path.getCanonicalPath) + log.update() + assert(log.snapshot.protocol === protocol) + } + } + + test("overwrite with additional configs keeps the same protocol version and features") { + withTempDir { path => + val protocol = Protocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(AppendOnlyTableFeature) + val log = createTableWithProtocol(protocol, path) + spark + .range(1) + .write + .format("delta") + .option("delta.feature.testWriter", "enabled") + .option("delta.feature.testReaderWriter", "enabled") + .mode("overwrite") + .save(path.getCanonicalPath) + log.update() + assert(log.snapshot.protocol === protocol) + } + } + + test("overwrite with additional session defaults keeps the same protocol version and features") { + withTempDir { path => + val protocol = Protocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(AppendOnlyTableFeature) + val log = createTableWithProtocol(protocol, path) + withSQLConf( + s"$DEFAULT_FEATURE_PROP_PREFIX${TestLegacyWriterFeature.name}" -> "enabled") { + spark + .range(1) + .write + .format("delta") + .option("delta.feature.testWriter", "enabled") + .option("delta.feature.testReaderWriter", "enabled") + .mode("overwrite") + .save(path.getCanonicalPath) + } + log.update() + assert(log.snapshot.protocol === protocol) + } + } + + test("access with protocol too high") { + withTempDir { path => + val log = DeltaLog.forTable(spark, path) + log.ensureLogDirectoryExist() + log.store.write( + deltaFile(log.logPath, 0), + Iterator(Metadata().json, Protocol(Integer.MAX_VALUE, Integer.MAX_VALUE).json), + overwrite = false, + log.newDeltaHadoopConf()) + intercept[InvalidProtocolVersionException] { + spark.range(1).write.format("delta").save(path.getCanonicalPath) + } + } + } + + test("Vacuum checks the write protocol") { + withTempDir { path => + spark.range(10).write.format("delta").save(path.getCanonicalPath) + val log = DeltaLog.forTable(spark, path) + + sql(s"INSERT INTO delta.`${path.getCanonicalPath}` VALUES (10)") + val vacuumCommandsToTry = Seq( + s"vacuum delta.`${path.getCanonicalPath}` RETAIN 10000 HOURS", + s"vacuum delta.`${path.getCanonicalPath}` RETAIN 10000 HOURS DRY RUN" + ) + // Both vacuum and vacuum dry run works as expected + vacuumCommandsToTry.foreach(spark.sql(_).collect()) + + val snapshot = log.update() + val newProtocol = Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION).withWriterFeatures(Seq("newUnsupportedWriterFeature")) + log.store.write( + deltaFile(log.logPath, snapshot.version + 1), + Iterator(Metadata().json, newProtocol.json), + overwrite = false, + log.newDeltaHadoopConf()) + + // Both vacuum and vacuum dry run fails as expected + vacuumCommandsToTry.foreach { command => + intercept[DeltaUnsupportedTableFeatureException] { + spark.sql(command).collect() + } + } + } + } + + test("InvalidProtocolVersionException - error message with protocol too high - table path") { + withTempDir { path => + spark.range(1).write.format("delta").save(path.getCanonicalPath) + val (deltaLog, snapshot) = DeltaLog.forTableWithSnapshot(spark, path.getCanonicalPath) + + var tableReaderVersion = 4 + var tableWriterVersion = 7 + var version = snapshot.version + 1 + untrackedChangeProtocolVersion(deltaLog, version, tableReaderVersion, tableWriterVersion) + + val exceptionRead = intercept[InvalidProtocolVersionException] { + spark.read.format("delta").load(path.getCanonicalPath) + } + + validateInvalidProtocolVersionException( + exceptionRead, + deltaLog.dataPath.toString, + tableReaderVersion, + tableWriterVersion) + + tableReaderVersion = 3 + tableWriterVersion = 8 + version = version + 1 + untrackedChangeProtocolVersion(deltaLog, version, tableReaderVersion, tableWriterVersion) + + val exceptionWrite = intercept[InvalidProtocolVersionException] { + spark.range(1).write + .mode("append") + .option("mergeSchema", "true") + .format("delta") + .save(path.getCanonicalPath) + } + + validateInvalidProtocolVersionException( + exceptionWrite, + deltaLog.dataPath.toString, + tableReaderVersion, + tableWriterVersion) + } + } + + def testInvalidProtocolErrorMessageWithTableName(warm: Boolean): Unit = { + val protocolTableName = "mytableprotocoltoohigh" + withTable(protocolTableName) { + spark.range(1).write.format("delta").saveAsTable(protocolTableName) + val (deltaLog, snapshot) = DeltaLog.forTableWithSnapshot( + spark, + TableIdentifier(protocolTableName)) + + var tableReaderVersion = 4 + var tableWriterVersion = 7 + var version = snapshot.version + 1 + untrackedChangeProtocolVersion(deltaLog, version, tableReaderVersion, tableWriterVersion) + if (!warm) { + DeltaLog.clearCache() + } + + val exceptionRead = intercept[InvalidProtocolVersionException] { + spark.read.format("delta").table(protocolTableName) + } + + var pathInErrorMessage = "default." + protocolTableName + + validateInvalidProtocolVersionException( + exceptionRead, + pathInErrorMessage, + tableReaderVersion, + tableWriterVersion) + + tableReaderVersion = 3 + tableWriterVersion = 8 + version = version + 1 + untrackedChangeProtocolVersion(deltaLog, version, tableReaderVersion, tableWriterVersion) + if (!warm) { + DeltaLog.clearCache() + } + + val exceptionWrite = intercept[InvalidProtocolVersionException] { + spark.range(1).write + .mode("append") + .option("mergeSchema", "true") + .format("delta") + .saveAsTable(protocolTableName) + } + + validateInvalidProtocolVersionException( + exceptionWrite, + pathInErrorMessage, + tableReaderVersion, + tableWriterVersion) + + // Restore the protocol version or the clean-up fails + version = version + 1 + untrackedChangeProtocolVersion(deltaLog, version, 1, 2) + } + } + + test("InvalidProtocolVersionException - error message with table name - warm") { + testInvalidProtocolErrorMessageWithTableName(true) + } + + test("InvalidProtocolVersionException - error message with table name - cold") { + testInvalidProtocolErrorMessageWithTableName(false) + } + + test("InvalidProtocolVersionException - " + + "incompatible protocol change during the transaction - table name") { + for (incompatibleProtocol <- Seq( + Protocol(minReaderVersion = Int.MaxValue), + Protocol(minWriterVersion = Int.MaxValue), + Protocol(minReaderVersion = Int.MaxValue, minWriterVersion = Int.MaxValue) + )) { + val tableName = "mytableprotocoltoohigh" + withTable(tableName) { + spark.range(0).write.format("delta").saveAsTable(tableName) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + val catalogTable = DeltaTableV2(spark, TableIdentifier(tableName)).catalogTable + val txn = deltaLog.startTransaction(catalogTable) + val currentVersion = txn.snapshot.version + untrackedChangeProtocolVersion(deltaLog, currentVersion + 1, incompatibleProtocol) + + // Should detect the above incompatible protocol change and fail + val exception = intercept[InvalidProtocolVersionException] { + txn.commit(AddFile("test", Map.empty, 1, 1, dataChange = true) :: Nil, ManualUpdate) + } + + var pathInErrorMessage = "default." + tableName + + validateInvalidProtocolVersionException( + exception, + pathInErrorMessage, + incompatibleProtocol.minReaderVersion, + incompatibleProtocol.minWriterVersion) + } + } + } + + private def untrackedChangeProtocolVersion( + log: DeltaLog, + version: Long, + tableProtocolReaderVersion: Int, + tableProtocolWriterVersion: Int) + { + untrackedChangeProtocolVersion( + log, + version, + Protocol(tableProtocolReaderVersion, tableProtocolWriterVersion)) + } + + private def untrackedChangeProtocolVersion( + log: DeltaLog, + version: Long, + protocol: Protocol): Unit = { + log.store.write( + deltaFile(log.logPath, version), + Iterator( + Metadata().json, + protocol.json), + overwrite = false, + log.newDeltaHadoopConf()) + } + + def validateInvalidProtocolVersionException( + exception: InvalidProtocolVersionException, + tableNameOrPath: String, + readerRequiredVersion: Int, + writerRequiredVersion: Int): Unit = { + assert(exception.getErrorClass == "DELTA_INVALID_PROTOCOL_VERSION") + assert(exception.tableNameOrPath == tableNameOrPath) + assert(exception.readerRequiredVersion == readerRequiredVersion) + assert(exception.writerRequiredVersion == writerRequiredVersion) + } + + test("DeltaUnsupportedTableFeatureException - error message - table path") { + withTempDir { path => + spark.range(1).write.format("delta").save(path.getCanonicalPath) + val (deltaLog, snapshot) = DeltaLog.forTableWithSnapshot(spark, path.getCanonicalPath) + + var version = snapshot.version + 1 + val invalidReaderFeatures = Seq("NonExistingReaderFeature1", "NonExistingReaderFeature2") + val protocolReaderFeatures = Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withReaderFeatures(invalidReaderFeatures) + untrackedChangeProtocolVersion(deltaLog, version, protocolReaderFeatures) + + val exceptionRead = intercept[DeltaUnsupportedTableFeatureException] { + spark.read.format("delta").load(path.getCanonicalPath) + } + + validateUnsupportedTableReadFeatureException( + exceptionRead, + deltaLog.dataPath.toString, + invalidReaderFeatures) + + version = version + 1 + val invalidWriterFeatures = Seq("NonExistingWriterFeature1", "NonExistingWriterFeature2") + val protocolWriterFeatures = Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withWriterFeatures(invalidWriterFeatures) + untrackedChangeProtocolVersion(deltaLog, version, protocolWriterFeatures) + + val exceptionWrite = intercept[DeltaUnsupportedTableFeatureException] { + spark.range(1).write + .mode("append") + .option("mergeSchema", "true") + .format("delta") + .save(path.getCanonicalPath) + } + + validateUnsupportedTableWriteFeatureException( + exceptionWrite, + deltaLog.dataPath.toString, + invalidWriterFeatures) + } + } + + def testTableFeatureErrorMessageWithTableName(warm: Boolean): Unit = { + val featureTable = "mytablefeaturesnotsupported" + withTable(featureTable) { + spark.range(1).write.format("delta").saveAsTable(featureTable) + val (deltaLog, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(featureTable)) + + var version = snapshot.version + 1 + val invalidReaderFeatures = Seq("NonExistingReaderFeature1", "NonExistingReaderFeature2") + val protocolReaderFeatures = Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withReaderFeatures(invalidReaderFeatures) + untrackedChangeProtocolVersion(deltaLog, version, protocolReaderFeatures) + if (!warm) { + DeltaLog.clearCache() + } + + val exceptionRead = intercept[DeltaUnsupportedTableFeatureException] { + spark.read.format("delta").table(featureTable) + } + val pathInErrorMessage = "default." + featureTable + + validateUnsupportedTableReadFeatureException( + exceptionRead, + pathInErrorMessage, + invalidReaderFeatures) + + version = version + 1 + val invalidWriterFeatures = Seq("NonExistingWriterFeature1", "NonExistingWriterFeature2") + val protocolWriterFeatures = Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withWriterFeatures(invalidWriterFeatures) + untrackedChangeProtocolVersion(deltaLog, version, protocolWriterFeatures) + if (!warm) { + DeltaLog.clearCache() + } + + val exceptionWrite = intercept[DeltaUnsupportedTableFeatureException] { + spark.range(1).write + .mode("append") + .option("mergeSchema", "true") + .format("delta") + .saveAsTable(featureTable) + } + + validateUnsupportedTableWriteFeatureException( + exceptionWrite, + pathInErrorMessage, + invalidWriterFeatures) + + // Restore the protocol version or the clean-up fails + version = version + 1 + untrackedChangeProtocolVersion(deltaLog, version, 1, 2) + } + } + + test("DeltaUnsupportedTableFeatureException - error message with table name - warm") { + testTableFeatureErrorMessageWithTableName(warm = true) + } + + test("DeltaUnsupportedTableFeatureException - error message with table name - cold") { + testTableFeatureErrorMessageWithTableName(warm = false) + } + + test("DeltaUnsupportedTableFeatureException - " + + "incompatible protocol change during the transaction - table name") { + for ((incompatibleProtocol, read) <- Seq( + (Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withReaderFeatures(Seq("NonExistingReaderFeature1", "NonExistingReaderFeature2")), + true), + (Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION) + .withWriterFeatures(Seq("NonExistingWriterFeature1", "NonExistingWriterFeature2")), + false) + )) { + val tableName = "mytablefeaturesnotsupported" + withTable(tableName) { + spark.range(0).write.format("delta").saveAsTable(tableName) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + val catalogTable = DeltaTableV2(spark, TableIdentifier(tableName)).catalogTable + val txn = deltaLog.startTransaction(catalogTable) + val currentVersion = txn.snapshot.version + untrackedChangeProtocolVersion(deltaLog, currentVersion + 1, incompatibleProtocol) + + // Should detect the above incompatible feature and fail + val exception = intercept[DeltaUnsupportedTableFeatureException] { + txn.commit(AddFile("test", Map.empty, 1, 1, dataChange = true) :: Nil, ManualUpdate) + } + + var pathInErrorMessage = "default." + tableName + + read match { + case true => + validateUnsupportedTableReadFeatureException( + exception, + pathInErrorMessage, + incompatibleProtocol.readerFeatures.get) + case false => + validateUnsupportedTableWriteFeatureException( + exception, + pathInErrorMessage, + incompatibleProtocol.writerFeatures.get) + } + } + } + } + + def validateUnsupportedTableReadFeatureException( + exception: DeltaUnsupportedTableFeatureException, + tableNameOrPath: String, + unsupportedFeatures: Iterable[String]): Unit = { + validateUnsupportedTableFeatureException( + exception, + "DELTA_UNSUPPORTED_FEATURES_FOR_READ", + tableNameOrPath, + unsupportedFeatures) + } + + def validateUnsupportedTableWriteFeatureException( + exception: DeltaUnsupportedTableFeatureException, + tableNameOrPath: String, + unsupportedFeatures: Iterable[String]): Unit = { + validateUnsupportedTableFeatureException( + exception, + "DELTA_UNSUPPORTED_FEATURES_FOR_WRITE", + tableNameOrPath, + unsupportedFeatures) + } + + def validateUnsupportedTableFeatureException( + exception: DeltaUnsupportedTableFeatureException, + errorClass: String, + tableNameOrPath: String, + unsupportedFeatures: Iterable[String]): Unit = { + assert(exception.getErrorClass == errorClass) + assert(exception.tableNameOrPath == tableNameOrPath) + assert(exception.unsupported.toSeq.sorted == unsupportedFeatures.toSeq.sorted) + } + + test("protocol downgrade is a no-op") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(2, 3), path) + assert(log.update().protocol === Protocol(2, 3)) + + { // DeltaLog API. This API is internal-only and will fail when downgrade. + + val e = intercept[ProtocolDowngradeException] { + log.upgradeProtocol(Protocol(1, 2)) + } + assert(log.update().protocol == Protocol(2, 3)) + assert(e.getErrorClass.contains("DELTA_INVALID_PROTOCOL_DOWNGRADE")) + } + { // DeltaTable API + val table = io.delta.tables.DeltaTable.forPath(spark, path.getCanonicalPath) + val events = Log4jUsageLogger.track { + table.upgradeTableProtocol(1, 2) + } + assert(log.update().protocol == Protocol(2, 3)) + assert(events.count(_.tags.get("opType").contains("delta.protocol.downgradeIgnored")) === 1) + } + { // SQL API + val events = Log4jUsageLogger.track { + sql(s"ALTER TABLE delta.`${path.getCanonicalPath}` " + + "SET TBLPROPERTIES (delta.minWriterVersion = 2)") + } + assert(log.update().protocol == Protocol(2, 3)) + assert(events.count(_.tags.get("opType").contains("delta.protocol.downgradeIgnored")) === 1) + } + } + } + + private case class SessionAndTableConfs(name: String, session: Seq[String], table: Seq[String]) + + for (confs <- Seq( + SessionAndTableConfs( + "session", + session = Seq(DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.defaultTablePropertyKey), + table = Seq.empty[String]), + SessionAndTableConfs( + "table", + session = Seq.empty[String], + table = Seq(DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key)))) + test(s"CREATE TABLE can ignore protocol defaults, configured in ${confs.name}") { + withTempDir { path => + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "3", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "7", + defaultPropertyKey(ChangeDataFeedTableFeature) -> FEATURE_PROP_SUPPORTED) { + withSQLConf(confs.session.map(_ -> "true"): _*) { + spark + .range(10) + .write + .format("delta") + .options(confs.table.map(_ -> "true").toMap) + .save(path.getCanonicalPath) + } + } + + val snapshot = DeltaLog.forTable(spark, path).update() + assert(snapshot.protocol === Protocol(1, 1)) + assert( + !snapshot.metadata.configuration + .contains(DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key)) + } + } + + for (ignoreProtocolDefaults <- BOOLEAN_DOMAIN) + for (op <- Seq( + "ALTER TABLE", + "SHALLOW CLONE", + "RESTORE")) { + test(s"$op always ignore protocol defaults (flag = $ignoreProtocolDefaults)" + ) { + withTempDir { path => + val expectedProtocol = if (ignoreProtocolDefaults) { + Protocol(1, 1) + } else { + Protocol( + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION), + spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION)) + } + + val cPath = path.getCanonicalPath + spark + .range(10) + .write + .format("delta") + .option( + DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key, + ignoreProtocolDefaults.toString) + .save(cPath) + val snapshot = DeltaLog.forTable(spark, path).update() + assert(snapshot.protocol === expectedProtocol) + assert( + !snapshot.metadata.configuration + .contains(DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key)) + + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "3", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "7", + defaultPropertyKey(ChangeDataFeedTableFeature) -> FEATURE_PROP_SUPPORTED) { + val snapshotAfter = op match { + case "ALTER TABLE" => + sql(s"ALTER TABLE delta.`$cPath` ALTER COLUMN id COMMENT 'hallo'") + DeltaLog.forTable(spark, path).update() + case "SHALLOW CLONE" => + var s: Snapshot = null + withTempDir { cloned => + sql( + s"CREATE TABLE delta.`${cloned.getCanonicalPath}` " + + s"SHALLOW CLONE delta.`$cPath`") + s = DeltaLog.forTable(spark, cloned).update() + } + s + case "RESTORE" => + sql(s"INSERT INTO delta.`$cPath` VALUES (99)") // version 2 + sql(s"RESTORE TABLE delta.`$cPath` TO VERSION AS OF 1") + DeltaLog.forTable(spark, path).update() + case _ => + throw new RuntimeException("OP is invalid. Add a match!") + } + assert(snapshotAfter.protocol === expectedProtocol) + assert( + !snapshotAfter.metadata.configuration + .contains(DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key)) + } + } + } + } + + test("concurrent upgrade") { + withTempDir { path => + val newProtocol = Protocol() + val log = createTableWithProtocol(Protocol(0, 0), path) + + // We have to copy out the internals of upgradeProtocol to induce the concurrency. + val txn = log.startTransaction() + log.upgradeProtocol(newProtocol) + intercept[ProtocolChangedException] { + txn.commit(Seq(newProtocol), DeltaOperations.UpgradeProtocol(newProtocol)) + } + } + } + + test("incompatible protocol change during the transaction") { + for (incompatibleProtocol <- Seq( + Protocol(minReaderVersion = Int.MaxValue), + Protocol(minWriterVersion = Int.MaxValue), + Protocol(minReaderVersion = Int.MaxValue, minWriterVersion = Int.MaxValue) + )) { + withTempDir { path => + spark.range(0).write.format("delta").save(path.getCanonicalPath) + val deltaLog = DeltaLog.forTable(spark, path) + val hadoopConf = deltaLog.newDeltaHadoopConf() + val txn = deltaLog.startTransaction() + val currentVersion = txn.snapshot.version + deltaLog.store.write( + deltaFile(deltaLog.logPath, currentVersion + 1), + Iterator(incompatibleProtocol.json), + overwrite = false, + hadoopConf) + + // Should detect the above incompatible protocol change and fail + intercept[InvalidProtocolVersionException] { + txn.commit(AddFile("test", Map.empty, 1, 1, dataChange = true) :: Nil, ManualUpdate) + } + // Make sure we didn't commit anything + val p = deltaFile(deltaLog.logPath, currentVersion + 2) + assert( + !p.getFileSystem(hadoopConf).exists(p), + s"$p should not be committed") + } + } + } + + import testImplicits._ + /** Creates a Delta table and checks the expected protocol version */ + private def testCreation(tableName: String, writerVersion: Int, tableInitialized: Boolean = false) + (fn: String => Unit): Unit = { + withTempDir { dir => + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + withTable(tableName) { + fn(dir.getCanonicalPath) + + val deltaLog = DeltaLog.forTable(spark, dir) + assert((deltaLog.snapshot.version != 0) == tableInitialized) + assert(deltaLog.snapshot.protocol.minWriterVersion === writerVersion) + assert(deltaLog.snapshot.protocol.minReaderVersion === 1) + } + } + } + } + + test("can create table using the latest protocol with conf") { + val readerVersion = Action.supportedProtocolVersion().minReaderVersion + val writerVersion = Action.supportedProtocolVersion().minWriterVersion + withTempDir { dir => + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> writerVersion.toString, + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> readerVersion.toString) { + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta") + val deltaLog = DeltaLog.forTable(spark, dir) + assert(deltaLog.snapshot.protocol === + Action.supportedProtocolVersion(withAllFeatures = false)) + } + } + } + + test("can create table using features configured in session") { + val readerVersion = Action.supportedProtocolVersion().minReaderVersion + val writerVersion = Action.supportedProtocolVersion().minWriterVersion + withTempDir { dir => + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> writerVersion.toString, + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> readerVersion.toString, + s"$DEFAULT_FEATURE_PROP_PREFIX${AppendOnlyTableFeature.name}" -> "enabled", + s"$DEFAULT_FEATURE_PROP_PREFIX${TestReaderWriterFeature.name}" -> "enabled") { + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta") + val deltaLog = DeltaLog.forTable(spark, dir) + assert( + deltaLog.snapshot.protocol === + Action + .supportedProtocolVersion(withAllFeatures = false) + .withFeatures(Set(AppendOnlyTableFeature, TestReaderWriterFeature))) + } + } + } + + test("can create table using features configured in table properties and session") { + withTempDir { dir => + withSQLConf( + s"$DEFAULT_FEATURE_PROP_PREFIX${TestWriterFeature.name}" -> "enabled") { + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (" + + s" delta.feature.${AppendOnlyTableFeature.name}='enabled'," + + s" delta.feature.${TestLegacyReaderWriterFeature.name}='enabled'" + + s")") + val deltaLog = DeltaLog.forTable(spark, dir) + assert( + deltaLog.snapshot.protocol.minReaderVersion === + TABLE_FEATURES_MIN_READER_VERSION, + "reader protocol version should support table features because we used the " + + "'delta.feature.' config.") + assert( + deltaLog.snapshot.protocol.minWriterVersion === + TABLE_FEATURES_MIN_WRITER_VERSION, + "reader protocol version should support table features because we used the " + + "'delta.feature.' config.") + assert( + deltaLog.snapshot.protocol.readerAndWriterFeatureNames === Set( + AppendOnlyTableFeature, + TestLegacyReaderWriterFeature, + TestWriterFeature).map(_.name)) + } + } + } + + test("creating a new table with default protocol") { + val tableName = "delta_test" + + def testTableCreation(fn: String => Unit, tableInitialized: Boolean = false): Unit = { + testCreation(tableName, 1, tableInitialized) { dir => + fn(dir) + } + } + + testTableCreation { dir => spark.range(10).write.format("delta").save(dir) } + testTableCreation { dir => + spark.range(10).write.format("delta").option("path", dir).saveAsTable(tableName) + } + testTableCreation { dir => + spark.range(10).writeTo(tableName).using("delta").tableProperty("location", dir).create() + } + testTableCreation { dir => + sql(s"CREATE TABLE $tableName (id bigint) USING delta LOCATION '$dir'") + } + testTableCreation { dir => + sql(s"CREATE TABLE $tableName USING delta LOCATION '$dir' AS SELECT * FROM range(10)") + } + testTableCreation(dir => { + val stream = MemoryStream[Int] + stream.addData(1 to 10) + val q = stream.toDF().writeStream.format("delta") + .option("checkpointLocation", new File(dir, "_checkpoint").getCanonicalPath) + .start(dir) + q.processAllAvailable() + q.stop() + } + ) + + testTableCreation { dir => + spark.range(10).write.mode("append").parquet(dir) + sql(s"CONVERT TO DELTA parquet.`$dir`") + } + } + + test( + "creating a new table with default protocol - requiring more recent protocol version") { + val tableName = "delta_test" + def testTableCreation(fn: String => Unit, tableInitialized: Boolean = false): Unit = + testCreation(tableName, 2, tableInitialized)(fn) + + testTableCreation { dir => + spark.range(10).writeTo(tableName).using("delta") + .tableProperty("location", dir) + .tableProperty("delta.appendOnly", "true") + .create() + } + testTableCreation { dir => + sql(s"CREATE TABLE $tableName (id bigint) USING delta LOCATION '$dir' " + + s"TBLPROPERTIES (delta.appendOnly = 'true')") + } + testTableCreation { dir => + sql(s"CREATE TABLE $tableName USING delta TBLPROPERTIES (delta.appendOnly = 'true') " + + s"LOCATION '$dir' AS SELECT * FROM range(10)") + } + testTableCreation { dir => + sql(s"CREATE TABLE $tableName (id bigint NOT NULL) USING delta LOCATION '$dir'") + } + + withSQLConf("spark.databricks.delta.properties.defaults.appendOnly" -> "true") { + testTableCreation { dir => spark.range(10).write.format("delta").save(dir) } + testTableCreation { dir => + spark.range(10).write.format("delta").option("path", dir).saveAsTable(tableName) + } + testTableCreation { dir => + spark.range(10).writeTo(tableName).using("delta").tableProperty("location", dir).create() + } + testTableCreation { dir => + sql(s"CREATE TABLE $tableName (id bigint) USING delta LOCATION '$dir'") + } + testTableCreation { dir => + sql(s"CREATE TABLE $tableName USING delta LOCATION '$dir' AS SELECT * FROM range(10)") + } + testTableCreation(dir => { + val stream = MemoryStream[Int] + stream.addData(1 to 10) + val q = stream.toDF().writeStream.format("delta") + .option("checkpointLocation", new File(dir, "_checkpoint").getCanonicalPath) + .start(dir) + q.processAllAvailable() + q.stop() + } + ) + + testTableCreation { dir => + spark.range(10).write.mode("append").parquet(dir) + sql(s"CONVERT TO DELTA parquet.`$dir`") + } + } + } + + test("replacing a new table with default protocol") { + withTempDir { dir => + // In this test we go back and forth through protocol versions, testing the various syntaxes + // of replacing tables + val tbl = "delta_test" + withTable(tbl) { + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + sql(s"CREATE TABLE $tbl (id bigint) USING delta LOCATION '${dir.getCanonicalPath}'") + } + val deltaLog = DeltaLog.forTable(spark, dir) + assert(deltaLog.snapshot.protocol.minWriterVersion === 1, + "Should've picked up the protocol from the configuration") + + // Replace the table and make sure the config is picked up + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2") { + spark.range(10).writeTo(tbl).using("delta") + .tableProperty("location", dir.getCanonicalPath).replace() + } + assert(deltaLog.snapshot.protocol.minWriterVersion === 2, + "Should've picked up the protocol from the configuration") + + // Will not downgrade without special flag. + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + sql(s"REPLACE TABLE $tbl (id bigint) USING delta LOCATION '${dir.getCanonicalPath}'") + assert(deltaLog.snapshot.protocol.minWriterVersion === 2, + "Should not pick up the protocol from the configuration") + } + + // Replace with the old writer again + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1", + DeltaSQLConf.REPLACE_TABLE_PROTOCOL_DOWNGRADE_ALLOWED.key -> "true") { + sql(s"REPLACE TABLE $tbl (id bigint) USING delta LOCATION '${dir.getCanonicalPath}'") + assert(deltaLog.snapshot.protocol.minWriterVersion === 1, + "Should've created a new protocol") + + sql(s"CREATE OR REPLACE TABLE $tbl (id bigint NOT NULL) USING delta " + + s"LOCATION '${dir.getCanonicalPath}'") + assert(deltaLog.snapshot.protocol.minWriterVersion === 2, + "Invariant should require the higher protocol") + + // Go back to version 1 + sql(s"REPLACE TABLE $tbl (id bigint) USING delta LOCATION '${dir.getCanonicalPath}'") + assert(deltaLog.snapshot.protocol.minWriterVersion === 1, + "Should've created a new protocol") + + // Check table properties with different syntax + spark.range(10).writeTo(tbl).tableProperty("location", dir.getCanonicalPath) + .tableProperty("delta.appendOnly", "true").using("delta").createOrReplace() + assert(deltaLog.snapshot.protocol.minWriterVersion === 2, + "appendOnly should require the higher protocol") + } + } + } + } + + test("create a table with no protocol") { + withTempDir { path => + val log = DeltaLog.forTable(spark, path) + log.ensureLogDirectoryExist() + log.store.write( + deltaFile(log.logPath, 0), + Iterator(Metadata().json), + overwrite = false, + log.newDeltaHadoopConf()) + + assert(intercept[DeltaIllegalStateException] { + log.update() + }.getErrorClass == "DELTA_STATE_RECOVER_ERROR") + assert(intercept[DeltaIllegalStateException] { + spark.read.format("delta").load(path.getCanonicalPath) + }.getErrorClass == "DELTA_STATE_RECOVER_ERROR") + assert(intercept[DeltaIllegalStateException] { + spark.range(1).write.format("delta").mode(SaveMode.Overwrite).save(path.getCanonicalPath) + }.getErrorClass == "DELTA_STATE_RECOVER_ERROR") + } + } + + test("bad inputs for default protocol versions") { + val readerVersion = Action.supportedProtocolVersion().minReaderVersion + val writerVersion = Action.supportedProtocolVersion().minWriterVersion + withTempDir { path => + val dir = path.getCanonicalPath + Seq("abc", "", "0", (readerVersion + 1).toString).foreach { conf => + val e = intercept[IllegalArgumentException] { + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> conf) { + spark.range(10).write.format("delta").save(dir) + } + } + } + Seq("abc", "", "0", (writerVersion + 1).toString).foreach { conf => + intercept[IllegalArgumentException] { + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> conf) { + spark.range(10).write.format("delta").save(dir) + } + } + } + } + } + + test("table creation with protocol as table property") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (delta.minWriterVersion=3)") + + assert(deltaLog.snapshot.protocol.minReaderVersion === 1) + assert(deltaLog.snapshot.protocol.minWriterVersion === 3) + assertPropertiesAndShowTblProperties(deltaLog) + } + } + } + + test("table creation with writer-only features as table property") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (" + + " DeLtA.fEaTurE.APPendONly='eNAbled'," + + " delta.feature.testWriter='enabled'" + + ")") + + assert(deltaLog.snapshot.protocol.minReaderVersion === 1) + assert( + deltaLog.snapshot.protocol.minWriterVersion === TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + deltaLog.snapshot.protocol.readerAndWriterFeatureNames === Set( + AppendOnlyTableFeature, TestWriterFeature).map(_.name)) + assertPropertiesAndShowTblProperties(deltaLog, tableHasFeatures = true) + } + } + + test( + "table creation with legacy reader-writer features as table property") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (DeLtA.fEaTurE.testLEGACYReaderWritER='eNAbled')") + + assert( + deltaLog.snapshot.protocol === Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION).withFeature(TestLegacyReaderWriterFeature)) + } + } + + test("table creation with native writer-only features as table property") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (DeLtA.fEaTurE.testWritER='eNAbled')") + + assert( + deltaLog.snapshot.protocol.minReaderVersion === 1) + assert( + deltaLog.snapshot.protocol.minWriterVersion === + TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + deltaLog.snapshot.protocol.readerAndWriterFeatureNames === + Set(TestWriterFeature.name)) + assertPropertiesAndShowTblProperties(deltaLog, tableHasFeatures = true) + } + } + + test("table creation with reader-writer features as table property") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (" + + " DeLtA.fEaTurE.testLEGACYReaderWritER='eNAbled'," + + " DeLtA.fEaTurE.testReaderWritER='enabled'" + + ")") + + assert( + deltaLog.snapshot.protocol.minReaderVersion === TABLE_FEATURES_MIN_READER_VERSION) + assert( + deltaLog.snapshot.protocol.minWriterVersion === TABLE_FEATURES_MIN_WRITER_VERSION) + assert( + deltaLog.snapshot.protocol.readerAndWriterFeatureNames === Set( + TestLegacyReaderWriterFeature, TestReaderWriterFeature).map(_.name)) + assertPropertiesAndShowTblProperties(deltaLog, tableHasFeatures = true) + } + } + + test("table creation with feature as table property and supported protocol version") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (" + + s" DEltA.MINREADERversion='$TABLE_FEATURES_MIN_READER_VERSION'," + + s" DEltA.MINWRITERversion='$TABLE_FEATURES_MIN_WRITER_VERSION'," + + " DeLtA.fEaTurE.testLEGACYReaderWriter='eNAbled'" + + ")") + + assert( + deltaLog.snapshot.protocol === Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = + Some(Set(TestLegacyReaderWriterFeature.name)), + writerFeatures = + Some(Set(TestLegacyReaderWriterFeature.name)))) + assertPropertiesAndShowTblProperties(deltaLog, tableHasFeatures = true) + } + } + + test("table creation with feature as table property and supported writer protocol version") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + s"TBLPROPERTIES (" + + s" delta.minWriterVersion='$TABLE_FEATURES_MIN_WRITER_VERSION'," + + s" delta.feature.testLegacyWriter='enabled'" + + s")") + + assert( + deltaLog.snapshot.protocol === Protocol( + minReaderVersion = 1, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = None, + writerFeatures = Some(Set(TestLegacyWriterFeature.name)))) + assertPropertiesAndShowTblProperties(deltaLog, tableHasFeatures = true) + } + } + + test("table creation with automatically-enabled features") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta TBLPROPERTIES (" + + s" ${TestReaderWriterMetadataAutoUpdateFeature.TABLE_PROP_KEY}='true'" + + ")") + assert( + deltaLog.snapshot.protocol === Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set(TestReaderWriterMetadataAutoUpdateFeature.name)), + writerFeatures = Some(Set(TestReaderWriterMetadataAutoUpdateFeature.name)))) + assertPropertiesAndShowTblProperties(deltaLog, tableHasFeatures = true) + } + } + + test("table creation with automatically-enabled legacy feature and unsupported protocol") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta TBLPROPERTIES (" + + " delta.minReaderVersion='1'," + + " delta.minWriterVersion='2'," + + " delta.enableChangeDataFeed='true'" + + ")") + assert(deltaLog.snapshot.protocol.minReaderVersion === 1) + assert(deltaLog.snapshot.protocol.minWriterVersion === 4) + } + } + + test("table creation with automatically-enabled native feature and unsupported protocol") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta TBLPROPERTIES (" + + " delta.minReaderVersion='1'," + + " delta.minWriterVersion='2'," + + s" ${TestReaderWriterMetadataAutoUpdateFeature.TABLE_PROP_KEY}='true'" + + ")") + assert( + deltaLog.snapshot.protocol === Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set(TestReaderWriterMetadataAutoUpdateFeature.name)), + writerFeatures = Some(Set(TestReaderWriterMetadataAutoUpdateFeature.name)))) + assertPropertiesAndShowTblProperties(deltaLog, tableHasFeatures = true) + } + } + + test("table creation with feature as table property and unsupported protocol version") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta TBLPROPERTIES (" + + " delta.minReaderVersion='1'," + + " delta.minWriterVersion='2'," + + " delta.feature.testWriter='enabled'" + + ")") + assert( + deltaLog.snapshot.protocol === Protocol( + minReaderVersion = 1, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = None, + writerFeatures = Some(Set(TestWriterFeature.name)))) + assertPropertiesAndShowTblProperties(deltaLog, tableHasFeatures = true) + } + } + + def testCreateTable( + name: String, + props: Map[String, String], + expectedExceptionClass: Option[String] = None, + expectedFinalProtocol: Option[Protocol] = None): Unit = { + test(s"create table - $name") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir) + + val propString = props.map(kv => s"'${kv._1}'='${kv._2}'").mkString(",") + if (expectedExceptionClass.isDefined) { + assert(intercept[DeltaTableFeatureException] { + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + s"TBLPROPERTIES ($propString)") + }.getErrorClass === expectedExceptionClass.get) + } else { + sql( + s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + s"TBLPROPERTIES ($propString)") + } + expectedFinalProtocol match { + case Some(p) => assert(log.update().protocol === p) + case None => // Do nothing + } + } + } + } + + testCreateTable( + "legacy protocol, legacy feature, metadata", + Map("delta.appendOnly" -> "true"), + expectedFinalProtocol = Some(Protocol(1, 2))) + + testCreateTable( + "legacy protocol, legacy feature, feature property", + Map(s"delta.feature.${TestLegacyReaderWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature))) + + testCreateTable( + "legacy protocol, legacy writer feature, feature property", + Map(s"delta.feature.${TestLegacyWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyWriterFeature))) + + testCreateTable( + "legacy protocol, native auto-update feature, metadata", + Map(TestReaderWriterMetadataAutoUpdateFeature.TABLE_PROP_KEY -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature))) + + testCreateTable( + "legacy protocol, native non-auto-update feature, metadata", + Map(TestReaderWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataNoAutoUpdateFeature))) + + testCreateTable( + "legacy protocol, native auto-update feature, feature property", + Map(s"delta.feature.${TestReaderWriterMetadataAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature))) + + testCreateTable( + "legacy protocol, native non-auto-update feature, feature property", + Map(s"delta.feature.${TestReaderWriterMetadataNoAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataNoAutoUpdateFeature))) + + testCreateTable( + "legacy protocol with supported version props, legacy feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> + TestLegacyReaderWriterFeature.minReaderVersion.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> + TestLegacyReaderWriterFeature.minWriterVersion.toString, + s"delta.feature.${TestLegacyReaderWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature))) + + testCreateTable( + "legacy protocol with table feature version props, legacy feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestLegacyReaderWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature))) + + testCreateTable( + "legacy protocol with supported version props, native feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestReaderWriterMetadataAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature))) + + testCreateTable( + "table features protocol, legacy feature, metadata", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + "delta.appendOnly" -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(AppendOnlyTableFeature))) + + testCreateTable( + "table features protocol, legacy feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestLegacyReaderWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature))) + + testCreateTable( + "table features protocol, native auto-update feature, metadata", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + TestReaderWriterMetadataAutoUpdateFeature.TABLE_PROP_KEY -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature))) + + testCreateTable( + "table features protocol, native non-auto-update feature, metadata", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + TestReaderWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataNoAutoUpdateFeature))) + + testCreateTable( + "table features protocol, native auto-update feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestReaderWriterMetadataAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature))) + + testCreateTable( + "table features protocol, native non-auto-update feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestReaderWriterMetadataNoAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataNoAutoUpdateFeature))) + + testCreateTable( + name = "feature with a dependency", + props = Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestFeatureWithDependency.name}" -> "supported"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq(TestFeatureWithDependency, TestReaderWriterFeature)))) + + testCreateTable( + name = "feature with a dependency, enabled using a feature property", + props = Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + TestFeatureWithDependency.TABLE_PROP_KEY -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq(TestFeatureWithDependency, TestReaderWriterFeature)))) + + testCreateTable( + name = "feature with a dependency that has a dependency", + props = Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestFeatureWithTransitiveDependency.name}" -> "supported"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq( + TestFeatureWithTransitiveDependency, + TestFeatureWithDependency, + TestReaderWriterFeature)))) + + def testAlterTable( + name: String, + props: Map[String, String], + expectedExceptionClass: Option[String] = None, + expectedFinalProtocol: Option[Protocol] = None, + tableProtocol: Protocol = Protocol(1, 1)): Unit = { + test(s"alter table - $name") { + withTempDir { dir => + val log = createTableWithProtocol(tableProtocol, dir) + + val propString = props.map(kv => s"'${kv._1}'='${kv._2}'").mkString(",") + if (expectedExceptionClass.isDefined) { + assert(intercept[DeltaTableFeatureException] { + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` SET TBLPROPERTIES ($propString)") + }.getErrorClass === expectedExceptionClass.get) + } else { + sql(s"ALTER TABLE delta.`${dir.getCanonicalPath}` SET TBLPROPERTIES ($propString)") + } + expectedFinalProtocol match { + case Some(p) => assert(log.update().protocol === p) + case None => // Do nothing + } + } + } + } + + testAlterTable( + name = "downgrade reader version is a no-op", + tableProtocol = Protocol(2, 2), + props = Map(DeltaConfigs.MIN_READER_VERSION.key -> "1"), + expectedFinalProtocol = Some(Protocol(2, 2))) + + testAlterTable( + name = "downgrade writer version is a no-op", + tableProtocol = Protocol(2, 2), + props = Map(DeltaConfigs.MIN_WRITER_VERSION.key -> "1"), + expectedFinalProtocol = Some(Protocol(2, 2))) + + testAlterTable( + name = "downgrade both reader and versions version is a no-op", + tableProtocol = Protocol(2, 2), + props = Map( + DeltaConfigs.MIN_READER_VERSION.key -> "1", + DeltaConfigs.MIN_WRITER_VERSION.key -> "1"), + expectedFinalProtocol = Some(Protocol(2, 2))) + + testAlterTable( + name = "downgrade reader but upgrade writer versions (legacy protocol)", + tableProtocol = Protocol(2, 2), + props = Map( + DeltaConfigs.MIN_READER_VERSION.key -> "1", + DeltaConfigs.MIN_WRITER_VERSION.key -> "5"), + expectedFinalProtocol = Some(Protocol(2, 5))) + + testAlterTable( + name = "downgrade reader but upgrade writer versions (table features protocol)", + tableProtocol = Protocol(2, 2), + props = Map( + DeltaConfigs.MIN_READER_VERSION.key -> "1", + DeltaConfigs.MIN_WRITER_VERSION.key -> "7"), + expectedFinalProtocol = Some( + Protocol(2, 7).withFeatures( + Seq(AppendOnlyTableFeature, InvariantsTableFeature)))) // Features from writer version 2 + + testAlterTable( + name = "downgrade while enabling a feature will become an upgrade", + tableProtocol = Protocol(2, 2), + props = Map( + DeltaConfigs.MIN_READER_VERSION.key -> "1", + DeltaConfigs.MIN_WRITER_VERSION.key -> "1", + DeltaConfigs.CHANGE_DATA_FEED.key -> "true"), + expectedFinalProtocol = Some(Protocol(2, 4))) + + testAlterTable( + "legacy protocol, legacy feature, metadata", + Map("delta.appendOnly" -> "true"), + expectedFinalProtocol = Some(Protocol(1, 2))) + + testAlterTable( + "legacy protocol, legacy feature, feature property", + Map(s"delta.feature.${TestLegacyReaderWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature))) + + testAlterTable( + "legacy protocol, legacy writer feature, feature property", + Map(s"delta.feature.${TestLegacyWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyWriterFeature) + .merge(Protocol(1, 2))), + tableProtocol = Protocol(1, 2)) + + testAlterTable( + "legacy protocol, native auto-update feature, metadata", + Map(TestReaderWriterMetadataAutoUpdateFeature.TABLE_PROP_KEY -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature))) + + testAlterTable( + "legacy protocol, native non-auto-update feature, metadata", + Map(TestReaderWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY -> "true"), + expectedExceptionClass = Some("DELTA_FEATURES_REQUIRE_MANUAL_ENABLEMENT")) + + testAlterTable( + "legacy protocol, native non-auto-update feature, metadata and feature property", + Map( + TestReaderWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY -> "true", + s"delta.feature.${TestReaderWriterMetadataNoAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataNoAutoUpdateFeature))) + + testAlterTable( + "legacy protocol, native auto-update feature, feature property", + Map(s"delta.feature.${TestReaderWriterMetadataAutoUpdateFeature.name}" -> "supported"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature))) + + testAlterTable( + "legacy protocol, native non-auto-update feature, feature property", + Map(s"delta.feature.${TestReaderWriterMetadataNoAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataNoAutoUpdateFeature))) + + testAlterTable( + "legacy protocol with supported version props, legacy feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> + TestLegacyReaderWriterFeature.minReaderVersion.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> + TestLegacyReaderWriterFeature.minWriterVersion.toString, + s"delta.feature.${TestLegacyReaderWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .merge(TestLegacyReaderWriterFeature.minProtocolVersion))) + + testAlterTable( + "legacy protocol with table feature version props, legacy feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestLegacyReaderWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature))) + + testAlterTable( + "legacy protocol with supported version props, native feature, feature property", + Map( + DeltaConfigs.MIN_READER_VERSION.key -> TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> TABLE_FEATURES_MIN_WRITER_VERSION.toString, + s"delta.feature.${TestReaderWriterMetadataAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature))) + + testAlterTable( + "table features protocol, legacy feature, metadata", + Map("delta.appendOnly" -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(AppendOnlyTableFeature)), + tableProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION)) + + testAlterTable( + "table features protocol, legacy feature, feature property", + Map(s"delta.feature.${TestLegacyReaderWriterFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature)), + tableProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION)) + + testAlterTable( + "table features protocol, native auto-update feature, metadata", + Map(TestReaderWriterMetadataAutoUpdateFeature.TABLE_PROP_KEY -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature)), + tableProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION)) + + testAlterTable( + "table features protocol, native non-auto-update feature, metadata", + Map(TestReaderWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY -> "true"), + tableProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION), + expectedExceptionClass = Some("DELTA_FEATURES_REQUIRE_MANUAL_ENABLEMENT")) + + testAlterTable( + "table features protocol, native non-auto-update feature, metadata and feature property", + Map( + TestReaderWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY -> "true", + s"delta.feature.${TestReaderWriterMetadataNoAutoUpdateFeature.name}" -> "enabled"), + tableProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataNoAutoUpdateFeature))) + + testAlterTable( + "table features protocol, native auto-update feature, feature property", + Map(s"delta.feature.${TestReaderWriterMetadataAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature)), + tableProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION)) + + testAlterTable( + "table features protocol, native non-auto-update feature, feature property", + Map(s"delta.feature.${TestReaderWriterMetadataNoAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataNoAutoUpdateFeature)), + tableProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION)) + + testAlterTable( + "feature property merges the old protocol", + Map(s"delta.feature.${TestReaderWriterMetadataAutoUpdateFeature.name}" -> "enabled"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterMetadataAutoUpdateFeature).merge(Protocol(1, 2))), + tableProtocol = Protocol(1, 2)) + + testAlterTable( + name = "feature with a dependency", + tableProtocol = Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION), + props = Map(s"delta.feature.${TestFeatureWithDependency.name}" -> "supported"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq(TestFeatureWithDependency, TestReaderWriterFeature)))) + + testAlterTable( + name = "feature with a dependency, enabled using a feature property", + tableProtocol = Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION), + props = Map(TestFeatureWithDependency.TABLE_PROP_KEY -> "true"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq(TestFeatureWithDependency, TestReaderWriterFeature)))) + + testAlterTable( + name = "feature with a dependency that has a dependency", + tableProtocol = Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION), + props = Map(s"delta.feature.${TestFeatureWithTransitiveDependency.name}" -> "supported"), + expectedFinalProtocol = Some( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq( + TestFeatureWithTransitiveDependency, + TestFeatureWithDependency, + TestReaderWriterFeature)))) + + test("non-auto-update capable feature requires manual enablement (via feature prop)") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + spark.range(10).writeTo(s"delta.`${dir.getCanonicalPath}`").using("delta").create() + } + val expectedProtocolOnCreation = Protocol(1, 1) + assert(deltaLog.update().protocol === expectedProtocolOnCreation) + + assert(intercept[DeltaTableFeatureException] { + withSQLConf(defaultPropertyKey(TestWriterMetadataNoAutoUpdateFeature) -> "supported") { + sql( + s"ALTER TABLE delta.`${dir.getCanonicalPath}` SET TBLPROPERTIES (" + + s" '${TestWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY}' = 'true')") + } + }.getErrorClass === "DELTA_FEATURES_REQUIRE_MANUAL_ENABLEMENT", + "existing tables should ignore session defaults.") + + sql( + s"ALTER TABLE delta.`${dir.getCanonicalPath}` SET TBLPROPERTIES (" + + s" '${propertyKey(TestWriterMetadataNoAutoUpdateFeature)}' = 'supported'," + + s" '${TestWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY}' = 'true')") + assert( + deltaLog.update().protocol === + expectedProtocolOnCreation + .merge(TestWriterMetadataNoAutoUpdateFeature.minProtocolVersion) + .withFeature(TestWriterMetadataNoAutoUpdateFeature)) + } + } + + test("non-auto-update capable error message is correct") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + spark.range(10).writeTo(s"delta.`${dir.getCanonicalPath}`") + .tableProperty("delta.appendOnly", "true") + .using("delta") + .create() + val protocolOfNewTable = Protocol(1, 2) + assert(deltaLog.update().protocol === protocolOfNewTable) + + val e = intercept[DeltaTableFeatureException] { + // ALTER TABLE must not consider this SQL config + withSQLConf(defaultPropertyKey(TestWriterFeature) -> "supported") { + sql( + s"ALTER TABLE delta.`${dir.getCanonicalPath}` SET TBLPROPERTIES (" + + s" 'delta.appendOnly' = 'false'," + + s" 'delta.enableChangeDataFeed' = 'true'," + + s" '${TestReaderWriterMetadataAutoUpdateFeature.TABLE_PROP_KEY}' = 'true'," + + s" '${TestWriterMetadataNoAutoUpdateFeature.TABLE_PROP_KEY}' = 'true')") + } + } + + val unsupportedFeatures = TestWriterMetadataNoAutoUpdateFeature.name + val supportedFeatures = + (protocolOfNewTable.implicitlyAndExplicitlySupportedFeatures + + ChangeDataFeedTableFeature + + TestReaderWriterMetadataAutoUpdateFeature).map(_.name).toSeq.sorted.mkString(", ") + assert(e.getErrorClass === "DELTA_FEATURES_REQUIRE_MANUAL_ENABLEMENT") + + // `getMessageParameters` is available starting from Spark 3.4. + // For now we have to check for substrings. + assert(e.getMessage.contains(s" $unsupportedFeatures.")) + assert(e.getMessage.contains(s" $supportedFeatures.")) + + } + } + } + + test("table creation with protocol as table property - property wins over conf") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "3") { + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (delta.MINwriterVERsion=2)") + + assert(deltaLog.snapshot.protocol.minWriterVersion === 2) + assertPropertiesAndShowTblProperties(deltaLog) + } + } + } + + test("table creation with protocol as table property - feature requirements win SQL") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (delta.minWriterVersion=1, delta.appendOnly=true)") + + assert(deltaLog.snapshot.protocol.minWriterVersion === 2) + assertPropertiesAndShowTblProperties(deltaLog) + } + } + } + + test("table creation with protocol as table property - feature requirements win DF") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + spark.range(10).writeTo(s"delta.`${dir.getCanonicalPath}`") + .tableProperty("delta.minWriterVersion", "1") + .tableProperty("delta.appendOnly", "true") + .using("delta") + .create() + + assert(deltaLog.snapshot.protocol.minWriterVersion === 2) + assertPropertiesAndShowTblProperties(deltaLog) + } + } + } + + test("table creation with protocol as table property - default table properties") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + withSQLConf((DeltaConfigs.sqlConfPrefix + "minWriterVersion") -> "3") { + spark.range(10).writeTo(s"delta.`${dir.getCanonicalPath}`") + .using("delta") + .create() + + assert(deltaLog.snapshot.protocol.minWriterVersion === 3) + assertPropertiesAndShowTblProperties(deltaLog) + } + } + } + + test("table creation with protocol as table property - explicit wins over conf") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + withSQLConf((DeltaConfigs.sqlConfPrefix + "minWriterVersion") -> "3") { + spark.range(10).writeTo(s"delta.`${dir.getCanonicalPath}`") + .tableProperty("delta.minWriterVersion", "2") + .using("delta") + .create() + + assert(deltaLog.snapshot.protocol.minWriterVersion === 2) + assertPropertiesAndShowTblProperties(deltaLog) + } + } + } + + test("table creation with protocol as table property - bad input") { + withTempDir { dir => + val e = intercept[IllegalArgumentException] { + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (delta.minWriterVersion='delta rulz')") + } + assert(e.getMessage.contains(" one of ")) + + val e2 = intercept[AnalysisException] { + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (delta.minWr1terVersion=2)") // Typo in minWriterVersion + } + assert(e2.getMessage.contains("Unknown configuration")) + + val e3 = intercept[IllegalArgumentException] { + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (delta.minWriterVersion='-1')") + } + assert(e3.getMessage.contains(" one of ")) + } + } + + test("protocol as table property - desc table") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + withSQLConf(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2") { + spark.range(10).writeTo(s"delta.`${dir.getCanonicalPath}`") + .using("delta") + .tableProperty("delta.minWriterVersion", "3") + .createOrReplace() + } + assert(deltaLog.snapshot.protocol.minWriterVersion === 3) + + val output = spark.sql(s"DESC EXTENDED delta.`${dir.getCanonicalPath}`").collect() + assert(output.exists(_.toString.contains("delta.minWriterVersion")), + s"minWriterVersion not found in: ${output.mkString("\n")}") + assert(output.exists(_.toString.contains("delta.minReaderVersion")), + s"minReaderVersion not found in: ${output.mkString("\n")}") + } + } + + test("auto upgrade protocol version - version 2") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(1, 1), path) + spark.sql(s""" + |ALTER TABLE delta.`${log.dataPath.toString}` + |SET TBLPROPERTIES ('delta.appendOnly' = 'true') + """.stripMargin) + assert(log.snapshot.protocol.minWriterVersion === 2) + } + } + + test("auto upgrade protocol version - version 3") { + withTempDir { path => + val log = DeltaLog.forTable(spark, path) + sql(s"CREATE TABLE delta.`${path.getCanonicalPath}` (id bigint) USING delta " + + "TBLPROPERTIES (delta.minWriterVersion=2)") + assert(log.update().protocol.minWriterVersion === 2) + spark.sql(s""" + |ALTER TABLE delta.`${path.getCanonicalPath}` + |ADD CONSTRAINT test CHECK (id < 5) + """.stripMargin) + assert(log.update().protocol.minWriterVersion === 3) + } + } + + test("auto upgrade protocol version even with explicit protocol version configs") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(1, 1), path) + spark.sql(s""" + |ALTER TABLE delta.`${log.dataPath.toString}` SET TBLPROPERTIES ( + | 'delta.minWriterVersion' = '2', + | 'delta.enableChangeDataFeed' = 'true' + |)""".stripMargin) + assert(log.snapshot.protocol.minWriterVersion === 4) + } + } + + test("legacy feature can be listed during alter table with silent protocol upgrade") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(1, 1), path) + spark.sql(s""" + |ALTER TABLE delta.`${log.dataPath.toString}` SET TBLPROPERTIES ( + | 'delta.feature.testLegacyReaderWriter' = 'enabled' + |)""".stripMargin) + assert( + log.snapshot.protocol === Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION).withFeature(TestLegacyReaderWriterFeature)) + } + } + + test("legacy feature can be explicitly listed during alter table") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(2, TABLE_FEATURES_MIN_WRITER_VERSION), path) + spark.sql(s""" + |ALTER TABLE delta.`${log.dataPath.toString}` SET TBLPROPERTIES ( + | 'delta.feature.testLegacyReaderWriter' = 'enabled' + |)""".stripMargin) + assert(log.snapshot.protocol === Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set(TestLegacyReaderWriterFeature.name)), + writerFeatures = Some(Set(TestLegacyReaderWriterFeature.name)))) + } + } + + test("native feature can be explicitly listed during alter table with silent protocol upgrade") { + withTempDir { path => + val log = createTableWithProtocol(Protocol(1, 2), path) + spark.sql(s""" + |ALTER TABLE delta.`${log.dataPath.toString}` SET TBLPROPERTIES ( + | 'delta.feature.testReaderWriter' = 'enabled' + |)""".stripMargin) + assert( + log.snapshot.protocol === + TestReaderWriterFeature.minProtocolVersion + .withFeature(TestReaderWriterFeature) + .merge(Protocol(1, 2))) + } + } + + test("all active features are enabled in protocol") { + withTempDir { path => + spark.range(10).write.format("delta").save(path.getCanonicalPath) + val log = DeltaLog.forTable(spark, path) + val snapshot = log.unsafeVolatileSnapshot + val p = Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + val m = snapshot.metadata.copy(configuration = snapshot.metadata.configuration ++ Map( + DeltaConfigs.IS_APPEND_ONLY.key -> "false", + DeltaConfigs.CHANGE_DATA_FEED.key -> "true")) + log.store.write( + deltaFile(log.logPath, snapshot.version + 1), + Iterator(m.json, p.json), + overwrite = false, + log.newDeltaHadoopConf()) + val e = intercept[DeltaTableFeatureException] { + spark.read.format("delta").load(path.getCanonicalPath).collect() + } + assert(e.getMessage.contains("enabled in metadata but not listed in protocol")) + assert(e.getMessage.contains(": changeDataFeed.")) + } + } + + test("table feature status") { + withTempDir { path => + withSQLConf( + defaultPropertyKey(ChangeDataFeedTableFeature) -> FEATURE_PROP_SUPPORTED, + defaultPropertyKey(GeneratedColumnsTableFeature) -> FEATURE_PROP_ENABLED) { + spark.range(10).write.format("delta").save(path.getCanonicalPath) + val log = DeltaLog.forTable(spark, path) + val protocol = log.update().protocol + + assert(protocol.isFeatureSupported(ChangeDataFeedTableFeature)) + assert(protocol.isFeatureSupported(GeneratedColumnsTableFeature)) + } + } + } + + private def replaceTableAs(path: File): Unit = { + val p = path.getCanonicalPath + sql(s"REPLACE TABLE delta.`$p` USING delta AS (SELECT * FROM delta.`$p`)") + } + + test("REPLACE AS updates protocol when defaults are higher") { + withTempDir { path => + spark + .range(10) + .write + .format("delta") + .option(DeltaConfigs.MIN_READER_VERSION.key, 1) + .option(DeltaConfigs.MIN_WRITER_VERSION.key, 2) + .mode("append") + .save(path.getCanonicalPath) + val log = DeltaLog.forTable(spark, path) + assert(log.update().protocol === Protocol(1, 2)) + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "2", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "5") { + replaceTableAs(path) + } + assert(log.update().protocol === Protocol(2, 5)) + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "3", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "7", + TableFeatureProtocolUtils.defaultPropertyKey(TestReaderWriterFeature) -> "enabled") { + replaceTableAs(path) + } + assert( + log.update().protocol === + Protocol(2, 5).merge(Protocol(3, 7)).withFeature(TestReaderWriterFeature)) + } + } + + for (p <- Seq(Protocol(2, 5), Protocol(3, 7).withFeature(TestReaderWriterFeature))) + test(s"REPLACE AS keeps protocol when defaults are lower ($p)") { + withTempDir { path => + spark + .range(10) + .write + .format("delta") + .option(DeltaConfigs.MIN_READER_VERSION.key, p.minReaderVersion) + .option(DeltaConfigs.MIN_WRITER_VERSION.key, p.minWriterVersion) + .options( + p.readerAndWriterFeatureNames + .flatMap(TableFeature.featureNameToFeature) + .map(f => TableFeatureProtocolUtils.propertyKey(f) -> "enabled") + .toMap) + .mode("append") + .save(path.getCanonicalPath) + val log = DeltaLog.forTable(spark, path) + assert(log.update().protocol === p) + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2") { + replaceTableAs(path) + } + assert(log.update().protocol === p.merge(Protocol(1, 2))) + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2", + TableFeatureProtocolUtils.defaultPropertyKey(TestReaderWriterFeature) -> "enabled") { + replaceTableAs(path) + } + assert( + log.update().protocol === + p + .merge(Protocol(1, 2)) + .merge( + TestReaderWriterFeature.minProtocolVersion.withFeature(TestReaderWriterFeature))) + } + } + + test("REPLACE AS can ignore protocol defaults") { + withTempDir { path => + withSQLConf( + DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.defaultTablePropertyKey -> "true") { + spark.range(10).write.format("delta").save(path.getCanonicalPath) + } + val log = DeltaLog.forTable(spark, path) + assert(log.update().protocol === Protocol(1, 1)) + + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "3", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "7", + defaultPropertyKey(ChangeDataFeedTableFeature) -> FEATURE_PROP_SUPPORTED, + DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.defaultTablePropertyKey -> "true") { + replaceTableAs(path) + } + assert(log.update().protocol === Protocol(1, 1)) + assert( + !log.update().metadata.configuration + .contains(DeltaConfigs.CREATE_TABLE_IGNORE_PROTOCOL_DEFAULTS.key)) + } + } + + test("protocol change logging") { + withTempDir { path => + val dir = path.getCanonicalPath + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2") { + assert( + captureProtocolChangeEventBlob { + sql(s"CREATE TABLE delta.`$dir` (id INT) USING delta") + } === Map( + "toProtocol" -> Map( + "minReaderVersion" -> 1, + "minWriterVersion" -> 2, + "supportedFeatures" -> List("appendOnly", "invariants") + ))) + } + + // Upgrade protocol + assert(captureProtocolChangeEventBlob { + sql( + s"ALTER TABLE delta.`$dir` " + + s"SET TBLPROPERTIES (${DeltaConfigs.MIN_WRITER_VERSION.key} = '7')") + } === Map( + "fromProtocol" -> Map( + "minReaderVersion" -> 1, + "minWriterVersion" -> 2, + "supportedFeatures" -> List("appendOnly", "invariants") + ), + "toProtocol" -> Map( + "minReaderVersion" -> 1, + "minWriterVersion" -> 7, + "supportedFeatures" -> List("appendOnly", "invariants") + ))) + + // Add feature + assert(captureProtocolChangeEventBlob { + sql( + s"ALTER TABLE delta.`$dir` " + + s"SET TBLPROPERTIES (${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key} = 'true')") + } === Map( + "fromProtocol" -> Map( + "minReaderVersion" -> 1, + "minWriterVersion" -> 7, + "supportedFeatures" -> List("appendOnly", "invariants") + ), + "toProtocol" -> Map( + "minReaderVersion" -> 3, + "minWriterVersion" -> 7, + "supportedFeatures" -> List("appendOnly", "deletionVectors", "invariants") + ))) + } + } + + test("protocol change logging using commitLarge") { + withTempDir { path => + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2") { + assert( + captureProtocolChangeEventBlob { + sql(s"CREATE TABLE delta.`${path.getCanonicalPath}` (id INT) USING delta") + } === Map( + "toProtocol" -> Map( + "minReaderVersion" -> 1, + "minWriterVersion" -> 2, + "supportedFeatures" -> List("appendOnly", "invariants") + ))) + } + + // Clone table to invoke commitLarge + withTempDir { clonedPath => + assert( + captureProtocolChangeEventBlob { + sql(s"CREATE TABLE delta.`${clonedPath.getCanonicalPath}` " + + s"SHALLOW CLONE delta.`${path.getCanonicalPath}` " + + s"TBLPROPERTIES (${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key} = 'true')") + } === Map( + "toProtocol" -> Map( + "minReaderVersion" -> 3, + "minWriterVersion" -> 7, + "supportedFeatures" -> List("appendOnly", "deletionVectors", "invariants") + ))) + } + } + } + + test("can't write to a table with identity columns (legacy protocol)") { + withTempDir { dir => + val writerVersion = 6 + createTableWithProtocol(Protocol(1, writerVersion), dir) + + checkAnswer( + sql(s"SELECT * FROM delta.`${dir.getCanonicalPath}`"), + spark.range(0).toDF) + assert(intercept[InvalidProtocolVersionException] { + sql(s"INSERT INTO delta.`${dir.getCanonicalPath}` VALUES (9)") + }.getMessage.contains(s" and writer version 6")) + } + } + + test("can't write to a table with identity columns (table features)") { + withTempDir { dir => + val featureName = "identityColumns" + createTableWithProtocol( + Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set.empty), + writerFeatures = Some(Set(featureName))), + dir) + + checkAnswer( + sql(s"SELECT * FROM delta.`${dir.getCanonicalPath}`"), + spark.range(0).toDF) + assert(intercept[DeltaTableFeatureException] { + sql(s"INSERT INTO delta.`${dir.getCanonicalPath}` VALUES (9)") + }.getMessage.contains(s"""unsupported by Delta Lake "${io.delta.VERSION}": $featureName""")) + } + } + + def protocolWithFeatures( + readerFeatures: Seq[TableFeature] = Seq.empty, + writerFeatures: Seq[TableFeature] = Seq.empty): Protocol = { + val readerFeaturesEnabled = readerFeatures.nonEmpty + val writerFeaturesEnabled = readerFeatures.nonEmpty || writerFeatures.nonEmpty + val minReaderVersion = if (readerFeaturesEnabled) TABLE_FEATURES_MIN_READER_VERSION else 1 + val minWriterVersion = if (writerFeaturesEnabled) TABLE_FEATURES_MIN_WRITER_VERSION else 1 + val readerFeatureNames = + if (readerFeaturesEnabled) Some(readerFeatures.map(_.name).toSet) else None + val writerFeatureNames = if (writerFeaturesEnabled) { + Some((readerFeatures ++ writerFeatures).map(_.name).toSet) + } else { + None + } + + Protocol( + minReaderVersion = minReaderVersion, + minWriterVersion = minWriterVersion, + readerFeatures = readerFeatureNames, + writerFeatures = writerFeatureNames) + } + + def protocolWithReaderFeature(readerFeature: TableFeature): Protocol = { + protocolWithFeatures(readerFeatures = Seq(readerFeature)) + } + + def protocolWithWriterFeature(writerFeature: TableFeature): Protocol = { + protocolWithFeatures(writerFeatures = Seq(writerFeature)) + } + + def emptyProtocolWithWriterFeatures: Protocol = + Protocol( + minReaderVersion = 1, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = None, + writerFeatures = Some(Set.empty)) + + def emptyProtocolWithReaderFeatures: Protocol = + Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = Some(Set.empty), + writerFeatures = Some(Set.empty)) + + protected def createTableWithFeature( + deltaLog: DeltaLog, + feature: TableFeature, + featureProperty: String): Unit = { + sql(s"""CREATE TABLE delta.`${deltaLog.dataPath}` (id bigint) USING delta + |TBLPROPERTIES ( + |delta.minReaderVersion = $TABLE_FEATURES_MIN_READER_VERSION, + |delta.feature.${feature.name} = 'supported', + |$featureProperty = "true" + |)""".stripMargin) + + val expectedWriterFeatures = Some(Set(feature.name)) + val expectedReaderFeatures: Option[Set[String]] = + if (feature.isReaderWriterFeature) expectedWriterFeatures else Some(Set.empty) + + assert( + deltaLog.update().protocol === Protocol( + minReaderVersion = TABLE_FEATURES_MIN_READER_VERSION, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = expectedReaderFeatures, + writerFeatures = expectedWriterFeatures)) + } + + /** Assumes there is at least 1 commit. */ + def getEarliestCommitVersion(deltaLog: DeltaLog): Long = + deltaLog.listFrom(0L).collectFirst { case DeltaFile(_, v) => v }.get + + def testWriterFeatureRemoval( + feature: TableFeature, + featurePropertyKey: String): Unit = { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + createTableWithFeature(deltaLog, feature, featurePropertyKey) + + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + feature.name).run(spark) + + // Writer feature is removed from the writer features set. + val snapshot = deltaLog.update() + assert(snapshot.protocol === Protocol(1, 1)) + assert(!snapshot.metadata.configuration.contains(featurePropertyKey)) + assertPropertiesAndShowTblProperties(deltaLog) + } + } + + def truncateHistoryDefaultLogRetention: CalendarInterval = + DeltaConfigs.parseCalendarInterval( + DeltaConfigs.TABLE_FEATURE_DROP_TRUNCATE_HISTORY_LOG_RETENTION.defaultValue) + + def testReaderFeatureRemoval( + feature: TableFeature, + featurePropertyKey: String, + advanceClockPastRetentionPeriod: Boolean = true, + truncateHistory: Boolean = false, + truncateHistoryRetentionOpt: Option[String] = None): Unit = { + withTempDir { dir => + val truncateHistoryRetention = truncateHistoryRetentionOpt + .map(DeltaConfigs.parseCalendarInterval) + .getOrElse(truncateHistoryDefaultLogRetention) + val clock = new ManualClock(System.currentTimeMillis()) + val deltaLog = DeltaLog.forTable(spark, dir, clock) + + createTableWithFeature(deltaLog, feature, featurePropertyKey) + + if (truncateHistoryRetentionOpt.nonEmpty) { + val propertyKey = DeltaConfigs.TABLE_FEATURE_DROP_TRUNCATE_HISTORY_LOG_RETENTION.key + AlterTableSetPropertiesDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + Map(propertyKey -> truncateHistoryRetention.toString)).run(spark) + } + + // First attempt should cleanup feature traces but fail with a message due to historical + // log entries containing the feature. + val e1 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + feature.name).run(spark) + } + checkError( + exception = e1, + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + parameters = Map( + "feature" -> feature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryRetention.toString)) + + // Add some more commits. + spark.range(0, 100).write.format("delta").mode("append").save(dir.getCanonicalPath) + spark.range(100, 120).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // Table still contains historical data with the feature. Attempt should fail. + val e2 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + feature.name).run(spark) + } + checkError( + exception = e2, + errorClass = "DELTA_FEATURE_DROP_HISTORICAL_VERSIONS_EXIST", + parameters = Map( + "feature" -> feature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryRetention.toString)) + + // Generate commit. + spark.range(120, 140).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // Pretend retention period has passed. + if (advanceClockPastRetentionPeriod) { + val clockAdvanceMillis = if (truncateHistory) { + DeltaConfigs.getMilliSeconds(truncateHistoryRetention) + } else { + deltaLog.deltaRetentionMillis(deltaLog.update().metadata) + } + clock.advance(clockAdvanceMillis + TimeUnit.MINUTES.toMillis(5)) + } + + val dropCommand = AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + feature.name, + truncateHistory = truncateHistory) + + if (advanceClockPastRetentionPeriod) { + // History is now clean. We should be able to remove the feature. + dropCommand.run(spark) + + // Reader+writer feature is removed from the features set. + val snapshot = deltaLog.update() + assert(snapshot.protocol === Protocol(1, 1)) + assert(!snapshot.metadata.configuration.contains(featurePropertyKey)) + assertPropertiesAndShowTblProperties(deltaLog) + } else { + // When the clock did not advance the logs are not cleaned. We should detect there + // are still versions that contain traces of the feature. + val e3 = intercept[DeltaTableFeatureException] { + dropCommand.run(spark) + } + checkError( + exception = e3, + errorClass = "DELTA_FEATURE_DROP_HISTORICAL_VERSIONS_EXIST", + parameters = Map( + "feature" -> feature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryRetention.toString)) + } + + // Verify commits before the checkpoint are cleaned. + val earliestExpectedCommitVersion = + if (advanceClockPastRetentionPeriod) { + deltaLog.findEarliestReliableCheckpoint.get + } else { + 0L + } + assert(getEarliestCommitVersion(deltaLog) === earliestExpectedCommitVersion) + + // Validate extra commits. + val table = io.delta.tables.DeltaTable.forPath(deltaLog.dataPath.toString) + assert(table.toDF.count() == 140) + } + } + + test("Remove writer feature") { + testWriterFeatureRemoval( + TestRemovableWriterFeature, + TestRemovableWriterFeature.TABLE_PROP_KEY) + } + + test("Remove legacy writer feature") { + testWriterFeatureRemoval( + TestRemovableLegacyWriterFeature, + TestRemovableLegacyWriterFeature.TABLE_PROP_KEY) + } + + + for { + advanceClockPastRetentionPeriod <- BOOLEAN_DOMAIN + truncateHistory <- if (advanceClockPastRetentionPeriod) BOOLEAN_DOMAIN else Seq(false) + retentionOpt <- if (truncateHistory) Seq(Some("12 hours"), None) else Seq(None) + } test(s"Remove reader+writer feature " + + s"advanceClockPastRetentionPeriod: $advanceClockPastRetentionPeriod " + + s"truncateHistory: $truncateHistory " + + s"retentionOpt: ${retentionOpt.getOrElse("None")}") { + testReaderFeatureRemoval( + TestRemovableReaderWriterFeature, + TestRemovableReaderWriterFeature.TABLE_PROP_KEY, + advanceClockPastRetentionPeriod, + truncateHistory, + retentionOpt) + } + + test("Remove legacy reader+writer feature") { + testReaderFeatureRemoval( + TestRemovableLegacyReaderWriterFeature, + TestRemovableLegacyReaderWriterFeature.TABLE_PROP_KEY) + } + + test("Remove writer feature when table protocol does not support reader features") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql(s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ( + |delta.feature.${TestWriterFeature.name} = 'supported', + |delta.feature.${TestRemovableWriterFeature.name} = 'supported' + |)""".stripMargin) + + val protocol = deltaLog.update().protocol + assert(protocol === protocolWithFeatures( + writerFeatures = Seq(TestWriterFeature, TestRemovableWriterFeature))) + + val command = AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableWriterFeature.name) + command.run(spark) + + assert( + deltaLog.update().protocol === Protocol( + minReaderVersion = 1, + minWriterVersion = TABLE_FEATURES_MIN_WRITER_VERSION, + readerFeatures = None, + writerFeatures = Some(Set(TestWriterFeature.name)))) + } + } + + test("Remove a non-removable feature") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql(s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ( + |delta.feature.${TestWriterMetadataNoAutoUpdateFeature.name} = 'supported' + |)""".stripMargin) + + val expectedProtocol = protocolWithWriterFeature(TestWriterMetadataNoAutoUpdateFeature) + assert(deltaLog.update().protocol === expectedProtocol) + + val command = AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestWriterMetadataNoAutoUpdateFeature.name) + + val e = intercept[DeltaTableFeatureException] { + command.run(spark) + } + checkError( + exception = e, + errorClass = "DELTA_FEATURE_DROP_NONREMOVABLE_FEATURE", + parameters = Map("feature" -> TestWriterMetadataNoAutoUpdateFeature.name)) + } + } + + test("Remove an implicit writer feature") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql(s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ( + |delta.minWriterVersion = 2)""".stripMargin) + + assert(deltaLog.update().protocol === Protocol(minReaderVersion = 1, minWriterVersion = 2)) + + // Try removing AppendOnly which is an implicitly supported feature (writer version 2). + val command = AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + AppendOnlyTableFeature.name) + val e = intercept[DeltaTableFeatureException] { + command.run(spark) + } + checkError( + exception = e, + errorClass = "DELTA_FEATURE_DROP_NONREMOVABLE_FEATURE", + parameters = Map("feature" -> AppendOnlyTableFeature.name)) + } + } + + test("Remove a feature not supported by the client") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql(s"CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta") + + assert( + deltaLog.update().protocol === Protocol( + minReaderVersion = 1, + minWriterVersion = 2, + readerFeatures = None, + writerFeatures = None)) + + val command = AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + "NonSupportedFeature") + + val e = intercept[DeltaTableFeatureException] { + command.run(spark) + } + checkError( + exception = e, + errorClass = "DELTA_FEATURE_DROP_UNSUPPORTED_CLIENT_FEATURE", + parameters = Map("feature" -> "NonSupportedFeature")) + } + } + + for (withTableFeatures <- BOOLEAN_DOMAIN) + test(s"Remove a feature not present in the protocol - withTableFeatures: $withTableFeatures") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + val (minReaderVersion, minWriterVersion) = if (withTableFeatures) { + (TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + } else { + (1, 2) + } + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ( + |delta.minReaderVersion = $minReaderVersion, + |delta.minWriterVersion = $minWriterVersion)""".stripMargin) + + assert( + deltaLog.update().protocol === Protocol( + minReaderVersion = minReaderVersion, + minWriterVersion = minWriterVersion, + readerFeatures = if (withTableFeatures) Some(Set.empty) else None, + writerFeatures = if (withTableFeatures) Some(Set.empty) else None)) + + val command = AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableWriterFeature.name) + + val e = intercept[DeltaTableFeatureException] { + command.run(spark) + } + checkError( + exception = e, + errorClass = "DELTA_FEATURE_DROP_FEATURE_NOT_PRESENT", + parameters = Map("feature" -> TestRemovableWriterFeature.name)) + } + } + + test("Reintroduce a feature after removing it") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + sql(s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ( + |delta.feature.${TestRemovableWriterFeature.name} = 'supported' + |)""".stripMargin) + + val protocol = deltaLog.update().protocol + assert(protocol === protocolWithWriterFeature(TestRemovableWriterFeature)) + + val command = AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableWriterFeature.name) + command.run(spark) + assert(deltaLog.update().protocol === Protocol(1, 1)) + + sql(s"""ALTER TABLE delta.`${dir.getCanonicalPath}` SET TBLPROPERTIES ( + |delta.feature.${TestRemovableWriterFeature.name} = 'supported' + |)""".stripMargin) + + val expectedProtocolAfterReintroduction = + protocolWithFeatures(writerFeatures = Seq(TestRemovableWriterFeature)) + assert(deltaLog.update().protocol === expectedProtocolAfterReintroduction) + } + } + + test(s"Truncate history while dropping a writer feature") { + withTempDir { dir => + val table = s"delta.`${dir.getCanonicalPath}`" + val deltaLog = DeltaLog.forTable(spark, dir) + + createTableWithFeature( + deltaLog, + feature = TestRemovableWriterFeature, + featureProperty = TestRemovableWriterFeature.TABLE_PROP_KEY) + + val e = intercept[DeltaTableFeatureException] { + sql(s"""ALTER TABLE $table + |DROP FEATURE ${TestRemovableWriterFeature.name} + |TRUNCATE HISTORY""".stripMargin) + } + checkError( + exception = e, + errorClass = "DELTA_FEATURE_DROP_HISTORY_TRUNCATION_NOT_ALLOWED", + parameters = Map.empty) + } + } + + for { + reEnablePropertyValue <- BOOLEAN_DOMAIN + reDisable <- BOOLEAN_DOMAIN + } test("Try removing reader+writer feature but re-enable feature after disablement " + + s"reEnablePropertyValue: $reEnablePropertyValue " + + s"reDisable: $reDisable") { + withTempDir { dir => + val clock = new ManualClock(System.currentTimeMillis()) + val deltaLog = DeltaLog.forTable(spark, dir, clock) + + createTableWithFeature( + deltaLog, + feature = TestRemovableReaderWriterFeature, + featureProperty = TestRemovableReaderWriterFeature.TABLE_PROP_KEY) + + // Add some more commits. + spark.range(0, 100).write.format("delta").mode("append").save(dir.getCanonicalPath) + spark.range(100, 120).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // First attempt should cleanup feature traces but fail with a message due to historical + // log entries containing the feature. + val e1 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + } + checkError( + exception = e1, + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + parameters = Map( + "feature" -> TestRemovableReaderWriterFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + + val deltaRetentionMillis = deltaLog.deltaRetentionMillis(deltaLog.update().metadata) + require(deltaRetentionMillis === TimeUnit.DAYS.toMillis(30)) + + // Ten days have passed. + clock.advance(TimeUnit.DAYS.toMillis(10)) + + // Generate commit. + spark.range(120, 140).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // Add feature property again. + val v2Table = DeltaTableV2(spark, deltaLog.dataPath) + AlterTableSetPropertiesDeltaCommand( + v2Table, + Map(TestRemovableReaderWriterFeature.TABLE_PROP_KEY -> reEnablePropertyValue.toString)) + .run(spark) + + // Disable by removing property. + if (reDisable) { + val properties = Seq(TestRemovableReaderWriterFeature.TABLE_PROP_KEY) + AlterTableUnsetPropertiesDeltaCommand(v2Table, properties, ifExists = true).run(spark) + } + + // The retention period has passed since the disablement. + clock.advance( + deltaRetentionMillis - TimeUnit.DAYS.toMillis(10) + TimeUnit.MINUTES.toMillis(5)) + + // Cleanup logs. + deltaLog.cleanUpExpiredLogs(deltaLog.update()) + + // Feature was enabled again in the middle of the timeframe. The feature traces are + // are cleaned up again and we get a new "Wait for retention period message." + val e2 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + } + + // If the property is re-disabled we pick up the issue during the history check. + if (reDisable) { + checkError( + exception = e2, + errorClass = "DELTA_FEATURE_DROP_HISTORICAL_VERSIONS_EXIST", + parameters = Map( + "feature" -> TestRemovableReaderWriterFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + } else { + checkError( + exception = e2, + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + parameters = Map( + "feature" -> TestRemovableReaderWriterFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + } + } + } + + test("Remove reader+writer feature with shortened retention period") { + withTempDir { dir => + val clock = new ManualClock(System.currentTimeMillis()) + val deltaLog = DeltaLog.forTable(spark, dir, clock) + + createTableWithFeature( + deltaLog, + feature = TestRemovableReaderWriterFeature, + featureProperty = TestRemovableReaderWriterFeature.TABLE_PROP_KEY) + + // First attempt should cleanup feature traces but fail with a message due to historical + // log entries containing the feature. + val e1 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + } + checkError( + exception = e1, + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + parameters = Map( + "feature" -> TestRemovableReaderWriterFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + + // Set retention period to a day. + AlterTableSetPropertiesDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + Map(DeltaConfigs.LOG_RETENTION.key -> "1 DAY")).run(spark) + + // Metadata is not cleaned yet. Attempt should fail. + val e2 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + } + checkError( + exception = e2, + errorClass = "DELTA_FEATURE_DROP_HISTORICAL_VERSIONS_EXIST", + parameters = Map( + "feature" -> TestRemovableReaderWriterFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "1 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + + spark.range(1, 100).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // Pretend retention period has passed. + clock.advance( + deltaLog.deltaRetentionMillis(deltaLog.update().metadata) + + TimeUnit.MINUTES.toMillis(5)) + + // History is now clean. We should be able to remove the feature. + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + + // Verify commits before the checkpoint are cleaned. + val earliestExpectedCommitVersion = deltaLog.findEarliestReliableCheckpoint.get + assert(getEarliestCommitVersion(deltaLog) === earliestExpectedCommitVersion) + + // Reader+writer feature is removed from the features set. + val snapshot = deltaLog.update() + assert(snapshot.protocol === Protocol(1, 1)) + assert(!snapshot.metadata.configuration + .contains(TestRemovableReaderWriterFeature.TABLE_PROP_KEY)) + assertPropertiesAndShowTblProperties(deltaLog) + } + } + + test("Try removing reader+writer feature after restore") { + withTempDir { dir => + val clock = new ManualClock(System.currentTimeMillis()) + val deltaLog = DeltaLog.forTable(spark, dir, clock) + + createTableWithFeature( + deltaLog, + feature = TestRemovableReaderWriterFeature, + featureProperty = TestRemovableReaderWriterFeature.TABLE_PROP_KEY) + + val preRemovalVersion = deltaLog.update().version + + // Cleanup feature traces and throw message to wait retention period to expire. + val e1 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + } + checkError( + exception = e1, + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + parameters = Map( + "feature" -> TestRemovableReaderWriterFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + + // Add some more commits. + spark.range(0, 100).write.format("delta").mode("append").save(dir.getCanonicalPath) + spark.range(100, 120).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // Restore table to an older version with feature traces. + sql(s"RESTORE delta.`${deltaLog.dataPath}` TO VERSION AS OF $preRemovalVersion") + + // Drop command should detect that latest version has feature traces and run + // preDowngrade again. + val e2 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + } + checkError( + exception = e2, + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + parameters = Map( + "feature" -> TestRemovableReaderWriterFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + } + } + + test("Remove reader+writer feature after unrelated metadata change") { + withTempDir { dir => + val clock = new ManualClock(System.currentTimeMillis()) + val deltaLog = DeltaLog.forTable(spark, dir, clock) + + createTableWithFeature( + deltaLog, + feature = TestRemovableReaderWriterFeature, + featureProperty = TestRemovableReaderWriterFeature.TABLE_PROP_KEY) + + // First attempt should cleanup feature traces but fail with a message due to historical + // log entries containing the feature. + val e1 = intercept[DeltaTableFeatureException] { + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + } + checkError( + exception = e1, + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + parameters = Map( + "feature" -> TestRemovableReaderWriterFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> "30 days", + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + + // Add some more commits. + spark.range(0, 100).write.format("delta").mode("append").save(dir.getCanonicalPath) + spark.range(100, 120).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // Pretend retention period has passed. + clock.advance(deltaLog.deltaRetentionMillis(deltaLog.update().metadata) + + TimeUnit.MINUTES.toMillis(5)) + + // Perform an unrelated metadata change. + sql(s"ALTER TABLE delta.`${deltaLog.dataPath}` ADD COLUMN (value INT)") + + // The unrelated metadata change should not interfere with validation and we should + // be able to downgrade the protocol. + AlterTableDropFeatureDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + TestRemovableReaderWriterFeature.name).run(spark) + + // Verify commits before the checkpoint are cleaned. + val earliestExpectedCommitVersion = deltaLog.findEarliestReliableCheckpoint.get + assert(getEarliestCommitVersion(deltaLog) === earliestExpectedCommitVersion) + } + } + + for { + withCatalog <- BOOLEAN_DOMAIN + quoteWith <- if (withCatalog) Seq ("none", "single", "backtick") else Seq("none") + } test(s"Drop feature DDL - withCatalog=$withCatalog, quoteWith=$quoteWith") { + withTempDir { dir => + val table = if (withCatalog) "table" else s"delta.`${dir.getCanonicalPath}`" + if (withCatalog) sql(s"DROP TABLE IF EXISTS $table") + sql( + s"""CREATE TABLE $table (id bigint) USING delta + |TBLPROPERTIES ( + |delta.feature.${TestRemovableWriterFeature.name} = 'supported' + |)""".stripMargin) + + val deltaLog = if (withCatalog) { + DeltaLog.forTable(spark, TableIdentifier(table)) + } else { + DeltaLog.forTable(spark, dir) + } + + AlterTableSetPropertiesDeltaCommand( + DeltaTableV2(spark, deltaLog.dataPath), + Map(TestRemovableWriterFeature.TABLE_PROP_KEY -> "true")).run(spark) + + val protocol = deltaLog.update().protocol + assert(protocol === protocolWithWriterFeature(TestRemovableWriterFeature)) + + val logs = Log4jUsageLogger.track { + val featureName = quoteWith match { + case "none" => s"${TestRemovableWriterFeature.name}" + case "single" => s"'${TestRemovableWriterFeature.name}'" + case "backtick" => s"`${TestRemovableWriterFeature.name}`" + } + sql(s"ALTER TABLE $table DROP FEATURE $featureName") + assert(deltaLog.update().protocol === Protocol(1, 1)) + } + // Test that the write downgrade command was invoked. + val expectedOpType = "delta.test.TestWriterFeaturePreDowngradeCommand" + val blob = logs.collectFirst { + case r if r.metric == MetricDefinitions.EVENT_TAHOE.name && + r.tags.get("opType").contains(expectedOpType) => r.blob + } + assert(blob.nonEmpty, s"Expecting an '$expectedOpType' event but didn't see any.") + } + } + + protected def testProtocolVersionDowngrade( + initialMinReaderVersion: Int, + initialMinWriterVersion: Int, + featuresToAdd: Seq[TableFeature], + featuresToRemove: Seq[TableFeature], + expectedDowngradedProtocol: Protocol): Unit = { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + + spark.sql(s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ( + |delta.minReaderVersion = $initialMinReaderVersion, + |delta.minWriterVersion = $initialMinWriterVersion + |)""".stripMargin) + + // Upgrade protocol to table features. + val newTBLProperties = featuresToAdd + .map(f => s"delta.feature.${f.name}='supported'") + .reduce(_ + ", " + _) + spark.sql( + s"""ALTER TABLE delta.`${dir.getPath}` + |SET TBLPROPERTIES ( + |$newTBLProperties + |)""".stripMargin) + + for (feature <- featuresToRemove) { + AlterTableDropFeatureDeltaCommand(DeltaTableV2(spark, deltaLog.dataPath), feature.name) + .run(spark) + } + assert(deltaLog.update().protocol === expectedDowngradedProtocol) + } + } + + test("Downgrade protocol version (1, 4)") { + testProtocolVersionDowngrade( + initialMinReaderVersion = 1, + initialMinWriterVersion = 4, + featuresToAdd = Seq(TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(1, 4)) + } + + // Initial minReader version is (2, 4), however, there are no legacy features that require + // reader version 2. Therefore, the protocol version is downgraded to (1, 4). + test("Downgrade protocol version (2, 4)") { + testProtocolVersionDowngrade( + initialMinReaderVersion = 2, + initialMinWriterVersion = 4, + featuresToAdd = Seq(TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(1, 4)) + } + + // Version (2, 5) enables column mapping which is a reader+writer feature and requires (2, 5). + // Therefore, to downgrade from table features we need at least (2, 5). + test("Downgrade protocol version (2, 5)") { + testProtocolVersionDowngrade( + initialMinReaderVersion = 2, + initialMinWriterVersion = 5, + featuresToAdd = Seq(TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(2, 5)) + } + + + test("Downgrade protocol version (1, 1)") { + testProtocolVersionDowngrade( + initialMinReaderVersion = 1, + initialMinWriterVersion = 1, + featuresToAdd = Seq(TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(1, 1)) + } + + test("Downgrade protocol version on table created with table features") { + // When the table is initialized with table features there are no active (implicit) legacy + // features. After removing the last table feature we downgrade back to (1, 1). + testProtocolVersionDowngrade( + initialMinReaderVersion = 3, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(1, 1)) + } + + test("Downgrade protocol version on table created with writer features") { + testProtocolVersionDowngrade( + initialMinReaderVersion = 1, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(1, 1)) + } + + test("Protocol version downgrade on a table with table features and added legacy feature") { + // Added legacy feature should be removed and the protocol should be downgraded to (2, 5). + testProtocolVersionDowngrade( + initialMinReaderVersion = 3, + initialMinWriterVersion = 7, + featuresToAdd = + Seq(TestRemovableWriterFeature) ++ Protocol(2, 5).implicitlySupportedFeatures, + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(2, 5)) + + // Added legacy feature should not be removed and the protocol should stay on (1, 7). + testProtocolVersionDowngrade( + initialMinReaderVersion = 1, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableWriterFeature, TestRemovableLegacyWriterFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(1, 7) + .withFeature(TestRemovableLegacyWriterFeature)) + + // Legacy feature was manually removed. Protocol should be downgraded to (1, 1). + testProtocolVersionDowngrade( + initialMinReaderVersion = 1, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableWriterFeature, TestRemovableLegacyWriterFeature), + featuresToRemove = Seq(TestRemovableLegacyWriterFeature, TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(1, 1)) + + // Start with writer table features and add a legacy reader+writer feature. + testProtocolVersionDowngrade( + initialMinReaderVersion = 1, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableWriterFeature, ColumnMappingTableFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(3, 7).withFeature(ColumnMappingTableFeature)) + + // Remove reader+writer legacy feature as well. + testProtocolVersionDowngrade( + initialMinReaderVersion = 1, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableLegacyReaderWriterFeature, TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableLegacyReaderWriterFeature, TestRemovableWriterFeature), + expectedDowngradedProtocol = Protocol(1, 1)) + } + + test("Protocol version is not downgraded when writer features exist") { + testProtocolVersionDowngrade( + initialMinReaderVersion = 1, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableWriterFeature, DomainMetadataTableFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = protocolWithWriterFeature(DomainMetadataTableFeature)) + } + + test("Protocol version is not downgraded when reader+writer features exist") { + testProtocolVersionDowngrade( + initialMinReaderVersion = 3, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableReaderWriterFeature, DeletionVectorsTableFeature), + featuresToRemove = Seq(TestRemovableReaderWriterFeature), + expectedDowngradedProtocol = protocolWithReaderFeature(DeletionVectorsTableFeature)) + } + + test("Protocol version is not downgraded when both reader+writer and writer features exist") { + testProtocolVersionDowngrade( + initialMinReaderVersion = 3, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableReaderWriterFeature, TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableReaderWriterFeature), + expectedDowngradedProtocol = + Protocol(3, 7, Some(Set.empty), Some(Set(TestRemovableWriterFeature.name)))) + + testProtocolVersionDowngrade( + initialMinReaderVersion = 3, + initialMinWriterVersion = 7, + featuresToAdd = Seq(TestRemovableReaderWriterFeature, TestRemovableWriterFeature), + featuresToRemove = Seq(TestRemovableWriterFeature), + expectedDowngradedProtocol = protocolWithReaderFeature(TestRemovableReaderWriterFeature)) + } + + private def dropV2CheckpointsTableFeature(spark: SparkSession, log: DeltaLog): Unit = { + spark.sql(s"ALTER TABLE delta.`${log.dataPath}` DROP FEATURE " + + s"`${V2CheckpointTableFeature.name}`") + } + + private def testV2CheckpointTableFeatureDrop( + v2CheckpointFormat: V2Checkpoint.Format, + withInitialV2Checkpoint: Boolean, + forceMultiPartCheckpoint: Boolean = false): Unit = { + var confs = Seq( + DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> CheckpointPolicy.V2.name, + DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key -> v2CheckpointFormat.name + ) + val expectedClassicCheckpointType = if (forceMultiPartCheckpoint) { + confs :+= DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "1" + CheckpointInstance.Format.WITH_PARTS + } else { + CheckpointInstance.Format.SINGLE + } + withSQLConf(confs: _*) { + withTempPath { dir => + val clock = new ManualClock(System.currentTimeMillis()) + val targetLog = DeltaLog.forTable(spark, dir, clock) + val defaultRetentionPeriod = + DeltaConfigs.LOG_RETENTION.fromMetaData(targetLog.update().metadata).toString + + val targetDF = spark.range(start = 0, end = 100, step = 1, numPartitions = 2) + targetDF.write.format("delta").save(dir.toString) + + val initialCheckpointCount = if (withInitialV2Checkpoint) 1 else 0 + + if (withInitialV2Checkpoint) { + // Create a v2 checkpoint. + targetLog.checkpoint() + } + + // Assert that the current checkpointing policy requires v2 checkpoint support. + val preDowngradeSnapshot = targetLog.update() + assert( + DeltaConfigs.CHECKPOINT_POLICY + .fromMetaData(preDowngradeSnapshot.metadata) + .needsV2CheckpointSupport) + val checkpointFiles = targetLog.listFrom(0).filter(FileNames.isCheckpointFile) + assert(checkpointFiles.length == initialCheckpointCount) + checkpointFiles.foreach { f => + assert(CheckpointInstance(f.getPath).format == CheckpointInstance.Format.V2) + } + + // Dropping the feature should fail because + // 1. The checkpointing policy in metadata requires v2 checkpoint support. + // 2. Also, when initialCheckpointCount = true, there is a v2 checkpoint. + val e1 = intercept[DeltaTableFeatureException] { + dropV2CheckpointsTableFeature(spark, targetLog) + } + checkError( + exception = e1, + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", + parameters = Map( + "feature" -> V2CheckpointTableFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> defaultRetentionPeriod, + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + + val postCleanupCheckpointFiles = + targetLog.listFrom(0).filter(FileNames.isCheckpointFile).toList + + // Assert that a new classic checkpoint has been created. + val uniqueCheckpointCount = postCleanupCheckpointFiles + .drop(initialCheckpointCount) + .map { checkpointFile => + val checkpointInstance = CheckpointInstance(checkpointFile.getPath) + + assert(checkpointInstance.format == expectedClassicCheckpointType) + + checkpointInstance.version + } + // Count a multi-part checkpoint as a single checkpoint. + .toSet.size + // Drop feature command generates one classic checkpoints after v2 checkpoint cleanup. + val expectedClassicCheckpointCount = 1 + assert(uniqueCheckpointCount == expectedClassicCheckpointCount) + + spark.range(100, 120).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // V2 Checkpoint related traces have not been cleaned up yet. Attempt should fail. + val e2 = intercept[DeltaTableFeatureException] { + dropV2CheckpointsTableFeature(spark, targetLog) + } + checkError( + exception = e2, + errorClass = "DELTA_FEATURE_DROP_HISTORICAL_VERSIONS_EXIST", + parameters = Map( + "feature" -> V2CheckpointTableFeature.name, + "logRetentionPeriodKey" -> "delta.logRetentionDuration", + "logRetentionPeriod" -> defaultRetentionPeriod, + "truncateHistoryLogRetentionPeriod" -> truncateHistoryDefaultLogRetention.toString)) + + // Pretend retention period has passed. + clock.advance( + targetLog.deltaRetentionMillis(targetLog.update().metadata) + + TimeUnit.MINUTES.toMillis(5)) + + // History is now clean. We should be able to remove the feature. + dropV2CheckpointsTableFeature(spark, targetLog) + + val postDowngradeSnapshot = targetLog.update() + val protocol = postDowngradeSnapshot.protocol + assert(!protocol.readerFeatureNames.contains(V2CheckpointTableFeature.name)) + assert( + !DeltaConfigs.CHECKPOINT_POLICY + .fromMetaData(postDowngradeSnapshot.metadata) + .needsV2CheckpointSupport) + assert(targetLog.listFrom(0).filter(FileNames.isCheckpointFile).forall { f => + CheckpointInstance(f.getPath).format == expectedClassicCheckpointType + }) + } + } + } + + for ( + v2CheckpointFormat <- V2Checkpoint.Format.ALL; + withInitialV2Checkpoint <- BOOLEAN_DOMAIN) + test(s"Remove v2 Checkpoints Feature [v2CheckpointFormat: ${v2CheckpointFormat.name}; " + + s"withInitialV2Checkpoint: $withInitialV2Checkpoint; forceMultiPartCheckpoint: false]") { + testV2CheckpointTableFeatureDrop(v2CheckpointFormat, withInitialV2Checkpoint) + } + + test( + s"Remove v2 Checkpoints Feature [v2CheckpointFormat: ${V2Checkpoint.Format.PARQUET.name}; " + + s"withInitialV2Checkpoint: true; forceMultiPartCheckpoint: true]") { + testV2CheckpointTableFeatureDrop(V2Checkpoint.Format.PARQUET, true, true) + } + + // Create a table for testing that has an unsupported feature. + private def withTestTableWithUnsupportedWriterFeature( + emptyTable: Boolean)(testCode: String => Unit): Unit = { + val tableName = "test_table" + withTable(tableName) { + if (emptyTable) { + sql(s"CREATE TABLE $tableName(id INT) USING DELTA") + } else { + sql(s"CREATE TABLE $tableName USING DELTA AS SELECT 1 AS id") + } + + sql(s"""ALTER TABLE $tableName + SET TBLPROPERTIES ('delta.minReaderVersion' = '3', 'delta.minWriterVersion' = '7')""") + + val deltaLogPath = DeltaLog.forTable(spark, TableIdentifier(tableName)).logPath + .toString.stripPrefix("file:") + + // scalastyle:off + val commitJson = + """{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1702304249309}} + |{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":[],"writerFeatures":["unsupportedWriter"]}}""".stripMargin + // scalastyle:on + + Files.write(Paths.get(deltaLogPath, "00000000000000000002.json"), commitJson.getBytes) + + testCode(tableName) + } + } + + // Test that write commands error out when unsupported features in the table protocol. + private def testUnsupportedFeature( + commandName: String, emptyTable: Boolean)(command: String => Unit): Unit = { + test(s"Writes using $commandName error out when unsupported writer features are present") { + withTestTableWithUnsupportedWriterFeature(emptyTable) { tableName => + intercept[DeltaUnsupportedTableFeatureException] { + command(tableName) + } + } + } + } + + testUnsupportedFeature("INSERT", emptyTable = true) { testTableName => + sql(s"INSERT INTO $testTableName VALUES (2)") + } + + testUnsupportedFeature("UPDATE", emptyTable = false) { testTableName => + sql(s"UPDATE $testTableName SET id = 2") + } + + testUnsupportedFeature("DELETE", emptyTable = false) { testTableName => + sql(s"DELETE FROM $testTableName WHERE id > 0") + } + + testUnsupportedFeature("MERGE", emptyTable = false) { testTableName => + sql(s"""MERGE INTO $testTableName t + |USING $testTableName s + |ON s.id = t.id + 100 + |WHEN NOT MATCHED THEN INSERT *""".stripMargin) + } + + testUnsupportedFeature("CREATE OR REPLACE TABLE", emptyTable = false) { testTableName => + sql(s"CREATE OR REPLACE TABLE $testTableName (other_column INT) USING DELTA") + } + + testUnsupportedFeature("ManualUpdate commit", emptyTable = true) { testTableName => + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(testTableName)) + deltaLog.startTransaction(None) + .commit(Seq(DeltaTestUtils.createTestAddFile()), DeltaOperations.ManualUpdate) + } + + testUnsupportedFeature("SHALLOW CLONE", emptyTable = true) { testTableName => + val cloneSourceTableName = "clone_source_table" + withTable(cloneSourceTableName) { + sql(s"DELETE FROM $testTableName") + sql(s"CREATE TABLE $cloneSourceTableName USING delta AS SELECT 1337 as id") + sql(s"CREATE OR REPLACE TABLE $testTableName SHALLOW CLONE $cloneSourceTableName") + } + } + + private def assertPropertiesAndShowTblProperties( + deltaLog: DeltaLog, + tableHasFeatures: Boolean = false): Unit = { + val configs = deltaLog.snapshot.metadata.configuration.map { case (k, v) => + k.toLowerCase(Locale.ROOT) -> v + } + assert(!configs.contains(Protocol.MIN_READER_VERSION_PROP)) + assert(!configs.contains(Protocol.MIN_WRITER_VERSION_PROP)) + assert(!configs.exists(_._1.startsWith(FEATURE_PROP_PREFIX))) + + val tblProperties = + sql(s"SHOW TBLPROPERTIES delta.`${deltaLog.dataPath.toString}`").collect() + + assert( + tblProperties.exists(row => row.getAs[String]("key") == Protocol.MIN_READER_VERSION_PROP)) + assert( + tblProperties.exists(row => row.getAs[String]("key") == Protocol.MIN_WRITER_VERSION_PROP)) + + assert(tableHasFeatures === tblProperties.exists(row => + row.getAs[String]("key").startsWith(FEATURE_PROP_PREFIX))) + val rows = + tblProperties.filter(row => + row.getAs[String]("key").startsWith(FEATURE_PROP_PREFIX)) + for (row <- rows) { + val name = row.getAs[String]("key").substring(FEATURE_PROP_PREFIX.length) + val status = row.getAs[String]("value") + assert(TableFeature.featureNameToFeature(name).isDefined) + assert(status == FEATURE_PROP_SUPPORTED) + } + } + + private def captureProtocolChangeEventBlob(f: => Unit): Map[String, Any] = { + val logs = Log4jUsageLogger.track(f) + val blob = logs.collectFirst { + case r if r.metric == MetricDefinitions.EVENT_TAHOE.name && + r.tags.get("opType").contains("delta.protocol.change") => r.blob + } + require(blob.nonEmpty, "Expecting a delta.protocol.change event but didn't see any.") + blob.map(JsonUtils.fromJson[Map[String, Any]]).head + } +} + +class DeltaProtocolVersionSuite extends DeltaProtocolVersionSuiteBase diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRestartSessionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRestartSessionSuite.scala new file mode 100644 index 00000000000..3f710a59b10 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRestartSessionSuite.scala @@ -0,0 +1,48 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import org.apache.spark.sql.internal.SQLConf + +class DeltaRestartSessionSuite extends SparkFunSuite { + + test("restart Spark session should work") { + withTempDir { dir => + var spark = SparkSession.builder().master("local[2]") + .config(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[DeltaCatalog].getName) + .getOrCreate() + try { + val path = dir.getCanonicalPath + spark.range(10).write.format("delta").mode("overwrite").save(path) + spark.read.format("delta").load(path).count() + + spark.stop() + spark = SparkSession.builder().master("local[2]") + .config(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[DeltaCatalog].getName) + .getOrCreate() + spark.range(10).write.format("delta").mode("overwrite").save(path) + spark.read.format("delta").load(path).count() + } + finally { + spark.stop() + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRetentionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRetentionSuite.scala new file mode 100644 index 00000000000..2389c6ece59 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRetentionSuite.scala @@ -0,0 +1,507 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import scala.language.postfixOps + +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.{Action, AddFile, RemoveFile, SetTransaction} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.util.ManualClock + +// scalastyle:off: removeFile +class DeltaRetentionSuite extends QueryTest + with DeltaRetentionSuiteBase + with SQLTestUtils + with DeltaSQLCommandTest { + + protected override def sparkConf: SparkConf = super.sparkConf + + override protected def getLogFiles(dir: File): Seq[File] = + getDeltaFiles(dir) ++ getCheckpointFiles(dir) + + test("delete expired logs") { + withTempDir { tempDir => + val startTime = getStartTimeForRetentionTest + val clock = new ManualClock(startTime) + val actualTestStartTime = System.currentTimeMillis() + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + val logPath = new File(log.logPath.toUri) + (1 to 5).foreach { i => + val txn = if (i == 1) startTxnWithManualLogCleanup(log) else log.startTransaction() + val file = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + val delete: Seq[Action] = if (i > 1) { + val timestamp = startTime + (System.currentTimeMillis()-actualTestStartTime) + RemoveFile(i - 1 toString, Some(timestamp), true) :: Nil + } else { + Nil + } + txn.commit(delete ++ file, testOp) + } + + val initialFiles = getLogFiles(logPath) + // Shouldn't clean up, no checkpoint, no expired files + log.cleanUpExpiredLogs(log.snapshot) + + assert(initialFiles === getLogFiles(logPath)) + + clock.advance(intervalStringToMillis(DeltaConfigs.LOG_RETENTION.defaultValue) + + intervalStringToMillis("interval 1 day")) + + // Shouldn't clean up, no checkpoint, although all files have expired + log.cleanUpExpiredLogs(log.snapshot) + assert(initialFiles === getLogFiles(logPath)) + + log.checkpoint() + + val expectedFiles = Seq("04.json", "04.checkpoint.parquet") + // after checkpointing, the files should be cleared + log.cleanUpExpiredLogs(log.snapshot) + val afterCleanup = getLogFiles(logPath) + assert(initialFiles !== afterCleanup) + assert(expectedFiles.forall(suffix => afterCleanup.exists(_.getName.endsWith(suffix))), + s"${afterCleanup.mkString("\n")}\n didn't contain files with suffixes: $expectedFiles") + } + } + + test("log files being already deleted shouldn't fail log deletion job") { + withTempDir { tempDir => + val startTime = getStartTimeForRetentionTest + val clock = new ManualClock(startTime) + val actualTestStartTime = System.currentTimeMillis() + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + val logPath = new File(log.logPath.toUri) + val iterationCount = (log.checkpointInterval() * 2) + 1 + + (1 to iterationCount).foreach { i => + val txn = if (i == 1) startTxnWithManualLogCleanup(log) else log.startTransaction() + val file = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + val delete: Seq[Action] = if (i > 1) { + val timestamp = startTime + (System.currentTimeMillis()-actualTestStartTime) + RemoveFile(i - 1 toString, Some(timestamp), true) :: Nil + } else { + Nil + } + val version = txn.commit(delete ++ file, testOp) + val deltaFile = new File(FileNames.deltaFile(log.logPath, version).toUri) + deltaFile.setLastModified(clock.getTimeMillis() + i * 10000) + val crcFile = new File(FileNames.checksumFile(log.logPath, version).toUri) + crcFile.setLastModified(clock.getTimeMillis() + i * 10000) + val chk = new File(FileNames.checkpointFileSingular(log.logPath, version).toUri) + if (chk.exists()) { + chk.setLastModified(clock.getTimeMillis() + i * 10000) + } + } + + // delete some files in the middle + val middleStartIndex = log.checkpointInterval() / 2 + getDeltaFiles(logPath).sortBy(_.getName).slice( + middleStartIndex, middleStartIndex + log.checkpointInterval()).foreach(_.delete()) + clock.advance(intervalStringToMillis(DeltaConfigs.LOG_RETENTION.defaultValue) + + intervalStringToMillis("interval 2 day")) + log.cleanUpExpiredLogs(log.snapshot) + + val minDeltaFile = + getDeltaFiles(logPath).map(f => FileNames.deltaVersion(new Path(f.toString))).min + val maxChkFile = getCheckpointFiles(logPath).map(f => + FileNames.checkpointVersion(new Path(f.toString))).max + + assert(maxChkFile === minDeltaFile, + "Delta files before the last checkpoint version should have been deleted") + assert(getCheckpointFiles(logPath).length === 1, + "There should only be the last checkpoint version") + } + } + + testQuietly( + "RemoveFiles persist across checkpoints as tombstones if retention time hasn't expired") { + withTempDir { tempDir => + val clock = new ManualClock(getStartTimeForRetentionTest) + val log1 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + + val txn = startTxnWithManualLogCleanup(log1) + val files1 = (1 to 10).map(f => AddFile(f.toString, Map.empty, 1, 1, true)) + txn.commit(files1, testOp) + val txn2 = log1.startTransaction() + val files2 = (1 to 4).map(f => RemoveFile(f.toString, Some(clock.getTimeMillis()))) + txn2.commit(files2, testOp) + log1.checkpoint() + + DeltaLog.clearCache() + val log2 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + assert(log2.snapshot.tombstones.count() === 4) + assert(log2.snapshot.allFiles.count() === 6) + } + } + + def removeFileCountFromUnderlyingCheckpoint(snapshot: Snapshot): Long = { + val df = snapshot.checkpointProvider + .allActionsFileIndexes() + .map(snapshot.deltaLog.loadIndex(_)) + .reduce(_.union(_)) + df.where("remove is not null").count() + } + + testQuietly("retention timestamp is picked properly by the cold snapshot initialization") { + withTempDir { dir => + val clock = new ManualClock(getStartTimeForRetentionTest) + def deltaLog: DeltaLog = DeltaLog.forTable(spark, new Path(dir.getCanonicalPath), clock) + + // Create table with 30 day tombstone retention. + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.deletedFileRetentionDuration' = 'interval 30 days') + """.stripMargin) + + + // 1st day - commit 10 new files and remove them also same day. + clock.advance(intervalStringToMillis("interval 1 days")) + val files1 = (1 to 4).map(f => AddFile(f.toString, Map.empty, 1, 1, true)) + deltaLog.startTransaction().commit(files1, testOp) + val files2 = (1 to 4).map(f => RemoveFile(f.toString, Some(clock.getTimeMillis()))) + deltaLog.startTransaction().commit(files2, testOp) + + // Advance clock by 10 days. + clock.advance(intervalStringToMillis("interval 10 days")) + DeltaLog.clearCache() + deltaLog.checkpoint() + DeltaLog.clearCache() // Clear cache and reinitialize snapshot with latest checkpoint. + assert(removeFileCountFromUnderlyingCheckpoint(deltaLog.unsafeVolatileSnapshot) === 4) + + // Advance clock by 21 more days. Now checkpoint should stop tracking remove tombstones. + clock.advance(intervalStringToMillis("interval 21 days")) + deltaLog.startTransaction().commit(Seq.empty, testOp) + DeltaLog.clearCache() + deltaLog.checkpoint(deltaLog.unsafeVolatileSnapshot) + DeltaLog.clearCache() // Clear cache and reinitialize snapshot with latest checkpoint. + assert(removeFileCountFromUnderlyingCheckpoint(deltaLog.unsafeVolatileSnapshot) === 0) + } + } + + + testQuietly("retention timestamp is lesser than the default value") { + withTempDir { dir => + val clock = new ManualClock(getStartTimeForRetentionTest) + def deltaLog: DeltaLog = DeltaLog.forTable(spark, new Path(dir.getCanonicalPath), clock) + + // Create table with 2 day tombstone retention. + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.deletedFileRetentionDuration' = 'interval 2 days') + """.stripMargin) + + + // 1st day - commit 10 new files and remove them also same day. + { + clock.advance(intervalStringToMillis("interval 1 days")) + val txn = deltaLog.startTransaction() + val files1 = (1 to 4).map(f => AddFile(f.toString, Map.empty, 1, 1, true)) + txn.commit(files1, testOp) + val txn2 = deltaLog.startTransaction() + val files2 = (1 to 4).map(f => RemoveFile(f.toString, Some(clock.getTimeMillis()))) + txn2.commit(files2, testOp) + } + + + // Advance clock by 4 days. + clock.advance(intervalStringToMillis("interval 4 days")) + DeltaLog.clearCache() + deltaLog.checkpoint(deltaLog.unsafeVolatileSnapshot) + DeltaLog.clearCache() // Clear cache and reinitialize snapshot with latest checkpoint. + assert(removeFileCountFromUnderlyingCheckpoint(deltaLog.unsafeVolatileSnapshot) === 0) + } + } + + testQuietly("RemoveFiles get deleted during checkpoint if retention time has passed") { + withTempDir { tempDir => + val clock = new ManualClock(getStartTimeForRetentionTest) + val log1 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + + val txn = startTxnWithManualLogCleanup(log1) + val files1 = (1 to 10).map(f => AddFile(f.toString, Map.empty, 1, 1, true)) + txn.commit(files1, testOp) + val txn2 = log1.startTransaction() + val files2 = (1 to 4).map(f => RemoveFile(f.toString, Some(clock.getTimeMillis()))) + txn2.commit(files2, testOp) + + clock.advance( + intervalStringToMillis(DeltaConfigs.TOMBSTONE_RETENTION.defaultValue) + 1000000L) + + log1.checkpoint() + + DeltaLog.clearCache() + val log2 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + assert(log2.snapshot.tombstones.count() === 0) + assert(log2.snapshot.allFiles.count() === 6) + } + } + + test("the checkpoint file for version 0 should be cleaned") { + withTempDir { tempDir => + val clock = new ManualClock(getStartTimeForRetentionTest) + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + val logPath = new File(log.logPath.toUri) + startTxnWithManualLogCleanup(log).commit(AddFile("0", Map.empty, 1, 1, true) :: Nil, testOp) + log.checkpoint() + + val initialFiles = getLogFiles(logPath) + clock.advance(intervalStringToMillis(DeltaConfigs.LOG_RETENTION.defaultValue) + + intervalStringToMillis("interval 1 day")) + + // Create a new checkpoint so that the previous version can be deleted + log.startTransaction().commit(AddFile("1", Map.empty, 1, 1, true) :: Nil, testOp) + log.checkpoint() + + // despite our clock time being set in the future, this doesn't change the FileStatus + // lastModified time. this can cause some flakiness during log cleanup. setting it fixes that. + getLogFiles(logPath) + .filterNot(f => initialFiles.contains(f)) + .foreach(f => f.setLastModified(clock.getTimeMillis())) + + log.cleanUpExpiredLogs(log.snapshot) + val afterCleanup = getLogFiles(logPath) + initialFiles.foreach { file => + assert(!afterCleanup.contains(file)) + } + } + } + + test("allow users to expire transaction identifiers from checkpoints") { + withTempDir { dir => + val clock = new ManualClock(getStartTimeForRetentionTest) + val log = DeltaLog.forTable(spark, new Path(dir.getCanonicalPath), clock) + sql( + s"""CREATE TABLE delta.`${dir.getCanonicalPath}` (id bigint) USING delta + |TBLPROPERTIES ('delta.setTransactionRetentionDuration' = 'interval 1 days') + """.stripMargin) + + // commit at time < TRANSACTION_ID_RETENTION_DURATION + log.startTransaction().commitManually(SetTransaction("app", 1, Some(clock.getTimeMillis()))) + assert(log.update().transactions == Map("app" -> 1)) + assert(log.update().numOfSetTransactions == 1) + + clock.advance(intervalStringToMillis("interval 1 days")) + + // query at time == TRANSACTION_ID_RETENTION_DURATION & NO new commit + // No new commit has been made, so we will see expired transactions (this is not ideal, but + // it's a tradeoff we've accepted) + assert(log.update().transactions == Map("app" -> 1)) + assert(log.snapshot.numOfSetTransactions == 1) + + clock.advance(1) + + // query at time > TRANSACTION_ID_RETENTION_DURATION & NO new commit + // we continue to see expired transactions + assert(log.update().transactions == Map("app" -> 1)) + assert(log.snapshot.numOfSetTransactions == 1) + + // query at time > TRANSACTION_ID_RETENTION_DURATION & there IS a new commit + // We will only filter expired transactions when time is >= TRANSACTION_ID_RETENTION_DURATION + // and a new commit has been made + val addFile = AddFile( + path = "fake/path/1", partitionValues = Map.empty, size = 1, + modificationTime = 1, dataChange = true) + log.startTransaction().commitManually(addFile) + assert(log.update().transactions.isEmpty) + assert(log.snapshot.numOfSetTransactions == 0) + } + } + + protected def cleanUpExpiredLogs(log: DeltaLog): Unit = { + val snapshot = log.update() + + val checkpointVersion = snapshot.logSegment.checkpointProvider.version + logInfo(s"snapshot version: ${snapshot.version} checkpoint: $checkpointVersion") + + log.cleanUpExpiredLogs(snapshot) + } + + for (v2CheckpointFormat <- V2Checkpoint.Format.ALL_AS_STRINGS) + test(s"sidecar file cleanup [v2CheckpointFormat: $v2CheckpointFormat]") { + val checkpointPolicy = CheckpointPolicy.V2.name + withSQLConf((DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key -> v2CheckpointFormat)) { + withTempDir { tempDir => + val startTime = getStartTimeForRetentionTest + val clock = new ManualClock(startTime) + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + val logPath = new File(log.logPath.toUri) + val visitedFiles = scala.collection.mutable.Set.empty[String] + + spark.sql(s"""CREATE TABLE delta.`${tempDir.toString()}` (id Int) USING delta + | TBLPROPERTIES( + |-- Disable the async log cleanup as this test needs to manually trigger log + |-- clean up. + |'delta.enableExpiredLogCleanup' = 'false', + |'${DeltaConfigs.CHECKPOINT_POLICY.key}' = '$checkpointPolicy', + |'${DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.key}' = 'false', + |'delta.checkpointInterval' = '100000', + |'delta.logRetentionDuration' = 'interval 6 days') + """.stripMargin) + + // day-1. Create a commit with 4 AddFiles. + clock.setTime(day(startTime, day = 1)) + val file = (1 to 4).map(i => createTestAddFile(i.toString)) + log.startTransaction().commit(file, testOp) + setModificationTimeOfNewFiles(log, clock, visitedFiles) + + // Trigger 1 commit and 1 checkpoint daily for next 8 days + val sidecarFiles = scala.collection.mutable.Map.empty[Long, String] + val oddCommitSidecarFile_1 = createSidecarFile(log, Seq(1)) + val evenCommitSidecarFile_1 = createSidecarFile(log, Seq(1)) + def commitAndCheckpoint(dayNumber: Int): Unit = { + clock.setTime(day(startTime, dayNumber)) + + // Write a new commit on each day + log.startTransaction().commit(Seq(log.unsafeVolatileSnapshot.metadata), testOp) + setModificationTimeOfNewFiles(log, clock, visitedFiles) + + // Write a new checkpoint on each day. Each checkpoint has 2 sodecars: + // 1. Common sidecar - one of oddCommitSidecarFile_1/evenCommitSidecarFile_1 + // 2. A new sidecar just created for this checkpoint. + val sidecarFile1 = + if (dayNumber % 2 == 0) evenCommitSidecarFile_1 else oddCommitSidecarFile_1 + val sidecarFile2 = createSidecarFile(log, Seq(2, 3, 4)) + val checkpointVersion = log.update().version + createV2CheckpointWithSidecarFile( + log, + checkpointVersion, + sidecarFileNames = Seq(sidecarFile1, sidecarFile2)) + setModificationTimeOfNewFiles(log, clock, visitedFiles) + sidecarFiles.put(checkpointVersion, sidecarFile2) + } + + (2 to 9).foreach { dayNumber => commitAndCheckpoint(dayNumber) } + clock.setTime(day(startTime, day = 10)) + log.update() + + // Assert all log files are present. + compareVersions(getCheckpointVersions(logPath), "checkpoint", 2 to 9) + compareVersions(getDeltaVersions(logPath), "delta", 0 to 9) + assert( + getSidecarFiles(log) === + Set( + evenCommitSidecarFile_1, + oddCommitSidecarFile_1) ++ sidecarFiles.values.toIndexedSeq) + + // Trigger metadata cleanup and validate that only last 6 days of deltas and checkpoints + // have been retained. + cleanUpExpiredLogs(log) + compareVersions(getCheckpointVersions(logPath), "checkpoint", 4 to 9) + compareVersions(getDeltaVersions(logPath), "delta", 4 to 9) + // Check that all active sidecars are retained and expired ones are deleted. + assert( + getSidecarFiles(log) === + Set(evenCommitSidecarFile_1, oddCommitSidecarFile_1) ++ + (4 to 9).map(sidecarFiles(_))) + + // Advance 1 day and again run metadata cleanup. + clock.setTime(day(startTime, day = 11)) + cleanUpExpiredLogs(log) + setModificationTimeOfNewFiles(log, clock, visitedFiles) + // Commit 4 and checkpoint 4 have expired and were deleted. + compareVersions(getCheckpointVersions(logPath), "checkpoint", 5 to 9) + compareVersions(getDeltaVersions(logPath), "delta", 5 to 9) + assert( + getSidecarFiles(log) === + Set(evenCommitSidecarFile_1, oddCommitSidecarFile_1) ++ + (5 to 9).map(sidecarFiles(_))) + + // do 1 more commit and checkpoint on day 13 and run metadata cleanup. + commitAndCheckpoint(dayNumber = 13) // commit and checkpoint 10 + compareVersions(getCheckpointVersions(logPath), "checkpoint", 5 to 10) + compareVersions(getDeltaVersions(logPath), "delta", 5 to 10) + cleanUpExpiredLogs(log) + setModificationTimeOfNewFiles(log, clock, visitedFiles) + // Version 5 and 6 checkpoints and deltas have expired and were deleted. + compareVersions(getCheckpointVersions(logPath), "checkpoint", 7 to 10) + compareVersions(getDeltaVersions(logPath), "delta", 7 to 10) + + assert( + getSidecarFiles(log) === + Set(evenCommitSidecarFile_1, oddCommitSidecarFile_1) ++ + (7 to 10).map(sidecarFiles(_))) + } + } + } + + for (v2CheckpointFormat <- V2Checkpoint.Format.ALL_AS_STRINGS) + test( + s"compat file created with metadata cleanup when checkpoints are deleted" + + s" [v2CheckpointFormat: $v2CheckpointFormat]") { + val checkpointPolicy = CheckpointPolicy.V2.name + withSQLConf((DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key -> v2CheckpointFormat)) { + withTempDir { tempDir => + val startTime = getStartTimeForRetentionTest + val clock = new ManualClock(startTime) + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + val logPath = new File(log.logPath.toUri) + val visitedFiles = scala.collection.mutable.Set.empty[String] + + spark.sql(s"""CREATE TABLE delta.`${tempDir.toString()}` (id Int) USING delta + | TBLPROPERTIES( + |-- Disable the async log cleanup as this test needs to manually trigger log + |-- clean up. + |'delta.enableExpiredLogCleanup' = 'false', + |'${DeltaConfigs.CHECKPOINT_POLICY.key}' = '$checkpointPolicy', + |'${DeltaConfigs.CHECKPOINT_WRITE_STATS_AS_STRUCT.key}' = 'false', + |'delta.checkpointInterval' = '100000', + |'delta.logRetentionDuration' = 'interval 6 days') + """.stripMargin) + + (1 to 10).foreach { dayNum => + clock.setTime(day(startTime, dayNum)) + log.startTransaction().commit(Seq(), testOp) + setModificationTimeOfNewFiles(log, clock, visitedFiles) + clock.setTime(day(startTime, dayNum) + 10) + log.checkpoint(log.update()) + setModificationTimeOfNewFiles(log, clock, visitedFiles) + } + clock.setTime(day(startTime, 11)) + log.update() + compareVersions(getCheckpointVersions(logPath), "checkpoint", 1 to 10) + compareVersions(getDeltaVersions(logPath), "delta", 0 to 10) + + // 11th day Run metadata cleanup. + clock.setTime(day(startTime, 11)) + cleanUpExpiredLogs(log) + compareVersions(getCheckpointVersions(logPath), "checkpoint", 5 to 10) + compareVersions(getDeltaVersions(logPath), "delta", 5 to 10) + val checkpointInstancesForV10 = + getCheckpointFiles(logPath) + .filter(f => getFileVersions(Seq(f)).head == 10) + .map(f => new Path(f.getAbsolutePath)) + .sortBy(_.getName) + .map(CheckpointInstance.apply) + + assert(checkpointInstancesForV10.size == 2) + assert( + checkpointInstancesForV10.map(_.format) === + Seq(CheckpointInstance.Format.V2, CheckpointInstance.Format.SINGLE)) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRetentionSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRetentionSuiteBase.scala new file mode 100644 index 00000000000..ea5c4b48509 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaRetentionSuiteBase.scala @@ -0,0 +1,227 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.util.{Calendar, TimeZone} + +import scala.collection.mutable + +import org.apache.spark.sql.delta.DeltaOperations.Truncate +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.{CheckpointMetadata, Metadata, SidecarFile} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.apache.spark.sql.delta.util.FileNames.{newV2CheckpointJsonFile, newV2CheckpointParquetFile} +import org.apache.commons.lang3.time.DateUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.util.IntervalUtils +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.ManualClock + +trait DeltaRetentionSuiteBase extends QueryTest + with SharedSparkSession { + protected val testOp = Truncate() + + protected override def sparkConf: SparkConf = super.sparkConf + // Disable the log cleanup because it runs asynchronously and causes test flakiness + .set("spark.databricks.delta.properties.defaults.enableExpiredLogCleanup", "false") + + protected def intervalStringToMillis(str: String): Long = { + DeltaConfigs.getMilliSeconds( + IntervalUtils.safeStringToInterval(UTF8String.fromString(str))) + } + + /** + * Returns milliseconds since epoch at 1:00am UTC of current day. + * + * Context: + * Most DeltaRetentionSuite tests rely on ManualClock to time travel and + * trigger metadata cleanup. Cleanup boundaries are determined by + * finding files that were modified before 00:00 of the day on which + * currentTime-LOG_RETENTION_PERIOD falls. This means that for a long running + * test started at 23:59, the number of expired files would jump suddenly + * in 1 minute (the expiration boundary would move by a day as soon as + * system clock hits 00:00 of the next day). By fixing the start time of the + * test to 01:00, we avoid these scenarios. + * + * This would still break if the test runs for more than 23 hours. + */ + protected def getStartTimeForRetentionTest: Long = { + val currentTime = System.currentTimeMillis() + val date = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + date.setTimeInMillis(currentTime) + val dayStartTimeStamp = DateUtils.truncate(date, Calendar.DAY_OF_MONTH) + dayStartTimeStamp.add(Calendar.HOUR_OF_DAY, 1); + dayStartTimeStamp.getTimeInMillis + } + + protected def getDeltaFiles(dir: File): Seq[File] = + dir.listFiles().filter(f => FileNames.isDeltaFile(new Path(f.getCanonicalPath))) + + protected def getCheckpointFiles(dir: File): Seq[File] = + dir.listFiles().filter(f => FileNames.isCheckpointFile(new Path(f.getCanonicalPath))) + + protected def getLogFiles(dir: File): Seq[File] + + protected def getFileVersions(files: Seq[File]): Set[Long] = { + files.map(f => f.getName()).map(s => s.substring(0, s.indexOf(".")).toLong).toSet + } + + protected def getDeltaVersions(dir: File): Set[Long] = { + getFileVersions(getDeltaFiles(dir)) + } + + protected def getSidecarFiles(log: DeltaLog): Set[String] = { + new java.io.File(log.sidecarDirPath.toUri) + .listFiles() + .filter(_.getName.endsWith(".parquet")) + .map(_.getName) + .toSet + } + + protected def getCheckpointVersions(dir: File): Set[Long] = { + getFileVersions(getCheckpointFiles(dir)) + } + + /** Compares the given versions with expected and generates a nice error message. */ + protected def compareVersions( + versions: Set[Long], + logType: String, + expected: Iterable[Int]): Unit = { + val expectedSet = expected.map(_.toLong).toSet + val deleted = expectedSet -- versions + val notDeleted = versions -- expectedSet + if (!(deleted.isEmpty && notDeleted.isEmpty)) { + fail(s"""Mismatch in log clean up for ${logType}s: + |Shouldn't be deleted but deleted: ${deleted.toArray.sorted.mkString("[", ", ", "]")} + |Should be deleted but not: ${notDeleted.toArray.sorted.mkString("[", ", ", "]")} + """.stripMargin) + } + } + + // Set modification time of the new files in _delta_log directory and mark them as visited. + def setModificationTimeOfNewFiles( + log: DeltaLog, + clock: ManualClock, + visitedFiled: mutable.Set[String]): Unit = { + val fs = log.logPath.getFileSystem(log.newDeltaHadoopConf()) + val allFiles = fs.listFiles(log.logPath, true) + while (allFiles.hasNext) { + val file = allFiles.next() + if (!visitedFiled.contains(file.getPath.toString)) { + visitedFiled += file.getPath.toString + fs.setTimes(file.getPath, clock.getTimeMillis(), 0) + } + } + } + + protected def day(startTime: Long, day: Int): Long = + startTime + intervalStringToMillis(s"interval $day days") + + // Create a sidecar file with given AddFiles inside it. + protected def createSidecarFile(log: DeltaLog, files: Seq[Int]): String = { + val sparkSession = spark + // scalastyle:off sparkimplicits + import sparkSession.implicits._ + // scalastyle:on sparkimplicits + var sidecarFileName: String = "" + withTempDir { dir => + val adds = files.map(i => createTestAddFile(i.toString)) + adds.map(_.wrap).toDF.repartition(1).write.mode("overwrite").parquet(dir.getAbsolutePath) + val srcPath = + new Path(dir.listFiles().filter(_.getName.endsWith("parquet")).head.getAbsolutePath) + val dstPath = new Path(log.sidecarDirPath, srcPath.getName) + val fs = srcPath.getFileSystem(log.newDeltaHadoopConf()) + fs.mkdirs(log.sidecarDirPath) + fs.rename(srcPath, dstPath) + sidecarFileName = fs.getFileStatus(dstPath).getPath.getName + } + sidecarFileName + } + + // Create a V2 Checkpoint at given version with given sidecar files. + protected def createV2CheckpointWithSidecarFile( + log: DeltaLog, + version: Long, + sidecarFileNames: Seq[String]): Unit = { + val hadoopConf = log.newDeltaHadoopConf() + val fs = log.logPath.getFileSystem(hadoopConf) + val sidecarFiles = sidecarFileNames.map { fileName => + val sidecarPath = new Path(log.sidecarDirPath, fileName) + val fileStatus = SerializableFileStatus.fromStatus(fs.getFileStatus(sidecarPath)) + SidecarFile(fileStatus) + } + val snapshot = log.getSnapshotAt(version) + val actionsForCheckpoint = + snapshot.nonFileActions ++ sidecarFiles :+ CheckpointMetadata(version) + val v2CheckpointFormat = + spark.conf.getOption(DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key) + v2CheckpointFormat match { + case Some(V2Checkpoint.Format.JSON.name) | None => + log.store.write( + newV2CheckpointJsonFile(log.logPath, version), + actionsForCheckpoint.map(_.json).toIterator, + overwrite = true, + hadoopConf = hadoopConf) + case Some(V2Checkpoint.Format.PARQUET.name) => + val parquetFile = newV2CheckpointParquetFile(log.logPath, version) + val sparkSession = spark + // scalastyle:off sparkimplicits + import sparkSession.implicits._ + // scalastyle:on sparkimplicits + val dfToWrite = actionsForCheckpoint.map(_.wrap).toDF + Checkpoints.createCheckpointV2ParquetFile( + spark, + dfToWrite, + parquetFile, + hadoopConf, + useRename = false) + case _ => + assert(false, "Invalid v2 checkpoint format") + } + log.writeLastCheckpointFile( + log, + LastCheckpointInfo(version, -1, None, None, None, None), + false) + } + + /** + * Start a txn that disables automatic log cleanup. Some tests may need to manually clean up logs + * to get deterministic behaviors. + */ + protected def startTxnWithManualLogCleanup(log: DeltaLog): OptimisticTransaction = { + val txn = log.startTransaction() + // This will pick up `spark.databricks.delta.properties.defaults.enableExpiredLogCleanup` to + // disable log cleanup. + txn.updateMetadata(Metadata()) + txn + } + + test("startTxnWithManualLogCleanup") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + startTxnWithManualLogCleanup(log).commit(Nil, testOp) + assert(!log.enableExpiredLogCleanup()) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSinkSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSinkSuite.scala new file mode 100644 index 00000000000..22d8b563207 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSinkSuite.scala @@ -0,0 +1,801 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.util.Locale + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.CommitInfo +import org.apache.spark.sql.delta.sources.{DeltaSink, DeltaSQLConf} +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.apache.commons.io.FileUtils +import org.scalatest.time.SpanSugar._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.apache.spark.sql.execution.DataSourceScanExec +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.execution.streaming.{MemoryStream, MicroBatchExecution, StreamingQueryWrapper} +import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSourceV1 +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.streaming._ +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + +class DeltaSinkSuite + extends StreamTest + with DeltaColumnMappingTestUtils + with DeltaSQLCommandTest { + + override val streamingTimeout = 60.seconds + import testImplicits._ + + // Before we start running the tests in this suite, we should let Spark perform all necessary set + // up that needs to be done for streaming. Without this, the first test in the suite may be flaky + // as its running time can exceed the timeout for the test due to Spark setup. See: ES-235735 + override def beforeAll(): Unit = { + super.beforeAll() + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int].toDF() + val query = inputData.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + + query.stop() + } + } + + protected def withTempDirs(f: (File, File) => Unit): Unit = { + withTempDir { file1 => + withTempDir { file2 => + f(file1, file2) + } + } + } + + test("append mode") { + failAfter(streamingTimeout) { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val df = inputData.toDF() + val query = df.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + val log = DeltaLog.forTable(spark, outputDir.getCanonicalPath) + try { + inputData.addData(1) + query.processAllAvailable() + + val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath) + checkDatasetUnorderly(outputDf.as[Int], 1) + assert(log.update().transactions.head == (query.id.toString -> 0L)) + + inputData.addData(2) + query.processAllAvailable() + + checkDatasetUnorderly(outputDf.as[Int], 1, 2) + assert(log.update().transactions.head == (query.id.toString -> 1L)) + + inputData.addData(3) + query.processAllAvailable() + + checkDatasetUnorderly(outputDf.as[Int], 1, 2, 3) + assert(log.update().transactions.head == (query.id.toString -> 2L)) + } finally { + query.stop() + } + } + } + } + + test("complete mode") { + failAfter(streamingTimeout) { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val df = inputData.toDF() + val query = + df.groupBy().count() + .writeStream + .outputMode("complete") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + val log = DeltaLog.forTable(spark, outputDir.getCanonicalPath) + try { + inputData.addData(1) + query.processAllAvailable() + + val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath) + checkDatasetUnorderly(outputDf.as[Long], 1L) + assert(log.update().transactions.head == (query.id.toString -> 0L)) + + inputData.addData(2) + query.processAllAvailable() + + checkDatasetUnorderly(outputDf.as[Long], 2L) + assert(log.update().transactions.head == (query.id.toString -> 1L)) + + inputData.addData(3) + query.processAllAvailable() + + checkDatasetUnorderly(outputDf.as[Long], 3L) + assert(log.update().transactions.head == (query.id.toString -> 2L)) + } finally { + query.stop() + } + } + } + } + + test("update mode: not supported") { + failAfter(streamingTimeout) { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val df = inputData.toDF() + val e = intercept[AnalysisException] { + df.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .outputMode("update") + .format("delta") + .start(outputDir.getCanonicalPath) + } + Seq("update", "not support").foreach { msg => + assert(e.getMessage.toLowerCase(Locale.ROOT).contains(msg)) + } + } + } + } + + test("path not specified") { + failAfter(streamingTimeout) { + withTempDir { checkpointDir => + val inputData = MemoryStream[Int] + val df = inputData.toDF() + val e = intercept[IllegalArgumentException] { + df.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start() + } + Seq("path", " not specified").foreach { msg => + assert(e.getMessage.toLowerCase(Locale.ROOT).contains(msg)) + } + } + } + } + + test("SPARK-21167: encode and decode path correctly") { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[String] + val query = inputData.toDS() + .map(s => (s, s.length)) + .toDF("value", "len") + .writeStream + .partitionBy("value") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + + try { + // The output is partitioned by "value", so the value will appear in the file path. + // This is to test if we handle spaces in the path correctly. + inputData.addData("hello world") + failAfter(streamingTimeout) { + query.processAllAvailable() + } + val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath) + checkDatasetUnorderly(outputDf.as[(String, Int)], ("hello world", "hello world".length)) + } finally { + query.stop() + } + } + } + + test("partitioned writing and batch reading") { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val ds = inputData.toDS() + val query = + ds.map(i => (i, i * 1000)) + .toDF("id", "value") + .writeStream + .partitionBy("id") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + try { + + inputData.addData(1, 2, 3) + failAfter(streamingTimeout) { + query.processAllAvailable() + } + + val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath) + val expectedSchema = new StructType() + .add(StructField("id", IntegerType)) + .add(StructField("value", IntegerType)) + assert(outputDf.schema === expectedSchema) + + // Verify the correct partitioning schema has been inferred + val hadoopFsRelations = outputDf.queryExecution.analyzed.collect { + case LogicalRelation(baseRelation, _, _, _) if + baseRelation.isInstanceOf[HadoopFsRelation] => + baseRelation.asInstanceOf[HadoopFsRelation] + } + assert(hadoopFsRelations.size === 1) + assert(hadoopFsRelations.head.partitionSchema.exists(_.name == "id")) + assert(hadoopFsRelations.head.dataSchema.exists(_.name == "value")) + + // Verify the data is correctly read + checkDatasetUnorderly( + outputDf.as[(Int, Int)], + (1, 1000), (2, 2000), (3, 3000)) + + /** Check some condition on the partitions of the FileScanRDD generated by a DF */ + def checkFileScanPartitions(df: DataFrame)(func: Seq[FilePartition] => Unit): Unit = { + val filePartitions = df.queryExecution.executedPlan.collect { + case scan: DataSourceScanExec if scan.inputRDDs().head.isInstanceOf[FileScanRDD] => + scan.inputRDDs().head.asInstanceOf[FileScanRDD].filePartitions + }.flatten + if (filePartitions.isEmpty) { + fail(s"No FileScan in query\n${df.queryExecution}") + } + func(filePartitions) + } + + // Read without pruning + checkFileScanPartitions(outputDf) { partitions => + // There should be as many distinct partition values as there are distinct ids + assert(partitions.flatMap(_.files.map(_.partitionValues)).distinct.size === 3) + } + + // Read with pruning, should read only files in partition dir id=1 + checkFileScanPartitions(outputDf.filter("id = 1")) { partitions => + // use physical name + val filesToBeRead = partitions.flatMap(_.files) + assert(filesToBeRead.forall(_.partitionValues.getInt(0) == 1)) + assert(filesToBeRead.map(_.partitionValues).distinct.size === 1) + } + + // Read with pruning, should read only files in partition dir id=1 and id=2 + checkFileScanPartitions(outputDf.filter("id in (1,2)")) { partitions => + val filesToBeRead = partitions.flatMap(_.files) + assert(filesToBeRead.forall(_.partitionValues.getInt(0) != 3)) + assert(filesToBeRead.map(_.partitionValues).distinct.size === 2) + } + } finally { + if (query != null) { + query.stop() + } + } + } + } + + test("work with aggregation + watermark") { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Long] + val inputDF = inputData.toDF.toDF("time") + val outputDf = inputDF + .selectExpr("CAST(time AS timestamp) AS timestamp") + .withWatermark("timestamp", "10 seconds") + .groupBy(window($"timestamp", "5 seconds")) + .count() + .select("window.start", "window.end", "count") + + val query = + outputDf.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + try { + def addTimestamp(timestampInSecs: Int*): Unit = { + inputData.addData(timestampInSecs.map(_ * 1L): _*) + failAfter(streamingTimeout) { + query.processAllAvailable() + } + } + + def check(expectedResult: ((Long, Long), Long)*): Unit = { + val outputDf = spark.read.format("delta").load(outputDir.getCanonicalPath) + .selectExpr( + "CAST(start as BIGINT) AS start", + "CAST(end as BIGINT) AS end", + "count") + checkDatasetUnorderly( + outputDf.as[(Long, Long, Long)], + expectedResult.map(x => (x._1._1, x._1._2, x._2)): _*) + } + + addTimestamp(100) // watermark = None before this, watermark = 100 - 10 = 90 after this + addTimestamp(104, 123) // watermark = 90 before this, watermark = 123 - 10 = 113 after this + + addTimestamp(140) // wm = 113 before this, emit results on 100-105, wm = 130 after this + check((100L, 105L) -> 2L, (120L, 125L) -> 1L) // no-data-batch emits results on 120-125 + + } finally { + if (query != null) { + query.stop() + } + } + } + } + + test("throw exception when users are trying to write in batch with different partitioning") { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val ds = inputData.toDS() + val query = + ds.map(i => (i, i * 1000)) + .toDF("id", "value") + .writeStream + .partitionBy("id") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + try { + + inputData.addData(1, 2, 3) + failAfter(streamingTimeout) { + query.processAllAvailable() + } + + val e = intercept[AnalysisException] { + spark.range(100) + .select('id.cast("integer"), 'id % 4 as "by4", 'id.cast("integer") * 1000 as "value") + .write + .format("delta") + .partitionBy("id", "by4") + .mode("append") + .save(outputDir.getCanonicalPath) + } + assert(e.getMessage.contains("Partition columns do not match")) + + } finally { + query.stop() + } + } + } + + testQuietly("incompatible schema merging throws errors - first streaming then batch") { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val ds = inputData.toDS() + val query = + ds.map(i => (i, i * 1000)) + .toDF("id", "value") + .writeStream + .partitionBy("id") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + try { + + inputData.addData(1, 2, 3) + failAfter(streamingTimeout) { + query.processAllAvailable() + } + + val e = intercept[AnalysisException] { + spark.range(100).select('id, ('id * 3).cast("string") as "value") + .write + .partitionBy("id") + .format("delta") + .mode("append") + .save(outputDir.getCanonicalPath) + } + assert(e.getMessage.contains("incompatible")) + } finally { + query.stop() + } + } + } + + test("incompatible schema merging throws errors - first batch then streaming") { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val ds = inputData.toDS() + val dsWriter = + ds.map(i => (i, i * 1000)) + .toDF("id", "value") + .writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + spark.range(100).select('id, ('id * 3).cast("string") as "value") + .write + .format("delta") + .mode("append") + .save(outputDir.getCanonicalPath) + + val wrapperException = intercept[StreamingQueryException] { + val q = dsWriter.start(outputDir.getCanonicalPath) + inputData.addData(1, 2, 3) + q.processAllAvailable() + } + assert(wrapperException.cause.isInstanceOf[AnalysisException]) + assert(wrapperException.cause.getMessage.contains("incompatible")) + } + } + + private def verifyDeltaSinkCatalog(f: DataStreamWriter[_] => StreamingQuery): Unit = { + // Create a Delta sink whose target table is defined by our caller. + val input = MemoryStream[Int] + val streamWriter = input.toDF + .writeStream + .format("delta") + .option( + "checkpointLocation", + Utils.createTempDir(namePrefix = "tahoe-test").getCanonicalPath) + val q = f(streamWriter).asInstanceOf[StreamingQueryWrapper] + + // WARNING: Only the query execution thread is allowed to initialize the logical plan (enforced + // by an assertion in MicroBatchExecution.scala). To avoid flaky failures, run the stream to + // completion, to guarantee the query execution thread ran before we try to access the plan. + try { + input.addData(1, 2, 3) + q.processAllAvailable() + } finally { + q.stop() + } + + val plan = q.streamingQuery.logicalPlan + val WriteToMicroBatchDataSourceV1(catalogTable, sink: DeltaSink, _, _, _, _, _) = plan + assert(catalogTable === sink.catalogTable) + } + + test("DeltaSink.catalogTable is correctly populated - catalog-based table") { + withTable("tab") { + verifyDeltaSinkCatalog(_.toTable("tab")) + } + } + + test("DeltaSink.catalogTable is correctly populated - path-based table") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + verifyDeltaSinkCatalog(_.start(tempDir.getCanonicalPath)) + } + } + + test("can't write out with all columns being partition columns") { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val ds = inputData.toDS() + val query = + ds.map(i => (i, i * 1000)) + .toDF("id", "value") + .writeStream + .partitionBy("id", "value") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + val e = intercept[StreamingQueryException] { + inputData.addData(1) + query.awaitTermination(30000) + } + assert(e.cause.isInstanceOf[AnalysisException]) + } + } + + test("streaming write correctly sets isBlindAppend in CommitInfo") { + withTempDirs { (outputDir, checkpointDir) => + + val input = MemoryStream[Int] + val inputDataStream = input.toDF().toDF("value") + + def tableData: DataFrame = spark.read.format("delta").load(outputDir.toString) + + def appendToTable(df: DataFrame): Unit = failAfter(streamingTimeout) { + var q: StreamingQuery = null + try { + input.addData(0) + q = df.writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .start(outputDir.toString) + q.processAllAvailable() + } finally { + if (q != null) q.stop() + } + } + + var lastCheckedVersion = -1L + def isLastCommitBlindAppend: Boolean = { + val log = DeltaLog.forTable(spark, outputDir.toString) + val lastVersion = log.update().version + assert(lastVersion > lastCheckedVersion, "no new commit was made") + lastCheckedVersion = lastVersion + val lastCommitChanges = log.getChanges(lastVersion).toSeq.head._2 + lastCommitChanges.collectFirst { case c: CommitInfo => c }.flatMap(_.isBlindAppend).get + } + + // Simple streaming write should have isBlindAppend = true + appendToTable(inputDataStream) + assert( + isLastCommitBlindAppend, + "simple write to target table should have isBlindAppend = true") + + // Join with the table should have isBlindAppend = false + appendToTable(inputDataStream.join(tableData, "value")) + assert( + !isLastCommitBlindAppend, + "joining with target table in the query should have isBlindAppend = false") + } + } + + test("do not trust user nullability, so that parquet files aren't corrupted") { + val jsonRec = """{"s": "ss", "b": {"s": "ss"}}""" + val schema = new StructType() + .add("s", StringType) + .add("b", new StructType() + .add("s", StringType) + .add("i", IntegerType, nullable = false)) + .add("c", IntegerType, nullable = false) + + withTempDir { base => + val sourceDir = new File(base, "source").getCanonicalPath + val tableDir = new File(base, "output").getCanonicalPath + val chkDir = new File(base, "checkpoint").getCanonicalPath + + FileUtils.write(new File(sourceDir, "a.json"), jsonRec) + + val q = spark.readStream + .format("json") + .schema(schema) + .load(sourceDir) + .withColumn("file", input_file_name()) // Not sure why needs this to reproduce + .writeStream + .format("delta") + .trigger(org.apache.spark.sql.streaming.Trigger.Once) + .option("checkpointLocation", chkDir) + .start(tableDir) + + q.awaitTermination() + + checkAnswer( + spark.read.format("delta").load(tableDir).drop("file"), + Seq(Row("ss", Row("ss", null), null))) + } + } + + test("history includes user-defined metadata for DataFrame.writeStream API") { + failAfter(streamingTimeout) { + withTempDirs { (outputDir, checkpointDir) => + val inputData = MemoryStream[Int] + val df = inputData.toDF() + val query = df.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .option("userMetadata", "testMeta!") + .format("delta") + .start(outputDir.getCanonicalPath) + val log = DeltaLog.forTable(spark, outputDir.getCanonicalPath) + + inputData.addData(1) + query.processAllAvailable() + + val lastCommitInfo = io.delta.tables.DeltaTable.forPath(spark, outputDir.getCanonicalPath) + .history(1).as[DeltaHistory].head + + assert(lastCommitInfo.userMetadata === Some("testMeta!")) + query.stop() + } + } + } + +} + +abstract class DeltaSinkColumnMappingSuiteBase extends DeltaSinkSuite + with DeltaColumnMappingSelectedTestMixin { + import testImplicits._ + + override protected def runOnlyTests = Seq( + "append mode", + "complete mode", + "partitioned writing and batch reading", + "work with aggregation + watermark" + ) + + + test("allow schema evolution after renaming column") { + Seq(true, false).foreach { schemaMergeEnabled => + withClue(s"Schema merge enabled: $schemaMergeEnabled") { + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> schemaMergeEnabled.toString) { + failAfter(streamingTimeout) { + withTempDirs { (outputDir, checkpointDir) => + val sourceDir = Utils.createTempDir() + def addData(df: DataFrame): Unit = + df.coalesce(1).write.mode("append").save(sourceDir.getCanonicalPath) + + // save data to target dir + Seq(100).toDF("value").write.format("delta").save(outputDir.getCanonicalPath) + // use parquet stream as MemoryStream doesn't support recovering failed batches + val df = spark.readStream + .schema(new StructType().add("value", IntegerType, true)) + .parquet(sourceDir.getCanonicalPath) + // start writing into Delta sink + def queryGen(df: DataFrame): StreamingQuery = df.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + + val query = queryGen(df) + val log = DeltaLog.forTable(spark, outputDir.getCanonicalPath) + + // delta sink contains [100, 1] + addData(Seq(1).toDF("value")) + query.processAllAvailable() + + def outputDf: DataFrame = + spark.read.format("delta").load(outputDir.getCanonicalPath) + checkDatasetUnorderly(outputDf.as[Int], 100, 1) + require(log.update().transactions.head == (query.id.toString -> 0L)) + + sql(s"ALTER TABLE delta.`${outputDir.getAbsolutePath}` " + + s"RENAME COLUMN value TO new_value") + + if (!schemaMergeEnabled) { + // schema has changed, we can't automatically migrate the schema + val e = intercept[StreamingQueryException] { + addData(Seq(2).toDF("value")) + query.processAllAvailable() + } + assert(e.cause.isInstanceOf[AnalysisException]) + assert(e.cause.getMessage.contains("A schema mismatch detected when writing")) + + // restart using the same query would still fail + val query2 = queryGen(df) + val e2 = intercept[StreamingQueryException] { + addData(Seq(2).toDF("value")) + query2.processAllAvailable() + } + assert(e2.cause.isInstanceOf[AnalysisException]) + assert(e2.cause.getMessage.contains("A schema mismatch detected when writing")) + + // but reingest using new schema should work + val df2 = spark.readStream + .schema(new StructType().add("value", IntegerType, true)) + .parquet(sourceDir.getCanonicalPath) + .withColumnRenamed("value", "new_value") + val query3 = queryGen(df2) + // delta sink contains [100, 1, 2] + [2, 2] due to recovering the failed batched + addData(Seq(2).toDF("value")) + query3.processAllAvailable() + checkAnswer(outputDf, + Row(100) :: Row(1) :: Row(2) :: Row(2) :: Row(2) :: Nil) + assert(outputDf.schema == new StructType().add("new_value", IntegerType, true)) + query3.stop() + } else { + // we allow auto schema migration, delta sink contains [100, 1, 2] + addData(Seq(2).toDF("value")) + query.processAllAvailable() + // Since the incoming `value` column is now merged as a new column (even though it + // has the same value as the original name) in which only the 3rd record has data. + checkAnswer(outputDf, Row(100, null) :: Row(1, null) :: Row(null, 2) :: Nil) + assert(outputDf.schema == + new StructType().add("new_value", IntegerType, true) + .add("value", IntegerType, true)) + query.stop() + } + } + } + } + } + } + } + + test("allow schema evolution after dropping column") { + Seq(true, false).foreach { schemaMergeEnabled => + withClue(s"Schema merge enabled: $schemaMergeEnabled") { + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> schemaMergeEnabled.toString) { + failAfter(streamingTimeout) { + withTempDirs { (outputDir, checkpointDir) => + val sourceDir = Utils.createTempDir() + def addData(df: DataFrame): Unit = + df.coalesce(1).write.mode("append").save(sourceDir.getCanonicalPath) + + // save data to target dir + Seq((1, 100)).toDF("id", "value").write.format("delta") + .save(outputDir.getCanonicalPath) + + // use parquet stream as MemoryStream doesn't support recovering failed batches + val df = spark.readStream + .schema(new StructType().add("id", IntegerType, true) + .add("value", IntegerType, true)) + .parquet(sourceDir.getCanonicalPath) + + // start writing into Delta sink + def queryGen(df: DataFrame): StreamingQuery = df.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + + val query = queryGen(df) + val log = DeltaLog.forTable(spark, outputDir.getCanonicalPath) + // delta sink contains [(1, 100), (2, 200)] + addData(Seq((2, 200)).toDF("id", "value")) + query.processAllAvailable() + + def outputDf: DataFrame = + spark.read.format("delta").load(outputDir.getCanonicalPath) + + checkDatasetUnorderly(outputDf.as[(Int, Int)], (1, 100), (2, 200)) + assert(log.update().transactions.head == (query.id.toString -> 0L)) + + withSQLConf(DeltaSQLConf.DELTA_ALTER_TABLE_DROP_COLUMN_ENABLED.key -> "true") { + sql(s"ALTER TABLE delta.`${outputDir.getAbsolutePath}` DROP COLUMN value") + } + + if (!schemaMergeEnabled) { + // schema changed, we can't automatically migrate the schema + val e = intercept[StreamingQueryException] { + addData(Seq((3, 300)).toDF("id", "value")) + query.processAllAvailable() + } + assert(e.cause.isInstanceOf[AnalysisException]) + assert(e.cause.getMessage.contains("A schema mismatch detected when writing")) + + // restart using the same query would still fail + val query2 = queryGen(df) + val e2 = intercept[StreamingQueryException] { + addData(Seq((3, 300)).toDF("id", "value")) + query2.processAllAvailable() + } + assert(e2.cause.isInstanceOf[AnalysisException]) + assert(e2.cause.getMessage.contains("A schema mismatch detected when writing")) + + // but reingest using new schema should work + val df2 = spark.readStream + .schema(new StructType().add("id", IntegerType, true)) + .parquet(sourceDir.getCanonicalPath) + val query3 = queryGen(df2) + // delta sink contains [1, 2, 3] + [3, 3] due to + // recovering failed batches + addData(Seq((3, 300)).toDF("id", "value")) + query3.processAllAvailable() + checkAnswer(outputDf, + Row(1) :: Row(2) :: Row(3) :: Row(3) :: Row(3) :: Nil) + assert(outputDf.schema == new StructType().add("id", IntegerType, true)) + query3.stop() + } else { + addData(Seq((3, 300)).toDF("id", "value")) + query.processAllAvailable() + // None/null value appears because even though the added column has the same + // logical name (`value`) as the dropped column, the physical name has been + // changed so the old data could not be loaded. + checkAnswer(outputDf, Row(1, null) :: Row(2, null) :: Row(3, 300) :: Nil) + assert(outputDf.schema == + new StructType().add("id", IntegerType, true).add("value", IntegerType, true)) + query.stop() + } + } + } + } + } + } + } + +} + +class DeltaSinkIdColumnMappingSuite extends DeltaSinkColumnMappingSuiteBase + with DeltaColumnMappingEnableIdMode + with DeltaColumnMappingTestUtils + +class DeltaSinkNameColumnMappingSuite extends DeltaSinkColumnMappingSuiteBase + with DeltaColumnMappingEnableNameMode diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceColumnMappingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceColumnMappingSuite.scala new file mode 100644 index 00000000000..531af7ace73 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceColumnMappingSuite.scala @@ -0,0 +1,747 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.util.UUID + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.sources.{DeltaSource, DeltaSQLConf} +import org.apache.spark.sql.delta.test.DeltaColumnMappingSelectedTestMixin +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.commons.io.FileUtils +import org.apache.commons.lang3.exception.ExceptionUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingExecutionRelation} +import org.apache.spark.sql.streaming.{DataStreamReader, StreamTest} +import org.apache.spark.sql.types.{StringType, StructType} +import org.apache.spark.util.Utils + +trait ColumnMappingStreamingTestUtils extends StreamTest with DeltaColumnMappingTestUtils { + + // Whether we are requesting CDC streaming changes + protected def isCdcTest: Boolean + + protected val ProcessAllAvailableIgnoreError = Execute { q => + try { + q.processAllAvailable() + } catch { + case _: Throwable => + // swallow the errors so we could check answer and failure on the query later + } + } + + protected def isColumnMappingSchemaIncompatibleFailure( + t: Throwable, + detectedDuringStreaming: Boolean): Boolean = t match { + case e: DeltaStreamingColumnMappingSchemaIncompatibleException => + e.additionalProperties.get("detectedDuringStreaming") + .exists(_.toBoolean == detectedDuringStreaming) + case _ => false + } + + protected val ExpectStreamStartInCompatibleSchemaFailure = + ExpectFailure[DeltaStreamingColumnMappingSchemaIncompatibleException] { t => + assert(isColumnMappingSchemaIncompatibleFailure(t, detectedDuringStreaming = false)) + } + + protected val ExpectInStreamSchemaChangeFailure = + ExpectFailure[DeltaStreamingColumnMappingSchemaIncompatibleException] { t => + assert(isColumnMappingSchemaIncompatibleFailure(t, detectedDuringStreaming = true)) + } + + protected val ExpectGenericSchemaIncompatibleFailure = + ExpectFailure[DeltaStreamingColumnMappingSchemaIncompatibleException]() + + // Failure thrown by the current DeltaSource schema change incompatible check + protected val ExistingRetryableInStreamSchemaChangeFailure = Execute { q => + // Similar to ExpectFailure but allows more fine-grained checking of exceptions + failAfter(streamingTimeout) { + try { + q.awaitTermination() + } catch { + case _: Throwable => + // swallow the exception + } + val cause = ExceptionUtils.getRootCause(q.exception.get) + assert(cause.getMessage.contains("Detected schema change")) + } + } + + protected def getLatestCommittedDeltaVersion(q: StreamExecution): Long = + JsonUtils.fromJson[Map[String, Any]]( + q.committedOffsets.values.head.json() + ).apply("reservoirVersion").asInstanceOf[Number].longValue() + + // Drop CDC fields because they are not useful for testing the blocking behavior + protected def dropCDCFields(df: DataFrame): DataFrame = + df.drop(CDCReader.CDC_COMMIT_TIMESTAMP) + .drop(CDCReader.CDC_TYPE_COLUMN_NAME) + .drop(CDCReader.CDC_COMMIT_VERSION) +} + +trait ColumnMappingStreamingBlockedWorkflowSuiteBase extends ColumnMappingStreamingTestUtils { + + import testImplicits._ + + // DataStreamReader to use + // Set a small max file per trigger to ensure we could catch failures ASAP + private def dsr: DataStreamReader = if (isCdcTest) { + spark.readStream.format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") + .option(DeltaOptions.CDC_READ_OPTION, "true") + } else { + spark.readStream.format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") + } + + private def checkStreamStartBlocked( + df: DataFrame, + ckpt: File, + expectedFailure: StreamAction): Unit = { + // Restart the stream from the same checkpoint will pick up the dropped schema and our + // column mapping check will kick in and error out. + testStream(df)( + StartStream(checkpointLocation = ckpt.getCanonicalPath), + ProcessAllAvailableIgnoreError, + // No batches have been served + CheckLastBatch(Nil: _*), + expectedFailure + ) + } + + protected def writeDeltaData( + data: Seq[Int], + deltaLog: DeltaLog, + userSpecifiedSchema: Option[StructType] = None): Unit = { + val schema = userSpecifiedSchema.getOrElse(deltaLog.update().schema) + data.foreach { i => + val data = Seq(Row(schema.map(_ => i.toString): _*)) + spark.createDataFrame(data.asJava, schema) + .write.format("delta").mode("append").save(deltaLog.dataPath.toString) + } + } + + test("deltaLog snapshot should not be updated outside of the stream") { + withTempDir { dir => + val tablePath = dir.getCanonicalPath + // write initial data + Seq(1).toDF("id").write.format("delta").mode("overwrite").save(tablePath) + // record initial snapshot version and warm DeltaLog cache + val initialDeltaLog = DeltaLog.forTable(spark, tablePath) + // start streaming + val df = spark.readStream.format("delta").load(tablePath) + testStream(df)( + StartStream(), + ProcessAllAvailable(), + AssertOnQuery { q => + // write more data + Seq(2).toDF("id").write.format("delta").mode("append").save(tablePath) + // update deltaLog externally + initialDeltaLog.update() + assert(initialDeltaLog.snapshot.version == 1) + // query start snapshot should not change + val source = q.logicalPlan.collectFirst { + case r: StreamingExecutionRelation => + r.source.asInstanceOf[DeltaSource] + }.get + // same delta log but stream start version not affected + source.snapshotAtSourceInit.deltaLog == initialDeltaLog && + source.snapshotAtSourceInit.version == 0 + } + ) + } + } + + test("column mapping + streaming - allowed workflows - column addition") { + // column addition schema evolution should not be blocked upon restart + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + writeDeltaData(0 until 5, deltaLog, Some(StructType.fromDDL("id string, value string"))) + + val checkpointDir = new File(inputDir, "_checkpoint") + + def loadDf(): DataFrame = dropCDCFields(dsr.load(inputDir.getCanonicalPath)) + + testStream(loadDf())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + CheckAnswer((0 until 5).map(i => (i.toString, i.toString)): _*), + Execute { _ => + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` ADD COLUMN (value2 string)") + }, + Execute { _ => + writeDeltaData(5 until 10, deltaLog) + }, + ExistingRetryableInStreamSchemaChangeFailure + ) + + testStream(loadDf())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + // Sink is reinitialized, only 5-10 are ingested + CheckAnswer( + (5 until 10).map(i => (i.toString, i.toString, i.toString)): _*) + ) + } + + } + + test("column mapping + streaming - allowed workflows - upgrade to name mode") { + // upgrade should not blocked both during the stream AND during stream restart + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withColumnMappingConf("none") { + writeDeltaData(0 until 5, deltaLog, Some(StructType.fromDDL("id string, name string"))) + } + + def createNewDf(): DataFrame = dropCDCFields(dsr.load(inputDir.getCanonicalPath)) + + val checkpointDir = new File(inputDir, "_checkpoint") + + testStream(createNewDf())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + CheckAnswer((0 until 5).map(i => (i.toString, i.toString)): _*), + Execute { _ => + sql( + s""" + |ALTER TABLE delta.`${inputDir.getCanonicalPath}` + |SET TBLPROPERTIES ( + | ${DeltaConfigs.COLUMN_MAPPING_MODE.key} = "name", + | ${DeltaConfigs.MIN_READER_VERSION.key} = "2", + | ${DeltaConfigs.MIN_WRITER_VERSION.key} = "5")""".stripMargin) + }, + Execute { _ => + writeDeltaData(5 until 10, deltaLog) + }, + ProcessAllAvailable(), + CheckAnswer((0 until 10).map(i => (i.toString, i.toString)): _*), + // add column schema evolution should fail the stream + Execute { _ => + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` ADD COLUMN (value2 string)") + }, + Execute { _ => + writeDeltaData(10 until 15, deltaLog) + }, + ExistingRetryableInStreamSchemaChangeFailure + ) + + // but should not block after restarting, now in column mapping mode + testStream(createNewDf())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + // Sink is reinitialized, only 10-15 are ingested + CheckAnswer( + (10 until 15).map(i => (i.toString, i.toString, i.toString)): _*) + ) + + // use a different checkpoint to simulate a clean stream restart + val checkpointDir2 = new File(inputDir, "_checkpoint2") + + testStream(createNewDf())( + StartStream(checkpointLocation = checkpointDir2.getCanonicalPath), + ProcessAllAvailable(), + // Since the latest schema contain the additional column, it is null for previous batches. + // This is fine as it is consistent with the current semantics. + CheckAnswer((0 until 10).map(i => (i.toString, i.toString, null)) ++ + (10 until 15).map(i => (i.toString, i.toString, i.toString)): _*), + StopStream + ) + + // Refresh delta log so we could catch the latest schema with column mapping mode + deltaLog.update() + // test read prior to upgrade batches with latest metadata should also work + val checkpointDir3 = new File(inputDir, "_checkpoint3") + testStream(dropCDCFields(dsr.option("startingVersion", 0).load(inputDir.getCanonicalPath)))( + StartStream(checkpointLocation = checkpointDir3.getCanonicalPath), + ProcessAllAvailable(), + // Since the latest schema contain the additional column, it is null for previous batches. + // This is fine as it is consistent with the current semantics. + CheckAnswer((0 until 10).map(i => (i.toString, i.toString, null)) ++ + (10 until 15).map(i => (i.toString, i.toString, i.toString)): _*), + StopStream + ) + + } + } + + /** + * Setup the test table for testing blocked workflow, this will create a id or name mode table + * based on which tests it is run. + */ + protected def setupTestTable(deltaLog: DeltaLog): Unit = { + require(columnMappingModeString != NoMapping.name) + val tablePath = deltaLog.dataPath.toString + + // For name mapping, we use upgrade to stir things up a little + if (columnMappingModeString == NameMapping.name) { + // initialize with no column mapping + withColumnMappingConf("none") { + writeDeltaData(0 until 5, deltaLog, Some(StructType.fromDDL("id string, value string"))) + } + + // upgrade to name mode + val protocol = deltaLog.snapshot.protocol + val (r, w) = if (protocol.supportsReaderFeatures || protocol.supportsWriterFeatures) { + (TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION, + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) + } else { + (spark.conf + .get(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION) + .max(ColumnMappingTableFeature.minReaderVersion), + spark.conf + .get(DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION) + .max(ColumnMappingTableFeature.minWriterVersion)) + } + + sql( + s""" + |ALTER TABLE delta.`${tablePath}` + |SET TBLPROPERTIES ( + | ${DeltaConfigs.COLUMN_MAPPING_MODE.key} = "name", + | ${DeltaConfigs.MIN_READER_VERSION.key} = "$r", + | ${DeltaConfigs.MIN_WRITER_VERSION.key} = "$w")""".stripMargin) + + // write more data post upgrade + writeDeltaData(5 until 10, deltaLog) + } + // For id mapping, we could only create the table from scratch + else if (columnMappingModeString == IdMapping.name) { + withColumnMappingConf("id") { + writeDeltaData(0 until 10, deltaLog, Some(StructType.fromDDL("id string, value string"))) + } + } + } + + test("column mapping + streaming: blocking workflow - drop column") { + val schemaAlterQuery = "DROP COLUMN value" + val schemaRestoreQuery = "ADD COLUMN (value string)" + + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + setupTestTable(deltaLog) + + // change schema + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` $schemaAlterQuery") + + // write more data post change schema + writeDeltaData(10 until 15, deltaLog) + + // Test the two code paths below + // Case 1 - Restart did not specify a start version, this will successfully serve the initial + // entire existing data based on the initial snapshot's schema, which is basically + // the stream schema, all schema changes in between are ignored. + // But once the initial snapshot is served, all subsequent batches will fail if + // encountering a schema change during streaming, and all restart effort should fail. + val checkpointDir = new File(inputDir, "_checkpoint") + val df = dropCDCFields(dsr.load(inputDir.getCanonicalPath)) + + testStream(df)( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + // Initial data (pre + post upgrade + post change schema) all served + CheckAnswer((0 until 15).map(i => i.toString): _*), + Execute { _ => + // write more data in new schema during streaming + writeDeltaData(15 until 20, deltaLog) + }, + ProcessAllAvailable(), + // can still work because the schema is still compatible + CheckAnswer((0 until 20).map(i => i.toString): _*), + // But a new schema change would cause stream to fail + // Note here we are restoring back the original schema, see next case for how we test + // some extra special cases when schemas are reverted. + Execute { _ => + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` $schemaRestoreQuery") + }, + // write more data in updated schema again + Execute { _ => + writeDeltaData(20 until 25, deltaLog) + }, + // The last batch should not be processed and stream should fail + ProcessAllAvailableIgnoreError, + // sink data did not change + CheckAnswer((0 until 20).map(i => i.toString): _*), + // The schemaRestoreQuery for DROP column is ADD column so it fails a more benign error + ExistingRetryableInStreamSchemaChangeFailure + ) + + val df2 = dropCDCFields(dsr.load(inputDir.getCanonicalPath)) + // Since the initial snapshot ignores all schema changes, the most recent schema change + // is just ADD COLUMN, which can be retried. + testStream(df2)( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + // but an additional drop should fail the stream as we are capturing data changes now + Execute { _ => + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` $schemaAlterQuery") + }, + ProcessAllAvailableIgnoreError, + ExpectInStreamSchemaChangeFailure + ) + // The latest DROP columns blocks the stream. + if (isCdcTest) { + checkStreamStartBlocked(df2, checkpointDir, ExpectGenericSchemaIncompatibleFailure) + } else { + checkStreamStartBlocked(df2, checkpointDir, ExpectStreamStartInCompatibleSchemaFailure) + } + + // Case 2 - Specifically we use startingVersion=0 to simulate serving the entire table's data + // in a streaming fashion, ignoring the initialSnapshot. + // Here we test the special case when the latest schema is "restored". + val checkpointDir2 = new File(inputDir, "_checkpoint2") + val dfStartAtZero = dropCDCFields(dsr + .option(DeltaOptions.STARTING_VERSION_OPTION, "0") + .load(inputDir.getCanonicalPath)) + + if (isCdcTest) { + checkStreamStartBlocked( + dfStartAtZero, checkpointDir2, ExpectGenericSchemaIncompatibleFailure) + } else { + // In the case when we drop and add a column back + // the restart should still fail directly because all the historical batches with the same + // old logical name now will have a different physical name we would have data loss + + // lets add back the column we just dropped before + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` $schemaRestoreQuery") + assert(DeltaLog.forTable(spark, inputDir.getCanonicalPath).snapshot.schema.size == 2) + + // restart should block right away + checkStreamStartBlocked( + dfStartAtZero, checkpointDir, ExpectStreamStartInCompatibleSchemaFailure) + } + } + } + + test("column mapping + streaming: blocking workflow - rename column") { + val schemaAlterQuery = "RENAME COLUMN value TO value2" + val schemaRestoreQuery = "RENAME COLUMN value2 TO value" + + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + setupTestTable(deltaLog) + + // change schema + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` $schemaAlterQuery") + + // write more data post change schema + writeDeltaData(10 until 15, deltaLog) + + // Test the two code paths below + // Case 1 - Restart did not specify a start version, this will successfully serve the initial + // entire existing data based on the initial snapshot's schema, which is basically + // the stream schema, all schema changes in between are ignored. + // But once the initial snapshot is served, all subsequent batches will fail if + // encountering a schema change during streaming, and all restart effort should fail. + val checkpointDir = new File(inputDir, "_checkpoint") + def df: DataFrame = dropCDCFields(dsr.load(inputDir.getCanonicalPath)) + + testStream(df)( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + // Initial data (pre + post upgrade + post change schema) all served + CheckAnswer((0 until 15).map(i => (i.toString, i.toString)): _*), + Execute { _ => + // write more data in new schema during streaming + writeDeltaData(15 until 20, deltaLog) + }, + ProcessAllAvailable(), + // can still work because the schema is still compatible + CheckAnswer((0 until 20).map(i => (i.toString, i.toString)): _*), + // stop stream to allow schema change + data update to start in a batch + StopStream, + // But a new schema change would cause stream to fail + // Note here we are restoring back the original schema, see next case for how we test + // some extra special cases when schemas are reverted. + Execute { _ => + writeDeltaData(20 until 25, deltaLog) + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` $schemaRestoreQuery") + } + ) + + val df2 = dropCDCFields(dsr.load(inputDir.getCanonicalPath)) + testStream(df2)( + // Restart stream + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + // the last batch should not be processed because the batch cross an incompatible + // schema change. + ProcessAllAvailableIgnoreError, + // no data processed + CheckAnswer(Nil: _*), + // detected schema change while trying to generate the next offset + ExpectStreamStartInCompatibleSchemaFailure + ) + + // Case 2 - Specifically we use startingVersion=0 to simulate serving the entire table's data + // in a streaming fashion, ignoring the initialSnapshot. + // Here we test the special case when the latest schema is "restored". + if (isCdcTest) { + val checkpointDir2 = new File(inputDir, "_checkpoint2") + val dfStartAtZero = dropCDCFields(dsr + .option(DeltaOptions.STARTING_VERSION_OPTION, "0") + .load(inputDir.getCanonicalPath)) + checkStreamStartBlocked( + dfStartAtZero, checkpointDir2, ExpectGenericSchemaIncompatibleFailure) + } else { + // In the trickier case when we rename a column and rename back, we could not + // immediately detect the schema incompatibility at stream start, so we will move on. + // This is fine because the batches served will be compatible until the in-stream check + // finds another schema change action and fail. + val checkpointDir2 = new File(inputDir, s"_checkpoint_${UUID.randomUUID.toString}") + val dfStartAtZero = dropCDCFields(dsr + .option(DeltaOptions.STARTING_VERSION_OPTION, "0") + .load(inputDir.getCanonicalPath)) + testStream(dfStartAtZero)( + // The stream could not move past version 10, because batches after which + // will be incompatible with the latest schema. + StartStream(checkpointLocation = checkpointDir2.getCanonicalPath), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + val latestCommittedVersion = getLatestCommittedDeltaVersion(q) + latestCommittedVersion <= 10 + }, + ExpectInStreamSchemaChangeFailure + ) + // restart won't move forward either + val df2 = dropCDCFields(dsr.load(inputDir.getCanonicalPath)) + checkStreamStartBlocked(df2, checkpointDir2, ExpectInStreamSchemaChangeFailure) + } + } + } + + test("column mapping + streaming: blocking workflow - " + + "should not generate latestOffset past schema change") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + writeDeltaData(0 until 5, deltaLog, + userSpecifiedSchema = Some( + new StructType() + .add("id", StringType, true) + .add("value", StringType, true))) + // rename column + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` RENAME COLUMN value TO value2") + val renameVersion = deltaLog.update().version + // write more data + writeDeltaData(5 until 10, deltaLog) + + // Case 1 - Stream start failure should not progress new latestOffset + // Since we had a rename, the data files prior to that should not be served with the renamed + // schema , but the original schema . latestOffset() should not create + // a new offset moves past the schema change. + val df1 = dropCDCFields( + dsr.option("startingVersion", "1") // start from 1 to ignore the initial schema change + .load(inputDir.getCanonicalPath)) + testStream(df1)( + StartStream(), // fresh checkpoint + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + // This should come from the latestOffset checker + q.availableOffsets.isEmpty && q.latestOffsets.isEmpty && + q.exception.get.cause.getStackTrace.exists(_.toString.contains("latestOffset")) + }, + ExpectStreamStartInCompatibleSchemaFailure + ) + + // try drop column now + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` DROP COLUMN value2") + val dropVersion = deltaLog.update().version + // write more data + writeDeltaData(10 until 15, deltaLog) + + val df2 = dropCDCFields( + dsr.option("startingVersion", renameVersion + 1) // so we could detect drop column + .load(inputDir.getCanonicalPath)) + testStream(df2)( + StartStream(), // fresh checkpoint + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + // This should come from the latestOffset stream start checker + q.availableOffsets.isEmpty && q.latestOffsets.isEmpty && + q.exception.get.cause.getStackTrace.exists(_.toString.contains("latestOffset")) + }, + ExpectStreamStartInCompatibleSchemaFailure + ) + + // Case 2 - in stream failure should not progress latest offset too + // This is the handle prior to SC-111607, which should cover the major cases. + def loadDf(): DataFrame = dropCDCFields( + dsr.option("startingVersion", dropVersion + 1) // so we could move on to in stream failure + .load(inputDir.getCanonicalPath)) + + val ckpt = Utils.createTempDir().getCanonicalPath + var latestAvailableOffsets: Seq[String] = null + testStream(loadDf())( + StartStream(checkpointLocation = ckpt), // fresh checkpoint + ProcessAllAvailable(), + CheckAnswer((10 until 15).map(i => (i.toString)): _*), + Execute { q => + latestAvailableOffsets = q.availableOffsets.values.map(_.json()).toSeq + }, + // add more data and rename column + Execute { _ => + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` RENAME COLUMN id TO id2") + writeDeltaData(15 until 16, deltaLog) + }, + ProcessAllAvailableIgnoreError, + CheckAnswer((10 until 15).map(i => (i.toString)): _*), // no data processed + AssertOnQuery { q => + // Available offsets should not change + // This should come from the latestOffset in-stream checker + q.availableOffsets.values.map(_.json()) == latestAvailableOffsets && + q.latestOffsets.isEmpty && + q.exception.get.cause.getStackTrace.exists(_.toString.contains("latestOffset")) + }, + ExpectInStreamSchemaChangeFailure + ) + + // Case 3 - resuming from existing checkpoint, note that getBatch's stream start check + // should be called instead of latestOffset for recovery. + // This is also the handle prior to SC-111607, which should cover the major cases. + testStream(loadDf())( + StartStream(checkpointLocation = ckpt), // existing checkpoint + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + AssertOnQuery { q => + // This should come from the latestOffset in-stream checker + q.availableOffsets.values.map(_.json()) == latestAvailableOffsets && + q.latestOffsets.isEmpty && + q.exception.get.cause.getStackTrace.exists(_.toString.contains("getBatch")) + }, + ExpectStreamStartInCompatibleSchemaFailure + ) + } + } + + test("unsafe flag can unblock drop or rename column") { + // upgrade should not blocked both during the stream AND during stream restart + withTempDir { inputDir => + Seq( + s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` DROP COLUMN value", + s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` RENAME COLUMN value TO value2" + ).foreach { schemaChangeQuery => + FileUtils.deleteDirectory(inputDir) + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withColumnMappingConf("none") { + writeDeltaData(0 until 5, deltaLog, + Some(StructType.fromDDL("id string, value string"))) + } + + def createNewDf(): DataFrame = dropCDCFields(dsr.load(inputDir.getCanonicalPath)) + + val checkpointDir = new File(inputDir, s"_checkpoint_${schemaChangeQuery.hashCode}") + val isRename = schemaChangeQuery.contains("RENAME") + testStream(createNewDf())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + CheckAnswer((0 until 5).map(i => (i.toString, i.toString)): _*), + Execute { _ => + sql( + s""" + |ALTER TABLE delta.`${inputDir.getCanonicalPath}` + |SET TBLPROPERTIES ( + | ${DeltaConfigs.COLUMN_MAPPING_MODE.key} = "name", + | ${DeltaConfigs.MIN_READER_VERSION.key} = "2", + | ${DeltaConfigs.MIN_WRITER_VERSION.key} = "5")""".stripMargin) + // Add another schema change to ensure even after enable the flag, we would still hit + // a schema change with more columns than read schema so `verifySchemaChange` would see + // that can complain. + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` ADD COLUMN (random STRING)") + sql(schemaChangeQuery) + writeDeltaData(5 until 10, deltaLog) + }, + ProcessAllAvailableIgnoreError, + ExistingRetryableInStreamSchemaChangeFailure + ) + + // Without the flag it would still fail + testStream(createNewDf())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + ExpectStreamStartInCompatibleSchemaFailure + ) + + val checkExpectedResult = if (isRename) { + CheckAnswer((5 until 10).map(i => (i.toString, i.toString, i.toString)): _*) + } else { + CheckAnswer((5 until 10).map(i => (i.toString, i.toString)): _*) + } + + withSQLConf(DeltaSQLConf + .DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_COLUMN_MAPPING_SCHEMA_CHANGES + .key -> "true") { + testStream(createNewDf())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + // The processing will pass, ignoring any schema column missing in the backfill. + ProcessAllAvailable(), + // Show up as dropped column + checkExpectedResult, + Execute { _ => + // But any schema change post the stream analysis would still cause exceptions + // as usual, which is critical to avoid data loss. + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` ADD COLUMN (random2 STRING)") + }, + ProcessAllAvailableIgnoreError, + ExistingRetryableInStreamSchemaChangeFailure + ) + } + } + } + } +} + +trait DeltaSourceColumnMappingSuiteBase extends DeltaColumnMappingSelectedTestMixin { + override protected def runOnlyTests = Seq( + "basic", + "maxBytesPerTrigger: metadata checkpoint", + "maxFilesPerTrigger: metadata checkpoint", + "allow to change schema before starting a streaming query", + + // streaming blocking semantics test + "deltaLog snapshot should not be updated outside of the stream", + "column mapping + streaming - allowed workflows - column addition", + "column mapping + streaming - allowed workflows - upgrade to name mode", + "column mapping + streaming: blocking workflow - drop column", + "column mapping + streaming: blocking workflow - rename column", + "column mapping + streaming: blocking workflow - " + + "should not generate latestOffset past schema change" + ) +} + +class DeltaSourceIdColumnMappingSuite extends DeltaSourceSuite + with ColumnMappingStreamingBlockedWorkflowSuiteBase + with DeltaColumnMappingEnableIdMode + with DeltaSourceColumnMappingSuiteBase { + + override protected def isCdcTest: Boolean = false + +} + +class DeltaSourceNameColumnMappingSuite extends DeltaSourceSuite + with ColumnMappingStreamingBlockedWorkflowSuiteBase + with DeltaColumnMappingEnableNameMode + with DeltaSourceColumnMappingSuiteBase { + + override protected def isCdcTest: Boolean = false + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceDeletionVectorsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceDeletionVectorsSuite.scala new file mode 100644 index 00000000000..196476d9e23 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceDeletionVectorsSuite.scala @@ -0,0 +1,418 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.hadoop.fs.Path +import org.scalatest.concurrent.Eventually +import org.scalatest.concurrent.PatienceConfiguration.Timeout + +import org.apache.spark.sql.execution.streaming.StreamExecution +import org.apache.spark.sql.streaming.{StreamTest, Trigger} +import org.apache.spark.sql.streaming.util.StreamManualClock + +trait DeltaSourceDeletionVectorTests extends StreamTest + with DeletionVectorsTestUtils { + + import testImplicits._ + + test("allow to delete files before starting a streaming query") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + sql(s"DELETE FROM delta.`$inputDir`") + (5 until 10).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + deltaLog.checkpoint() + assert(deltaLog.readLastCheckpointFile().nonEmpty, "this test requires a checkpoint") + + val df = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + + testStream(df)( + AssertOnQuery { q => + q.processAllAvailable() + true + }, + CheckAnswer((5 until 10).map(_.toString): _*)) + } + } + + test("allow to delete files before staring a streaming query without checkpoint") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + sql(s"DELETE FROM delta.`$inputDir`") + (5 until 7).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + assert(deltaLog.readLastCheckpointFile().isEmpty, "this test requires no checkpoint") + + val df = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + + testStream(df)( + AssertOnQuery { q => + q.processAllAvailable() + true + }, + CheckAnswer((5 until 7).map(_.toString): _*)) + } + } + + /** + * If deletion vectors are expected here, return true if they are present. If none are expected, + * return true if none are present. + */ + protected def deletionVectorsPresentIfExpected( + inputDir: String, + expectDVs: Boolean): Boolean = { + val deltaLog = DeltaLog.forTable(spark, inputDir) + val filesWithDVs = getFilesWithDeletionVectors(deltaLog) + logWarning(s"Expecting DVs=$expectDVs - found ${filesWithDVs.size}") + if (expectDVs) { + filesWithDVs.nonEmpty + } else { + filesWithDVs.isEmpty + } + } + + private def ignoreOperationsTest( + inputDir: String, + sourceOptions: Seq[(String, String)], + sqlCommand: String, + commandShouldProduceDVs: Option[Boolean] = None)(expectations: StreamAction*): Unit = { + (0 until 10 by 2).foreach { i => + Seq(i, i + 1).toDF().coalesce(1).write.format("delta").mode("append").save(inputDir) + } + + val df = spark.readStream.format("delta").options(sourceOptions.toMap).load(inputDir) + val expectDVs = commandShouldProduceDVs.getOrElse( + sqlCommand.toUpperCase().startsWith("DELETE")) + + val base = Seq( + AssertOnQuery { q => + q.processAllAvailable() + true + }, + CheckAnswer((0 until 10): _*), + AssertOnQuery { q => + sql(sqlCommand) + deletionVectorsPresentIfExpected(inputDir, expectDVs) + }) + + testStream(df)((base ++ expectations): _*) + } + + private def ignoreOperationsTestWithManualClock( + inputDir: String, + sourceOptions: Seq[(String, String)], + sqlCommand1: String, + sqlCommand2: String, + command1ShouldProduceDVs: Option[Boolean] = None, + command2ShouldProduceDVs: Option[Boolean] = None, + expectations: List[StreamAction]): Unit = { + val clock = new StreamManualClock + + (0 until 15 by 3).foreach { i => + Seq(i, i + 1, i + 2).toDF().coalesce(1).write.format("delta").mode("append").save(inputDir) + } + val log = DeltaLog.forTable(spark, inputDir) + val commitVersionBeforeDML = log.update().version + val df = spark.readStream.format("delta").options(sourceOptions.toMap).load(inputDir) + def expectDVsInCommand(shouldProduceDVs: Option[Boolean], command: String): Boolean = { + shouldProduceDVs.getOrElse(command.toUpperCase().startsWith("DELETE")) + } + val expectDVsInCommand1 = expectDVsInCommand(command1ShouldProduceDVs, sqlCommand1) + val expectDVsInCommand2 = expectDVsInCommand(command2ShouldProduceDVs, sqlCommand2) + + // If it's expected to fail we must be sure not to actually process it in here, + // or it'll fail too early instead of being caught by ExpectFailure. + val shouldFailAfterCommands = expectations.exists(_.isInstanceOf[ExpectFailure[_]]) + + val baseActions: Seq[StreamAction] = Seq( + StartStream(Trigger.ProcessingTime(1000), clock), + AdvanceManualClock(1000L), + CheckAnswer((0 until 15): _*), + AssertOnQuery { q => + // Ensure we only processed a single batch since the initial data load. + q.commitLog.getLatestBatchId().get == 0 + }, + AssertOnQuery { q => + eventually("Stream never stopped processing") { + // Wait until the stream stops processing, so we aren't racing with the next two + // commands on whether or not they end up in the same batch. + assert(!q.status.isTriggerActive) + assert(!q.status.isDataAvailable) + } + true + }, + AssertOnQuery { q => + sql(sqlCommand1) + deletionVectorsPresentIfExpected(inputDir, expectDVsInCommand1) + }, + AssertOnQuery { q => + sql(sqlCommand2) + deletionVectorsPresentIfExpected(inputDir, expectDVsInCommand2) + }, + AssertOnQuery { q => + // Ensure we still didn't process the DML commands. + q.commitLog.getLatestBatchId().get == 0 + }, + // Advance the clock, so that we process the two DML commands. + AdvanceManualClock(2000L)) ++ + (if (shouldFailAfterCommands) { + Seq.empty[StreamAction] + } else { + Seq( + // This makes it move to the next batch. + AssertOnQuery { q => + eventually("Next batch was never processed") { + // Ensure we only processed a single batch with the DML commands. + assert(q.commitLog.getLatestBatchId().get === 1) + } + true + }) + }) + + testStream(df)((baseActions ++ expectations): _*) + } + + protected def eventually[T](message: String)(func: => T): T = { + try { + Eventually.eventually(Timeout(streamingTimeout)) { + func + } + } catch { + case NonFatal(e) => + fail(message, e) + } + } + + testQuietly(s"deleting files fails query if ignoreDeletes = false") { + withTempDir { inputDir => + ignoreOperationsTest( + inputDir.getAbsolutePath, + sourceOptions = Nil, + sqlCommand = s"DELETE FROM delta.`$inputDir`", + // Whole table deletes do not produce DVs. + commandShouldProduceDVs = Some(false))(ExpectFailure[DeltaUnsupportedOperationException] { + e => + for (msg <- Seq("Detected deleted data", "not supported", "ignoreDeletes", "true")) { + assert(e.getMessage.contains(msg)) + } + }) + } + } + + Seq("ignoreFileDeletion", DeltaOptions.IGNORE_DELETES_OPTION).foreach { ignoreDeletes => + testQuietly( + s"allow to delete files after staring a streaming query when $ignoreDeletes is true") { + withTempDir { inputDir => + ignoreOperationsTest( + inputDir.getAbsolutePath, + sourceOptions = Seq(ignoreDeletes -> "true"), + sqlCommand = s"DELETE FROM delta.`$inputDir`", + // Whole table deletes do not produce DVs. + commandShouldProduceDVs = Some(false))( + AssertOnQuery { q => + Seq(10).toDF().write.format("delta").mode("append").save(inputDir.getAbsolutePath) + q.processAllAvailable() + true + }, + CheckAnswer((0 to 10): _*)) + } + } + } + + case class SourceChangeVariant( + label: String, + query: File => String, + answerWithIgnoreChanges: Seq[Int]) + + val sourceChangeVariants: Seq[SourceChangeVariant] = Seq( + // A partial file delete is treated like an update by the Source. + SourceChangeVariant( + label = "DELETE", + query = inputDir => s"DELETE FROM delta.`$inputDir` WHERE value = 3", + // 2 occurs in the same file as 3, so it gets duplicated during processing. + answerWithIgnoreChanges = (0 to 10) :+ 2)) + + for (variant <- sourceChangeVariants) + testQuietly( + "updating the source table causes failure when ignoreChanges = false" + + s" - using ${variant.label}") { + withTempDir { inputDir => + ignoreOperationsTest( + inputDir.getAbsolutePath, + sourceOptions = Nil, + sqlCommand = variant.query(inputDir))( + ExpectFailure[DeltaUnsupportedOperationException] { e => + for (msg <- Seq("data update", "not supported", "skipChangeCommits", "true")) { + assert(e.getMessage.contains(msg)) + } + }) + } + } + + for (variant <- sourceChangeVariants) + testQuietly( + "allow to update the source table when ignoreChanges = true" + + s" - using ${variant.label}") { + withTempDir { inputDir => + ignoreOperationsTest( + inputDir.getAbsolutePath, + sourceOptions = Seq(DeltaOptions.IGNORE_CHANGES_OPTION -> "true"), + sqlCommand = variant.query(inputDir))( + AssertOnQuery { q => + Seq(10).toDF().write.format("delta").mode("append").save(inputDir.getAbsolutePath) + q.processAllAvailable() + true + }, + CheckAnswer(variant.answerWithIgnoreChanges: _*)) + } + } + + testQuietly("deleting files when ignoreChanges = true doesn't fail the query") { + withTempDir { inputDir => + ignoreOperationsTest( + inputDir.getAbsolutePath, + sourceOptions = Seq(DeltaOptions.IGNORE_CHANGES_OPTION -> "true"), + sqlCommand = s"DELETE FROM delta.`$inputDir`", + // Whole table deletes do not produce DVs. + commandShouldProduceDVs = Some(false))( + AssertOnQuery { q => + Seq(10).toDF().write.format("delta").mode("append").save(inputDir.getAbsolutePath) + q.processAllAvailable() + true + }, + CheckAnswer((0 to 10): _*)) + } + } + + for (variant <- sourceChangeVariants) + testQuietly("updating source table when ignoreDeletes = true fails the query" + + s" - using ${variant.label}") { + withTempDir { inputDir => + ignoreOperationsTest( + inputDir.getAbsolutePath, + sourceOptions = Seq(DeltaOptions.IGNORE_DELETES_OPTION -> "true"), + sqlCommand = variant.query(inputDir))( + ExpectFailure[DeltaUnsupportedOperationException] { e => + for (msg <- Seq("data update", "not supported", "skipChangeCommits", "true")) { + assert(e.getMessage.contains(msg)) + } + }) + } + } + + private val allSourceOptions = Seq( + Nil, + List(DeltaOptions.IGNORE_DELETES_OPTION), + List(DeltaOptions.IGNORE_CHANGES_OPTION), + List(DeltaOptions.SKIP_CHANGE_COMMITS_OPTION)) + .map { options => + options.map(key => key -> "true") + } + + for (sourceOption <- allSourceOptions) + testQuietly( + "subsequent DML commands are processed correctly in a batch - DELETE->DELETE" + + s" - $sourceOption") { + val expectations: List[StreamAction] = + sourceOption.map(_._1) match { + case List(DeltaOptions.IGNORE_DELETES_OPTION) | Nil => + // These two do not allow updates. + ExpectFailure[DeltaUnsupportedOperationException] { e => + for (msg <- Seq("data update", "not supported", "skipChangeCommits", "true")) { + assert(e.getMessage.contains(msg)) + } + } :: Nil + case List(DeltaOptions.IGNORE_CHANGES_OPTION) => + // The 4 and 5 are in the same file as 3, so the first DELETE is going to duplicate them. + // 5 is still in the same file as 4 after the first DELETE, so the second DELETE is going + // to duplicate it again. + CheckAnswer((0 until 15) ++ Seq(4, 5, 5): _*) :: Nil + case List(DeltaOptions.SKIP_CHANGE_COMMITS_OPTION) => + // This will completely ignore the DELETEs. + CheckAnswer((0 until 15): _*) :: Nil + } + + withTempDir { inputDir => + ignoreOperationsTestWithManualClock( + inputDir.getAbsolutePath, + sourceOptions = sourceOption, + sqlCommand1 = s"DELETE FROM delta.`$inputDir` WHERE value == 3", + sqlCommand2 = s"DELETE FROM delta.`$inputDir` WHERE value == 4", + expectations = expectations) + } + } + + for (sourceOption <- allSourceOptions) + testQuietly("subsequent DML commands are processed correctly in a batch - INSERT->DELETE" + + s" - $sourceOption") { + val expectations: List[StreamAction] = sourceOption.map(_._1) match { + case List(DeltaOptions.IGNORE_DELETES_OPTION) | Nil => + // These two do not allow updates. + ExpectFailure[DeltaUnsupportedOperationException] { e => + for (msg <- Seq("data update", "not supported", "skipChangeCommits", "true")) { + assert(e.getMessage.contains(msg)) + } + } :: Nil + case List(DeltaOptions.IGNORE_CHANGES_OPTION) => + // 15 and 16 are in the same file, so 16 will get duplicated by the DELETE. + CheckAnswer((0 to 16) ++ Seq(16): _*) :: Nil + case List(DeltaOptions.SKIP_CHANGE_COMMITS_OPTION) => + // This will completely ignore the DELETE. + CheckAnswer((0 to 16): _*) :: Nil + } + + withTempDir { inputDir => + ignoreOperationsTestWithManualClock( + inputDir.getAbsolutePath, + sourceOptions = sourceOption, + sqlCommand1 = + s"INSERT INTO delta.`$inputDir` SELECT /*+ COALESCE(1) */ * FROM VALUES 15, 16", + sqlCommand2 = s"DELETE FROM delta.`$inputDir` WHERE value == 15", + expectations = expectations) + } + } +} + +class DeltaSourceDeletionVectorsSuite extends DeltaSourceSuiteBase + with DeltaSQLCommandTest + with DeltaSourceDeletionVectorTests { + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectorsInNewTables(spark.conf) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceLargeLogSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceLargeLogSuite.scala new file mode 100644 index 00000000000..f251165bdc4 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceLargeLogSuite.scala @@ -0,0 +1,25 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +class DeltaSourceLargeLogSuite extends DeltaSourceSuite { + protected override def sparkConf = { + super.sparkConf.set(DeltaSQLConf.LOG_SIZE_IN_MEMORY_THRESHOLD.key, "0") + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSchemaEvolutionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSchemaEvolutionSuite.scala new file mode 100644 index 00000000000..42d5766fd8f --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSchemaEvolutionSuite.scala @@ -0,0 +1,2214 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.nio.charset.Charset + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.sources._ +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.commons.io.FileUtils +import org.apache.commons.lang3.exception.ExceptionUtils +import org.apache.hadoop.fs.Path +import org.apache.logging.log4j.Level + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} +import org.apache.spark.sql.execution.streaming.Offset +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger} +import org.apache.spark.sql.types.{StringType, StructType} +import org.apache.spark.util.Utils + +trait StreamingSchemaEvolutionSuiteBase extends ColumnMappingStreamingTestUtils + with DeltaSourceSuiteBase with DeltaColumnMappingSelectedTestMixin with DeltaSQLCommandTest { + + override protected def runOnlyTests: Seq[String] = Seq( + "schema log initialization with additive schema changes", + "detect incompatible schema change while streaming", + "trigger.Once with deferred commit should work", + "trigger.AvailableNow should work", + "consecutive schema evolutions", + "latestOffset should not progress before schema evolved" + ) + + override protected def sparkConf: SparkConf = { + val conf = super.sparkConf + // Enable for testing + conf.set(DeltaSQLConf.DELTA_STREAMING_ENABLE_SCHEMA_TRACKING.key, "true") + conf.set( + DeltaSQLConf.DELTA_STREAMING_ENABLE_SCHEMA_TRACKING_MERGE_CONSECUTIVE_CHANGES.key, "true") + conf.set( + s"${DeltaSQLConf.SQL_CONF_PREFIX}.streaming.allowSourceColumnRenameAndDrop", "always") + if (isCdcTest) { + conf.set(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true") + } else { + conf + } + } + + protected def withoutAllowStreamRestart(f: => Unit): Unit = { + withSQLConf(s"${DeltaSQLConf.SQL_CONF_PREFIX}.streaming" + + s".allowSourceColumnRenameAndDrop" -> "false") { + f + } + } + + protected def testWithoutAllowStreamRestart(testName: String)(f: => Unit): Unit = { + test(testName) { + withoutAllowStreamRestart(f) + } + } + + import testImplicits._ + + protected val ExpectSchemaLogInitializationFailedException = + ExpectFailure[DeltaRuntimeException](e => + assert( + e.asInstanceOf[DeltaRuntimeException].getErrorClass == + "DELTA_STREAMING_SCHEMA_LOG_INIT_FAILED_INCOMPATIBLE_METADATA" && + // Does NOT come from the stream start check which is for lazy initialization ... + !e.getStackTrace.exists( + _.toString.contains("checkReadIncompatibleSchemaChangeOnStreamStartOnce")) && + // Coming from the check against constructed batches + e.getStackTrace.exists( + _.toString.contains("validateAndInitMetadataLogForPlannedBatchesDuringStreamStart")) + ) + ) + + protected val ExpectMetadataEvolutionException = + ExpectFailure[DeltaRuntimeException](e => + assert( + e.asInstanceOf[DeltaRuntimeException].getErrorClass == + "DELTA_STREAMING_METADATA_EVOLUTION" && + e.getStackTrace.exists( + _.toString.contains("updateMetadataTrackingLogAndFailTheStreamIfNeeded")) + ) + ) + + protected val ExpectMetadataEvolutionExceptionFromInitialization = + ExpectFailure[DeltaRuntimeException](e => + assert( + e.asInstanceOf[DeltaRuntimeException].getErrorClass == + "DELTA_STREAMING_METADATA_EVOLUTION" && + !e.getStackTrace.exists(_.toString.contains("checkReadIncompatibleSchemaChanges")) && + e.getStackTrace.exists(_.toString.contains("initializeMetadataTrackingAndExitStream")) + ) + ) + + protected val indexWhenSchemaLogIsUpdated = DeltaSourceOffset.POST_METADATA_CHANGE_INDEX + + protected val AwaitTermination = AssertOnQuery { q => + q.awaitTermination(600 * 1000) // 600 seconds + true + } + + protected val AwaitTerminationIgnoreError = AssertOnQuery { q => + try { + q.awaitTermination(600 * 1000) // 600 seconds + } catch { + case _: Throwable => + // ignore + } + true + } + + protected def allowSchemaLocationOutsideCheckpoint(f: => Unit): Unit = { + val allowSchemaLocationOutSideCheckpointConf = + DeltaSQLConf.DELTA_STREAMING_ALLOW_SCHEMA_LOCATION_OUTSIDE_CHECKPOINT_LOCATION.key + withSQLConf(allowSchemaLocationOutSideCheckpointConf -> "true") { + f + } + } + + protected def testSchemaEvolution( + testName: String, + columnMapping: Boolean = true, + tags: Seq[org.scalatest.Tag] = Seq.empty)(f: DeltaLog => Unit): Unit = { + super.test(testName, tags: _*) { + if (columnMapping) { + withStarterTable { log => + f(log) + } + } else { + withColumnMappingConf("none") { + withStarterTable { log => + f(log) + } + } + } + } + } + + /** + * Initialize a starter table with 6 rows and schema STRUCT + */ + protected def withStarterTable(f: DeltaLog => Unit): Unit = { + withTempDir { dir => + val tablePath = dir.getCanonicalPath + // Write 6 versions, the first version 0 will contain data -1 and will come with the default + // schema initialization actions. + (-1 until 5).foreach { i => + Seq((i.toString, i.toString)).toDF("a", "b") + .write.mode("append").format("delta") + .save(tablePath) + } + val deltaLog = DeltaLog.forTable(spark, dir.getCanonicalPath) + deltaLog.update() + f(deltaLog) + } + } + + protected def addData( + data: Seq[Int], + userSpecifiedSchema: Option[StructType] = None)(implicit log: DeltaLog): Unit = { + val schema = userSpecifiedSchema.getOrElse(log.update().schema) + data.foreach { i => + val data = Seq(Row(schema.map(_ => i.toString): _*)) + spark.createDataFrame(data.asJava, schema) + .write.format("delta").mode("append").save(log.dataPath.toString) + } + } + + protected def readStream( + schemaLocation: Option[String] = None, + sourceTrackingId: Option[String] = None, + startingVersion: Option[Long] = None, + maxFilesPerTrigger: Option[Int] = None, + ignoreDeletes: Option[Boolean] = None)(implicit log: DeltaLog): DataFrame = { + var dsr = spark.readStream.format("delta") + if (isCdcTest) { + dsr = dsr.option(DeltaOptions.CDC_READ_OPTION, "true") + } + schemaLocation.foreach { loc => dsr = dsr.option(DeltaOptions.SCHEMA_TRACKING_LOCATION, loc) } + sourceTrackingId.foreach { name => + dsr = dsr.option(DeltaOptions.STREAMING_SOURCE_TRACKING_ID, name) + } + startingVersion.foreach { v => dsr = dsr.option("startingVersion", v) } + maxFilesPerTrigger.foreach { f => dsr = dsr.option("maxFilesPerTrigger", f) } + ignoreDeletes.foreach{ i => dsr.option("ignoreDeletes", i) } + val df = { + dsr.load(log.dataPath.toString) + } + if (isCdcTest) { + dropCDCFields(df) + } else { + df + } + } + + protected def getDefaultSchemaLog( + sourceTrackingId: Option[String] = None, + initializeEagerly: Boolean = true + )(implicit log: DeltaLog): DeltaSourceMetadataTrackingLog = + DeltaSourceMetadataTrackingLog.create( + spark, getDefaultSchemaLocation.toString, log.update(), sourceTrackingId, + initMetadataLogEagerly = initializeEagerly) + + protected def getDefaultCheckpoint(implicit log: DeltaLog): Path = + new Path(log.dataPath, "_checkpoint") + + protected def getDefaultSchemaLocation(implicit log: DeltaLog): Path = + new Path(getDefaultCheckpoint, "_schema_location") + + protected def addColumn(column: String, dt: String = "STRING")(implicit log: DeltaLog): Unit = { + sql(s"ALTER TABLE delta.`${log.dataPath}` ADD COLUMN ($column $dt)") + } + + protected def renameColumn(oldColumn: String, newColumn: String)(implicit log: DeltaLog): Unit = { + sql(s"ALTER TABLE delta.`${log.dataPath}` RENAME COLUMN $oldColumn TO $newColumn") + } + + protected def dropColumn(column: String)(implicit log: DeltaLog): Unit = { + sql(s"ALTER TABLE delta.`${log.dataPath}` DROP COLUMN $column") + } + + protected def overwriteSchema( + schema: StructType, + partitionColumns: Seq[String] = Nil)(implicit log: DeltaLog): Unit = { + spark.sqlContext.internalCreateDataFrame(spark.sparkContext.emptyRDD[InternalRow], schema) + .write.format("delta") + .mode("overwrite") + .partitionBy(partitionColumns: _*) + .option("overwriteSchema", "true") + .save(log.dataPath.toString) + } + + protected def upgradeToNameMode(implicit log: DeltaLog): Unit = { + sql( + s"""ALTER TABLE delta.`${log.dataPath}` SET TBLPROPERTIES ( + |'delta.columnMapping.mode' = "name", + |'delta.minReaderVersion' = '2', + |'delta.minWriterVersion' = '5' + |) + |""".stripMargin) + } + + protected def makeMetadata( + schema: StructType, + partitionSchema: StructType)(implicit log: DeltaLog): Metadata = { + log.update().metadata.copy( + schemaString = schema.json, + partitionColumns = partitionSchema.fieldNames + ) + } + + protected def testSchemasLocationMustBeUnderCheckpoint(implicit log: DeltaLog): Unit = { + val dest = Utils.createTempDir().getCanonicalPath + val ckpt = getDefaultCheckpoint.toString + val invalidSchemaLocation = Utils.createTempDir().getCanonicalPath + + // By default it should fail + val e = intercept[DeltaAnalysisException] { + readStream(schemaLocation = Some(invalidSchemaLocation)) + .writeStream.option("checkpointLocation", ckpt).start(dest) + } + assert(e.getErrorClass == "DELTA_STREAMING_SCHEMA_LOCATION_NOT_UNDER_CHECKPOINT") + + // But can be lifted with the flag + allowSchemaLocationOutsideCheckpoint { + testStream(readStream(schemaLocation = Some(invalidSchemaLocation)))( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailable(), + CheckAnswer((-1 until 5).map(i => (i.toString, i.toString)): _*) + ) + } + } + + testSchemaEvolution(s"schema location must be placed under checkpoint location") { implicit log => + testSchemasLocationMustBeUnderCheckpoint + } + + testSchemaEvolution("multiple delta source sharing same schema log is blocked") { implicit log => + allowSchemaLocationOutsideCheckpoint { + val dest = Utils.createTempDir().getCanonicalPath + val ckpt = getDefaultCheckpoint.toString + val schemaLocation = getDefaultSchemaLocation.toString + + // Two INSTANCES of Delta sources sharing same schema location should be blocked + val df1 = readStream(schemaLocation = Some(schemaLocation)) + val df2 = readStream(schemaLocation = Some(schemaLocation)) + val sdf = df1 union df2 + + val e = intercept[DeltaAnalysisException] { + sdf.writeStream.option("checkpointLocation", ckpt).start(dest) + } + assert(e.getErrorClass == "DELTA_STREAMING_SCHEMA_LOCATION_CONFLICT") + + + // But providing an additional source name can differentiate + val df3 = readStream(schemaLocation = Some(schemaLocation), sourceTrackingId = Some("a")) + val df4 = readStream(schemaLocation = Some(schemaLocation), sourceTrackingId = Some("b")) + val sdf2 = df3 union df4 + testStream(sdf2)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailable(), + CheckAnswer(((-1 until 5) union (-1 until 5)).map(i => (i.toString, i.toString)): _*) + ) + + // But if they are the same instance it should not be blocked, because they will be + // unified to the same source during execution. + val sdf3 = df1 union df1 + testStream(sdf3)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailable(), + AssertOnQuery { q => + // Just one source being executed + q.committedOffsets.size == 1 + } + ) + } + } + + // Disable column mapping for this test so we could save some schema metadata manipulation hassle + testSchemaEvolution("schema log is applied", columnMapping = false) { implicit log => + withSQLConf( + DeltaSQLConf.DELTA_STREAMING_SCHEMA_TRACKING_METADATA_PATH_CHECK_ENABLED.key -> "false") { + // Schema log's schema is respected + val schemaLog = getDefaultSchemaLog() + val newSchema = PersistedMetadata(log.tableId, 0, + makeMetadata( + new StructType().add("a", StringType, true) + .add("b", StringType, true) + .add("c", StringType, true), + partitionSchema = new StructType() + ), + log.update().protocol, + sourceMetadataPath = "" + ) + schemaLog.writeNewMetadata(newSchema) + + testStream( + readStream(schemaLocation = Some(getDefaultSchemaLocation.toString), + // Ignore initial snapshot + startingVersion = Some(1L)))( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + // See how the schema returns one more dimension for `c` + CheckAnswer((0 until 5).map(_.toString).map(i => (i, i, null)): _*) + ) + + // Cannot use schema from another table + val newSchemaWithTableId = PersistedMetadata( + "some_random_id", 0, + makeMetadata( + new StructType().add("a", StringType, true) + .add("b", StringType, true), + partitionSchema = new StructType() + ), + log.update().protocol, + sourceMetadataPath = "" + ) + schemaLog.writeNewMetadata(newSchemaWithTableId) + assert { + val e = intercept[DeltaAnalysisException] { + val q = readStream( + schemaLocation = Some(getDefaultSchemaLocation.toString), + // Ignore initial snapshot + startingVersion = Some(1L)) + .writeStream + .option("checkpointLocation", getDefaultCheckpoint.toString) + .outputMode("append") + .format("console") + .start() + q.processAllAvailable() + q.stop() + } + ExceptionUtils.getRootCause(e).asInstanceOf[DeltaAnalysisException] + .getErrorClass == "DELTA_STREAMING_SCHEMA_LOG_INCOMPATIBLE_DELTA_TABLE_ID" + } + } + } + + test("concurrent schema log modification should be detected") { + withStarterTable { implicit log => + // Note: this test assumes schema log files are written one after another, which is majority + // of the case; True concurrent execution would require commit service to protected against. + val schemaLocation = getDefaultSchemaLocation.toString + val snapshot = log.update() + val schemaLog1 = DeltaSourceMetadataTrackingLog.create(spark, schemaLocation, snapshot) + val schemaLog2 = DeltaSourceMetadataTrackingLog.create(spark, schemaLocation, snapshot) + val newSchema = + PersistedMetadata("1", 1, + makeMetadata(new StructType(), partitionSchema = new StructType()), + Protocol(), + sourceMetadataPath = "") + + schemaLog1.writeNewMetadata(newSchema) + val e = intercept[DeltaAnalysisException] { + schemaLog2.writeNewMetadata(newSchema) + } + assert(e.getErrorClass == "DELTA_STREAMING_SCHEMA_LOCATION_CONFLICT") + } + } + + /** + * Manually create a new offset with targeted reservoirVersion by copying it from the previous + * offset. + * @param checkpoint Checkpoint location + * @param version Target version + * @param index Target index fle. + * @return The raw content for the updated offset file + */ + protected def manuallyCreateLatestStreamingOffsetUntilReservoirVersion( + checkpoint: String, + version: Long, + index: Long = DeltaSourceOffset.BASE_INDEX): String = { + // manually create another offset to latest version + val offsetDir = new File(checkpoint.stripPrefix("file:") + "/offsets") + val previousOffset = offsetDir.listFiles().filter(!_.getName.endsWith(".crc")) + .maxBy(_.getName.toInt) + val previousOffsetContent = FileUtils + .readFileToString(previousOffset, Charset.defaultCharset()) + + val reservoirVersionRegex = """"reservoirVersion":[0-9]+""".r + val indexRegex = """"index":-?\d+""".r + var updated = reservoirVersionRegex + .replaceAllIn(previousOffsetContent, s""""reservoirVersion":$version""") + updated = indexRegex.replaceAllIn(updated, s""""index":$index""") + + val newOffsetFile = new File(previousOffset.getParent, + (previousOffset.getName.toInt + 1).toString) + FileUtils.writeStringToFile(newOffsetFile, updated, Charset.defaultCharset()) + updated + } + + /** + * Write serialized offset content as a batch id for a particular checkpoint. + * @param checkpoint Checkpoint location + * @param batchId Target batch ID to write to + * @param offsetContent Offset content + */ + protected def manuallyCreateStreamingOffsetAtBatchId( + checkpoint: String, batchId: Long, offsetContent: String): Unit = { + // manually create another offset to latest version + val offsetDir = new File(checkpoint.stripPrefix("file:") + "/offsets") + val newOffsetFile = new File(offsetDir, batchId.toString) + FileUtils.writeStringToFile(newOffsetFile, offsetContent, Charset.defaultCharset()) + } + + /** + * Manually delete the latest offset + * @param checkpoint Checkpoint location + */ + protected def manuallyDeleteLatestBatchId(checkpoint: String): Unit = { + // manually create another offset to latest version + val offsetDir = new File(checkpoint.stripPrefix("file:") + "/offsets") + val latestOffsetFile = offsetDir.listFiles().filter(!_.getName.endsWith(".crc")) + .maxBy(_.getName.toInt) + latestOffsetFile.delete() + } + + testSchemaEvolution("schema log initialization with additive schema changes") { implicit log => + // Provide a schema log by default + def createNewDf(): DataFrame = + readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + // Initialize snapshot schema same as latest, no need to fail stream + testStream(createNewDf())( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((-1 until 5).map(_.toString).map(i => (i, i)): _*) + ) + + val v0 = log.update().version + + // And schema log is initialized already, even though there aren't schema evolution exceptions + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == v0) + + // Add a column and some data + addColumn("c") + val v1 = log.update().version + + addData(5 until 10) + + // Update schema log to v1 + testStream(createNewDf())( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == v1) + + var v2: Long = -1 + testStream(createNewDf())( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + // Process successfully + CheckAnswer((5 until 10).map(_.toString).map(i => (i, i, i)): _*), + // Trigger additive schema change would evolve schema as well + Execute { _ => + addColumn("d") + v2 = log.update().version + }, + Execute { _ => addData(10 until 15) }, + ExpectMetadataEvolutionException, + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + offset.index == indexWhenSchemaLogIsUpdated + } + ) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == v2) + testStream(createNewDf())( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((10 until 15).map(_.toString).map(i => (i, i, i, i)): _*) + ) + } + + testSchemaEvolution("detect incompatible schema change while streaming") { implicit log => + // Rename as part of initial snapshot + renameColumn("b", "c") + // Write more data + addData(5 until 10) + // Source df without schema location + val df = readStream() + var schemaChangeDeltaVersion: Long = -1 + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + // schema change inside initial snapshot should not throw error + CheckAnswer((-1 until 10).map(i => (i.toString, i.toString)): _*), + // This new rename should throw the legacy error because we have not provided a schema + // location + Execute { _ => + renameColumn("c", "d") + schemaChangeDeltaVersion = log.update().version + }, + // Add some data in new schema + Execute { _ => addData(10 until 15) }, + ProcessAllAvailableIgnoreError, + // No more data should've been processed + CheckAnswer((-1 until 10).map(i => (i.toString, i.toString)): _*), + // Detected by the in stream check + ExpectInStreamSchemaChangeFailure + ) + // Start the stream again with a schema location + val df2 = readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + assert(getDefaultSchemaLog().getLatestMetadata.isEmpty) + testStream(df2)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + // No data should've been processed + CheckAnswer(Nil: _*), + // Schema evolution exception! + ExpectMetadataEvolutionExceptionFromInitialization + ) + // We should've updated the schema to the version just before the schema change version + // because that's the previous version's schema we left with. To be safe and in case there + // are more file actions to process, we saved that schema instead of the renamed schema. + // Also, since the previous batch was still on initial snapshot, the last file action was not + // bumped to the next version, so the schema initialization effectively did not consider the + // rename column schema change's version. + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == + schemaChangeDeltaVersion - 1) + // Start the stream again with the same schema location + val df3 = readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + testStream(df3)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + // Again, no data should've been processed because the next version has a rename + CheckAnswer(Nil: _*), + // And schema will be evolved again + ExpectMetadataEvolutionException + ) + // Now finally the schema log is up to date + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == + schemaChangeDeltaVersion) + // Start the stream again should process the rest of the data without a problem + val df4 = readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + val v1 = log.update().version + testStream(df4)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((10 until 15).map(i => (i.toString, i.toString)): _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + // bumped from file action, no pending schema change + offset.reservoirVersion == v1 + 1 && + offset.index == DeltaSourceOffset.BASE_INDEX && + // BASE_INDEX is -100 but serialized form should use version 1 & index -1 for backward + // compatibility + offset.json.contains(s""""sourceVersion":1""") && + offset.json.contains(s""""index":-1""") + }, + // Trigger another schema change + Execute { _ => + addColumn("e") + addData(15 until 20) + }, + ProcessAllAvailableIgnoreError, + // No more new data + CheckAnswer((10 until 15).map(i => (i.toString, i.toString)): _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + // latest offset should have a schema attached and evolved set to true + // note the reservoir version has not changed + offset.reservoirVersion == v1 + 1 && + offset.index == indexWhenSchemaLogIsUpdated + }, + ExpectMetadataEvolutionException + ) + + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v1 + 1) + + val df5 = readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + // Process the rest + testStream(df5)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((15 until 20).map(i => (i.toString, i.toString, i.toString)): _*) + ) + } + + testSchemaEvolution("detect incompatible schema change during first getBatch") { implicit log => + renameColumn("b", "c") + val schemaChangeVersion = log.update().version + // Source df without schema location, and start at version 1 to ignore initial snapshot + // We also use maxFilePerTrigger=1 so that the first getBatch will conduct the check instead + // of latestOffset() scanning far ahead and throw the In-Stream version of the exception. + val df = readStream(startingVersion = Some(1), maxFilesPerTrigger = Some(1)) + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + // Add more data + Execute { _ => addData(5 until 10) }, + // Try processing + ProcessAllAvailableIgnoreError, + // No data should've been processed :) + CheckAnswer(Nil: _*), + // The first getBatch should fail + if (isCdcTest) { + ExpectGenericSchemaIncompatibleFailure + } else { + ExpectStreamStartInCompatibleSchemaFailure + } + ) + // Restart with a schema location, note that maxFilePerTrigger is not needed now + // because a schema location is provided and any exception would evolve the schema. + val df2 = readStream(startingVersion = Some(1), + schemaLocation = Some(getDefaultSchemaLocation.toString)) + assert(getDefaultSchemaLog().getLatestMetadata.isEmpty) + testStream(df2)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + // Again, no data is processed + CheckLastBatch(Nil: _*), + // Schema evolution exception! + ExpectMetadataEvolutionExceptionFromInitialization + ) + // Since the error happened during the first getBatch, we initialize schema log to schema@v1 + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == 1) + // Restart again with a schema location + val df3 = readStream(startingVersion = Some(1), + schemaLocation = Some(getDefaultSchemaLocation.toString)) + testStream(df3)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + // Note that the default maxFilePerTrigger is 1000, so this shows that the batch has been + // split and the available data prior to schema change should've been served. + // Also since we started at v1, -1 is not included. + CheckAnswer((0 until 5).map(i => (i.toString, i.toString)): _*), + // Schema evolution exception! + ExpectMetadataEvolutionException + ) + // Now the schema is up to date + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == schemaChangeVersion) + // Restart again should pick up the new schema and process the rest without a problem. + // Note that startingVersion is ignored when we have existing progress to work with. + val df4 = readStream(startingVersion = Some(1), + schemaLocation = Some(getDefaultSchemaLocation.toString)) + testStream(df4)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((5 until 10).map(i => (i.toString, i.toString)): _*) + ) + } + + /** + * This test manually generates Delta source offsets that crosses non-additive schema change + * boundaries to test if the schema log initialization check logic can detect those changes and + * error out. + */ + protected def testDetectingInvalidOffsetDuringLogInit( + invalidAction: String, + readStreamWithSchemaLocation: => DataFrame, + expectedLogInitException: StreamAction)(implicit log: DeltaLog): Unit = { + // start a stream to initialize checkpoint + val ckpt = getDefaultCheckpoint.toString + val schemaLoc = getDefaultSchemaLocation.toString + val df = readStream(startingVersion = Some(1)) + testStream(df)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailable(), + CheckAnswer((0 until 5).map(i => (i.toString, i.toString)): _*), + StopStream + ) + // Add more data to create room for data offsets, so when the stream resumes, the latest + // committed offset if still in the old schema. + addData(Seq(6)) + if (invalidAction == "rename") { + renameColumn("b", "c") + } else if (invalidAction == "drop") { + addColumn("c") + } + // write more data + addData(Seq(7)) + // Add a rename or drop commit that reverses the previous change, to ensure that our check + // has validated all the schema changes, instead of just checking the start schema. + if (invalidAction == "rename") { + renameColumn("c", "b") + } else if (invalidAction == "drop") { + dropColumn("c") + } else { + assert(false, s"unexpected action ${invalidAction}") + } + // write more data + addData(Seq(8)) + val latestVersion = log.update().version + // Manually create another offset to latest version to simulate the situation that an end + // offset is somehow generated that bypasses the block, e.g. they were upgrading from a + // super old version that did not have the block logic, and is left with a constructed + // batch that bypasses a schema change. + // There should be at MOST one such trailing batch as of today's streaming engine semantics. + val offsetContent = + manuallyCreateLatestStreamingOffsetUntilReservoirVersion(ckpt, latestVersion) + + // rerun the stream should detect that and fail, even with schema location + testStream(readStreamWithSchemaLocation)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + expectedLogInitException + ) + + // Let's also test the case when we only have one offset in the checkpoint without any committed + // Clear existing checkpoint dir and schema log dir + FileUtils.deleteDirectory(new File(ckpt.stripPrefix("file:"))) + new File(ckpt.stripPrefix("file:")).mkdirs() + FileUtils.deleteDirectory(new File(schemaLoc.stripPrefix("file:"))) + + // Create a single offset that points to the latest version of the table. + manuallyCreateStreamingOffsetAtBatchId(ckpt, 0, offsetContent) + + // One more non additive schema change + if (invalidAction == "rename") { + renameColumn("a", "x") + } else if (invalidAction == "drop") { + dropColumn("b") + } + + addData(Seq(9)) + + val latestVersion2 = log.update().version + + // Create another offset point to the updated latest version + manuallyCreateLatestStreamingOffsetUntilReservoirVersion(ckpt, latestVersion2) + + // This should also fail because it crossed the new non-additive schema change above, note that + // since we didn't have a committed offset nor a user specified startingVersion, the first + // offset will re-read using latestVersion2 - 1 as the initial snapshot now. + // Without this new non-additive schema change the validation would actually pass. + testStream(readStreamWithSchemaLocation)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + expectedLogInitException + ) + } + + Seq("rename", "drop").foreach { invalidAction => + testSchemaEvolution(s"detect invalid offset during initialization before " + + s"initializing schema log - $invalidAction") { implicit log => + def provideStreamingDf: DataFrame = + readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + testDetectingInvalidOffsetDuringLogInit( + invalidAction, + provideStreamingDf, + ExpectSchemaLogInitializationFailedException + ) + } + } + + /** + * This test checks a corner case on the initialization of the schema log. + * When a log is initialized, we would check over ALL pending batches and their delta versions + * to ensure we have a safe schema to read all of them (i.e. no non-additive schema changes) + * within the range. + * This test checks the case when the last version of the range is a non-additive schema change, + * but it does not need to be blocked because there's no data to be read during initialization. + */ + protected def testLogInitializationWithoutBlockingOnSchemaChangeInTheEnd( + readStreamWithSchemaLocation: => DataFrame, + expectLogInitException: StreamAction)(implicit log: DeltaLog): Unit = { + // Start a stream to initialize checkpoint + val ckpt = getDefaultCheckpoint.toString + val df = readStream(startingVersion = Some(1)) + testStream(df)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailable(), + CheckAnswer((0 until 5).map(i => (i.toString, i.toString)): _*), + StopStream + ) + val v0 = log.update().version + // The previous committed offset ends at (v0 + 1, -100). + + // Add more data + addData(Seq(5)) + // Non-additive schema change + renameColumn("b", "c") + val v1 = log.update().version + + // Manually create another offset ending on [v1, -100] + manuallyCreateLatestStreamingOffsetUntilReservoirVersion(ckpt, v1) + + // Start stream again would attempt to run the constructed batch first. + // Since the ending offset does not yet contain the metadata action, we won't need to block + // the schema log initialization + testStream(readStreamWithSchemaLocation)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + expectLogInitException + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v0 + 1) + + testStream(readStreamWithSchemaLocation)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailableIgnoreError, + // Data processed + CheckAnswer(("5", "5")), + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v1) + } + + testSchemaEvolution(s"no need to block schema log initialization if " + + s"constructed batch ends on schema change") { implicit log => + def provideStreamingDf: DataFrame = + readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + testLogInitializationWithoutBlockingOnSchemaChangeInTheEnd( + provideStreamingDf, + ExpectMetadataEvolutionExceptionFromInitialization + ) + } + + testSchemaEvolution("resolve the most encompassing schema during getBatch " + + "to initialize schema log") { implicit log => + // start a stream to initialize checkpoint + val ckpt = getDefaultCheckpoint.toString + val df = readStream(startingVersion = Some(1)) + testStream(df)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailable() + ) + val v1 = log.update().version + // add a new column + addColumn("c") + // write more data + addData(5 until 6) + // add another column + addColumn("d") + val secondAddColumnVersion = log.update().version + addData(6 until 10) + // add an invalid commit so we could fail directly + renameColumn("d", "d2") + val renamedVersion = log.update().version + // v2 should include the two add column change but not the renamed version + val v2 = v1 + 5 + // manually create another offset to latest version + manuallyCreateLatestStreamingOffsetUntilReservoirVersion(ckpt, v2, -1) + // rerun the stream should detect rename with the stream start check, but since within the + // offsets the schema changes are all additive, we could use the encompassing schema . + val schemaLocation = getDefaultSchemaLocation.toString + testStream(readStream(schemaLocation = Some(schemaLocation)))( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + // Schema can be evolved + ExpectMetadataEvolutionExceptionFromInitialization + ) + // Schema log is ready and populated with + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a", "b", "c", "d"))) + // ... which is the schema that should be valid until v2 - 1 (the batch end version). + // It is v2 - 1 because the latest offset sits on the BASE_INDEX of v2, which does not contain + // any data, so there's no need to consider that for schema change initialization. + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v2 - 1) + // Keep going until rename is found + testStream(readStream(schemaLocation = Some(schemaLocation)))( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailableIgnoreError, + CheckAnswer((Seq(5).map(i => (i.toString, i.toString, i.toString, null)) ++ + (6 until 10).map(i => (i.toString, i.toString, i.toString, i.toString))): _*), + ExpectMetadataEvolutionException + ) + // Schema log is evolved with + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a", "b", "c", "d2"))) + // ... which is the renamed version + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == renamedVersion) + } + + test("trigger.Once with deferred commit should work") { + withStarterTable { implicit log => + dropColumn("b") + val schemaChangeVersion = log.update().version + addData(5 until 10) + + val ckpt = getDefaultCheckpoint.toString + val schemaLoc = getDefaultSchemaLocation.toString + + // Use starting version to ignore initial snapshot + def read: DataFrame = readStream(schemaLocation = Some(schemaLoc), startingVersion = Some(1)) + + // Use once trigger to execute streaming one step a time + val StartThisStream = StartStream(trigger = Trigger.Once, checkpointLocation = ckpt) + // This trigger: + // 1. The stream starts with an uninitialized schema log. + // 2. The stream schema is taken from the latest version of the Delta table. + // 3. The schema tracking log must initialized immediately, in this case from latestOffset + // because this is the first time the stream starts. The schema is initialized to the + // schema at version 1. + // 4. Because the schema at version 1 is not equal to the stream schema, the stream must be + // restarted. + testStream(read)( + StartThisStream, + AwaitTerminationIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionExceptionFromInitialization + ) + // Latest schema in schema log has been initialized + assert(getDefaultSchemaLog().getLatestMetadata.exists(_.deltaCommitVersion == 1)) + + // This trigger: + // 1. Finds the latest offset that ends with the schema change + // 2. Serve all batches prior to the schema change + // Note that the schema has NOT evolved yet because the batch ending at the schema change has + // not being committed, and thus we have not triggered the schema evolution and will need an + // extra restart. + testStream(read)( + StartThisStream, + AwaitTerminationIgnoreError, + CheckAnswer((0 until 5).map(i => (i.toString, i.toString)): _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + // bumped from file action + offset.reservoirVersion == schemaChangeVersion && + offset.index == DeltaSourceOffset.METADATA_CHANGE_INDEX && + // serialized as version 3 because METADATA_CHANGE_INDEX is only available in v3 + offset.json.contains(s""""sourceVersion":3""") + } + ) + assert(getDefaultSchemaLog().getLatestMetadata.exists(_.deltaCommitVersion == 1)) + // This trigger: + // 1. Finds a NEW latest offset that sets the dummy offset index post schema change + // 2. The previous valid batch can be committed + // 3. The commit evolves the schema and exit the stream. + testStream(read)( + StartThisStream, + AwaitTerminationIgnoreError, + CheckAnswer(Nil: _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + // still stuck, but the pending schema change is marked as evolved + offset.reservoirVersion == schemaChangeVersion && + offset.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX && + // serialized as version 3 because POST_METADATA_CHANGE_INDEX is only available in v3 + offset.json.contains(s""""sourceVersion":3""") + }, + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata + .exists(_.deltaCommitVersion == schemaChangeVersion)) + + // This trigger: + // 1. GetBatch for the empty batch because it was constructed and now no schema mismatches + testStream(read)( + StartThisStream, + AwaitTermination, + CheckAnswer(Nil: _*) + ) + + // This trigger: + // 1. Find the latest offset till end of data + // 2. Commits the previous empty batch (with no schema change), so no schema evolution + // 3. GetBatch of all data + val v2 = log.update().version + testStream(read)( + StartThisStream, + AwaitTermination, + CheckAnswer((5 until 10).map(i => (i.toString)): _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + // bumped by file action, and since it's an non schema change, just clear schema change + offset.reservoirVersion == v2 + 1 && + offset.index == DeltaSourceOffset.BASE_INDEX + } + ) + + // Create a new schema change + addColumn("b") + val v3 = log.update().version + addData(10 until 11) + + // This trigger: + // 1. Finds a new offset ending with the schema change index + // 2. Commits previous batch (no schema change, thus no schema evolution) + // 3. GetBatch of this empty batch + testStream(read)( + StartThisStream, + AwaitTermination, + CheckAnswer(Nil: _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + offset.reservoirVersion == v2 + 1 && + offset.index == DeltaSourceOffset.METADATA_CHANGE_INDEX + } + ) + + // This trigger: + // 1. Again, finds an empty batch but now ending at the dummy post schema change index. + // 2. Commits the previous batch, evolve the schema and fail the stream. + testStream(read)( + StartThisStream, + AwaitTerminationIgnoreError, + CheckAnswer(Nil: _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + offset.reservoirVersion == v3 && + offset.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX + }, + ExpectMetadataEvolutionException + ) + } + } + + test("trigger.AvailableNow should work") { + withStarterTable { implicit log => + dropColumn("b") + val schemaChangeVersion = log.update().version + addData(5 until 10) + + val ckpt = getDefaultCheckpoint.toString + val schemaLoc = getDefaultSchemaLocation.toString + + // Use starting version to ignore initial snapshot + def read: DataFrame = readStream(schemaLocation = Some(schemaLoc), startingVersion = Some(1)) + + // Use trigger available now + val StartThisStream = StartStream(trigger = Trigger.AvailableNow(), checkpointLocation = ckpt) + + // Similar to once trigger, this: + // 1. Detects the schema change right-away from computing latest offset + // 2. Initialize the schema log and exit stream + testStream(read)( + StartThisStream, + AwaitTerminationIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionExceptionFromInitialization + ) + // Latest schema in schema log has been updated + assert(getDefaultSchemaLog().getLatestMetadata.exists(_.deltaCommitVersion == 1)) + + // Now, this trigger: + // 1. Finds the latest offset RIGHT AT the schema change ending at schema change index + // 2. GetBatch till that offset + // 3. Finds ANOTHER the latest offset ending at the dummy post schema change index + // 4. GetBatch for this empty batch + // 5. Commits the previous batch + // 6. Triggers schema evolution + testStream(read)( + StartThisStream, + AwaitTerminationIgnoreError, + CheckAnswer((0 until 5).map(_.toString).map(i => (i, i)): _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + offset.reservoirVersion == schemaChangeVersion && + // schema change marked as evolved + offset.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX + }, + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata + .exists(_.deltaCommitVersion == schemaChangeVersion)) + + // This trigger: + // 1. Finds the next latest offset, which is the end of data + // 2. Commit previous empty batch with no pending schema change + // 3. GetBatch with the remaining data + val latestVersion = log.update().version + testStream(read)( + StartThisStream, + AwaitTermination, + CheckAnswer((5 until 10).map(i => (i.toString)): _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + // schema change cleared because it's a non-schema change offset + offset.reservoirVersion == latestVersion + 1 && + offset.index == DeltaSourceOffset.BASE_INDEX + } + ) + + // Create a new schema change + addColumn("b") + val v3 = log.update().version + addData(10 until 11) + + // This trigger: + // 1. Finds the latest offset, again ending at the schema change index + // 2. Commits previous batch + // 3. GetBatch with empty data and schema change ending offset + // 4. Finds another latest offset, ending at the dummy post schema change index + // 5. Commits the empty batch at 3, evolves schema log and restart stream. + testStream(read)( + StartThisStream, + AwaitTerminationIgnoreError, + CheckAnswer(Nil: _*), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + offset.reservoirVersion == v3 && + offset.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX + }, + ExpectMetadataEvolutionException + ) + + // Finish the rest + testStream(read)( + StartThisStream, + AwaitTermination, + CheckAnswer((10 until 11).map(_.toString).map(i => (i, i)): _*) + ) + } + } + + testSchemaEvolution("consecutive schema evolutions without schema merging") { implicit log => + withSQLConf( + DeltaSQLConf.DELTA_STREAMING_ENABLE_SCHEMA_TRACKING_MERGE_CONSECUTIVE_CHANGES.key + -> "false") { + val v5 = log.update().version // v5 has an ADD file action with value (4, 4) + renameColumn("b", "c") // v6 + renameColumn("c", "b") // v7 + dropColumn("b") // v9 + addColumn("b") // v10 + + def df: DataFrame = readStream( + schemaLocation = Some(getDefaultSchemaLocation.toString), startingVersion = Some(v5)) + + // The schema log initializes @ v1 with schema + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + // initialization does not generate any offsets + q.availableOffsets.isEmpty + }, + ExpectMetadataEvolutionExceptionFromInitialization + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v5) + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a", "b"))) + // Encounter next schema change + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Seq(4).map(_.toString).map(i => (i, i)): _*), + AssertOnQuery { q => + q.availableOffsets.size == 1 && { + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.head) + offset.reservoirVersion == v5 + 1 && offset.index == indexWhenSchemaLogIsUpdated + } + }, + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v5 + 1) + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a", "c"))) + // Encounter next schema change again + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + // size is 1 because commit removes previous offset + q.availableOffsets.size == 1 && { + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.head) + offset.reservoirVersion == v5 + 2 && offset.index == indexWhenSchemaLogIsUpdated + } + }, + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v5 + 2) + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a", "b"))) + // Encounter next schema change + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + q.availableOffsets.size == 1 && { + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.head) + offset.reservoirVersion == v5 + 3 && offset.index == indexWhenSchemaLogIsUpdated + } + }, + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v5 + 3) + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a"))) + // Encounter next schema change again + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + q.availableOffsets.size == 1 && { + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.head) + offset.reservoirVersion == v5 + 4 && offset.index == indexWhenSchemaLogIsUpdated + } + }, + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v5 + 4) + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a", "b"))) + } + } + + testSchemaEvolution("consecutive schema evolutions") { implicit log => + // By default we have consecutive schema merging turned on + val v5 = log.update().version // v5 has an ADD file action with value (4, 4) + renameColumn("b", "c") // v6 + renameColumn("c", "b") // v7 + dropColumn("b") // v9 + addColumn("b") // v10 + val v10 = log.update().version + // Write some more data post the consecutive schema changes + addData(5 until 6) + + def df: DataFrame = readStream( + schemaLocation = Some(getDefaultSchemaLocation.toString), startingVersion = Some(v5)) + + // The schema log initializes @ v1 with schema + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + // initialization does not generate any offsets + q.availableOffsets.isEmpty + }, + ExpectMetadataEvolutionExceptionFromInitialization + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v5) + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a", "b"))) + // Encounter next schema change + // This still fails schema evolution exception and won't scan ahead + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Seq(4).map(_.toString).map(i => (i, i)): _*), + AssertOnQuery { q => + q.availableOffsets.size == 1 && { + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.head) + offset.reservoirVersion == v5 + 1 && offset.index == indexWhenSchemaLogIsUpdated + } + }, + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v5 + 1) + assert(getDefaultSchemaLog().getLatestMetadata.get.dataSchema.fieldNames + .sameElements(Array("a", "c"))) + + // Now the next restart would scan over the consecutive schema changes and use the last one + // to initialize the schema again. + val latestDf = df + assert(latestDf.schema.fieldNames.sameElements(Array("a", "b"))) + // The analysis phase should've already updated schema log + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v10) + // Processing should ignore the intermediary schema changes and process the data using the + // merged schema. + testStream(latestDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((5 until 6).map(i => (i.toString, i.toString)): _*) + ) + } + + testSchemaEvolution("upgrade and downgrade") { implicit log => + val ckpt = getDefaultCheckpoint.toString + val df = readStream(startingVersion = Some(1)) + val v0 = log.update().version + // Initialize a stream + testStream(df)( + StartStream(checkpointLocation = ckpt), + ProcessAllAvailable(), + CheckAnswer((0 until 5).map(_.toString).map(i => (i, i)): _*), + AssertOnQuery { q => + assert(q.availableOffsets.size == 1) + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + offset.reservoirVersion == v0 + 1 && + offset.index == DeltaSourceOffset.BASE_INDEX + } + ) + + addData(Seq(5)) + val v1 = log.update().version + dropColumn("b") + val v2 = log.update().version + + // Restart with schema location should initialize + val df2 = readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + testStream(df2)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + // initialization does not generate any more offsets + q.availableOffsets.size <= 1 + }, + ExpectMetadataEvolutionExceptionFromInitialization + ) + // The schema should be valid until v1 (the batch end version). + // It is v1 - 1 because the latest offset sits on the BASE_INDEX of v1, which does not contain + // any data, so there's no need to consider that for schema change initialization. + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v1 - 1) + + // Restart again should be able to use the new offset version + val df3 = readStream(schemaLocation = Some(getDefaultSchemaLocation.toString)) + val logAppenderUpgrade = new LogAppender("Should convert legacy offset", maxEvents = 1e6.toInt) + logAppenderUpgrade.setThreshold(Level.DEBUG) + + withLogAppender(logAppenderUpgrade, level = Some(Level.DEBUG)) { + testStream(df3)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(("5", "5")), + AssertOnQuery { q => + val offset = DeltaSourceOffset(log.tableId, q.availableOffsets.values.last) + offset.reservoirVersion == v2 && + offset.index == indexWhenSchemaLogIsUpdated + }, + ExpectMetadataEvolutionException + ) + } + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v2) + // Should've upgraded the legacy offset + val target = logAppenderUpgrade.loggingEvents.find( + _.getMessage.toString.contains("upgrading offset ")) + assert(target.isDefined) + + // Add more data + addData(Seq(6)) + + // Suppose now the user doesn't want to use schema tracking any more, and whats to downgrade + // to use latest schema again, it should be able to do that. + val df4 = readStream() // without schema location + val logAppenderDowngrade = new LogAppender("Should convert new offset", maxEvents = 1e6.toInt) + logAppenderDowngrade.setThreshold(Level.DEBUG) + + withSQLConf( + DeltaSQLConf.DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_SCHEMA_CHANGES_DURING_STREAM_START + .key -> "true", + DeltaSQLConf.DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_COLUMN_MAPPING_SCHEMA_CHANGES + .key -> "true") { + withLogAppender(logAppenderDowngrade, level = Some(Level.DEBUG)) { + testStream(df4)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + // See the next read just falls back to use latest schema + CheckAnswer(("6")) + ) + } + } + } + + testSchemaEvolution("multiple sources with schema evolution" + ) { implicit log => + val v5 = log.update().version // v5 has an ADD file action with value (4, 4) + renameColumn("b", "c") + addData(5 until 10) + + val schemaLog1Location = new Path(getDefaultCheckpoint, "_schema_log1").toString + val schemaLog2Location = new Path(getDefaultCheckpoint, "_schema_log2").toString + + // Join two individual sources with two schema log + // Each source should return an identical batch and therefore the output batch should also be + // identical, we are just using join to create a multi-source situation. + def df: DataFrame = + readStream(schemaLocation = + Some(schemaLog1Location), + startingVersion = Some(v5)) + .unionByName( + readStream(schemaLocation = + Some(schemaLog2Location), + startingVersion = Some(v5)), allowMissingColumns = true) + + // Both schema log initialized + def schemaLog1: DeltaSourceMetadataTrackingLog = DeltaSourceMetadataTrackingLog.create( + spark, schemaLog1Location, log.update()) + def schemaLog2: DeltaSourceMetadataTrackingLog = DeltaSourceMetadataTrackingLog.create( + spark, schemaLog2Location, log.update()) + + // The schema log initializes @ v5 with schema + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + // initialization does not generate any offsets + q.availableOffsets.isEmpty + }, + ExpectMetadataEvolutionExceptionFromInitialization + ) + + // But takes another restart for the other Delta source + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + AssertOnQuery { q => + // initialization does not generate any offsets + q.availableOffsets.isEmpty + }, + ExpectMetadataEvolutionExceptionFromInitialization + ) + + // Both schema log should be initialized + assert(schemaLog1.getCurrentTrackedMetadata.map(_.deltaCommitVersion) == + schemaLog2.getCurrentTrackedMetadata.map(_.deltaCommitVersion)) + + // One of the source will commit and fail + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + // The data prior to schema change is served + // Two rows in schema [a, b] + CheckAnswer(("4", "4"), ("4", "4")), + ExpectMetadataEvolutionException + ) + + // Restart should fail the other commit + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionException + ) + + assert(schemaLog1.getCurrentTrackedMetadata.map(_.deltaCommitVersion) == + schemaLog2.getCurrentTrackedMetadata.map(_.deltaCommitVersion)) + + // Restart stream should proceed on loading the rest of data + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + // Unioned data is served + // 10 rows in schema [a, c] + CheckAnswer((5 until 10).map(_.toString).flatMap(i => Seq((i, i), (i, i))): _*) + ) + + // Attempt to use the wrong schema log for each source will be detected + val wrongDf = readStream(schemaLocation = + // instead of using schemaLog1Location + Some(schemaLog2Location), + startingVersion = Some(v5)) + .unionByName( + readStream(schemaLocation = + // instead of using schemaLog2Location + Some(schemaLog1Location), + startingVersion = Some(v5)), allowMissingColumns = true) + + testStream(wrongDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + ExpectFailure[IllegalArgumentException](t => + assert(t.getMessage.contains("The Delta source metadata path used for execution"))) + ) + } + + testSchemaEvolution("schema evolution with Delta sink") { implicit log => + val v5 = log.update().version // v5 has an ADD file action with value (4) + renameColumn("b", "c") + val renameVersion1 = log.update().version + addData(5 until 10) + renameColumn("c", "b") + val renameVersion2 = log.update().version + addData(10 until 15) + dropColumn("b") + val dropVersion = log.update().version + addData(15 until 20) + addColumn("b") + val addVersion = log.update().version + addData(20 until 25) + + withTempDir { sink => + def writeStream(df: DataFrame): Unit = { + val q = df.writeStream + .format("delta") + .option("checkpointLocation", getDefaultCheckpoint.toString) + .option("mergeSchema", "true") // for automatically adding columns + .start(sink.getCanonicalPath) + q.processAllAvailable() + q.stop() + } + + def df: DataFrame = readStream( + schemaLocation = Some(getDefaultSchemaLocation.toString), startingVersion = Some(v5)) + def readSink: DataFrame = spark.read.format("delta").load(sink.getCanonicalPath) + + val e1 = ExceptionUtils.getRootCause { + intercept[StreamingQueryException] { + writeStream(df) + } + } + ExpectMetadataEvolutionExceptionFromInitialization.assertFailure(e1) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == v5) + + val e2 = ExceptionUtils.getRootCause { + intercept[StreamingQueryException] { + writeStream(df) + } + } + assert(readSink.schema.fieldNames sameElements Array("a", "b")) + checkAnswer(readSink, Seq(4).map(_.toString).map(i => Row(i, i))) + ExpectMetadataEvolutionException.assertFailure(e2) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == renameVersion1) + + val e3 = ExceptionUtils.getRootCause { + intercept[StreamingQueryException] { + writeStream(df) + } + } + // c added as a new column + assert(readSink.schema.fieldNames sameElements Array("a", "b", "c")) + checkAnswer(readSink, Seq(4).map(_.toString).map(i => Row(i, i, null)) ++ + (5 until 10).map(_.toString).map(i => Row(i, null, i))) + ExpectMetadataEvolutionException.assertFailure(e3) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == renameVersion2) + + val e4 = ExceptionUtils.getRootCause { + intercept[StreamingQueryException] { + writeStream(df) + } + } + // c was renamed to b, new data now writes to b + assert(readSink.schema.fieldNames sameElements Array("a", "b", "c")) + checkAnswer(readSink, Seq(4).map(_.toString).map(i => Row(i, i, null)) ++ + (5 until 10).map(_.toString).map(i => Row(i, null, i)) ++ + (10 until 15).map(_.toString).map(i => Row(i, i, null))) + ExpectMetadataEvolutionException.assertFailure(e4) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == dropVersion) + + val e5 = ExceptionUtils.getRootCause { + intercept[StreamingQueryException] { + writeStream(df) + } + } + // b was dropped, but sink remains the same + assert(readSink.schema.fieldNames sameElements Array("a", "b", "c")) + checkAnswer(readSink, Seq(4).map(_.toString).map(i => Row(i, i, null)) ++ + (5 until 10).map(_.toString).map(i => Row(i, null, i)) ++ + (10 until 15).map(_.toString).map(i => Row(i, i, null)) ++ + (15 until 20).map(_.toString).map(i => Row(i, null, null))) + ExpectMetadataEvolutionException.assertFailure(e5) + assert(getDefaultSchemaLog().getLatestMetadata.get.deltaCommitVersion == addVersion) + + // Finish the stream without errors + writeStream(df) + // b was added back, sink remains the same + assert(readSink.schema.fieldNames sameElements Array("a", "b", "c")) + checkAnswer(readSink, Seq(4).map(_.toString).map(i => Row(i, i, null)) ++ + (5 until 10).map(_.toString).map(i => Row(i, null, i)) ++ + (10 until 15).map(_.toString).map(i => Row(i, i, null)) ++ + (15 until 20).map(_.toString).map(i => Row(i, null, null)) ++ + (20 until 25).map(_.toString).map(i => Row(i, i, null))) + } + } + + testSchemaEvolution("latestOffset should not progress before schema evolved") { implicit log => + val s0 = log.update() + // Change schema + renameColumn("b", "c") + val v0 = log.update().version + addData(Seq(5)) + val v1 = log.update().version + + // Manually construct a Delta source since it's hard to test multiple (2+) latestOffset() calls + // with the current streaming engine without incurring the schema evolution failure. + def getSource: DeltaSource = DeltaSource( + spark, log, + new DeltaOptions(Map("startingVersion" -> "0"), spark.sessionState.conf), + log.update(), + metadataPath = "", + Some(getDefaultSchemaLog())) + + def getLatestOffset(source: DeltaSource, start: Option[Offset] = None): DeltaSourceOffset = + DeltaSourceOffset(log.tableId, + source.latestOffset(start.orNull, source.getDefaultReadLimit)) + + // Initialize the schema log to skip initialization failure + getDefaultSchemaLog().writeNewMetadata( + PersistedMetadata( + log.tableId, + 0L, + s0.metadata, + s0.protocol, + sourceMetadataPath = "" + ) + ) + + val source1 = getSource + + // 1st call, land at INDEX_SCHEMA_CHANGE + val ofs1 = getLatestOffset(source1) + assert(ofs1.index == DeltaSourceOffset.METADATA_CHANGE_INDEX) + source1.getBatch(startOffsetOption = None, ofs1) + // 2nd call, land at INDEX_POST_SCHEMA_CHANGE + val ofs2 = getLatestOffset(source1, Some(ofs1)) + assert(ofs2.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX) + source1.getBatch(Some(ofs1), ofs2) + // 3rd call, still land at INDEX_POST_SCHEMA_CHANGE, because schema evolution has not happened + val ofs3 = getLatestOffset(source1, Some(ofs2)) + assert(ofs3.index == DeltaSourceOffset.POST_METADATA_CHANGE_INDEX) + // Commit and restart + val e = intercept[DeltaRuntimeException] { + source1.commit(ofs2) + } + ExpectMetadataEvolutionException.assertFailure(e) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == v0) + + val source2 = getSource + // restore previousOffset + source2.getBatch(Some(ofs3), ofs3) + // 4th call, should move on to latest version + 1 (bumped by file action) + val ofs4 = getLatestOffset(source2, Some(ofs3)) + assert(ofs4.index == DeltaSourceOffset.BASE_INDEX && + ofs4.reservoirVersion == v1 + 1) + } + + protected def expectSqlConfException(opType: String, ver: Long, checkpointHash: Int) = { + ExpectFailure[DeltaRuntimeException] { e => + val se = e.asInstanceOf[DeltaRuntimeException] + assert { + se.getErrorClass == "DELTA_STREAMING_CANNOT_CONTINUE_PROCESSING_POST_SCHEMA_EVOLUTION" && + se.messageParameters(0) == opType && se.messageParameters(2) == ver.toString && + se.messageParameters.exists(_.contains(checkpointHash.toString)) + } + } + } + + /** + * Initialize a simple streaming DF for a simple table with just one (0, 0) entry for schema + * We also prepare an initialized schema log to skip the initialization phase. + */ + protected def withSimpleStreamingDf(f: (() => DataFrame, DeltaLog) => Unit): Unit = { + withTempDir { dir => + val tablePath = dir.getCanonicalPath + Seq(("0", "0")).toDF("a", "b") + .write.mode("append").format("delta").save(tablePath) + implicit val log = DeltaLog.forTable(spark, dir.getCanonicalPath) + val s0 = log.update() + val schemaLog = getDefaultSchemaLog() + schemaLog.writeNewMetadata( + PersistedMetadata(log.tableId, s0.version, s0.metadata, s0.protocol, + sourceMetadataPath = "") + ) + + def read(): DataFrame = + readStream( + Some(getDefaultSchemaLocation.toString), + startingVersion = Some(s0.version)) + + // Initialize checkpoint + withSQLConf( + DeltaSQLConf.DELTA_STREAMING_SCHEMA_TRACKING_METADATA_PATH_CHECK_ENABLED.key -> "false") { + testStream(read())( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer(("0", "0")), + StopStream + ) + f(read, log) + } + } + } + + testWithoutAllowStreamRestart("unblock with sql conf") { + def testStreamFlow( + changeSchema: DeltaLog => Unit, + schemaChangeType: String, + getConfKV: (Int, Long) => (String, String)): Unit = { + withSimpleStreamingDf { (readDf, log) => + val ckptHash = (getDefaultCheckpoint(log).toString + "/sources/0").hashCode + changeSchema(log) + val v1 = log.update().version + addData(Seq(1))(log) + // Encounter schema evolution exception + testStream(readDf())( + StartStream(checkpointLocation = getDefaultCheckpoint(log).toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionException + ) + // Restart would fail due to SQL conf validation + testStream(readDf())( + StartStream(checkpointLocation = getDefaultCheckpoint(log).toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + expectSqlConfException(schemaChangeType, v1, ckptHash) + ) + // Another restart still fails + testStream(readDf())( + StartStream(checkpointLocation = getDefaultCheckpoint(log).toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + expectSqlConfException(schemaChangeType, v1, ckptHash) + ) + // With SQL Conf set we can move on + val (k, v) = getConfKV(ckptHash, v1) + withSQLConf(k -> v) { + testStream(readDf())( + StartStream(checkpointLocation = getDefaultCheckpoint(log).toString), + ProcessAllAvailable() + ) + } + } + } + + // Test drop column + Seq("allowSourceColumnRenameAndDrop", "allowSourceColumnDrop").foreach { allow => + Seq( + ( + (log: DeltaLog) => { + dropColumn("a")(log) + // Revert the drop to test consecutive schema changes won't affect sql conf validation + // the new column will show up with different physical name so it can trigger the + // DROP COLUMN detection logic + addColumn("a")(log) + }, + (ckptHash: Int, _: Long) => + (s"${DeltaSQLConf.SQL_CONF_PREFIX}.streaming.$allow.ckpt_$ckptHash", "always") + ), + ( + (log: DeltaLog) => { + dropColumn("a")(log) + // Ditto + addColumn("a")(log) + }, + (ckptHash: Int, ver: Long) => + (s"${DeltaSQLConf.SQL_CONF_PREFIX}.streaming.$allow.ckpt_$ckptHash", ver.toString) + ) + ).foreach { case (changeSchema, getConfKV) => + testStreamFlow(changeSchema, NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_DROP, getConfKV) + } + } + + // Test rename column + Seq("allowSourceColumnRenameAndDrop", "allowSourceColumnRename").foreach { allow => + Seq( + ( + (log: DeltaLog) => { + renameColumn("b", "c")(log) + }, + (ckptHash: Int, _: Long) => + (s"${DeltaSQLConf.SQL_CONF_PREFIX}.streaming.$allow.ckpt_$ckptHash", "always") + ), + ( + (log: DeltaLog) => { + renameColumn("b", "c")(log) + }, + (ckptHash: Int, ver: Long) => + (s"${DeltaSQLConf.SQL_CONF_PREFIX}.streaming.$allow.ckpt_$ckptHash", ver.toString) + ) + ).foreach { case (changeSchema, getConfKV) => + testStreamFlow(changeSchema, NonAdditiveSchemaChangeTypes.SCHEMA_CHANGE_RENAME, getConfKV) + } + } + } + + testSchemaEvolution( + "schema tracking interacting with unsafe escape flag") { implicit log => + renameColumn("b", "c") + // Even when schema location is provided, it won't be initialized because the unsafe + // flag is turned on. + val df = readStream( + schemaLocation = Some(getDefaultSchemaLocation.toString), startingVersion = Some(1L)) + withSQLConf( + DeltaSQLConf.DELTA_STREAMING_UNSAFE_READ_ON_INCOMPATIBLE_COLUMN_MAPPING_SCHEMA_CHANGES.key + -> "true") { + testStream(df)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((0 until 5).map(_.toString).map(i => (i, i)): _*) + ) + } + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.isEmpty) + } + + testSchemaEvolution( + "streaming with a column mapping upgrade", columnMapping = false) { implicit log => + upgradeToNameMode + val v0 = log.update().version + renameColumn("b", "c") + val v1 = log.update().version + addData(5 until 10) + + // Start schema tracking from prior to upgrade + // Initialize schema tracking log + def readDf(): DataFrame = + readStream( + schemaLocation = Some(getDefaultSchemaLocation.toString), + startingVersion = Some(1)) + + testStream(readDf())( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionExceptionFromInitialization + ) + assert { + val schemaEntry = getDefaultSchemaLog().getCurrentTrackedMetadata.get + schemaEntry.deltaCommitVersion == 1 && + // no physical name entry + !DeltaColumnMapping.hasPhysicalName(schemaEntry.dataSchema.head) + } + + testStream(readDf())( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer((0 until 5).map(_.toString).map(i => (i, i)): _*), + ExpectMetadataEvolutionException + ) + assert { + val schemaEntry = getDefaultSchemaLog().getCurrentTrackedMetadata.get + // stopped at the upgrade commit + schemaEntry.deltaCommitVersion == v0 && + // now with physical name entry + DeltaColumnMapping.hasPhysicalName(schemaEntry.dataSchema.head) + } + + // Note that since we have schema merging, we won't need to fail again at the rename column + // schema change, the rest of the data can be served altogether. + testStream(readDf())( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((5 until 10).map(_.toString).map(i => (i, i)): _*) + ) + + assert { + val schemaEntry = getDefaultSchemaLog().getCurrentTrackedMetadata.get + // schema log updated implicitly + schemaEntry.deltaCommitVersion == v1 && + schemaEntry.dataSchema.fieldNames.sameElements(Array("a", "c")) + } + + } + + test("backward-compat: latest version can read back older JSON") { + val serialized = JsonUtils.toJson { + OldPersistedSchema( + tableId = "test", + deltaCommitVersion = 1, + StructType.fromDDL("a INT").json, + StructType.fromDDL("a INT").json, + sourceMetadataPath = "" + ) + } + + val schemaFromJson = PersistedMetadata.fromJson(serialized) + assert(schemaFromJson == PersistedMetadata( + tableId = "test", + deltaCommitVersion = 1, + StructType.fromDDL("a INT").json, + StructType.fromDDL("a INT").json, + sourceMetadataPath = "", + tableConfigurations = None, + protocolJson = None, + previousMetadataSeqNum = None + )) + } + + test("forward-compat: older version can read back newer JSON") { + val newSchema = PersistedMetadata( + tableId = "test", + deltaCommitVersion = 1, + StructType.fromDDL("a INT").json, + StructType.fromDDL("a INT").json, + sourceMetadataPath = "/path", + tableConfigurations = Some(Map("a" -> "b")), + protocolJson = Some(Protocol(1, 2).json), + previousMetadataSeqNum = Some(1L) + ) + + assert { + JsonUtils.fromJson[OldPersistedSchema](JsonUtils.toJson(newSchema)) == OldPersistedSchema( + tableId = "test", + deltaCommitVersion = 1, + StructType.fromDDL("a INT").json, + StructType.fromDDL("a INT").json, + sourceMetadataPath = "/path" + ) + } + } + + testSchemaEvolution("partition evolution") { implicit log => + // Same schema but different partition + overwriteSchema(log.update().schema, partitionColumns = Seq("a")) + val v0 = log.update().version + addData(5 until 10) + overwriteSchema(log.update().schema, partitionColumns = Seq("b")) + val v1 = log.update().version + def readDf: DataFrame = + readStream(schemaLocation = Some(getDefaultSchemaLocation.toString), + startingVersion = Some(1), + // ignoreDeletes because overwriteSchema would generate RemoveFiles. + ignoreDeletes = Some(true)) + + // Init schema log + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + AwaitTerminationIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionExceptionFromInitialization + ) + // Latest schema in schema log has been updated + assert(getDefaultSchemaLog().getLatestMetadata.exists(_.deltaCommitVersion == 1)) + // Process the first batch before overwrite + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer((0 until 5).map(_.toString).map(i => (i, i)): _*), + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata.exists(_.deltaCommitVersion == v0)) + + // Process until the next overwrite + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer( + // TODO: since we did an overwrite, the previous RemoveFiles are also captured, but they are + // using the old physical schema, we cannot read them back correctly. This is a corner case + // with schema overwrite + CDC, although technically CDC should not worry about overwrite + // because that means the downstream table needs to be truncated after applying CDC. + // Note that since we support reuse physical name across overwrite, the value of partition + // can still be read. + (if (isCdcTest) (-1 until 5).map(_.toString).map(i => (null, i)) else Nil) ++ + (5 until 10).map(_.toString).map(i => (i, i)): _*), + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getLatestMetadata.exists(_.deltaCommitVersion == v1)) + } + + testSchemaEvolution("schema log replace current", columnMapping = false) { implicit log => + withSQLConf( + DeltaSQLConf.DELTA_STREAMING_SCHEMA_TRACKING_METADATA_PATH_CHECK_ENABLED.key -> "false") { + // Schema log's schema is respected + val schemaLog = getDefaultSchemaLog() + val s0 = PersistedMetadata(log.tableId, 0, + makeMetadata( + new StructType().add("a", StringType, true) + .add("b", StringType, true) + .add("c", StringType, true), + partitionSchema = new StructType() + ), + log.update().protocol, + sourceMetadataPath = "" + ) + // The `replaceCurrent` is noop because there is no previous schema. + schemaLog.writeNewMetadata(s0, replaceCurrent = true) + assert(schemaLog.getCurrentTrackedMetadata.contains(s0)) + assert(schemaLog.getPreviousTrackedMetadata.isEmpty) + + val s1 = s0.copy(deltaCommitVersion = 1L) + schemaLog.writeNewMetadata(s1) + assert(schemaLog.getCurrentTrackedMetadata.contains(s1)) + assert(schemaLog.getPreviousTrackedMetadata.contains(s0)) + + val s2 = s1.copy(deltaCommitVersion = 2L) + schemaLog.writeNewMetadata(s2, replaceCurrent = true) + assert(schemaLog.getCurrentTrackedMetadata.contains( + s2.copy(previousMetadataSeqNum = Some(0L)))) + assert(schemaLog.getPreviousTrackedMetadata.contains(s0)) + + val s3 = s2.copy(deltaCommitVersion = 3L) + schemaLog.writeNewMetadata(s3, replaceCurrent = true) + assert(schemaLog.getCurrentTrackedMetadata.contains( + s3.copy(previousMetadataSeqNum = Some(0L)))) + assert(schemaLog.getPreviousTrackedMetadata.contains(s0)) + + val s4 = s3.copy(deltaCommitVersion = 4L) + schemaLog.writeNewMetadata(s4) + assert(schemaLog.getCurrentTrackedMetadata.contains(s4)) + assert(schemaLog.getPreviousTrackedMetadata.contains( + s3.copy(previousMetadataSeqNum = Some(0L)))) + + val s5 = s4.copy(deltaCommitVersion = 5L) + schemaLog.writeNewMetadata(s5, replaceCurrent = true) + assert(schemaLog.getCurrentTrackedMetadata.contains( + s5.copy(previousMetadataSeqNum = Some(3L)))) + assert(schemaLog.getPreviousTrackedMetadata.contains( + s3.copy(previousMetadataSeqNum = Some(0L)))) + } + } +} + +// Needs to be top-level for serialization to work. +case class OldPersistedSchema( + tableId: String, + deltaCommitVersion: Long, + dataSchemaJson: String, + partitionSchemaJson: String, + sourceMetadataPath: String +) + +class DeltaSourceSchemaEvolutionNameColumnMappingSuite + extends StreamingSchemaEvolutionSuiteBase + with DeltaColumnMappingEnableNameMode { + override def isCdcTest: Boolean = false +} + +class DeltaSourceSchemaEvolutionIdColumnMappingSuite + extends StreamingSchemaEvolutionSuiteBase + with DeltaColumnMappingEnableIdMode { + override def isCdcTest: Boolean = false +} + +trait CDCStreamingSchemaEvolutionSuiteBase extends StreamingSchemaEvolutionSuiteBase { + override def isCdcTest: Boolean = true + + import testImplicits._ + + // This test will generate AddCDCFiles + test("CDC streaming with schema evolution") { + withTempDir { dir => + spark.range(10).toDF("id").write.format("delta").save(dir.getCanonicalPath) + implicit val log: DeltaLog = DeltaLog.forTable(spark, dir.getCanonicalPath) + + { + withTable("merge_source") { + spark.range(10).filter(_ % 2 == 0) + .toDF("id").withColumn("age", lit("string")) + .createOrReplaceTempView("data") + + spark.sql(s"CREATE TABLE merge_source USING delta AS SELECT * FROM data") + + // Use merge to trigger schema evolution as well (add column age) + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "true") { + spark.sql( + s""" + |MERGE INTO delta.`${log.dataPath}` t + |USING merge_source s + |ON t.id = s.id + |WHEN MATCHED + | THEN UPDATE SET * + |WHEN NOT MATCHED + | THEN INSERT * + |""".stripMargin) + } + } + } + val v1 = log.update().version + + def readDf: DataFrame = + readStream(schemaLocation = Some(getDefaultSchemaLocation.toString), + startingVersion = Some(0)) + + // Init schema log + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionExceptionFromInitialization + ) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == 0L) + + // Streaming CDC until the MERGE invoked schema change + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + // The first 10 inserts + CheckAnswer((0L until 10L): _*), + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == v1 && + getDefaultSchemaLog().getCurrentTrackedMetadata.get.dataSchema.fieldNames.sameElements( + Array("id", "age"))) + + // Streaming CDC of the MERGE + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer( + // odd numbers have UPDATE actions (preimage and postimage) + (0L until 10L).filter(_ % 2 == 0).flatMap(i => Seq((i, null), (i, "string"))): _* + ) + ) + } + } + + testSchemaEvolution( + "protocol and configuration evolution", columnMapping = false) { implicit log => + // Updates table properties / protocol + spark.sql( + s""" + |ALTER TABLE delta.`${log.dataPath}` + |SET TBLPROPERTIES ( + | 'delta.minReaderVersion' = 2, + | 'delta.minWriterVersion' = 5 + |) + |""".stripMargin) + val v1 = log.update().version + + addData(5 until 10) + // Update just delta table property + spark.sql( + s""" + |ALTER TABLE delta.`${log.dataPath}` + |SET TBLPROPERTIES ( + | 'delta.isolationLevel' = 'SERIALIZABLE' + |) + |""".stripMargin + ) + val v2 = log.update().version + + addData(10 until 13) + // Update non-delta property won't need stream stop + spark.sql( + s""" + |ALTER TABLE delta.`${log.dataPath}` + |SET TBLPROPERTIES ( + | 'hello' = 'its me' + |) + |""".stripMargin + ) + addData(13 until 15) + + def readDf: DataFrame = + readStream(schemaLocation = Some(getDefaultSchemaLocation.toString), + startingVersion = Some(1L)) + + // Init schema log + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer(Nil: _*), + ExpectMetadataEvolutionExceptionFromInitialization + ) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == 1L) + + // Reaching the first protocol change + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer((0 until 5).map(_.toString).map(i => (i, i)): _*), + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == v1) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.protocol.contains(Protocol(2, 5))) + + // Reaching the second property change + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailableIgnoreError, + CheckAnswer((5 until 10).map(_.toString).map(i => (i, i)): _*), + ExpectMetadataEvolutionException + ) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.deltaCommitVersion == v2) + assert(getDefaultSchemaLog().getCurrentTrackedMetadata.get.tableConfigurations + .get.contains("delta.isolationLevel")) + + // The final property change won't stop stream because it's non delta + testStream(readDf)( + StartStream(checkpointLocation = getDefaultCheckpoint.toString), + ProcessAllAvailable(), + CheckAnswer((10 until 15).map(_.toString).map(i => (i, i)): _*) + ) + } +} + +class DeltaSourceSchemaEvolutionCDCNameColumnMappingSuite + extends CDCStreamingSchemaEvolutionSuiteBase + with DeltaColumnMappingEnableNameMode { + override def isCdcTest: Boolean = true +} + +class DeltaSourceSchemaEvolutionCDCIdColumnMappingSuite + extends CDCStreamingSchemaEvolutionSuiteBase + with DeltaColumnMappingEnableIdMode { + override def isCdcTest: Boolean = true +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala new file mode 100644 index 00000000000..f4092f03a64 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala @@ -0,0 +1,2520 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File, FileInputStream, OutputStream} +import java.net.URI +import java.util.UUID +import java.util.concurrent.TimeoutException + +import scala.concurrent.duration._ +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.actions.{AddFile, Protocol} +import org.apache.spark.sql.delta.sources.{DeltaSourceOffset, DeltaSQLConf} +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import org.apache.commons.io.FileUtils +import org.apache.commons.lang3.exception.ExceptionUtils +import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem} +import org.scalatest.time.{Seconds, Span} + +import org.apache.spark.SparkThrowable +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.util.IntervalUtils +import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.functions.when +import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamingQueryException, Trigger} +import org.apache.spark.sql.streaming.util.StreamManualClock +import org.apache.spark.sql.types.{StringType, StructType} +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.{ManualClock, Utils} + +class DeltaSourceSuite extends DeltaSourceSuiteBase + with DeltaColumnMappingTestUtils + with DeltaSQLCommandTest { + + import testImplicits._ + + test("no schema should throw an exception") { + withTempDir { inputDir => + new File(inputDir, "_delta_log").mkdir() + val e = intercept[AnalysisException] { + spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + } + for (msg <- Seq("Table schema is not set", "CREATE TABLE")) { + assert(e.getMessage.contains(msg)) + } + } + } + + test("disallow user specified schema") { + withTempDir { inputDir => + new File(inputDir, "_delta_log").mkdir() + val e = intercept[AnalysisException] { + spark.readStream + .schema(StructType.fromDDL("a INT, b STRING")) + .format("delta") + .load(inputDir.getCanonicalPath) + } + for (msg <- Seq("Delta does not support specifying the schema at read time")) { + assert(e.getMessage.contains(msg)) + } + } + } + + test("basic") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + + val df = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + .filter($"value" contains "keep") + + testStream(df)( + AddToReservoir(inputDir, Seq("keep1", "keep2", "drop3").toDF), + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer("keep1", "keep2"), + StopStream, + AddToReservoir(inputDir, Seq("drop4", "keep5", "keep6").toDF), + StartStream(), + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer("keep1", "keep2", "keep5", "keep6"), + AddToReservoir(inputDir, Seq("keep7", "drop8", "keep9").toDF), + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer("keep1", "keep2", "keep5", "keep6", "keep7", "keep9") + ) + } + } + + test("initial snapshot ends at base index of next version") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + // Add data before creating the stream, so that it becomes part of the initial snapshot. + Seq("keep1", "keep2", "drop3").toDF.write + .format("delta").mode("append").save(inputDir.getAbsolutePath) + + val df = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + .filter($"value" contains "keep") + + testStream(df)( + AssertOnQuery { q => q.processAllAvailable(); true }, + AssertOnQuery { q => + val offset = q.committedOffsets.iterator.next()._2.asInstanceOf[DeltaSourceOffset] + assert(offset.reservoirVersion === 2) + assert(offset.index === DeltaSourceOffset.BASE_INDEX) + true + }, + CheckAnswer("keep1", "keep2"), + StopStream + ) + } + } + + test("allow to change schema before starting a streaming query") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF("id") + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + withMetadata(deltaLog, StructType.fromDDL("id STRING, value STRING")) + + (5 until 10).foreach { i => + val v = Seq(i.toString -> i.toString).toDF("id", "value") + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val df = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + + val expected = ( + (0 until 5).map(_.toString -> null) ++ (5 until 10).map(_.toString).map(x => x -> x) + ).toDF("id", "value").collect() + testStream(df)( + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer(expected: _*) + ) + } + } + + testQuietly("disallow to change schema after starting a streaming query") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val df = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + + testStream(df)( + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer((0 until 5).map(_.toString): _*), + AssertOnQuery { _ => + withMetadata(deltaLog, StructType.fromDDL("id int, value int")) + true + }, + ExpectFailure[DeltaIllegalStateException](t => + assert(t.getMessage.contains("Detected schema change"))) + ) + } + } + + test("maxFilesPerTrigger") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .queryName("maxFilesPerTriggerTest") + .start() + try { + q.processAllAvailable() + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 5) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer(sql("SELECT * from maxFilesPerTriggerTest"), (0 until 5).map(_.toString).toDF) + } finally { + q.stop() + } + } + } + + test("maxFilesPerTrigger: metadata checkpoint") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 20).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .queryName("maxFilesPerTriggerTest") + .start() + try { + q.processAllAvailable() + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 20) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer(sql("SELECT * from maxFilesPerTriggerTest"), (0 until 20).map(_.toString).toDF) + } finally { + q.stop() + } + } + } + + test("maxFilesPerTrigger: change and restart") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 10).foreach { i => + val v = Seq(i.toString).toDF() + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") + .load(inputDir.getCanonicalPath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start(outputDir.getCanonicalPath) + try { + q.processAllAvailable() + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 10) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer( + spark.read.format("delta").load(outputDir.getAbsolutePath), + (0 until 10).map(_.toString).toDF()) + } finally { + q.stop() + } + + (10 until 20).foreach { i => + val v = Seq(i.toString).toDF() + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q2 = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "2") + .load(inputDir.getCanonicalPath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start(outputDir.getCanonicalPath) + try { + q2.processAllAvailable() + val progress = q2.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 5) + progress.foreach { p => + assert(p.numInputRows === 2) + } + + checkAnswer( + spark.read.format("delta").load(outputDir.getAbsolutePath), + (0 until 20).map(_.toString).toDF()) + } finally { + q2.stop() + } + } + } + + testQuietly("maxFilesPerTrigger: invalid parameter") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + + Seq(0, -1, "string").foreach { invalidMaxFilesPerTrigger => + val e = intercept[StreamingQueryException] { + spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, invalidMaxFilesPerTrigger.toString) + .load(inputDir.getCanonicalPath) + .writeStream + .format("console") + .start() + .processAllAvailable() + } + assert(e.getCause.isInstanceOf[IllegalArgumentException]) + for (msg <- Seq("Invalid", DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "positive")) { + assert(e.getCause.getMessage.contains(msg)) + } + } + } + } + + test("maxFilesPerTrigger: ignored when using Trigger.Once") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + def runTriggerOnceAndVerifyResult(expected: Seq[Int]): Unit = { + val q = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .trigger(Trigger.Once) + .queryName("triggerOnceTest") + .start() + try { + assert(q.awaitTermination(streamingTimeout.toMillis)) + assert(q.recentProgress.count(_.numInputRows != 0) == 1) // only one trigger was run + checkAnswer(sql("SELECT * from triggerOnceTest"), expected.map(_.toString).toDF) + } finally { + q.stop() + } + } + + runTriggerOnceAndVerifyResult(0 until 5) + + // Write more data and start a second batch. + (5 until 10).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + // Verify we can see all of latest data. + runTriggerOnceAndVerifyResult(0 until 10) + } + } + + test("maxFilesPerTrigger: Trigger.AvailableNow respects read limits") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaLog = DeltaLog.forTable(spark, inputDir) + // Write versions 0, 1, 2, 3, 4. + (0 to 4).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val stream = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") + .load(inputDir.getCanonicalPath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .trigger(Trigger.AvailableNow) + .queryName("maxFilesPerTriggerTest") + + var q = stream.start(outputDir.getCanonicalPath) + try { + assert(q.awaitTermination(streamingTimeout.toMillis)) + assert(q.recentProgress.length === 5) + // The first 5 versions each contain one file. They are processed as part of the initial + // snapshot (reservoir version 4) with one index per file. + (0 to 3).foreach { i => + val p = q.recentProgress(i) + assert(p.numInputRows === 1) + val endOffset = JsonUtils.fromJson[DeltaSourceOffset](p.sources.head.endOffset) + assert(endOffset == DeltaSourceOffset( + endOffset.reservoirId, reservoirVersion = 4, index = i, isInitialSnapshot = true)) + } + // The last batch ends at the base index of the next reservoir version (5). + val p4 = q.recentProgress(4) + assert(p4.numInputRows === 1) + val endOffset = JsonUtils.fromJson[DeltaSourceOffset](p4.sources.head.endOffset) + assert(endOffset == DeltaSourceOffset( + endOffset.reservoirId, + reservoirVersion = 5, + index = DeltaSourceOffset.BASE_INDEX, + isInitialSnapshot = false)) + + checkAnswer( + sql(s"SELECT * from delta.`${outputDir.getCanonicalPath}`"), + (0 to 4).map(_.toString).toDF) + + // Restarting the stream should immediately terminate with no progress because no more data + q = stream.start(outputDir.getCanonicalPath) + assert(q.awaitTermination(streamingTimeout.toMillis)) + // The streaming engine always reports one batch, even if it's empty. + assert(q.recentProgress.length === 1) + assert(q.recentProgress(0).sources.head.startOffset == + q.recentProgress(0).sources.head.endOffset) + + // Write versions 5, 6, 7. + (5 to 7).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + q = stream.start(outputDir.getCanonicalPath) + assert(q.awaitTermination(streamingTimeout.toMillis)) + // These versions are processed one by one outside the initial snapshot. + assert(q.recentProgress.length === 3) + + (5 to 7).foreach { i => + val p = q.recentProgress(i - 5) + assert(p.numInputRows === 1) + val endOffset = JsonUtils.fromJson[DeltaSourceOffset](p.sources.head.endOffset) + assert(endOffset == DeltaSourceOffset( + endOffset.reservoirId, + reservoirVersion = i + 1, + index = DeltaSourceOffset.BASE_INDEX, + isInitialSnapshot = false)) + } + + // Restarting the stream should immediately terminate with no progress because no more data + q = stream.start(outputDir.getCanonicalPath) + assert(q.awaitTermination(streamingTimeout.toMillis)) + assert(q.recentProgress.length === 1) + assert(q.recentProgress(0).sources.head.startOffset == + q.recentProgress(0).sources.head.endOffset) + } finally { + q.stop() + } + } + } + + test("Trigger.AvailableNow with an empty table") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + sql(s"CREATE TABLE delta.`${inputDir.toURI}` (value STRING) USING delta") + + val stream = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .trigger(Trigger.AvailableNow) + .queryName("emptyTableTriggerAvailableNow") + + var q = stream.start() + try { + assert(q.awaitTermination(10000)) + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 0) + } finally { + q.stop() + } + } + } + + test("maxBytesPerTrigger: process at least one file") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, "1b") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .queryName("maxBytesPerTriggerTest") + .start() + try { + q.processAllAvailable() + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 5) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer(sql("SELECT * from maxBytesPerTriggerTest"), (0 until 5).map(_.toString).toDF) + } finally { + q.stop() + } + } + } + + test("maxBytesPerTrigger: metadata checkpoint") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 20).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, "1b") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .queryName("maxBytesPerTriggerTest") + .start() + try { + q.processAllAvailable() + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 20) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer(sql("SELECT * from maxBytesPerTriggerTest"), (0 until 20).map(_.toString).toDF) + } finally { + q.stop() + } + } + } + + test("maxBytesPerTrigger: change and restart") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 10).foreach { i => + val v = Seq(i.toString).toDF() + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, "1b") + .load(inputDir.getCanonicalPath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start(outputDir.getCanonicalPath) + try { + q.processAllAvailable() + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 10) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer( + spark.read.format("delta").load(outputDir.getAbsolutePath), + (0 until 10).map(_.toString).toDF()) + } finally { + q.stop() + } + + (10 until 20).foreach { i => + val v = Seq(i.toString).toDF() + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q2 = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, "100g") + .load(inputDir.getCanonicalPath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start(outputDir.getCanonicalPath) + try { + q2.processAllAvailable() + val progress = q2.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 1) + progress.foreach { p => + assert(p.numInputRows === 10) + } + + checkAnswer( + spark.read.format("delta").load(outputDir.getAbsolutePath), + (0 until 20).map(_.toString).toDF()) + } finally { + q2.stop() + } + } + } + + testQuietly("maxBytesPerTrigger: invalid parameter") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + + Seq(0, -1, "string").foreach { invalidMaxBytesPerTrigger => + val e = intercept[StreamingQueryException] { + spark.readStream + .format("delta") + .option(DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, invalidMaxBytesPerTrigger.toString) + .load(inputDir.getCanonicalPath) + .writeStream + .format("console") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start() + .processAllAvailable() + } + assert(e.getCause.isInstanceOf[IllegalArgumentException]) + for (msg <- Seq("Invalid", DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, "size")) { + assert(e.getCause.getMessage.contains(msg)) + } + } + } + } + + test("maxBytesPerTrigger: Trigger.AvailableNow respects read limits") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val deltaLog = DeltaLog.forTable(spark, inputDir) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val stream = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, "1b") + .load(inputDir.getCanonicalPath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .trigger(Trigger.AvailableNow) + .queryName("maxBytesPerTriggerTest") + + var q = stream.start(outputDir.getCanonicalPath) + try { + assert(q.awaitTermination(streamingTimeout.toMillis)) + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 5) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer( + sql(s"SELECT * from delta.`${outputDir.getCanonicalPath}`"), + (0 until 5).map(_.toString).toDF) + + // Restarting the stream should immediately terminate with no progress because no more data + q = stream.start(outputDir.getCanonicalPath) + assert(q.awaitTermination(streamingTimeout.toMillis)) + assert(q.recentProgress.length === 1) + assert(q.recentProgress(0).sources.head.startOffset == + q.recentProgress(0).sources.head.endOffset) + } finally { + q.stop() + } + } + } + + test("maxBytesPerTrigger: max bytes and max files together") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + (0 until 5).foreach { i => + val v = Seq(i.toString).toDF + v.write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + val q = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "1") // should process a file at a time + .option(DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, "100gb") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .queryName("maxBytesPerTriggerTest") + .start() + try { + q.processAllAvailable() + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 5) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer(sql("SELECT * from maxBytesPerTriggerTest"), (0 until 5).map(_.toString).toDF) + } finally { + q.stop() + } + + val q2 = spark.readStream + .format("delta") + .option(DeltaOptions.MAX_FILES_PER_TRIGGER_OPTION, "2") + .option(DeltaOptions.MAX_BYTES_PER_TRIGGER_OPTION, "1b") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .queryName("maxBytesPerTriggerTest") + .start() + try { + q2.processAllAvailable() + val progress = q2.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === 5) + progress.foreach { p => + assert(p.numInputRows === 1) + } + checkAnswer(sql("SELECT * from maxBytesPerTriggerTest"), (0 until 5).map(_.toString).toDF) + } finally { + q2.stop() + } + } + } + + test("unknown sourceVersion value") { + // Set unknown sourceVersion as the max allowed version plus 1. + val unknownVersion = 4 + + // Note: "isStartingVersion" corresponds to DeltaSourceOffset.isInitialSnapshot. + val json = + s""" + |{ + | "sourceVersion": $unknownVersion, + | "reservoirVersion": 1, + | "index": 1, + | "isStartingVersion": true + |} + """.stripMargin + val e = intercept[SparkThrowable] { + DeltaSourceOffset( + UUID.randomUUID().toString, + SerializedOffset(json) + ) + } + assert(e.getErrorClass == "DELTA_INVALID_FORMAT_FROM_SOURCE_VERSION") + assert(e.toString.contains("Please upgrade to newer version of Delta")) + } + + test("invalid sourceVersion value") { + // Note: "isStartingVersion" corresponds to DeltaSourceOffset.isInitialSnapshot. + val json = + """ + |{ + | "sourceVersion": "foo", + | "reservoirVersion": 1, + | "index": 1, + | "isStartingVersion": true + |} + """.stripMargin + val e = intercept[SparkThrowable] { + DeltaSourceOffset( + UUID.randomUUID().toString, + SerializedOffset(json) + ) + } + assert(e.getErrorClass == "DELTA_INVALID_SOURCE_OFFSET_FORMAT") + assert(e.toString.contains("source offset format is invalid")) + } + + test("missing sourceVersion") { + // Note: "isStartingVersion" corresponds to DeltaSourceOffset.isInitialSnapshot. + val json = + """ + |{ + | "reservoirVersion": 1, + | "index": 1, + | "isStartingVersion": true + |} + """.stripMargin + val e = intercept[SparkThrowable] { + DeltaSourceOffset( + UUID.randomUUID().toString, + SerializedOffset(json) + ) + } + assert(e.getErrorClass == "DELTA_INVALID_SOURCE_VERSION") + for (msg <- "is invalid") { + assert(e.toString.contains(msg)) + } + } + + test("unmatched reservoir id") { + // Note: "isStartingVersion" corresponds to DeltaSourceOffset.isInitialSnapshot. + val json = + s""" + |{ + | "reservoirId": "${UUID.randomUUID().toString}", + | "sourceVersion": 1, + | "reservoirVersion": 1, + | "index": 1, + | "isStartingVersion": true + |} + """.stripMargin + val e = intercept[SparkThrowable] { + DeltaSourceOffset( + UUID.randomUUID().toString, + SerializedOffset(json) + ) + } + assert(e.getErrorClass == "DIFFERENT_DELTA_TABLE_READ_BY_STREAMING_SOURCE") + for (msg <- Seq("delete", "checkpoint", "restart")) { + assert(e.toString.contains(msg)) + } + } + + test("isInitialSnapshot serializes as isStartingVersion") { + for (isStartingVersion <- Seq(false, true)) { + // From serialized to object + val reservoirId = UUID.randomUUID().toString + val json = + s""" + |{ + | "reservoirId": "$reservoirId", + | "sourceVersion": 1, + | "reservoirVersion": 1, + | "index": 1, + | "isStartingVersion": $isStartingVersion + |} + """.stripMargin + val offsetDeserialized = DeltaSourceOffset(reservoirId, SerializedOffset(json)) + assert(offsetDeserialized.isInitialSnapshot === isStartingVersion) + + // From object to serialized + val offset = DeltaSourceOffset( + reservoirId = reservoirId, + reservoirVersion = 7, + index = 13, + isInitialSnapshot = isStartingVersion) + assert(offset.json.contains(s""""isStartingVersion":$isStartingVersion""")) + } + } + + test("DeltaSourceOffset deserialization") { + // Source version 1 with BASE_INDEX_V1 + val reservoirId = UUID.randomUUID().toString + val jsonV1 = + s""" + |{ + | "reservoirId": "$reservoirId", + | "sourceVersion": 1, + | "reservoirVersion": 3, + | "index": -1, + | "isStartingVersion": false + |} + """.stripMargin + val offsetDeserializedV1 = JsonUtils.fromJson[DeltaSourceOffset](jsonV1) + assert(offsetDeserializedV1 == + DeltaSourceOffset(reservoirId, 3, DeltaSourceOffset.BASE_INDEX, false)) + + // Source version 3 with BASE_INDEX_V3 + val jsonV3 = + s""" + |{ + | "reservoirId": "$reservoirId", + | "sourceVersion": 3, + | "reservoirVersion": 7, + | "index": -100, + | "isStartingVersion": false + |} + """.stripMargin + val offsetDeserializedV3 = JsonUtils.fromJson[DeltaSourceOffset](jsonV3) + assert(offsetDeserializedV3 == + DeltaSourceOffset(reservoirId, 7, DeltaSourceOffset.BASE_INDEX, false)) + + // Source version 3 with METADATA_CHANGE_INDEX + val jsonV3metadataChange = + s""" + |{ + | "reservoirId": "$reservoirId", + | "sourceVersion": 3, + | "reservoirVersion": 7, + | "index": -20, + | "isStartingVersion": false + |} + """.stripMargin + val offsetDeserializedV3metadataChange = + JsonUtils.fromJson[DeltaSourceOffset](jsonV3metadataChange) + assert(offsetDeserializedV3metadataChange == + DeltaSourceOffset(reservoirId, 7, DeltaSourceOffset.METADATA_CHANGE_INDEX, false)) + + // Source version 3 with regular index and isStartingVersion = true + val jsonV3start = + s""" + |{ + | "reservoirId": "$reservoirId", + | "sourceVersion": 3, + | "reservoirVersion": 9, + | "index": 23, + | "isStartingVersion": true + |} + """.stripMargin + val offsetDeserializedV3start = JsonUtils.fromJson[DeltaSourceOffset](jsonV3start) + assert(offsetDeserializedV3start == DeltaSourceOffset(reservoirId, 9, 23, true)) + } + + test("DeltaSourceOffset deserialization error") { + val reservoirId = UUID.randomUUID().toString + // This is missing a double quote so it's unbalanced. + val jsonV1 = + s""" + |{ + | "reservoirId": "$reservoirId", + | "sourceVersion": 23x, + | "reservoirVersion": 3, + | "index": -1, + | "isStartingVersion": false + |} + """.stripMargin + val e = intercept[SparkThrowable] { + JsonUtils.fromJson[DeltaSourceOffset](jsonV1) + } + assert(e.getErrorClass == "DELTA_INVALID_SOURCE_OFFSET_FORMAT") + } + + test("DeltaSourceOffset serialization") { + val reservoirId = UUID.randomUUID().toString + // BASE_INDEX is always serialized as V1. + val offsetV1 = DeltaSourceOffset(reservoirId, 3, DeltaSourceOffset.BASE_INDEX, false) + assert(JsonUtils.toJson(offsetV1) === + s"""{"sourceVersion":1,"reservoirId":"$reservoirId","reservoirVersion":3,"index":-1,""" + + s""""isStartingVersion":false}""" + ) + // The same serializer should be used by both methods. + assert(JsonUtils.toJson(offsetV1) === offsetV1.json) + + // METADATA_CHANGE_INDEX is always serialized as V3 + val offsetV3metadataChange = + DeltaSourceOffset(reservoirId, 7, DeltaSourceOffset.METADATA_CHANGE_INDEX, false) + assert(JsonUtils.toJson(offsetV3metadataChange) === + s"""{"sourceVersion":3,"reservoirId":"$reservoirId","reservoirVersion":7,"index":-20,""" + + s""""isStartingVersion":false}""" + ) + // The same serializer should be used by both methods. + assert(JsonUtils.toJson(offsetV3metadataChange) === offsetV3metadataChange.json) + + // Regular index and isStartingVersion = true, serialized as V1 + val offsetV1start = DeltaSourceOffset(reservoirId, 9, 23, true) + assert(JsonUtils.toJson(offsetV1start) === + s"""{"sourceVersion":1,"reservoirId":"$reservoirId","reservoirVersion":9,"index":23,""" + + s""""isStartingVersion":true}""" + ) + // The same serializer should be used by both methods. + assert(JsonUtils.toJson(offsetV1start) === offsetV1start.json) + } + + testQuietly("recreate the reservoir should fail the query") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + + val df = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + .filter($"value" contains "keep") + + testStream(df)( + AddToReservoir(inputDir, Seq("keep1", "keep2", "drop3").toDF), + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer("keep1", "keep2"), + StopStream, + AssertOnQuery { _ => + Utils.deleteRecursively(inputDir) + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + // All Delta tables in tests use the same tableId by default. Here we pass a new tableId + // to simulate a new table creation in production + withMetadata(deltaLog, StructType.fromDDL("value STRING"), tableId = Some("tableId-1234")) + true + }, + StartStream(), + ExpectFailure[DeltaIllegalStateException] { e => + for (msg <- Seq("delete", "checkpoint", "restart")) { + assert(e.getMessage.contains(msg)) + } + } + ) + } + } + + test("excludeRegex works and doesn't mess up offsets across restarts - parquet version") { + withTempDir { inputDir => + val chk = new File(inputDir, "_checkpoint").toString + + def excludeReTest(s: Option[String], expected: String*): Unit = { + val dfr = spark.readStream + .format("delta") + s.foreach(regex => dfr.option(DeltaOptions.EXCLUDE_REGEX_OPTION, regex)) + val df = dfr.load(inputDir.getCanonicalPath).groupBy('value).count + testStream(df, OutputMode.Complete())( + StartStream(checkpointLocation = chk), + AssertOnQuery { sq => sq.processAllAvailable(); true }, + CheckLastBatch(expected.map((_, 1)): _*), + StopStream + ) + } + + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + + def writeFile(name: String, content: String): AddFile = { + FileUtils.write(new File(inputDir, name), content) + AddFile(name, Map.empty, content.length, System.currentTimeMillis(), dataChange = true) + } + + def commitFiles(files: AddFile*): Unit = { + deltaLog.startTransaction().commit(files, DeltaOperations.ManualUpdate) + } + + Seq("abc", "def").toDF().write.format("delta").save(inputDir.getAbsolutePath) + commitFiles( + writeFile("batch1-ignore-file1", "ghi"), + writeFile("batch1-ignore-file2", "jkl") + ) + excludeReTest(Some("ignore"), "abc", "def") + } + } + + testQuietly("excludeRegex throws good error on bad regex pattern") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + + val e = intercept[StreamingQueryException] { + spark.readStream + .format("delta") + .option(DeltaOptions.EXCLUDE_REGEX_OPTION, "[abc") + .load(inputDir.getCanonicalPath) + .writeStream + .format("console") + .start() + .awaitTermination() + }.cause + assert(e.isInstanceOf[IllegalArgumentException]) + assert(e.getMessage.contains(DeltaOptions.EXCLUDE_REGEX_OPTION)) + } + } + + test("a fast writer should not starve a Delta source") { + val deltaPath = Utils.createTempDir().getCanonicalPath + val checkpointPath = Utils.createTempDir().getCanonicalPath + val writer = spark.readStream + .format("rate") + .load() + .writeStream + .format("delta") + .option("checkpointLocation", checkpointPath) + .start(deltaPath) + try { + eventually(timeout(streamingTimeout)) { + assert(spark.read.format("delta").load(deltaPath).count() > 0) + } + val testTableName = "delta_source_test" + withTable(testTableName) { + val reader = spark.readStream + .format("delta") + .load(deltaPath) + .writeStream + .format("memory") + .queryName(testTableName) + .start() + try { + eventually(timeout(streamingTimeout)) { + assert(spark.table(testTableName).count() > 0) + } + } finally { + reader.stop() + } + } + } finally { + writer.stop() + } + } + + test("start from corrupt checkpoint") { + withTempDir { inputDir => + val path = inputDir.getAbsolutePath + for (i <- 1 to 5) { + Seq(i).toDF("id").write.mode("append").format("delta").save(path) + } + val deltaLog = DeltaLog.forTable(spark, path) + deltaLog.checkpoint() + Seq(6).toDF("id").write.mode("append").format("delta").save(path) + val checkpoints = new File(deltaLog.logPath.toUri).listFiles() + .filter(f => FileNames.isCheckpointFile(new Path(f.getAbsolutePath))) + checkpoints.last.delete() + + val df = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + + testStream(df)( + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer(1, 2, 3, 4, 5, 6), + StopStream + ) + } + } + + test("SC-11561: can consume new data without update") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + + val df = spark.readStream.format("delta").load(inputDir.getCanonicalPath) + + // clear the cache so that the writer creates its own DeltaLog instead of reusing the reader's + DeltaLog.clearCache() + (0 until 3).foreach { i => + Seq(i.toString).toDF("value") + .write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + // check that reader consumed new data without updating its DeltaLog + testStream(df)( + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer("0", "1", "2") + ) + assert(deltaLog.snapshot.version == 0) + + (3 until 5).foreach { i => + Seq(i.toString).toDF("value") + .write.mode("append").format("delta").save(deltaLog.dataPath.toString) + } + + // check that reader consumed new data without update despite checkpoint + val writersLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + writersLog.checkpoint() + testStream(df)( + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer("0", "1", "2", "3", "4") + ) + assert(deltaLog.snapshot.version == 0) + } + } + + test( + "can delete old files of a snapshot without update" + ) { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + + val df = spark.readStream.format("delta").load(inputDir.getCanonicalPath) + + // clear the cache so that the writer creates its own DeltaLog instead of reusing the reader's + DeltaLog.clearCache() + val clock = new ManualClock(System.currentTimeMillis()) + val writersLog = DeltaLog.forTable(spark, new Path(inputDir.toURI), clock) + (0 until 3).foreach { i => + Seq(i.toString).toDF("value") + .write.mode("append").format("delta").save(inputDir.getCanonicalPath) + } + + // Create a checkpoint so that logs before checkpoint can be expired and deleted + writersLog.checkpoint() + + testStream(df)( + StartStream(Trigger.ProcessingTime("10 seconds"), new StreamManualClock), + AdvanceManualClock(10 * 1000L), + CheckLastBatch("0", "1", "2"), + Assert { + val defaultLogRetentionMillis = DeltaConfigs.getMilliSeconds( + IntervalUtils.safeStringToInterval( + UTF8String.fromString(DeltaConfigs.LOG_RETENTION.defaultValue))) + clock.advance(defaultLogRetentionMillis + 100000000L) + + // Delete all logs before checkpoint + writersLog.cleanUpExpiredLogs(writersLog.snapshot) + + // Check that the first few log files have been deleted + val logPath = new File(inputDir, "_delta_log") + val logVersions = logPath.listFiles().map(_.getName) + .filter(_.endsWith(".json")) + .map(_.stripSuffix(".json").toInt) + + !logVersions.contains(0) && !logVersions.contains(1) + }, + Assert { + (3 until 5).foreach { i => + Seq(i.toString).toDF("value") + .write.mode("append").format("delta").save(inputDir.getCanonicalPath) + } + true + }, + // can process new data without update, despite that previous log files have been deleted + AdvanceManualClock(10 * 1000L), + AdvanceManualClock(10 * 1000L), + CheckNewAnswer("3", "4") + ) + assert(deltaLog.snapshot.version == 0) + } + } + + test("Delta sources don't write offsets with null json") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + Seq(1, 2, 3).toDF("x").write.format("delta").save(inputDir.toString) + + val df = spark.readStream.format("delta").load(inputDir.toString) + val stream = df.writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .start(outputDir.toString) + stream.processAllAvailable() + val offsetFile = checkpointDir.toString + "/offsets/0" + + // Make sure JsonUtils doesn't serialize it as null + val deltaSourceOffsetLine = + scala.io.Source.fromFile(offsetFile).getLines.toSeq.last + val deltaSourceOffset = JsonUtils.fromJson[DeltaSourceOffset](deltaSourceOffsetLine) + assert(deltaSourceOffset.json != null, "Delta sources shouldn't write null json field") + + // Make sure OffsetSeqLog won't choke on the offset we wrote + withTempDir { logPath => + new OffsetSeqLog(spark, logPath.toString) { + val offsetSeq = this.deserialize(new FileInputStream(offsetFile)) + val out = new OutputStream() { override def write(b: Int): Unit = { } } + this.serialize(offsetSeq, out) + } + } + + stream.stop() + } + } + + test("Delta source advances with non-data inserts and generates empty dataframe for " + + "non-data operations") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + // Version 0 + Seq(1L, 2L, 3L).toDF("x").write.format("delta").save(inputDir.toString) + + val df = spark.readStream.format("delta").load(inputDir.toString) + + val stream = df + .writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .foreachBatch( + (outputDf: DataFrame, bid: Long) => { + // Apart from first batch, rest of batches work with non-data operations + // for which we expect an empty dataframe to be generated. + if (bid > 0) { + assert(outputDf.isEmpty) + } + outputDf + .write + .format("delta") + .mode("append") + .save(outputDir.toString) + } + ) + .start() + + val deltaLog = DeltaLog.forTable(spark, inputDir.toString) + def expectLatestOffset(offset: DeltaSourceOffset) { + val lastOffset = DeltaSourceOffset( + deltaLog.tableId, + SerializedOffset(stream.lastProgress.sources.head.endOffset) + ) + + assert(lastOffset == offset) + } + + try { + stream.processAllAvailable() + expectLatestOffset(DeltaSourceOffset( + deltaLog.tableId, 1, DeltaSourceOffset.BASE_INDEX, isInitialSnapshot = false)) + + deltaLog.startTransaction().commit(Seq(), DeltaOperations.ManualUpdate) + stream.processAllAvailable() + expectLatestOffset(DeltaSourceOffset( + deltaLog.tableId, 2, DeltaSourceOffset.BASE_INDEX, isInitialSnapshot = false)) + + deltaLog.startTransaction().commit(Seq(), DeltaOperations.ManualUpdate) + stream.processAllAvailable() + expectLatestOffset(DeltaSourceOffset( + deltaLog.tableId, 3, DeltaSourceOffset.BASE_INDEX, isInitialSnapshot = false)) + } finally { + stream.stop() + } + } + } + + test("Rate limited Delta source advances with non-data inserts") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + // Version 0 + Seq(1L, 2L, 3L).toDF("x").write.format("delta").save(inputDir.toString) + + val df = spark.readStream.format("delta").load(inputDir.toString) + val stream = df.writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .option("maxFilesPerTrigger", 2) + .start(outputDir.toString) + + try { + val deltaLog = DeltaLog.forTable(spark, inputDir.toString) + def waitForOffset(offset: DeltaSourceOffset) { + eventually(timeout(streamingTimeout)) { + val lastOffset = DeltaSourceOffset( + deltaLog.tableId, + SerializedOffset(stream.lastProgress.sources.head.endOffset) + ) + + assert(lastOffset == offset) + } + } + + // Process the initial snapshot (version 0) and end up at the start of version 1 which + // does not exist yet. + stream.processAllAvailable() + waitForOffset(DeltaSourceOffset(deltaLog.tableId, 1, DeltaSourceOffset.BASE_INDEX, false)) + + // Add Versions 1, 2, 3, and 4 + for(i <- 1 to 4) { + deltaLog.startTransaction().commit(Seq(), DeltaOperations.ManualUpdate) + } + + // The manual commits don't have any files in them, but they do have indexes: BASE_INDEX + // and END_INDEX. Neither of those indexes are counted for rate limiting. We end up at + // v4[END_INDEX] which is then rounded up to v5[BASE_INDEX] even though v5 does not exist + // yet. + stream.processAllAvailable() + waitForOffset(DeltaSourceOffset(deltaLog.tableId, 5, DeltaSourceOffset.BASE_INDEX, false)) + + // Add Version 5 + deltaLog.startTransaction().commit(Seq(), DeltaOperations.ManualUpdate) + + // The stream progresses to v5[END_INDEX] which is rounded up to v6[BASE_INDEX]. (In prior + // versions of the code we did not have END_INDEX. In that case the stream would not have + // moved forward from v5, because there were no indexes after v5[BASE_INDEX]. + stream.processAllAvailable() + waitForOffset(DeltaSourceOffset(deltaLog.tableId, 6, DeltaSourceOffset.BASE_INDEX, false)) + } finally { + stream.stop() + } + } + } + + testQuietly("Delta sources should verify the protocol reader version") { + withTempDir { tempDir => + spark.range(0).write.format("delta").save(tempDir.getCanonicalPath) + + val df = spark.readStream.format("delta").load(tempDir.getCanonicalPath) + val stream = df.writeStream + .format("console") + .start() + try { + stream.processAllAvailable() + + val deltaLog = DeltaLog.forTable(spark, tempDir) + deltaLog.store.write( + FileNames.deltaFile(deltaLog.logPath, deltaLog.snapshot.version + 1), + // Write a large reader version to fail the streaming query + Iterator(Protocol(minReaderVersion = Int.MaxValue).json), + overwrite = false, + deltaLog.newDeltaHadoopConf()) + + // The streaming query should fail because its version is too old + val e = intercept[StreamingQueryException] { + stream.processAllAvailable() + } + assert(e.getCause.isInstanceOf[InvalidProtocolVersionException]) + } finally { + stream.stop() + } + } + } + + /** Generate commits with the given timestamp in millis. */ + private def generateCommits(location: String, commits: Long*): Unit = { + val deltaLog = DeltaLog.forTable(spark, location) + var startVersion = deltaLog.snapshot.version + 1 + commits.foreach { ts => + val rangeStart = startVersion * 10 + val rangeEnd = rangeStart + 10 + spark.range(rangeStart, rangeEnd).write.format("delta").mode("append").save(location) + val file = new File(FileNames.deltaFile(deltaLog.logPath, startVersion).toUri) + file.setLastModified(ts) + startVersion += 1 + } + } + + private implicit def durationToLong(duration: FiniteDuration): Long = { + duration.toMillis + } + + /** Disable log cleanup to avoid deleting logs we are testing. */ + private def disableLogCleanup(tablePath: String): Unit = { + sql(s"alter table delta.`$tablePath` " + + s"set tblproperties (${DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.key} = false)") + } + + testQuietly("startingVersion") { + withTempDir { tableDir => + val tablePath = tableDir.getCanonicalPath + val start = 1594795800000L + generateCommits(tablePath, start, start + 20.minutes) + + def testStartingVersion(startingVersion: Long): Unit = { + val q = spark.readStream + .format("delta") + .option("startingVersion", startingVersion) + .load(tablePath) + .writeStream + .format("memory") + .queryName("startingVersion_test") + .start() + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + for ((startingVersion, expected) <- Seq( + 0 -> (0 until 20), + 1 -> (10 until 20)) + ) { + withTempView("startingVersion_test") { + testStartingVersion(startingVersion) + checkAnswer( + spark.table("startingVersion_test"), + expected.map(_.toLong).toDF()) + } + } + + assert(intercept[StreamingQueryException] { + testStartingVersion(-1) + }.getMessage.contains("Invalid value '-1' for option 'startingVersion'")) + assert(intercept[StreamingQueryException] { + testStartingVersion(2) + }.getMessage.contains("Cannot time travel Delta table to version 2")) + + // Create a checkpoint at version 2 and delete version 0 + disableLogCleanup(tablePath) + val deltaLog = DeltaLog.forTable(spark, tablePath) + assert(deltaLog.update().version == 2) + deltaLog.checkpoint() + new File(FileNames.deltaFile(deltaLog.logPath, 0).toUri).delete() + + // Cannot start from version 0 + assert(intercept[StreamingQueryException] { + testStartingVersion(0) + }.getMessage.contains("Cannot time travel Delta table to version 0")) + + // Can start from version 1 even if it's not recreatable + // TODO: currently we would error out if we couldn't construct the snapshot to check column + // mapping enable tables. Unblock this once we roll out the proper semantics. + withStreamingReadOnColumnMappingTableEnabled { + withTempView("startingVersion_test") { + testStartingVersion(1L) + checkAnswer( + spark.table("startingVersion_test"), + (10 until 20).map(_.toLong).toDF()) + } + } + } + } + + // Row tracking forces actions to appear after AddFiles within commits. This will verify that + // we correctly skip processed commits, even when an AddFile is not the last action within a + // commit. + Seq(true, false).foreach { withRowTracking => + testQuietly(s"startingVersion should be ignored when restarting from a checkpoint, " + + s"withRowTracking = $withRowTracking") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + val start = 1594795800000L + withSQLConf( + DeltaConfigs.ROW_TRACKING_ENABLED.defaultTablePropertyKey -> withRowTracking.toString) { + generateCommits(inputDir.getCanonicalPath, start, start + 20.minutes) + } + + def testStartingVersion( + startingVersion: Long, + checkpointLocation: String = checkpointDir.getCanonicalPath): Unit = { + val q = spark.readStream + .format("delta") + .option("startingVersion", startingVersion) + .load(inputDir.getCanonicalPath) + .writeStream + .format("delta") + .option("checkpointLocation", checkpointLocation) + .start(outputDir.getCanonicalPath) + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + testStartingVersion(1L) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + (10 until 20).map(_.toLong).toDF()) + + // Add two new commits + generateCommits(inputDir.getCanonicalPath, start + 40.minutes) + disableLogCleanup(inputDir.getCanonicalPath) + val deltaLog = DeltaLog.forTable(spark, inputDir.getCanonicalPath) + assert(deltaLog.update().version == 3) + deltaLog.checkpoint() + + // Make the streaming query move forward. When we restart here, we still need to touch + // `DeltaSource.getStartingVersion` because the engine will call `getBatch` + // that was committed (start is None) during the restart. + testStartingVersion(1L) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + (10 until 30).map(_.toLong).toDF()) + + // Add one commit and delete version 0 and version 1 + generateCommits(inputDir.getCanonicalPath, start + 60.minutes) + (0 to 1).foreach { v => + new File(FileNames.deltaFile(deltaLog.logPath, v).toUri).delete() + } + + // Although version 1 has been deleted, restarting the query should still work as we have + // processed files in version 1. + // In other words, query restart should ignore "startingVersion" + // TODO: currently we would error out if we couldn't construct the snapshot to check column + // mapping enable tables. Unblock this once we roll out the proper semantics. + withStreamingReadOnColumnMappingTableEnabled { + testStartingVersion(1L) + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + // the gap caused by "alter table" + ((10 until 30) ++ (40 until 50)).map(_.toLong).toDF()) + + // But if we start a new query, it should fail. + val newCheckpointDir = Utils.createTempDir() + try { + assert(intercept[StreamingQueryException] { + testStartingVersion(1L, newCheckpointDir.getCanonicalPath) + }.getMessage.contains("[2, 4]")) + } finally { + Utils.deleteRecursively(newCheckpointDir) + } + } + } + } + } + + testQuietly("startingTimestamp") { + withTempDir { tableDir => + val tablePath = tableDir.getCanonicalPath + val start = 1594795800000L // 2020-07-14 23:50:00 PDT + generateCommits(tablePath, start, start + 20.minutes) + + def testStartingTimestamp(startingTimestamp: String): Unit = { + val q = spark.readStream + .format("delta") + .option("startingTimestamp", startingTimestamp) + .load(tablePath) + .writeStream + .format("memory") + .queryName("startingTimestamp_test") + .start() + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + for ((startingTimestamp, expected) <- Seq( + "2020-07-14" -> (0 until 20), + "2020-07-14 23:40:00" -> (0 until 20), + "2020-07-14 23:50:00" -> (0 until 20), // the timestamp of version 0 + "2020-07-14 23:50:01" -> (10 until 20), + "2020-07-15" -> (10 until 20), + "2020-07-15 00:00:00" -> (10 until 20), + "2020-07-15 00:10:00" -> (10 until 20)) // the timestamp of version 1 + ) { + withTempView("startingTimestamp_test") { + testStartingTimestamp(startingTimestamp) + checkAnswer( + spark.table("startingTimestamp_test"), + expected.map(_.toLong).toDF()) + } + } + assert(intercept[StreamingQueryException] { + testStartingTimestamp("2020-07-15 00:10:01") + }.getMessage.contains("The provided timestamp (2020-07-15 00:10:01.0) " + + "is after the latest version")) + assert(intercept[StreamingQueryException] { + testStartingTimestamp("2020-07-16") + }.getMessage.contains("The provided timestamp (2020-07-16 00:00:00.0) " + + "is after the latest version")) + assert(intercept[StreamingQueryException] { + testStartingTimestamp("i am not a timestamp") + }.getMessage.contains("The provided timestamp ('i am not a timestamp') " + + "cannot be converted to a valid timestamp")) + + // With non-strict parsing this produces null when casted to a timestamp and then parses + // to 1970-01-01 (unix time 0). + withSQLConf(DeltaSQLConf.DELTA_TIME_TRAVEL_STRICT_TIMESTAMP_PARSING.key -> "false") { + withTempView("startingTimestamp_test") { + testStartingTimestamp("i am not a timestamp") + checkAnswer( + spark.table("startingTimestamp_test"), + (0L until 20L).toDF()) + } + } + + // Create a checkpoint at version 2 and delete version 0 + disableLogCleanup(tablePath) + val deltaLog = DeltaLog.forTable(spark, tablePath) + assert(deltaLog.update().version == 2) + deltaLog.checkpoint() + new File(FileNames.deltaFile(deltaLog.logPath, 0).toUri).delete() + + // Can start from version 1 even if it's not recreatable + // TODO: currently we would error out if we couldn't construct the snapshot to check column + // mapping enable tables. Unblock this once we roll out the proper semantics. + withStreamingReadOnColumnMappingTableEnabled { + withTempView("startingTimestamp_test") { + testStartingTimestamp("2020-07-14") + checkAnswer( + spark.table("startingTimestamp_test"), + (10 until 20).map(_.toLong).toDF()) + } + } + } + } + + testQuietly("startingVersion and startingTimestamp are both set") { + withTempDir { tableDir => + val tablePath = tableDir.getCanonicalPath + generateCommits(tablePath, 0) + val q = spark.readStream + .format("delta") + .option("startingVersion", 0L) + .option("startingTimestamp", "2020-07-15") + .load(tablePath) + .writeStream + .format("console") + .start() + try { + assert(intercept[StreamingQueryException] { + q.processAllAvailable() + }.getMessage.contains("Please either provide 'startingVersion' or 'startingTimestamp'")) + } finally { + q.stop() + } + } + } + + test("startingVersion: user defined start works with mergeSchema") { + withTempDir { inputDir => + withTempView("startingVersionTest") { + spark.range(10) + .write + .format("delta") + .mode("append") + .save(inputDir.getCanonicalPath) + + // Change schema at version 1 + spark.range(10, 20) + .withColumn("id2", 'id) + .write + .option("mergeSchema", "true") + .format("delta") + .mode("append") + .save(inputDir.getCanonicalPath) + + // Change schema at version 2 + spark.range(20, 30) + .withColumn("id2", 'id) + .withColumn("id3", 'id) + .write + .option("mergeSchema", "true") + .format("delta") + .mode("append") + .save(inputDir.getCanonicalPath) + + // check answer from version 1 + val q = spark.readStream + .format("delta") + .option("startingVersion", "1") + .load(inputDir.getCanonicalPath) + .writeStream + .format("memory") + .queryName("startingVersionTest") + .start() + try { + q.processAllAvailable() + checkAnswer( + sql("select * from startingVersionTest"), + ((10 until 20).map(x => (x.toLong, x.toLong, None.toString)) ++ + (20 until 30).map(x => (x.toLong, x.toLong, x.toString))) + .toDF("id", "id2", "id3") + .selectExpr("id", "id2", "cast(id3 as long) as id3") + ) + } finally { + q.stop() + } + } + } + } + + test("startingVersion latest") { + withTempDir { dir => + withTempView("startingVersionTest") { + val path = dir.getAbsolutePath + spark.range(0, 10).write.format("delta").save(path) + val q = spark.readStream + .format("delta") + .option("startingVersion", "latest") + .load(path) + .writeStream + .format("memory") + .queryName("startingVersionLatest") + .start() + try { + // Starting from latest shouldn't include any data at first, even the most recent version. + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionLatest"), Seq.empty) + + // After we add some batches the stream should continue as normal. + spark.range(10, 15).write.format("delta").mode("append").save(path) + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionLatest"), (10 until 15).map(Row(_))) + spark.range(15, 20).write.format("delta").mode("append").save(path) + spark.range(20, 25).write.format("delta").mode("append").save(path) + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionLatest"), (10 until 25).map(Row(_))) + } finally { + q.stop() + } + } + } + } + + test("startingVersion latest defined before started") { + withTempDir { dir => + withTempView("startingVersionTest") { + val path = dir.getAbsolutePath + spark.range(0, 10).write.format("delta").save(path) + // Define the stream, but don't start it, before a second write. The startingVersion + // latest should be resolved when the query *starts*, so there'll be no data even though + // some was added after the stream was defined. + val streamDef = spark.readStream + .format("delta") + .option("startingVersion", "latest") + .load(path) + .writeStream + .format("memory") + .queryName("startingVersionLatest") + spark.range(10, 20).write.format("delta").mode("append").save(path) + val q = streamDef.start() + + try { + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionLatest"), Seq.empty) + spark.range(20, 25).write.format("delta").mode("append").save(path) + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionLatest"), (20 until 25).map(Row(_))) + } finally { + q.stop() + } + } + } + } + + test("startingVersion latest works on defined but empty table") { + withTempDir { dir => + withTempView("startingVersionTest") { + val path = dir.getAbsolutePath + spark.range(0).write.format("delta").save(path) + val streamDef = spark.readStream + .format("delta") + .option("startingVersion", "latest") + .load(path) + .writeStream + .format("memory") + .queryName("startingVersionLatest") + val q = streamDef.start() + + try { + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionLatest"), Seq.empty) + spark.range(0, 5).write.format("delta").mode("append").save(path) + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionLatest"), (0 until 5).map(Row(_))) + } finally { + q.stop() + } + } + } + } + + test("startingVersion latest calls update when starting") { + withTempDir { dir => + withTempView("startingVersionTest") { + val path = dir.getAbsolutePath + spark.range(0).write.format("delta").save(path) + + val streamDef = spark.readStream + .format("delta") + .option("startingVersion", "latest") + .load(path) + .writeStream + .format("memory") + .queryName("startingVersionLatest") + val log = DeltaLog.forTable(spark, path) + val originalSnapshot = log.snapshot + val timestamp = System.currentTimeMillis() + + // We write out some new data, and then do a dirty reflection hack to produce an un-updated + // Delta log. The stream should still update when started and not produce any data. + spark.range(10).write.format("delta").mode("append").save(path) + // The field is actually declared in the SnapshotManagement trait, but because traits don't + // exist in the JVM DeltaLog is where it ends up in reflection. + val snapshotField = classOf[DeltaLog].getDeclaredField("currentSnapshot") + snapshotField.setAccessible(true) + snapshotField.set(log, CapturedSnapshot(originalSnapshot, timestamp)) + + val q = streamDef.start() + + try { + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionLatest"), Seq.empty) + } finally { + q.stop() + } + } + } + } + + test("startingVersion should work with rate time") { + withTempDir { dir => + withTempView("startingVersionWithRateLimit") { + val path = dir.getAbsolutePath + // Create version 0 and version 1 and each version has two files + spark.range(0, 5).repartition(2).write.mode("append").format("delta").save(path) + spark.range(5, 10).repartition(2).write.mode("append").format("delta").save(path) + + val q = spark.readStream + .format("delta") + .option("startingVersion", 1) + .option("maxFilesPerTrigger", 1) + .load(path) + .writeStream + .format("memory") + .queryName("startingVersionWithRateLimit") + .start() + try { + q.processAllAvailable() + checkAnswer(sql("select * from startingVersionWithRateLimit"), (5 until 10).map(Row(_))) + val id = DeltaLog.forTable(spark, path).snapshot.metadata.id + val endOffsets = q.recentProgress + .map(_.sources(0).endOffset) + .map(offsetJson => DeltaSourceOffset( + id, + SerializedOffset(offsetJson) + )) + assert(endOffsets.toList == + DeltaSourceOffset(id, 1, 0, isInitialSnapshot = false) + // When we reach the end of version 1, we will jump to version 2 with index -1 + :: DeltaSourceOffset(id, 2, DeltaSourceOffset.BASE_INDEX, isInitialSnapshot = false) + :: Nil) + } finally { + q.stop() + } + } + } + } + + testQuietly("SC-46515: deltaSourceIgnoreChangesError contains removeFile, version, tablePath") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + Seq(1, 2, 3).toDF("x").write.format("delta").save(inputDir.toString) + val df = spark.readStream.format("delta").load(inputDir.toString) + df.writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .start(outputDir.toString) + .processAllAvailable() + + // Overwrite values, causing AddFile & RemoveFile actions to be triggered + Seq(1, 2, 3).toDF("x") + .write + .mode("overwrite") + .format("delta") + .save(inputDir.toString) + + val e = intercept[StreamingQueryException] { + val q = df.writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + // DeltaOptions.IGNORE_CHANGES_OPTION is false by default + .start(outputDir.toString) + + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + assert(e.getCause.isInstanceOf[UnsupportedOperationException]) + assert(e.getCause.getMessage.contains( + "This is currently not supported. If you'd like to ignore updates, set the option " + + "'skipChangeCommits' to 'true'.")) + assert(e.getCause.getMessage.contains("for example")) + assert(e.getCause.getMessage.contains("version")) + assert(e.getCause.getMessage.matches(s".*$inputDir.*")) + } + } + + testQuietly("SC-46515: deltaSourceIgnoreDeleteError contains removeFile, version, tablePath") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + Seq(1, 2, 3).toDF("x").write.format("delta").save(inputDir.toString) + val df = spark.readStream.format("delta").load(inputDir.toString) + df.writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + .start(outputDir.toString) + .processAllAvailable() + + // Delete the table, causing only RemoveFile (not AddFile) actions to be triggered + io.delta.tables.DeltaTable.forPath(spark, inputDir.getAbsolutePath).delete() + + val e = intercept[StreamingQueryException] { + val q = df.writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.toString) + // DeltaOptions.IGNORE_DELETES_OPTION is false by default + .start(outputDir.toString) + + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + + assert(e.getCause.isInstanceOf[UnsupportedOperationException]) + assert(e.getCause.getMessage.contains( + "This is currently not supported. If you'd like to ignore deletes, set the option " + + "'ignoreDeletes' to 'true'.")) + assert(e.getCause.getMessage.contains("for example")) + assert(e.getCause.getMessage.contains("version")) + assert(e.getCause.getMessage.matches(s".*$inputDir.*")) + } + } + + test("fail on data loss - starting from missing files") { + withTempDirs { case (srcData, targetData, chkLocation) => + def addData(): Unit = { + spark.range(10).write.format("delta").mode("append").save(srcData.getCanonicalPath) + } + + addData() + val df = spark.readStream.format("delta").load(srcData.getCanonicalPath) + + val q = df.writeStream.format("delta") + .option("checkpointLocation", chkLocation.getCanonicalPath) + .start(targetData.getCanonicalPath) + q.processAllAvailable() + q.stop() + + addData() + addData() + addData() + + val srcLog = DeltaLog.forTable(spark, srcData) + // Create a checkpoint so that we can create a snapshot without json files before version 3 + srcLog.checkpoint() + // Delete the first file + assert(new File(FileNames.deltaFile(srcLog.logPath, 1).toUri).delete()) + + val e = intercept[StreamingQueryException] { + val q = df.writeStream.format("delta") + .option("checkpointLocation", chkLocation.getCanonicalPath) + .start(targetData.getCanonicalPath) + q.processAllAvailable() + } + assert(e.getCause.getMessage === DeltaErrors.failOnDataLossException(1L, 2L).getMessage) + } + } + + test("fail on data loss - gaps of files") { + withTempDirs { case (srcData, targetData, chkLocation) => + def addData(): Unit = { + spark.range(10).write.format("delta").mode("append").save(srcData.getCanonicalPath) + } + + addData() + val df = spark.readStream.format("delta").load(srcData.getCanonicalPath) + + val q = df.writeStream.format("delta") + .option("checkpointLocation", chkLocation.getCanonicalPath) + .start(targetData.getCanonicalPath) + q.processAllAvailable() + q.stop() + + addData() + addData() + addData() + + val srcLog = DeltaLog.forTable(spark, srcData) + // Create a checkpoint so that we can create a snapshot without json files before version 3 + srcLog.checkpoint() + // Delete the second file + assert(new File(FileNames.deltaFile(srcLog.logPath, 2).toUri).delete()) + + val e = intercept[StreamingQueryException] { + val q = df.writeStream.format("delta") + .option("checkpointLocation", chkLocation.getCanonicalPath) + .start(targetData.getCanonicalPath) + q.processAllAvailable() + } + assert(e.getCause.getMessage === DeltaErrors.failOnDataLossException(2L, 3L).getMessage) + } + } + + test("fail on data loss - starting from missing files with option off") { + withTempDirs { case (srcData, targetData, chkLocation) => + def addData(): Unit = { + spark.range(10).write.format("delta").mode("append").save(srcData.getCanonicalPath) + } + + addData() + val df = spark.readStream.format("delta").option("failOnDataLoss", "false") + .load(srcData.getCanonicalPath) + + val q = df.writeStream.format("delta") + .option("checkpointLocation", chkLocation.getCanonicalPath) + .start(targetData.getCanonicalPath) + q.processAllAvailable() + q.stop() + + addData() + addData() + addData() + + val srcLog = DeltaLog.forTable(spark, srcData) + // Create a checkpoint so that we can create a snapshot without json files before version 3 + srcLog.checkpoint() + // Delete the first file + assert(new File(FileNames.deltaFile(srcLog.logPath, 1).toUri).delete()) + + val q2 = df.writeStream.format("delta") + .option("checkpointLocation", chkLocation.getCanonicalPath) + .start(targetData.getCanonicalPath) + q2.processAllAvailable() + q2.stop() + + assert(spark.read.format("delta").load(targetData.getCanonicalPath).count() === 30) + } + } + + test("fail on data loss - gaps of files with option off") { + withTempDirs { case (srcData, targetData, chkLocation) => + def addData(): Unit = { + spark.range(10).write.format("delta").mode("append").save(srcData.getCanonicalPath) + } + + addData() + val df = spark.readStream.format("delta").option("failOnDataLoss", "false") + .load(srcData.getCanonicalPath) + + val q = df.writeStream.format("delta") + .option("checkpointLocation", chkLocation.getCanonicalPath) + .start(targetData.getCanonicalPath) + q.processAllAvailable() + q.stop() + + addData() + addData() + addData() + + val srcLog = DeltaLog.forTable(spark, srcData) + // Create a checkpoint so that we can create a snapshot without json files before version 3 + srcLog.checkpoint() + // Delete the second file + assert(new File(FileNames.deltaFile(srcLog.logPath, 2).toUri).delete()) + + val q2 = df.writeStream.format("delta") + .option("checkpointLocation", chkLocation.getCanonicalPath) + .start(targetData.getCanonicalPath) + q2.processAllAvailable() + q2.stop() + + assert(spark.read.format("delta").load(targetData.getCanonicalPath).count() === 30) + } + } + + test("make sure that the delta sources works fine") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + + import io.delta.implicits._ + + Seq(1, 2, 3).toDF().write.delta(inputDir.toString) + + val df = spark.readStream.delta(inputDir.toString) + + val stream = df.writeStream + .option("checkpointLocation", checkpointDir.toString) + .delta(outputDir.toString) + + stream.processAllAvailable() + stream.stop() + + val writtenStreamDf = spark.read.delta(outputDir.toString) + val expectedRows = Seq(Row(1), Row(2), Row(3)) + + checkAnswer(writtenStreamDf, expectedRows) + } + } + + + test("should not attempt to read a non exist version") { + withTempDirs { (inputDir1, inputDir2, checkpointDir) => + spark.range(1, 2).write.format("delta").save(inputDir1.getCanonicalPath) + spark.range(1, 2).write.format("delta").save(inputDir2.getCanonicalPath) + + def startQuery(): StreamingQuery = { + val df1 = spark.readStream + .format("delta") + .load(inputDir1.getCanonicalPath) + val df2 = spark.readStream + .format("delta") + .load(inputDir2.getCanonicalPath) + df1.union(df2).writeStream + .format("noop") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start() + } + + var q = startQuery() + try { + q.processAllAvailable() + // current offsets: + // source1: DeltaSourceOffset(reservoirVersion=1,index=0,isInitialSnapshot=true) + // source2: DeltaSourceOffset(reservoirVersion=1,index=0,isInitialSnapshot=true) + + spark.range(1, 2).write.format("delta").mode("append").save(inputDir1.getCanonicalPath) + spark.range(1, 2).write.format("delta").mode("append").save(inputDir2.getCanonicalPath) + q.processAllAvailable() + // current offsets: + // source1: DeltaSourceOffset(reservoirVersion=2,index=-1,isInitialSnapshot=false) + // source2: DeltaSourceOffset(reservoirVersion=2,index=-1,isInitialSnapshot=false) + // Note: version 2 doesn't exist in source1 + + spark.range(1, 2).write.format("delta").mode("append").save(inputDir2.getCanonicalPath) + q.processAllAvailable() + // current offsets: + // source1: DeltaSourceOffset(reservoirVersion=2,index=-1,isInitialSnapshot=false) + // source2: DeltaSourceOffset(reservoirVersion=3,index=-1,isInitialSnapshot=false) + // Note: version 2 doesn't exist in source1 + + q.stop() + // Restart the query. It will call `getBatch` on the previous two offsets of `source1` which + // are both DeltaSourceOffset(reservoirVersion=2,index=-1,isInitialSnapshot=false) + // As version 2 doesn't exist, we should not try to load version 2 in this case. + q = startQuery() + q.processAllAvailable() + } finally { + q.stop() + } + } + } + + test("DeltaSourceOffset.validateOffsets") { + DeltaSourceOffset.validateOffsets( + previousOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 4, + index = 10, + isInitialSnapshot = false), + currentOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 4, + index = 10, + isInitialSnapshot = false) + ) + DeltaSourceOffset.validateOffsets( + previousOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 4, + index = 10, + isInitialSnapshot = false), + currentOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 5, + index = 1, + isInitialSnapshot = false) + ) + + assert(intercept[IllegalStateException] { + DeltaSourceOffset.validateOffsets( + previousOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 4, + index = 10, + isInitialSnapshot = false), + currentOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 4, + index = 10, + isInitialSnapshot = true) + ) + }.getMessage.contains("Found invalid offsets: 'isInitialSnapshot' flipped incorrectly.")) + assert(intercept[IllegalStateException] { + DeltaSourceOffset.validateOffsets( + previousOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 4, + index = 10, + isInitialSnapshot = false), + currentOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 1, + index = 10, + isInitialSnapshot = false) + ) + }.getMessage.contains("Found invalid offsets: 'reservoirVersion' moved back.")) + assert(intercept[IllegalStateException] { + DeltaSourceOffset.validateOffsets( + previousOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 4, + index = 10, + isInitialSnapshot = false), + currentOffset = DeltaSourceOffset( + reservoirId = "foo", + reservoirVersion = 4, + index = 9, + isInitialSnapshot = false) + ) + }.getMessage.contains("Found invalid offsets. 'index' moved back.")) + } + + test("self union a Delta table should pass the catalog table assert") { + withTable("self_union_delta") { + spark.range(10).write.format("delta").saveAsTable("self_union_delta") + val df = spark.readStream.format("delta").table("self_union_delta") + val q = df.union(df).writeStream.format("noop").start() + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + } + + test("ES-445863: delta source should not hang or reprocess data when using AvailableNow") { + withTempDirs { (inputDir, outputDir, checkpointDir) => + def runQuery(): Unit = { + val q = spark.readStream + .format("delta") + .load(inputDir.getCanonicalPath) + // Require a partition filter. The max index of files matching the partition filter must + // be less than the number of files in the second commit. + .where("part = 0") + .writeStream + .format("delta") + .trigger(Trigger.AvailableNow) + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start(outputDir.getCanonicalPath) + try { + if (!q.awaitTermination(60000)) { + throw new TimeoutException("the query didn't stop in 60 seconds") + } + } finally { + q.stop() + } + } + + spark.range(0, 1) + .selectExpr("id", "id as part") + .repartition(10) + .write + .partitionBy("part") + .format("delta") + .mode("append") + .save(inputDir.getCanonicalPath) + runQuery() + + spark.range(1, 10) + .selectExpr("id", "id as part") + .repartition(9) + .write + .partitionBy("part") + .format("delta") + .mode("append") + .save(inputDir.getCanonicalPath) + runQuery() + + checkAnswer( + spark.read.format("delta").load(outputDir.getCanonicalPath), + Row(0, 0) :: Nil) + } + } + + test("restarting a query should pick up latest table schema and recover") { + withTempDir { inputDir => + withTempDir { checkpointDir => + spark.range(10) + .write + .format("delta") + .mode("append") + .save(inputDir.getCanonicalPath) + + def startQuery(): StreamingQuery = { + spark.readStream.format("delta") + .load(inputDir.getCanonicalPath) + .writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("noop") + .start() + } + + var q = startQuery() + try { + q.processAllAvailable() + + // Clear delta log cache + DeltaLog.clearCache() + // Change the table schema using the non-cached `DeltaLog` to mimic the case that the + // table schema change happens on a different cluster + sql(s"ALTER TABLE delta.`${inputDir.getCanonicalPath}` ADD COLUMN newcol STRING") + + // The streaming query should fail when detecting a schema change + val e = intercept[StreamingQueryException] { + q.processAllAvailable() + } + assert(e.getMessage.contains("Detected schema change")) + + // Restarting the query should recover from the schema change error + q = startQuery() + q.processAllAvailable() + } finally { + q.stop() + } + } + } + } + + test("handling nullability schema changes") { + withTable("srcTable") { + withTempDirs { case (srcTblDir, checkpointDir, checkpointDir2) => + def readStream(startingVersion: Option[Long] = None): DataFrame = { + var dsr = spark.readStream + startingVersion.foreach { v => + dsr = dsr.option("startingVersion", v) + } + dsr.table("srcTable") + } + + sql(s""" + |CREATE TABLE srcTable ( + | a STRING NOT NULL, + | b STRING NOT NULL + |) USING DELTA LOCATION '${srcTblDir.getCanonicalPath}' + |""".stripMargin) + sql(""" + |INSERT INTO srcTable + | VALUES ("a", "b") + |""".stripMargin) + + // Initialize the stream to pass the initial snapshot + testStream(readStream())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + CheckAnswer(("a", "b")) + ) + + // It is ok to relax nullability during streaming post analysis, and restart would fix it. + var v1 = 0L + val clock = new StreamManualClock(System.currentTimeMillis()) + testStream(readStream())( + StartStream(checkpointLocation = checkpointDir.getCanonicalPath, + trigger = ProcessingTimeTrigger(1000), triggerClock = clock), + ProcessAllAvailable(), + // Write more data and drop NOT NULL constraint + Execute { _ => + // A batch of Delta actions + sql(""" + |INSERT INTO srcTable + |VALUES ("c", "d") + |""".stripMargin) + sql("ALTER TABLE srcTable ALTER COLUMN a DROP NOT NULL") + sql(""" + |INSERT INTO srcTable + |VALUES ("e", "f") + |""".stripMargin) + v1 = DeltaLog.forTable(spark, TableIdentifier("srcTable")).update().version + }, + // Process next trigger + AdvanceManualClock(1 * 1000L), + // The query would fail because the read schema has nullable=false but the schema change + // tries to relax it, we cannot automatically move ahead with it. + ExpectFailure[DeltaIllegalStateException](t => + assert(t.getMessage.contains("Detected schema change"))), + Execute { q => + assert(!q.isActive) + }, + // Upon restart, the backfill can work with relaxed nullability read schema + StartStream(checkpointLocation = checkpointDir.getCanonicalPath), + ProcessAllAvailable(), + // See how it loads data from across the nullability change without a problem + CheckAnswer(("c", "d"), ("e", "f")) + ) + + // However, it is NOT ok to read data with relaxed nullability during backfill, and restart + // would NOT fix it. + val deltaLog = DeltaLog.forTable(spark, TableIdentifier("srcTable")) + deltaLog.withNewTransaction { txn => + val schema = txn.snapshot.metadata.schema + val newSchema = StructType(schema("a").copy(nullable = false) :: schema("b") :: Nil) + txn.commit(txn.metadata.copy(schemaString = newSchema.json) :: Nil, + DeltaOperations.ManualUpdate) + } + sql(""" + |INSERT INTO srcTable + |VALUES ("g", "h") + |""".stripMargin) + // Backfill from the ADD file action prior to the nullable=false, the latest schema has + // nullable = false, but the ADD file has nullable = true, which is not allowed as we don't + // want to show any nulls. + // It queries [INSERT (e, f), nullable=false schema change, INSERT (g, h)] + testStream(readStream(startingVersion = Some(v1)))( + StartStream(checkpointLocation = checkpointDir2.getCanonicalPath), + // See how it is: + // 1. a non-retryable exception as it is a backfill. + // 2. it comes from the new stream start check we added, before this, verifyStreamHygiene + // could not detect because the most recent schema change looks exactly like the latest + // schema. + ExpectFailure[DeltaIllegalStateException](t => + assert(t.getMessage.contains("Detected schema change") && + t.getStackTrace.exists( + _.toString.contains("checkReadIncompatibleSchemaChangeOnStreamStartOnce")))) + ) + } + } + } + + test("skip change commits") { + withTempDir { inputDir => + val deltaLog = DeltaLog.forTable(spark, new Path(inputDir.toURI)) + withMetadata(deltaLog, StructType.fromDDL("value STRING")) + + val df = spark.readStream + .format("delta") + .option(DeltaOptions.SKIP_CHANGE_COMMITS_OPTION, value = true) + .load(inputDir.getCanonicalPath) + + testStream(df)( + // Add data to source table + AddToReservoir(inputDir, Seq("keep1", "update1", "drop1").toDF()), + AssertOnQuery { q => q.processAllAvailable(); true }, + CheckAnswer("keep1", "update1", "drop1"), + + // Update and delete rows + UpdateReservoir( + inputDir, + Map("value" -> when($"value" === "update1", "updated1").otherwise($"value")) + ), + DeleteFromReservoir(inputDir, $"value" === "drop1"), + CheckAnswer("keep1", "update1", "drop1"), + + // Merge data into source table + MergeIntoReservoir( + inputDir, + dfToMerge = Seq("keep1", "keep2", "keep3").toDF().as("merge1"), + mergeCondition = $"table.value" === $"merge1.value", + Map.empty + ), + MergeIntoReservoir( + inputDir, + dfToMerge = Seq("updated1", "keep4", "keep5").toDF().as("merge2"), + mergeCondition = $"table.value" === $"merge2.value", + Map("table.value" -> when($"table.value" === "updated1", "newlyUpdated1") + .otherwise($"table.value")) + ), + CheckAnswer( + "keep1", "update1", "drop1", "keep2", "keep3" + ) + ) + } + } +} + +/** + * A FileSystem implementation that returns monotonically increasing timestamps for file creation. + * Note that we may return a different timestamp for the same file. This is okay for the tests + * where we use this though. + */ +class MonotonicallyIncreasingTimestampFS extends RawLocalFileSystem { + private var time: Long = System.currentTimeMillis() + + override def getScheme: String = MonotonicallyIncreasingTimestampFS.scheme + + override def getUri: URI = { + URI.create(s"$getScheme:///") + } + + override def getFileStatus(f: Path): FileStatus = { + val original = super.getFileStatus(f) + time += 1000L + new FileStatus(original.getLen, original.isDirectory, 0, 0, time, f) + } +} + +object MonotonicallyIncreasingTimestampFS { + val scheme = s"MonotonicallyIncreasingTimestampFS" +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuiteBase.scala new file mode 100644 index 00000000000..4a5e353c8ff --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuiteBase.scala @@ -0,0 +1,142 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import org.apache.spark.sql.delta.actions.Format +import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils} + +import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.streaming.StreamTest +import org.apache.spark.sql.types.StructType + +trait DeltaSourceSuiteBase extends StreamTest { + + /** + * Creates 3 temporary directories for use within a function. + * @param f function to be run with created temp directories + */ + protected def withTempDirs(f: (File, File, File) => Unit): Unit = { + withTempDir { file1 => + withTempDir { file2 => + withTempDir { file3 => + f(file1, file2, file3) + } + } + } + } + + /** + * Copy metadata for fields in newSchema from currentSchema + * @param newSchema new schema + * @param currentSchema current schema to reference + * @param columnMappingMode mode for column mapping + * @return updated new schema + */ + protected def copyOverMetadata( + newSchema: StructType, + currentSchema: StructType, + columnMappingMode: DeltaColumnMappingMode): StructType = { + SchemaMergingUtils.transformColumns(newSchema) { (path, field, _) => + val fullName = path :+ field.name + val inSchema = SchemaUtils.findNestedFieldIgnoreCase( + currentSchema, fullName, includeCollections = true + ) + inSchema.map { refField => + val sparkMetadata = DeltaColumnMapping.getColumnMappingMetadata(refField, columnMappingMode) + field.copy(metadata = sparkMetadata) + }.getOrElse { + field + } + } + } + + protected def withMetadata( + deltaLog: DeltaLog, + schema: StructType, + format: String = "parquet", + tableId: Option[String] = None): Unit = { + val txn = deltaLog.startTransaction() + val baseMetadata = tableId.map { tId => txn.metadata.copy(id = tId) }.getOrElse(txn.metadata) + // We need to fill up the missing id/physical name in column mapping mode + // while maintaining existing metadata if there is any + val updatedMetadata = copyOverMetadata( + schema, baseMetadata.schema, + baseMetadata.columnMappingMode) + txn.commit( + DeltaColumnMapping.assignColumnIdAndPhysicalName( + baseMetadata.copy( + schemaString = updatedMetadata.json, + format = Format(format)), + baseMetadata, + isChangingModeOnExistingTable = false, + isOverwritingSchema = false) :: Nil, DeltaOperations.ManualUpdate) + } + + object AddToReservoir { + def apply(path: File, data: DataFrame): AssertOnQuery = + AssertOnQuery { _ => + data.write.format("delta").mode("append").save(path.getAbsolutePath) + true + } + } + + object UpdateReservoir { + def apply(path: File, updateExpression: Map[String, Column]): AssertOnQuery = + AssertOnQuery { _ => + io.delta.tables.DeltaTable.forPath(path.getAbsolutePath).update(updateExpression) + true + } + } + + object DeleteFromReservoir { + def apply(path: File, deleteCondition: Column): AssertOnQuery = + AssertOnQuery { _ => + io.delta.tables.DeltaTable.forPath(path.getAbsolutePath).delete(deleteCondition) + true + } + } + + object MergeIntoReservoir { + def apply(path: File, dfToMerge: DataFrame, mergeCondition: Column, + updateExpression: Map[String, Column]): AssertOnQuery = + AssertOnQuery { _ => + io.delta.tables.DeltaTable + .forPath(path.getAbsolutePath) + .as("table") + .merge(dfToMerge, mergeCondition) + .whenMatched() + .update(updateExpression) + .whenNotMatched() + .insertAll() + .execute() + true + } + } + + object CheckProgress { + def apply(rowsPerBatch: Seq[Int]): AssertOnQuery = + Execute { q => + val progress = q.recentProgress.filter(_.numInputRows != 0) + assert(progress.length === rowsPerBatch.size, "Expected batches don't match") + progress.zipWithIndex.foreach { case (p, i) => + assert(p.numInputRows === rowsPerBatch(i), s"Expected rows in batch $i does not match ") + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceTableAPISuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceTableAPISuite.scala new file mode 100644 index 00000000000..35b8a60151a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceTableAPISuite.scala @@ -0,0 +1,252 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{AnalysisException, Dataset} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.SessionCatalog.DEFAULT_DATABASE +import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.streaming.{StreamingQuery, StreamTest} +import org.apache.spark.util.Utils + +class DeltaSourceTableAPISuite extends StreamTest + with DeltaSQLCommandTest { + + override def beforeAll(): Unit = { + super.beforeAll() + + } + + import testImplicits._ + test("table API") { + withTempDir { tempDir => + val tblName = "my_table" + val dir = tempDir.getAbsolutePath + withTable(tblName) { + spark.range(3).write.format("delta").option("path", dir).saveAsTable(tblName) + + testStream(spark.readStream.table(tblName))( + ProcessAllAvailable(), + CheckAnswer(0, 1, 2) + ) + } + } + } + + test("table API with database") { + withTempDir { tempDir => + val tblName = "my_table" + val dir = tempDir.getAbsolutePath + withTempDatabase { db => + withTable(tblName) { + spark.sql(s"USE $db") + spark.range(3).write.format("delta").option("path", dir).saveAsTable(tblName) + spark.sql(s"USE $DEFAULT_DATABASE") + + testStream(spark.readStream.table(s"$db.$tblName"))( + ProcessAllAvailable(), + CheckAnswer(0, 1, 2) + ) + } + } + } + } + + private def startTableStream( + ds: Dataset[_], + tableName: String, + baseDir: Option[File] = None, + partitionColumns: Seq[String] = Nil, + format: String = "delta"): StreamingQuery = { + val checkpoint = baseDir.map(new File(_, "_checkpoint")) + .getOrElse(Utils.createTempDir().getCanonicalFile) + val dsw = ds.writeStream.format(format).partitionBy(partitionColumns: _*) + baseDir.foreach { output => + dsw.option("path", output.getCanonicalPath) + } + dsw.option("checkpointLocation", checkpoint.getCanonicalPath).toTable(tableName) + } + + test("writeStream.table - create new external table") { + withTempDir { dir => + val memory = MemoryStream[Int] + val tableName = "stream_test" + withTable(tableName) { + val sq = startTableStream(memory.toDS(), tableName, Some(dir)) + memory.addData(1, 2, 3) + sq.processAllAvailable() + + checkDatasetUnorderly( + spark.table(tableName).as[Int], + 1, 2, 3) + + checkDatasetUnorderly( + spark.read.format("delta").load(dir.getCanonicalPath).as[Int], + 1, 2, 3) + } + } + } + + test("writeStream.table - create new managed table") { + val memory = MemoryStream[Int] + val tableName = "stream_test" + withTable(tableName) { + val sq = startTableStream(memory.toDS(), tableName, None) + memory.addData(1, 2, 3) + sq.processAllAvailable() + + checkDatasetUnorderly( + spark.table(tableName).as[Int], + 1, 2, 3) + + val path = spark.sessionState.catalog.getTableRawMetadata(TableIdentifier(tableName)).location + checkDatasetUnorderly( + spark.read.format("delta").load(new File(path).getCanonicalPath).as[Int], + 1, 2, 3) + } + } + + test("writeStream.table - create new managed table with database") { + val memory = MemoryStream[Int] + val db = "my_db" + val tableName = s"$db.stream_test" + withDatabase(db) { + sql(s"create database $db") + withTable(tableName) { + val sq = startTableStream(memory.toDS(), tableName, None) + memory.addData(1, 2, 3) + sq.processAllAvailable() + + checkDatasetUnorderly( + spark.table(tableName).as[Int], + 1, 2, 3) + + val path = spark.sessionState.catalog.getTableRawMetadata( + spark.sessionState.sqlParser.parseTableIdentifier(tableName)).location + checkDatasetUnorderly( + spark.read.format("delta").load(new File(path).getCanonicalPath).as[Int], + 1, 2, 3) + } + } + } + + test("writeStream.table - create table from existing output") { + withTempDir { dir => + Seq(4, 5, 6).toDF("value").write.format("delta").save(dir.getCanonicalPath) + val memory = MemoryStream[Int] + val tableName = "stream_test" + withTable(tableName) { + val sq = startTableStream(memory.toDS(), tableName, Some(dir)) + memory.addData(1, 2, 3) + sq.processAllAvailable() + + checkDatasetUnorderly( + spark.table(tableName).as[Int], + 1, 2, 3, 4, 5, 6) + + checkDatasetUnorderly( + spark.read.format("delta").load(dir.getCanonicalPath).as[Int], + 1, 2, 3, 4, 5, 6) + } + } + } + + test("writeStream.table - fail writing into a view") { + val memory = MemoryStream[Int] + val tableName = "stream_test" + withTable(tableName) { + val viewName = tableName + "_view" + withView(viewName) { + Seq(4, 5, 6).toDF("value").write.saveAsTable(tableName) + sql(s"create view $viewName as select * from $tableName") + val e = intercept[AnalysisException] { + startTableStream(memory.toDS(), viewName, None) + } + assert(e.getMessage.contains("views")) + } + } + } + + test("writeStream.table - fail due to different schema than existing Delta table") { + withTempDir { dir => + Seq(4, 5, 6).toDF("id").write.format("delta").save(dir.getCanonicalPath) + val memory = MemoryStream[Int] + val tableName = "stream_test" + withTable(tableName) { + val e = intercept[Exception] { + val sq = startTableStream(memory.toDS(), tableName, Some(dir)) + memory.addData(1, 2, 3) + sq.processAllAvailable() + } + assert(e.getMessage.contains("The specified schema does not match the existing schema")) + } + } + } + + test("writeStream.table - fail due to different partitioning on existing Delta table") { + withTempDir { dir => + Seq(4 -> "a").toDF("id", "key").write.format("delta").save(dir.getCanonicalPath) + val memory = MemoryStream[(Int, String)] + val tableName = "stream_test" + withTable(tableName) { + val e = intercept[Exception] { + val sq = startTableStream( + memory.toDS().toDF("id", "key"), tableName, Some(dir), Seq("key")) + memory.addData(1 -> "a") + sq.processAllAvailable() + } + assert(e.getMessage.contains( + "The specified partitioning does not match the existing partitioning")) + } + } + } + + test("writeStream.table - fail writing into an external nonDelta table") { + withTempDir { dir => + val memory = MemoryStream[(Int, String)] + val tableName = "stream_test" + withTable(tableName) { + Seq(1).toDF("value").write.format("parquet") + .option("path", dir.getCanonicalPath).saveAsTable(tableName) + val e = intercept[AnalysisException] { + startTableStream(memory.toDS(), tableName, Some(dir)) + } + assert(e.getMessage.contains("delta")) + } + } + } + + test("writeStream.table - fail writing into an external nonDelta path") { + withTempDir { dir => + val memory = MemoryStream[Int] + val tableName = "stream_test" + withTable(tableName) { + Seq(1).toDF("value").write.mode("append").parquet(dir.getCanonicalPath) + val e = intercept[AnalysisException] { + startTableStream(memory.toDS(), tableName, Some(dir)) + } + assert(e.getMessage.contains("Delta")) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala new file mode 100644 index 00000000000..717cd1fbc5f --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala @@ -0,0 +1,3189 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File, FileNotFoundException} +import java.util.concurrent.atomic.AtomicInteger + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.{Action, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.files.TahoeLogFileIndex +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{DeltaFileOperations, FileNames} +import org.apache.spark.sql.delta.util.FileNames.deltaFile +import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, Path, PathHandle} + +import org.apache.spark.SparkException +import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.InSet +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.logical.Filter +import org.apache.spark.sql.execution.FileSourceScanExec +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions.{asc, col, expr, lit, map_values, struct} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.StreamingQuery +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.{StringType, StructType} +import org.apache.spark.util.Utils + +class DeltaSuite extends QueryTest + with SharedSparkSession + with DeltaColumnMappingTestUtils + with SQLTestUtils + with DeltaSQLCommandTest { + + import testImplicits._ + + private def tryDeleteNonRecursive(fs: FileSystem, path: Path): Boolean = { + try fs.delete(path, false) catch { + case _: FileNotFoundException => true + } + } + + test("handle partition filters and data filters") { + withTempDir { inputDir => + val testPath = inputDir.getCanonicalPath + spark.range(10) + .map(_.toInt) + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(testPath) + + val ds = spark.read.format("delta").load(testPath).as[(Int, Int)] + // partition filter + checkDatasetUnorderly( + ds.where("part = 1"), + 1 -> 1, 3 -> 1, 5 -> 1, 7 -> 1, 9 -> 1) + checkDatasetUnorderly( + ds.where("part = 0"), + 0 -> 0, 2 -> 0, 4 -> 0, 6 -> 0, 8 -> 0) + // data filter + checkDatasetUnorderly( + ds.where("value >= 5"), + 5 -> 1, 6 -> 0, 7 -> 1, 8 -> 0, 9 -> 1) + checkDatasetUnorderly( + ds.where("value < 5"), + 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 0) + // partition filter + data filter + checkDatasetUnorderly( + ds.where("part = 1 and value >= 5"), + 5 -> 1, 7 -> 1, 9 -> 1) + checkDatasetUnorderly( + ds.where("part = 1 and value < 5"), + 1 -> 1, 3 -> 1) + } + } + + test("query with predicates should skip partitions") { + withTempDir { tempDir => + val testPath = tempDir.getCanonicalPath + + // Generate two files in two partitions + spark.range(2) + .withColumn("part", $"id" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(testPath) + + // Read only one partition + val query = spark.read.format("delta").load(testPath).where("part = 1") + val fileScans = query.queryExecution.executedPlan.collect { + case f: FileSourceScanExec => f + } + + // Force the query to read files and generate metrics + query.queryExecution.executedPlan.execute().count() + + // Verify only one file was read + assert(fileScans.size == 1) + val numFilesAferPartitionSkipping = fileScans.head.metrics.get("numFiles") + assert(numFilesAferPartitionSkipping.nonEmpty) + assert(numFilesAferPartitionSkipping.get.value == 1) + checkAnswer(query, Seq(Row(1, 1))) + } + } + + test("partition column location should not impact table schema") { + val tableColumns = Seq("c1", "c2") + for (partitionColumn <- tableColumns) { + withTempDir { inputDir => + val testPath = inputDir.getCanonicalPath + Seq(1 -> "a", 2 -> "b").toDF(tableColumns: _*) + .write + .format("delta") + .partitionBy(partitionColumn) + .save(testPath) + val ds = spark.read.format("delta").load(testPath).as[(Int, String)] + checkDatasetUnorderly(ds, 1 -> "a", 2 -> "b") + } + } + } + + test("SC-8078: read deleted directory") { + val tempDir = Utils.createTempDir() + val path = new Path(tempDir.getCanonicalPath) + Seq(1).toDF().write.format("delta").save(tempDir.toString) + + val df = spark.read.format("delta").load(tempDir.toString) + // scalastyle:off deltahadoopconfiguration + val fs = path.getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + fs.delete(path, true) + + val e = intercept[AnalysisException] { + withSQLConf(DeltaSQLConf.DELTA_ASYNC_UPDATE_STALENESS_TIME_LIMIT.key -> "0s") { + checkAnswer(df, Row(1) :: Nil) + } + }.getMessage + assert(e.contains("The schema of your Delta table has changed")) + val e2 = intercept[AnalysisException] { + withSQLConf(DeltaSQLConf.DELTA_ASYNC_UPDATE_STALENESS_TIME_LIMIT.key -> "0s") { + // Define new DataFrame + spark.read.format("delta").load(tempDir.toString).collect() + } + }.getMessage + assert(e2.contains("Path does not exist")) + } + + test("SC-70676: directory deleted before first DataFrame is defined") { + val tempDir = Utils.createTempDir() + val path = new Path(tempDir.getCanonicalPath) + Seq(1).toDF().write.format("delta").save(tempDir.toString) + + // scalastyle:off deltahadoopconfiguration + val fs = path.getFileSystem(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + fs.delete(path, true) + + val e = intercept[AnalysisException] { + spark.read.format("delta").load(tempDir.toString).collect() + }.getMessage + assert(e.contains("Path does not exist")) + } + + test("append then read") { + val tempDir = Utils.createTempDir() + Seq(1).toDF().write.format("delta").save(tempDir.toString) + Seq(2, 3).toDF().write.format("delta").mode("append").save(tempDir.toString) + + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + checkAnswer(data, Row(1) :: Row(2) :: Row(3) :: Nil) + + // append more + Seq(4, 5, 6).toDF().write.format("delta").mode("append").save(tempDir.toString) + checkAnswer(data.toDF(), Row(1) :: Row(2) :: Row(3) :: Row(4) :: Row(5) :: Row(6) :: Nil) + } + + test("partitioned append - nulls") { + val tempDir = Utils.createTempDir() + Seq(Some(1), None).toDF() + .withColumn("is_odd", $"value" % 2 === 1) + .write + .format("delta") + .partitionBy("is_odd") + .save(tempDir.toString) + + val df = spark.read.format("delta").load(tempDir.toString) + + // Verify the correct partitioning schema is picked up + val hadoopFsRelations = df.queryExecution.analyzed.collect { + case LogicalRelation(baseRelation, _, _, _) if + baseRelation.isInstanceOf[HadoopFsRelation] => + baseRelation.asInstanceOf[HadoopFsRelation] + } + assert(hadoopFsRelations.size === 1) + assert(hadoopFsRelations.head.partitionSchema.exists(_.name == "is_odd")) + assert(hadoopFsRelations.head.dataSchema.exists(_.name == "value")) + + checkAnswer(df.where("is_odd = true"), Row(1, true) :: Nil) + checkAnswer(df.where("is_odd IS NULL"), Row(null, null) :: Nil) + } + + test("input files should be absolute paths") { + withTempDir { dir => + val basePath = dir.getAbsolutePath + spark.range(10).withColumn("part", 'id % 3) + .write.format("delta").partitionBy("part").save(basePath) + + val df1 = spark.read.format("delta").load(basePath) + val df2 = spark.read.format("delta").load(basePath).where("part = 1") + val df3 = spark.read.format("delta").load(basePath).where("part = 1").limit(3) + + assert(df1.inputFiles.forall(_.contains(basePath))) + assert(df2.inputFiles.forall(_.contains(basePath))) + assert(df3.inputFiles.forall(_.contains(basePath))) + } + } + + test("invalid replaceWhere") { + Seq(true, false).foreach { enabled => + withSQLConf(DeltaSQLConf.REPLACEWHERE_DATACOLUMNS_ENABLED.key -> enabled.toString) { + val tempDir = Utils.createTempDir() + Seq(1, 2, 3, 4).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .partitionBy("is_odd") + .save(tempDir.toString) + val e1 = intercept[AnalysisException] { + Seq(6).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_odd = true") + .save(tempDir.toString) + }.getMessage + assert(e1.contains("does not conform to partial table overwrite condition or constraint")) + + val e2 = intercept[AnalysisException] { + Seq(true).toDF("is_odd") + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_odd = true") + .save(tempDir.toString) + }.getMessage + assert(e2.contains( + "Data written into Delta needs to contain at least one non-partitioned")) + + val e3 = intercept[AnalysisException] { + Seq(6).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "not_a_column = true") + .save(tempDir.toString) + }.getMessage + if (enabled) { + assert(e3.contains( + "or function parameter with name `not_a_column` cannot be resolved") || + e3.contains("Column 'not_a_column' does not exist. Did you mean one of " + + "the following? [value, is_odd]")) + } else { + assert(e3.contains( + "Predicate references non-partition column 'not_a_column'. Only the " + + "partition columns may be referenced: [is_odd]")) + } + + val e4 = intercept[AnalysisException] { + Seq(6).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "value = 1") + .save(tempDir.toString) + }.getMessage + if (enabled) { + assert(e4.contains( + "Written data does not conform to partial table overwrite condition " + + "or constraint 'value = 1'")) + } else { + assert(e4.contains("Predicate references non-partition column 'value'. Only the " + + "partition columns may be referenced: [is_odd]")) + } + + val e5 = intercept[AnalysisException] { + Seq(6).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "") + .save(tempDir.toString) + }.getMessage + assert(e5.contains("Cannot recognize the predicate ''")) + } + } + } + + test("replaceWhere with rearrangeOnly") { + withTempDir { dir => + Seq(1, 2, 3, 4).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .partitionBy("is_odd") + .save(dir.toString) + + // dataFilter non empty + val e = intercept[AnalysisException] { + Seq(9).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_odd = true and value < 2") + .option(DeltaOptions.DATA_CHANGE_OPTION, "false") + .save(dir.toString) + }.getMessage + assert(e.contains( + "'replaceWhere' cannot be used with data filters when 'dataChange' is set to false")) + + Seq(9).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_odd = true") + .option(DeltaOptions.DATA_CHANGE_OPTION, "false") + .save(dir.toString) + checkAnswer( + spark.read.format("delta").load(dir.toString), + Seq(2, 4, 9).toDF().withColumn("is_odd", $"value" % 2 =!= 0)) + } + } + + test("valid replaceWhere") { + Seq(true, false).foreach { enabled => + withSQLConf(DeltaSQLConf.REPLACEWHERE_DATACOLUMNS_ENABLED.key -> enabled.toString) { + Seq(true, false).foreach { partitioned => + // Skip when it's not enabled and not partitioned. + if (enabled || partitioned) { + withTempDir { dir => + val writer = Seq(1, 2, 3, 4).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + .write + .format("delta") + + if (partitioned) { + writer.partitionBy("is_odd").save(dir.toString) + } else { + writer.save(dir.toString) + } + + def data: DataFrame = spark.read.format("delta").load(dir.toString) + + Seq(5, 7).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_odd = true") + .save(dir.toString) + checkAnswer( + data, + Seq(2, 4, 5, 7).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0)) + + // replaceWhere on non-partitioning columns if enabled. + if (enabled) { + Seq(6, 8).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_even = true") + .save(dir.toString) + checkAnswer( + data, + Seq(5, 6, 7, 8).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0)) + + // nothing to be replaced because the condition is false. + Seq(10, 12).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "1 = 2") + .save(dir.toString) + checkAnswer( + data, + Seq(5, 6, 7, 8, 10, 12).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + ) + + // replace the whole thing because the condition is true. + Seq(10, 12).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "1 = 1") + .save(dir.toString) + checkAnswer( + data, + Seq(10, 12).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .withColumn("is_even", $"value" % 2 === 0) + ) + } + } + } + } + } + } + } + + Seq(false, true).foreach { replaceWhereInDataColumn => + test(s"valid replaceWhere with cdf enabled, " + + s"replaceWhereInDataColumn = $replaceWhereInDataColumn") { + testReplaceWhereWithCdf( + replaceWhereInDataColumn) + } + } + + def testReplaceWhereWithCdf( + replaceWhereInDataColumn: Boolean): Unit = { + withSQLConf( + DeltaSQLConf.REPLACEWHERE_DATACOLUMNS_ENABLED.key -> replaceWhereInDataColumn.toString, + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true") { + withTempDir { dir => + Seq(1, 2, 3, 4).map(i => (i, i + 2)).toDF("key", "value.1") + .withColumn("is_odd", $"`value.1`" % 2 =!= 0) + .withColumn("is_even", $"`value.1`" % 2 === 0) + .coalesce(1) + .write + .format("delta") + .partitionBy("is_odd").save(dir.toString) + + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, dir), 0, 0, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(1, 3, true, false, "insert", 0) :: Row(3, 5, true, false, "insert", 0) :: + Row(2, 4, false, true, "insert", 0) :: Row(4, 6, false, true, "insert", 0) :: Nil) + + def data: DataFrame = spark.read.format("delta").load(dir.toString) + + Seq(5, 7).map(i => (i, i + 2)).toDF("key", "value.1") + .withColumn("is_odd", $"`value.1`" % 2 =!= 0) + .withColumn("is_even", $"`value.1`" % 2 === 0) + .coalesce(1) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_odd = true") + .save(dir.toString) + checkAnswer( + data, + Seq(2, 4, 5, 7).map(i => (i, i + 2)).toDF("key", "value.1") + .withColumn("is_odd", $"`value.1`" % 2 =!= 0) + .withColumn("is_even", $"`value.1`" % 2 === 0)) + + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, dir), 1, 1, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(1, 3, true, false, "delete", 1) :: Row(3, 5, true, false, "delete", 1) :: + Row(5, 7, true, false, "insert", 1) :: Row(7, 9, true, false, "insert", 1) :: Nil) + + if (replaceWhereInDataColumn) { + // replaceWhere on non-partitioning columns if enabled. + Seq((4, 8)).toDF("key", "value.1") + .withColumn("is_odd", $"`value.1`" % 2 =!= 0) + .withColumn("is_even", $"`value.1`" % 2 === 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "key = 4") + .save(dir.toString) + checkAnswer( + data, + Seq((2, 4), (4, 8), (5, 7), (7, 9)).toDF("key", "value.1") + .withColumn("is_odd", $"`value.1`" % 2 =!= 0) + .withColumn("is_even", $"`value.1`" % 2 === 0)) + + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, dir), 2, 2, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(4, 6, false, true, "delete", 2) :: Row(4, 8, false, true, "insert", 2) :: Nil) + } + } + } + } + + test("replace arbitrary with multiple references") { + withTempDir { dir => + def data: DataFrame = spark.read.format("delta").load(dir.toString) + + Seq((1, 3, 8), (1, 5, 9)).toDF("a", "b", "c") + .write + .format("delta") + .mode("overwrite") + .save(dir.toString) + + Seq((2, 4, 6)).toDF("a", "b", "c") + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "a + c < 10") + .save(dir.toString) + + checkAnswer( + data, + Seq((1, 5, 9), (2, 4, 6)).toDF("a", "b", "c")) + } + } + + test("replaceWhere with constraint check disabled") { + withSQLConf(DeltaSQLConf.REPLACEWHERE_CONSTRAINT_CHECK_ENABLED.key -> "false") { + withTempDir { dir => + Seq(1, 2, 3, 4).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .partitionBy("is_odd") + .save(dir.toString) + + def data: DataFrame = spark.read.format("delta").load(dir.toString) + + Seq(6).toDF() + .withColumn("is_odd", $"value" % 2 =!= 0) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "is_odd = true") + .save(dir.toString) + + checkAnswer(data, Seq(2, 4, 6).toDF().withColumn("is_odd", $"value" % 2 =!= 0)) + } + } + } + + Seq(true, false).foreach { p => + test(s"replaceWhere user defined _change_type column doesn't get dropped - partitioned=$p") { + withTable("tab") { + sql( + s"""CREATE TABLE tab USING DELTA + |${if (p) "PARTITIONED BY (part) " else ""} + |TBLPROPERTIES (delta.enableChangeDataFeed = false) + |AS SELECT id, floor(id / 10) AS part, 'foo' as _change_type + |FROM RANGE(1000) + |""".stripMargin) + Seq(33L).map(id => id * 42).toDF("id") + .withColumn("part", expr("floor(id / 10)")) + .withColumn("_change_type", lit("bar")) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "id % 7 = 0") + .saveAsTable("tab") + + sql("SELECT id, _change_type FROM tab").collect().foreach { row => + val _change_type = row.getString(1) + assert(_change_type === "foo" || _change_type === "bar", + s"Invalid _change_type for id=${row.get(0)}") + } + } + } + } + + test("move delta table") { + val tempDir = Utils.createTempDir() + Seq(1, 2, 3).toDS().write.format("delta").mode("append").save(tempDir.toString) + + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + checkAnswer(data.toDF(), Row(1) :: Row(2) :: Row(3) :: Nil) + + // Append files in log path should use relative paths and should work with file renaming. + val targetDir = new File(Utils.createTempDir(), "target") + assert(tempDir.renameTo(targetDir)) + + def data2: DataFrame = spark.read.format("delta").load(targetDir.toString) + checkDatasetUnorderly(data2.toDF().as[Int], 1, 2, 3) + } + + test("append table to itself") { + val tempDir = Utils.createTempDir() + Seq(1, 2, 3).toDS().write.format("delta").mode("append").save(tempDir.toString) + + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + checkDatasetUnorderly(data.toDF.as[Int], 1, 2, 3) + data.write.format("delta").mode("append").save(tempDir.toString) + + checkDatasetUnorderly(data.toDF.as[Int], 1, 1, 2, 2, 3, 3) + } + + test("missing partition columns") { + val tempDir = Utils.createTempDir() + Seq(1, 2, 3).toDF() + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .save(tempDir.toString) + + val e = intercept[Exception] { + Seq(1, 2, 3).toDF() + .write + .format("delta") + .mode("append") + .save(tempDir.toString) + } + assert(e.getMessage contains "Partition column") + assert(e.getMessage contains "part") + assert(e.getMessage contains "not found") + } + + test("batch write: append, overwrite") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .write + .format("delta") + .mode("append") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.toDF.as[Int], 1, 2, 3) + + Seq(4, 5, 6).toDF + .write + .format("delta") + .mode("overwrite") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.toDF.as[Int], 4, 5, 6) + } + } + + test("batch write: overwrite an empty directory with replaceWhere") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq (1, 3, 5).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .mode("overwrite") + .partitionBy("part") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part = 1") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.toDF.as[(Int, Int)], 1 -> 1, 3 -> 1, 5 -> 1) + } + } + + test("batch write: append, overwrite where") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq (1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(1, 5).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part=1") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.toDF.select($"value".as[Int]), 1, 2, 5) + } + } + + test("batch write: append, dynamic partition overwrite integer partition column") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(1, 5).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[Int], 1, 2, 5) + } + } + } + + test("batch write: append, dynamic partition overwrite string partition column") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(("a", "x"), ("b", "y"), ("c", "x")).toDF("value", "part") + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(("a", "x"), ("d", "x")).toDF("value", "part") + .write + .format("delta") + .partitionBy("part") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[String], "a", "b", "d") + } + } + } + + test("batch write: append, dynamic partition overwrite string and integer partition column") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq((1, "x"), (2, "y"), (3, "z")).toDF("value", "part2") + .withColumn("part1", $"value" % 2) + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq((5, "x"), (7, "y")).toDF("value", "part2") + .withColumn("part1", $"value" % 2) + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[Int], 2, 3, 5, 7) + } + } + } + + test("batch write: append, dynamic partition overwrite overwrites nothing") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(("a", "x"), ("b", "y"), ("c", "x")).toDF("value", "part") + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(("d", "z")).toDF("value", "part") + .write + .format("delta") + .partitionBy("part") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value", "part").as[(String, String)], + ("a", "x"), ("b", "y"), ("c", "x"), ("d", "z")) + } + } + } + + test("batch write: append, dynamic partition overwrite multiple partition columns") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(("a", "x", 1), ("b", "y", 2), ("c", "x", 3)).toDF("part1", "part2", "value") + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(("a", "x", 4), ("d", "x", 5)).toDF("part1", "part2", "value") + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("part1", "part2", "value").as[(String, String, Int)], + ("a", "x", 4), ("b", "y", 2), ("c", "x", 3), ("d", "x", 5)) + } + } + } + + test("batch write: append, dynamic partition overwrite without partitionBy") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(1, 5).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[Int], 1, 2, 5) + } + } + } + + test("batch write: append, dynamic partition overwrite conf, replaceWhere takes precedence") { + // when dynamic partition overwrite mode is enabled in the spark configuration, and a + // replaceWhere expression is provided, we delete data according to the replaceWhere expression + withSQLConf( + DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true", + SQLConf.PARTITION_OVERWRITE_MODE.key -> "dynamic") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq((1, "x"), (2, "y"), (3, "z")).toDF("value", "part2") + .withColumn("part1", $"value" % 2) + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq((5, "x")).toDF("value", "part2") + .withColumn("part1", $"value" % 2) + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part1 = 1") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select($"value").as[Int], 2, 5) + } + } + } + + test("batch write: append, replaceWhere + dynamic partition overwrite enabled in options") { + // when dynamic partition overwrite mode is enabled in the DataFrameWriter options, and + // a replaceWhere expression is provided, we throw an error + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + Seq((1, "x"), (2, "y"), (3, "z")).toDF("value", "part2") + .withColumn("part1", $"value" % 2) + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("append") + .save(tempDir.getCanonicalPath) + + val e = intercept[IllegalArgumentException] { + Seq((3, "x"), (5, "x")).toDF("value", "part2") + .withColumn("part1", $"value" % 2) + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part1 = 1") + .save(tempDir.getCanonicalPath) + } + assert(e.getMessage === "[DELTA_REPLACE_WHERE_WITH_DYNAMIC_PARTITION_OVERWRITE] " + + "A 'replaceWhere' expression and " + + "'partitionOverwriteMode'='dynamic' cannot both be set in the DataFrameWriter options.") + } + } + } + + test("batch write: append, dynamic partition overwrite set via conf") { + withSQLConf( + DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true", + SQLConf.PARTITION_OVERWRITE_MODE.key -> "dynamic") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(1, 5).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("overwrite") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[Int], 1, 2, 5) + } + } + } + + test("batch write: append, dynamic partition overwrite set via conf and overridden via option") { + withSQLConf( + DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true", + SQLConf.PARTITION_OVERWRITE_MODE.key -> "dynamic") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(1, 5).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .partitionBy("part") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "static") + .mode("overwrite") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[Int], 1, 5) + } + } + } + + test("batch write: append, overwrite without partitions should ignore partition overwrite mode") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(1, 5).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[Int], 1, 5) + } + } + } + + test("batch write: append, overwrite non-partitioned table with replaceWhere ignores partition " + + "overwrite mode option") { + // we check here that setting both replaceWhere and dynamic partition overwrite in the + // DataFrameWriter options is allowed for a non-partitioned table + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(1, 5).toDF + .withColumn("part", $"value" % 2) + .write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part = 1") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[Int], 1, 2, 5) + } + } + } + + test("batch write: append, dynamic partition with 'partitionValues' column") { + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .withColumn("partitionValues", $"value" % 2) + .write + .format("delta") + .partitionBy("partitionValues") + .mode("append") + .save(tempDir.getCanonicalPath) + + Seq(1, 5).toDF + .withColumn("partitionValues", $"value" % 2) + .write + .format("delta") + .partitionBy("partitionValues") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("value").as[Int], 1, 2, 5) + } + } + } + + test("batch write: ignore") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .write + .format("delta") + .mode("ignore") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.toDF.as[Int], 1, 2, 3) + + // The following data will be ignored + Seq(4, 5, 6).toDF + .write + .format("delta") + .mode("ignore") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.toDF.as[Int], 1, 2, 3) + } + } + + test("batch write: error") { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(1, 2, 3).toDF + .write + .format("delta") + .mode("error") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.toDF.as[Int], 1, 2, 3) + + val e = intercept[AnalysisException] { + Seq(4, 5, 6).toDF + .write + .format("delta") + .mode("error") + .save(tempDir.getCanonicalPath) + } + assert(e.getMessage.contains("Cannot write to already existent path")) + } + } + + testQuietly("creating log should not create the log directory") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + // Creating an empty log should not create the directory + assert(!tempDir.exists()) + + // Writing to table should create the directory + Seq(1, 2, 3).toDF + .write + .format("delta") + .save(tempDir.getCanonicalPath) + + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + checkDatasetUnorderly(data.toDF.as[Int], 1, 2, 3) + } + } + + test("read via data source API when the directory doesn't exist") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + // a batch query should fail at once + var e = intercept[AnalysisException] { + spark.read + .format("delta") + .load(tempDir.getCanonicalPath) + .show() + } + + assert(e.getMessage.contains("Path does not exist")) + assert(e.getMessage.contains(tempDir.getCanonicalPath)) + + assert(!tempDir.exists()) + + // a streaming query will also fail but it's because there is no schema + e = intercept[AnalysisException] { + spark.readStream + .format("delta") + .load(tempDir.getCanonicalPath) + } + assert(e.getMessage.contains("Table schema is not set")) + assert(e.getMessage.contains("CREATE TABLE")) + } + } + + test("write via data source API when the directory doesn't exist") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + // a batch query should create the output directory automatically + Seq(1, 2, 3).toDF + .write + .format("delta").save(tempDir.getCanonicalPath) + checkDatasetUnorderly( + spark.read.format("delta").load(tempDir.getCanonicalPath).as[Int], + 1, 2, 3) + + Utils.deleteRecursively(tempDir) + assert(!tempDir.exists()) + + // a streaming query should create the output directory automatically + val input = MemoryStream[Int] + val q = input.toDF + .writeStream + .format("delta") + .option( + "checkpointLocation", + Utils.createTempDir(namePrefix = "tahoe-test").getCanonicalPath) + .start(tempDir.getCanonicalPath) + try { + input.addData(1, 2, 3) + q.processAllAvailable() + checkDatasetUnorderly( + spark.read.format("delta").load(tempDir.getCanonicalPath).as[Int], + 1, 2, 3) + } finally { + q.stop() + } + } + } + + test("support partitioning with batch data source API - append") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + spark.range(100).select('id, 'id % 4 as "by4", 'id % 8 as "by8") + .write + .format("delta") + .partitionBy("by4", "by8") + .save(tempDir.toString) + + val files = spark.read.format("delta").load(tempDir.toString).inputFiles + + val deltaLog = loadDeltaLog(tempDir.getAbsolutePath) + assertPartitionExists("by4", deltaLog, files) + assertPartitionExists("by8", deltaLog, files) + } + } + + test("support removing partitioning") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + spark.range(100).select('id, 'id % 4 as "by4") + .write + .format("delta") + .partitionBy("by4") + .save(tempDir.toString) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + assert(deltaLog.snapshot.metadata.partitionColumns === Seq("by4")) + + spark.read.format("delta").load(tempDir.toString).write + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .format("delta") + .mode(SaveMode.Overwrite) + .save(tempDir.toString) + + assert(deltaLog.snapshot.metadata.partitionColumns === Nil) + } + } + + test("columns with commas as partition columns") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + val dfw = spark.range(100).select('id, 'id % 4 as "by,4") + .write + .format("delta") + .partitionBy("by,4") + + // if in column mapping mode, we should not expect invalid character errors + if (!columnMappingEnabled) { + val e = intercept[AnalysisException] { + dfw.save(tempDir.toString) + } + assert(e.getMessage.contains("invalid character(s)")) + } + + withSQLConf(DeltaSQLConf.DELTA_PARTITION_COLUMN_CHECK_ENABLED.key -> "false") { + dfw.save(tempDir.toString) + } + + // Note: although we are able to write, we cannot read the table with Spark 3.2+ with + // OSS Delta 1.1.0+ because SPARK-36271 adds a column name check in the read path. + } + } + + test("throw exception when users are trying to write in batch with different partitioning") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + spark.range(100).select('id, 'id % 4 as "by4", 'id % 8 as "by8") + .write + .format("delta") + .partitionBy("by4", "by8") + .save(tempDir.toString) + + val e = intercept[AnalysisException] { + spark.range(100).select('id, 'id % 4 as "by4") + .write + .format("delta") + .partitionBy("by4") + .mode("append") + .save(tempDir.toString) + } + assert(e.getMessage.contains("Partition columns do not match")) + } + } + + test("incompatible schema merging throws errors") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + spark.range(100).select('id, ('id * 3).cast("string") as "value") + .write + .format("delta") + .save(tempDir.toString) + + val e = intercept[AnalysisException] { + spark.range(100).select('id, 'id * 3 as "value") + .write + .format("delta") + .mode("append") + .save(tempDir.toString) + } + assert(e.getMessage.contains("incompatible")) + } + } + + test("support partitioning with batch data source API - overwrite") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + spark.range(100).select('id, 'id % 4 as "by4") + .write + .format("delta") + .partitionBy("by4") + .save(tempDir.toString) + + val files = spark.read.format("delta").load(tempDir.toString).inputFiles + + val deltaLog = loadDeltaLog(tempDir.getAbsolutePath) + assertPartitionExists("by4", deltaLog, files) + + spark.range(101, 200).select('id, 'id % 4 as "by4", 'id % 8 as "by8") + .write + .format("delta") + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "true") + .mode("overwrite") + .save(tempDir.toString) + + checkAnswer( + spark.read.format("delta").load(tempDir.toString), + spark.range(101, 200).select('id, 'id % 4 as "by4", 'id % 8 as "by8")) + } + } + + test("overwrite and replaceWhere should check partitioning compatibility") { + withTempDir { tempDir => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + spark.range(100).select('id, 'id % 4 as "by4") + .write + .format("delta") + .partitionBy("by4") + .save(tempDir.toString) + + val files = spark.read.format("delta").load(tempDir.toString).inputFiles + + val deltaLog = loadDeltaLog(tempDir.getAbsolutePath) + assertPartitionExists("by4", deltaLog, files) + + val e = intercept[AnalysisException] { + spark.range(101, 200).select('id, 'id % 4 as "by4", 'id % 8 as "by8") + .write + .format("delta") + .partitionBy("by4", "by8") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "by4 > 0") + .mode("overwrite") + .save(tempDir.toString) + } + assert(e.getMessage.contains("Partition columns do not match")) + } + } + + test("can't write out with all columns being partition columns") { + withTempDir { tempDir => + SaveMode.values().foreach { mode => + if (tempDir.exists()) { + assert(tempDir.delete()) + } + + val e = intercept[AnalysisException] { + spark.range(100).select('id, 'id % 4 as "by4") + .write + .format("delta") + .partitionBy("by4", "id") + .mode(mode) + .save(tempDir.toString) + } + assert(e.getMessage.contains("Cannot use all columns for partition columns")) + } + } + } + + test("SC-8727 - default snapshot num partitions") { + withTempDir { tempDir => + spark.range(10).write.format("delta").save(tempDir.toString) + val deltaLog = DeltaLog.forTable(spark, tempDir) + val numParts = spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SNAPSHOT_PARTITIONS).get + assert(deltaLog.snapshot.stateDS.rdd.getNumPartitions == numParts) + } + } + + test("SC-8727 - can't set negative num partitions") { + withTempDir { tempDir => + val caught = intercept[IllegalArgumentException] { + withSQLConf(("spark.databricks.delta.snapshotPartitions", "-1")) {} + } + + assert(caught.getMessage.contains("Delta snapshot partition number must be positive.")) + } + } + + test("SC-8727 - reconfigure num partitions") { + withTempDir { tempDir => + withSQLConf(("spark.databricks.delta.snapshotPartitions", "410")) { + spark.range(10).write.format("delta").save(tempDir.toString) + val deltaLog = DeltaLog.forTable(spark, tempDir) + assert(deltaLog.snapshot.stateDS.rdd.getNumPartitions == 410) + } + } + } + + test("SC-8727 - can't set zero num partitions") { + withTempDir { tempDir => + val caught = intercept[IllegalArgumentException] { + withSQLConf(("spark.databricks.delta.snapshotPartitions", "0")) {} + } + + assert(caught.getMessage.contains("Delta snapshot partition number must be positive.")) + } + } + + testQuietly("SC-8810: skip deleted file") { + withSQLConf( + ("spark.sql.files.ignoreMissingFiles", "true")) { + withTempDir { tempDir => + val tempDirPath = new Path(tempDir.getCanonicalPath) + Seq(1).toDF().write.format("delta").mode("append").save(tempDir.toString) + Seq(2, 2).toDF().write.format("delta").mode("append").save(tempDir.toString) + Seq(4).toDF().write.format("delta").mode("append").save(tempDir.toString) + Seq(5).toDF().write.format("delta").mode("append").save(tempDir.toString) + + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + val deltaLog = DeltaLog.forTable(spark, tempDir) + + // The file names are opaque. To identify which one we're deleting, we ensure that only one + // append has 2 partitions, and give them the same value so we know what was deleted. + val inputFiles = TahoeLogFileIndex(spark, deltaLog).inputFiles.toSeq + assert(inputFiles.size == 5) + + val filesToDelete = inputFiles.filter(_.split("/").last.startsWith("part-00001")) + assert(filesToDelete.size == 1) + filesToDelete.foreach { f => + val deleted = tryDeleteNonRecursive( + tempDirPath.getFileSystem(deltaLog.newDeltaHadoopConf()), + new Path(tempDirPath, f)) + assert(deleted) + } + + // The single 2 that we deleted should be missing, with the rest of the data still present. + checkAnswer(data.toDF(), Row(1) :: Row(2) :: Row(4) :: Row(5) :: Nil) + } + } + } + + + testQuietly("SC-8810: skipping deleted file still throws on corrupted file") { + withSQLConf(("spark.sql.files.ignoreMissingFiles", "true")) { + withTempDir { tempDir => + val tempDirPath = new Path(tempDir.getCanonicalPath) + Seq(1).toDF().write.format("delta").mode("append").save(tempDir.toString) + Seq(2, 2).toDF().write.format("delta").mode("append").save(tempDir.toString) + Seq(4).toDF().write.format("delta").mode("append").save(tempDir.toString) + Seq(5).toDF().write.format("delta").mode("append").save(tempDir.toString) + + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + val deltaLog = DeltaLog.forTable(spark, tempDir) + + // The file names are opaque. To identify which one we're deleting, we ensure that only one + // append has 2 partitions, and give them the same value so we know what was deleted. + val inputFiles = TahoeLogFileIndex(spark, deltaLog).inputFiles.toSeq + assert(inputFiles.size == 5) + + val filesToCorrupt = inputFiles.filter(_.split("/").last.startsWith("part-00001")) + assert(filesToCorrupt.size == 1) + val fs = tempDirPath.getFileSystem(deltaLog.newDeltaHadoopConf()) + filesToCorrupt.foreach { f => + val filePath = new Path(tempDirPath, f) + fs.create(filePath, true).close() + } + + val thrown = intercept[SparkException] { + data.toDF().collect() + } + assert(thrown.getMessage.contains("is not a Parquet file")) + } + } + } + + testQuietly("SC-8810: skip multiple deleted files") { + withSQLConf(("spark.sql.files.ignoreMissingFiles", "true")) { + withTempDir { tempDir => + val tempDirPath = new Path(tempDir.getCanonicalPath) + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + val deltaLog = DeltaLog.forTable(spark, tempDir) + + Range(0, 10).foreach(n => + Seq(n).toDF().write.format("delta").mode("append").save(tempDir.toString)) + + val inputFiles = TahoeLogFileIndex(spark, deltaLog).inputFiles.toSeq + + val filesToDelete = inputFiles.take(4) + filesToDelete.foreach { f => + val deleted = tryDeleteNonRecursive( + tempDirPath.getFileSystem(deltaLog.newDeltaHadoopConf()), + new Path(tempDirPath, f)) + assert(deleted) + } + + // We don't have a good way to tell which specific values got deleted, so just check that + // the right number remain. (Note that this works because there's 1 value per append, which + // means 1 value per file.) + assert(data.toDF().collect().size == 6) + } + } + } + + testQuietly("deleted files cause failure by default") { + withTempDir { tempDir => + val tempDirPath = new Path(tempDir.getCanonicalPath) + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + val deltaLog = DeltaLog.forTable(spark, tempDir) + + Range(0, 10).foreach(n => + Seq(n).toDF().write.format("delta").mode("append").save(tempDir.toString)) + + val inputFiles = TahoeLogFileIndex(spark, deltaLog).inputFiles.toSeq + val fileToDelete = inputFiles.head + val pathToDelete = new Path(tempDirPath, fileToDelete) + val deleted = tryDeleteNonRecursive( + tempDirPath.getFileSystem(deltaLog.newDeltaHadoopConf()), pathToDelete) + assert(deleted) + + val thrown = intercept[SparkException] { + data.toDF().collect() + } + assert(thrown.getMessage.contains("FileNotFound")) + } + } + + + test("ES-4716: Delta shouldn't be broken when users turn on case sensitivity") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + withTempDir { tempDir => + // We use a column with the weird name just to make sure that customer configurations still + // work. The original bug was within the `Snapshot` code, where we referred to `metaData` + // as `metadata`. + Seq(1, 2, 3).toDF("aBc").write.format("delta").mode("append").save(tempDir.toString) + + def testDf(columnName: Symbol): Unit = { + DeltaLog.clearCache() + val df = spark.read.format("delta").load(tempDir.getCanonicalPath).select(columnName) + checkDatasetUnorderly(df.as[Int], 1, 2, 3) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + testDf('aBc) + + intercept[AnalysisException] { + testDf('abc) + } + } + testDf('aBc) + testDf('abc) + } + } + } + + test("special chars in base path") { + withTempDir { dir => + val basePath = new File(new File(dir, "some space"), "and#spec*al+ch@rs") + spark.range(10).write.format("delta").save(basePath.getCanonicalPath) + checkAnswer( + spark.read.format("delta").load(basePath.getCanonicalPath), + spark.range(10).toDF() + ) + } + } + + test("get touched files for update, delete and merge") { + withTempDir { dir => + val directory = new File(dir, "test with space") + val df = Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value") + val writer = df.write.format("delta").mode("append") + writer.save(directory.getCanonicalPath) + spark.sql(s"UPDATE delta.`${directory.getCanonicalPath}` SET value = value + 10") + spark.sql(s"DELETE FROM delta.`${directory.getCanonicalPath}` WHERE key = 4") + Seq((3, 30)).toDF("key", "value").createOrReplaceTempView("inbound") + spark.sql(s"""|MERGE INTO delta.`${directory.getCanonicalPath}` AS base + |USING inbound + |ON base.key = inbound.key + |WHEN MATCHED THEN UPDATE SET base.value = + |base.value+inbound.value""".stripMargin) + spark.sql(s"UPDATE delta.`${directory.getCanonicalPath}` SET value = 40 WHERE key = 1") + spark.sql(s"DELETE FROM delta.`${directory.getCanonicalPath}` WHERE key = 2") + checkAnswer( + spark.read.format("delta").load(directory.getCanonicalPath), + Seq((1, 40), (3, 70)).toDF("key", "value") + ) + } + } + + + test("all operations with special characters in path") { + withTempDir { dir => + val directory = new File(dir, "test with space") + val df = Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value") + val writer = df.write.format("delta").mode("append") + writer.save(directory.getCanonicalPath) + + // UPDATE and DELETE + spark.sql(s"UPDATE delta.`${directory.getCanonicalPath}` SET value = 99") + spark.sql(s"DELETE FROM delta.`${directory.getCanonicalPath}` WHERE key = 4") + spark.sql(s"DELETE FROM delta.`${directory.getCanonicalPath}` WHERE key = 3") + checkAnswer( + spark.read.format("delta").load(directory.getCanonicalPath), + Seq((1, 99), (2, 99)).toDF("key", "value") + ) + + // INSERT + spark.sql(s"INSERT INTO delta.`${directory.getCanonicalPath}` VALUES (5, 50)") + spark.sql(s"INSERT INTO delta.`${directory.getCanonicalPath}` VALUES (5, 50)") + checkAnswer( + spark.read.format("delta").load(directory.getCanonicalPath), + Seq((1, 99), (2, 99), (5, 50), (5, 50)).toDF("key", "value") + ) + + // MERGE + Seq((1, 1), (3, 88), (5, 88)).toDF("key", "value").createOrReplaceTempView("inbound") + spark.sql( + s"""|MERGE INTO delta.`${directory.getCanonicalPath}` AS base + |USING inbound + |ON base.key = inbound.key + |WHEN MATCHED THEN DELETE + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + spark.read.format("delta").load(directory.getCanonicalPath), + Seq((2, 99), (3, 88)).toDF("key", "value") + ) + + // DELETE and INSERT again + spark.sql(s"DELETE FROM delta.`${directory.getCanonicalPath}` WHERE key = 3") + spark.sql(s"INSERT INTO delta.`${directory.getCanonicalPath}` VALUES (5, 99)") + checkAnswer( + spark.read.format("delta").load(directory.getCanonicalPath), + Seq((2, 99), (5, 99)).toDF("key", "value") + ) + + // VACUUM + withSQLConf(DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false") { + spark.sql(s"VACUUM delta.`${directory.getCanonicalPath}` RETAIN 0 HOURS") + } + checkAnswer( + spark.sql(s"SELECT * FROM delta.`${directory.getCanonicalPath}@v8`"), + Seq((2, 99), (5, 99)).toDF("key", "value") + ) + // Version 0 should be lost, as version 1 rewrites the whole file + val ex = intercept[Exception] { + checkAnswer( + spark.sql(s"SELECT * FROM delta.`${directory.getCanonicalPath}@v0`"), + spark.emptyDataFrame + ) + } + var cause = ex.getCause + while (cause.getCause != null) { + cause = cause.getCause + } + assert(cause.getMessage.contains(".parquet does not exist")) + } + } + + test("can't create zero-column table with a write") { + withTempDir { dir => + intercept[AnalysisException] { + Seq(1).toDF("a").drop("a").write.format("delta").save(dir.getAbsolutePath) + } + } + } + + test("SC-10573: InSet operator prunes partitions properly") { + withTempDir { dir => + val path = dir.getCanonicalPath + Seq((1, 1L, "1")).toDS() + .write + .format("delta") + .partitionBy("_2", "_3") + .save(path) + val df = spark.read.format("delta").load(path) + .where("_2 IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)").select("_1") + val condition = df.queryExecution.optimizedPlan.collectFirst { + case f: Filter => f.condition + } + assert(condition.exists(_.isInstanceOf[InSet])) + checkAnswer(df, Row(1)) + } + } + + test("SC-24886: partition columns have correct datatype in metadata scans") { + withTempDir { inputDir => + Seq(("foo", 2019)).toDF("name", "y") + .write.format("delta").partitionBy("y").mode("overwrite") + .save(inputDir.getAbsolutePath) + + // Before the fix, this query would fail because it tried to read strings from the metadata + // partition values as the LONG type that the actual partition columns are. This works now + // because we added a cast. + val df = spark.read.format("delta") + .load(inputDir.getAbsolutePath) + .where( + """cast(format_string("%04d-01-01 12:00:00", y) as timestamp) is not null""".stripMargin) + assert(df.collect().length == 1) + } + } + + test("SC-11332: session isolation for cached delta logs") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + val oldSession = spark + val deltaLog = DeltaLog.forTable(spark, path) + val maxSLL = deltaLog.maxSnapshotLineageLength + + val activeSession = oldSession.newSession() + SparkSession.setActiveSession(activeSession) + activeSession.sessionState.conf.setConf( + DeltaSQLConf.DELTA_MAX_SNAPSHOT_LINEAGE_LENGTH, maxSLL + 1) + + // deltaLog fetches conf from active session + assert(deltaLog.maxSnapshotLineageLength == maxSLL + 1) + + // new session confs don't propagate to old session + assert(maxSLL == + oldSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_MAX_SNAPSHOT_LINEAGE_LENGTH)) + } + } + + test("SC-11198: global configs - save to path") { + withTempDir { dir => + val path = dir.getCanonicalPath + withSQLConf("spark.databricks.delta.properties.defaults.dataSkippingNumIndexedCols" -> "1") { + spark.range(5).write.format("delta").save(path) + + val tableConfigs = DeltaLog.forTable(spark, path).update().metadata.configuration + assert(tableConfigs.get("delta.dataSkippingNumIndexedCols") == Some("1")) + } + } + } + + test("SC-24982 - initial snapshot has zero partitions") { + withTempDir { tempDir => + val deltaLog = DeltaLog.forTable(spark, tempDir) + assert(deltaLog.snapshot.stateDS.rdd.getNumPartitions == 0) + } + } + + test("SC-24982 - initial snapshot does not trigger jobs") { + val jobCount = new AtomicInteger(0) + val listener = new SparkListener { + override def onJobStart(jobStart: SparkListenerJobStart): Unit = { + // Spark will always log a job start/end event even when the job does not launch any task. + if (jobStart.stageInfos.exists(_.numTasks > 0)) { + jobCount.incrementAndGet() + } + } + } + sparkContext.listenerBus.waitUntilEmpty(15000) + sparkContext.addSparkListener(listener) + try { + withTempDir { tempDir => + val files = DeltaLog.forTable(spark, tempDir).snapshot.stateDS.collect() + assert(files.isEmpty) + } + sparkContext.listenerBus.waitUntilEmpty(15000) + assert(jobCount.get() == 0) + } finally { + sparkContext.removeSparkListener(listener) + } + } + + def lastDeltaHistory(dir: String): DeltaHistory = + io.delta.tables.DeltaTable.forPath(spark, dir).history(1).as[DeltaHistory].head + + test("history includes user-defined metadata for DataFrame.Write API") { + val tempDir = Utils.createTempDir().toString + val df = Seq(2).toDF().write.format("delta").mode("overwrite") + + df.option("userMetadata", "meta1") + .save(tempDir) + + assert(lastDeltaHistory(tempDir).userMetadata === Some("meta1")) + + df.option("userMetadata", "meta2") + .save(tempDir) + + assert(lastDeltaHistory(tempDir).userMetadata === Some("meta2")) + } + + test("history includes user-defined metadata for SQL API") { + val tempDir = Utils.createTempDir().toString + val tblName = "tblName" + + withTable(tblName) { + withSQLConf(DeltaSQLConf.DELTA_USER_METADATA.key -> "meta1") { + spark.sql(s"CREATE TABLE $tblName (data STRING) USING delta LOCATION '$tempDir';") + } + assert(lastDeltaHistory(tempDir).userMetadata === Some("meta1")) + + withSQLConf(DeltaSQLConf.DELTA_USER_METADATA.key -> "meta2") { + spark.sql(s"INSERT INTO $tblName VALUES ('test');") + } + assert(lastDeltaHistory(tempDir).userMetadata === Some("meta2")) + + withSQLConf(DeltaSQLConf.DELTA_USER_METADATA.key -> "meta3") { + spark.sql(s"INSERT INTO $tblName VALUES ('test2');") + } + assert(lastDeltaHistory(tempDir).userMetadata === Some("meta3")) + } + } + + test("history includes user-defined metadata for DF.Write API and config setting") { + val tempDir = Utils.createTempDir().toString + val df = Seq(2).toDF().write.format("delta").mode("overwrite") + + withSQLConf(DeltaSQLConf.DELTA_USER_METADATA.key -> "meta1") { + df.save(tempDir) + } + assert(lastDeltaHistory(tempDir).userMetadata === Some("meta1")) + + withSQLConf(DeltaSQLConf.DELTA_USER_METADATA.key -> "meta2") { + df.option("userMetadata", "optionMeta2") + .save(tempDir) + } + assert(lastDeltaHistory(tempDir).userMetadata === Some("optionMeta2")) + } + + test("history includes user-defined metadata for SQL + DF.Write API") { + val tempDir = Utils.createTempDir().toString + val df = Seq(2).toDF().write.format("delta").mode("overwrite") + + // metadata given in `option` should beat config + withSQLConf(DeltaSQLConf.DELTA_USER_METADATA.key -> "meta1") { + df.option("userMetadata", "optionMeta1") + .save(tempDir) + } + assert(lastDeltaHistory(tempDir).userMetadata === Some("optionMeta1")) + + withSQLConf(DeltaSQLConf.DELTA_USER_METADATA.key -> "meta2") { + df.option("userMetadata", "optionMeta2") + .save(tempDir) + } + assert(lastDeltaHistory(tempDir).userMetadata === Some("optionMeta2")) + } + + test("SC-77958 - history includes user-defined metadata for createOrReplace") { + withTable("tbl") { + spark.range(10).writeTo("tbl").using("delta").option("userMetadata", "meta").createOrReplace() + + val history = sql("DESCRIBE HISTORY tbl LIMIT 1").as[DeltaHistory].head() + assert(history.userMetadata === Some("meta")) + } + } + + test("SC-77958 - history includes user-defined metadata for saveAsTable") { + withTable("tbl") { + spark.range(10).write.format("delta").option("userMetadata", "meta1") + .mode("overwrite").saveAsTable("tbl") + + val history = sql("DESCRIBE HISTORY tbl LIMIT 1").as[DeltaHistory].head() + assert(history.userMetadata === Some("meta1")) + } + } + + test("lastCommitVersionInSession - init") { + spark.sessionState.conf.unsetConf(DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION) + withTempDir { tempDir => + + assert(spark.conf.get(DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION) === None) + + Seq(1).toDF + .write + .format("delta") + .save(tempDir.getCanonicalPath) + + assert(spark.conf.get(DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION) === Some(0)) + } + } + + test("lastCommitVersionInSession - SQL") { + spark.sessionState.conf.unsetConf(DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION) + withTempDir { tempDir => + + val k = DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION.key + assert(sql(s"SET $k").head().get(1) === "") + + Seq(1).toDF + .write + .format("delta") + .save(tempDir.getCanonicalPath) + + assert(sql(s"SET $k").head().get(1) === "0") + } + } + + test("lastCommitVersionInSession - SQL only") { + spark.sessionState.conf.unsetConf(DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION) + withTable("test_table") { + val k = DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION.key + assert(sql(s"SET $k").head().get(1) === "") + + sql("CREATE TABLE test_table USING delta AS SELECT * FROM range(10)") + assert(sql(s"SET $k").head().get(1) === "0") + } + } + + test("lastCommitVersionInSession - CONVERT TO DELTA") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + "/table" + spark.range(10).write.format("parquet").save(path) + convertToDelta(s"parquet.`$path`") + + // In column mapping (name mode), we perform convertToDelta with a CONVERT and an ALTER, + // so the version has been updated + val commitVersion = if (columnMappingEnabled) 1 else 0 + assert(spark.conf.get(DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION) === + Some(commitVersion)) + } + } + + test("lastCommitVersionInSession - many writes") { + withTempDir { tempDir => + + for (i <- 0 until 10) { + Seq(i).toDF + .write + .mode("overwrite") + .format("delta") + .save(tempDir.getCanonicalPath) + } + + Seq(10).toDF + .write + .format("delta") + .mode("append") + .save(tempDir.getCanonicalPath) + + assert(spark.conf.get(DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION) === Some(10)) + } + } + + test("lastCommitVersionInSession - new thread writes") { + withTempDir { tempDir => + + Seq(1).toDF + .write + .format("delta") + .mode("overwrite") + .save(tempDir.getCanonicalPath) + + val t = new Thread { + override def run(): Unit = { + Seq(2).toDF + .write + .format("delta") + .mode("overwrite") + .save(tempDir.getCanonicalPath) + } + } + + t.start + t.join + assert(spark.conf.get(DeltaSQLConf.DELTA_LAST_COMMIT_VERSION_IN_SESSION) === Some(1)) + } + } + + test("An external write should be reflected during analysis of a path based query") { + val tempDir = Utils.createTempDir().toString + spark.range(10).coalesce(1).write.format("delta").mode("append").save(tempDir) + spark.range(10, 20).coalesce(1).write.format("delta").mode("append").save(tempDir) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val hadoopConf = deltaLog.newDeltaHadoopConf() + val snapshot = deltaLog.snapshot + val files = snapshot.allFiles.collect() + + // assign physical name to new schema + val newMetadata = if (columnMappingEnabled) { + DeltaColumnMapping.assignColumnIdAndPhysicalName( + snapshot.metadata.copy(schemaString = new StructType().add("data", "bigint").json), + snapshot.metadata, + isChangingModeOnExistingTable = false, + isOverwritingSchema = false) + } else { + snapshot.metadata.copy(schemaString = new StructType().add("data", "bigint").json) + } + + // Now make a commit that comes from an "external" writer that deletes existing data and + // changes the schema + val actions = Seq(Action.supportedProtocolVersion(), newMetadata) ++ files.map(_.remove) + deltaLog.store.write( + FileNames.deltaFile(deltaLog.logPath, snapshot.version + 1), + actions.map(_.json).iterator, + overwrite = false, + hadoopConf) + + deltaLog.store.write( + FileNames.deltaFile(deltaLog.logPath, snapshot.version + 2), + files.take(1).map(_.json).iterator, + overwrite = false, + hadoopConf) + + // Since the column `data` doesn't exist in our old files, we read it as null. + checkAnswer( + spark.read.format("delta").load(tempDir), + Seq.fill(10)(Row(null)) + ) + } + + test("isBlindAppend with save and saveAsTable") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable("blind_append") { + sql(s"CREATE TABLE blind_append(value INT) USING delta LOCATION '$path'") // version = 0 + sql("INSERT INTO blind_append VALUES(1)") // version = 1 + spark.read.format("delta").load(path) + .where("value = 1") + .write.mode("append").format("delta").save(path) // version = 2 + checkAnswer(spark.table("blind_append"), Row(1) :: Row(1) :: Nil) + assert(sql("desc history blind_append") + .select("version", "isBlindAppend").head == Row(2, false)) + spark.table("blind_append").where("value = 1").write.mode("append").format("delta") + .saveAsTable("blind_append") // version = 3 + checkAnswer(spark.table("blind_append"), Row(1) :: Row(1) :: Row(1) :: Row(1) :: Nil) + assert(sql("desc history blind_append") + .select("version", "isBlindAppend").head == Row(3, false)) + } + } + } + + test("isBlindAppend with DataFrameWriterV2") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable("blind_append") { + sql(s"CREATE TABLE blind_append(value INT) USING delta LOCATION '$path'") // version = 0 + sql("INSERT INTO blind_append VALUES(1)") // version = 1 + spark.read.format("delta").load(path) + .where("value = 1") + .writeTo("blind_append").append() // version = 2 + checkAnswer(spark.table("blind_append"), Row(1) :: Row(1) :: Nil) + assert(sql("desc history blind_append") + .select("version", "isBlindAppend").head == Row(2, false)) + } + } + } + + test("isBlindAppend with RTAS") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTable("blind_append") { + sql(s"CREATE TABLE blind_append(value INT) USING delta LOCATION '$path'") // version = 0 + sql("INSERT INTO blind_append VALUES(1)") // version = 1 + sql("REPLACE TABLE blind_append USING delta AS SELECT * FROM blind_append") // version = 2 + checkAnswer(spark.table("blind_append"), Row(1) :: Nil) + assert(sql("desc history blind_append") + .select("version", "isBlindAppend").head == Row(2, false)) + } + } + } + + test("replaceWhere should support backtick when flag is disabled") { + val table = "replace_where_backtick" + withSQLConf(DeltaSQLConf.REPLACEWHERE_DATACOLUMNS_ENABLED.key -> "false") { + withTable(table) { + // The STRUCT column is added to prevent us from introducing any ambiguity in future + sql(s"CREATE TABLE $table(`a.b` STRING, `c.d` STRING, a STRUCT)" + + s"USING delta PARTITIONED BY (`a.b`)") + Seq(("a", "b", "c")) + .toDF("a.b", "c.d", "ab") + .withColumn("a", struct($"ab".alias("b"))) + .drop("ab") + .write + .format("delta") + // "replaceWhere" should support backtick and remove it correctly. Technically, + // "a.b" is not correct, but some users may already use it, + // so we keep supporting both. This is not ambiguous since "replaceWhere" only + // supports partition columns and it doesn't support struct type or map type. + .option("replaceWhere", "`a.b` = 'a' AND a.b = 'a'") + .mode("overwrite") + .saveAsTable(table) + checkAnswer(sql(s"SELECT `a.b`, `c.d`, a.b from $table"), Row("a", "b", "c") :: Nil) + } + } + } + + test("replaceArbitrary should enforce proper usage of backtick") { + val table = "replace_where_backtick" + withTable(table) { + sql(s"CREATE TABLE $table(`a.b` STRING, `c.d` STRING, a STRUCT)" + + s"USING delta PARTITIONED BY (`a.b`)") + + // User has to use backtick properly. If they want to use a.b to match on `a.b`, + // error will be thrown if `a.b` doesn't have the value. + val e = intercept[AnalysisException] { + Seq(("a", "b", "c")) + .toDF("a.b", "c.d", "ab") + .withColumn("a", struct($"ab".alias("b"))) + .drop("ab") + .write + .format("delta") + .option("replaceWhere", "a.b = 'a' AND `a.b` = 'a'") + .mode("overwrite") + .saveAsTable(table) + } + assert(e.getMessage.startsWith("[DELTA_REPLACE_WHERE_MISMATCH] " + + "Written data does not conform to partial table overwrite condition or constraint")) + + Seq(("a", "b", "c"), ("d", "e", "f")) + .toDF("a.b", "c.d", "ab") + .withColumn("a", struct($"ab".alias("b"))) + .drop("ab") + .write + .format("delta") + .mode("overwrite") + .saveAsTable(table) + + // Use backtick properly for `a.b` + Seq(("a", "h", "c")) + .toDF("a.b", "c.d", "ab") + .withColumn("a", struct($"ab".alias("b"))) + .drop("ab") + .write + .format("delta") + .option("replaceWhere", "`a.b` = 'a'") + .mode("overwrite") + .saveAsTable(table) + + checkAnswer(sql(s"SELECT `a.b`, `c.d`, a.b from $table"), + Row("a", "h", "c") :: Row("d", "e", "f") :: Nil) + + // struct field can only be referred by "a.b". + Seq(("a", "b", "c")) + .toDF("a.b", "c.d", "ab") + .withColumn("a", struct($"ab".alias("b"))) + .drop("ab") + .write + .format("delta") + .option("replaceWhere", "a.b = 'c'") + .mode("overwrite") + .saveAsTable(table) + checkAnswer(sql(s"SELECT `a.b`, `c.d`, a.b from $table"), + Row("a", "b", "c") :: Row("d", "e", "f") :: Nil) + } + } + + test("need to update DeltaLog on DataFrameReader.load() code path") { + // Due to possible race conditions (like in mounting/unmounting paths) there might be an initial + // snapshot that gets cached for a table that should have a valid (non-initial) snapshot. In + // such a case we need to call deltaLog.update() in the DataFrame read paths to update the + // initial snapshot to a valid one. + // + // We simulate a cached InitialSnapshot + valid delta table by creating an empty DeltaLog + // (which creates an InitialSnapshot cached for that path) then move an actual Delta table's + // transaction log into the path for the empty log. + val dir1 = Utils.createTempDir() + val dir2 = Utils.createTempDir() + val log = DeltaLog.forTable(spark, dir1) + assert(!log.tableExists) + spark.range(10).write.format("delta").save(dir2.getCanonicalPath) + // rename dir2 to dir1 then read + dir2.renameTo(dir1) + checkAnswer(spark.read.format("delta").load(dir1.getCanonicalPath), spark.range(10).toDF) + } + + test("set metadata upon write") { + withTempDir { inputDir => + val testPath = inputDir.getCanonicalPath + spark.range(10) + .map(_.toInt) + .withColumn("part", $"value" % 2) + .write + .format("delta") + .option("delta.logRetentionDuration", "123 days") + .option("mergeSchema", "true") + .partitionBy("part") + .mode("append") + .save(testPath) + + val deltaLog = DeltaLog.forTable(spark, testPath) + // We need to drop default properties set by subclasses to make this test pass in them + assert(deltaLog.snapshot.metadata.configuration + .filterKeys(!_.startsWith("delta.columnMapping.")).toMap === + Map("delta.logRetentionDuration" -> "123 days")) + } + } + + test("idempotent write: idempotent DataFrame insert") { + withTempDir { tableDir => + spark.conf.set("spark.databricks.delta.write.txnAppId", "insertTest") + + io.delta.tables.DeltaTable.createOrReplace(spark) + .addColumn("col1", "INT") + .addColumn("col2", "INT") + .location(tableDir.getCanonicalPath) + .execute() + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tableDir.getCanonicalPath) + + def runInsert(data: (Int, Int)): Unit = { + Seq(data).toDF("col1", "col2") + .write + .format("delta") + .mode("append") + .save(tableDir.getCanonicalPath) + } + + def assertTable(numRows: Int): Unit = { + val count = deltaTable.toDF.count() + assert(count == numRows) + } + + // run insert (1,1), table should have 1 row (1,1) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runInsert((1, 1)) + assertTable(1) + // run insert (2,2), table should have 2 rows (1,1),(2,2) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runInsert((2, 2)) + assertTable(2) + // retry update 2, table should have 2 rows (1,1),(2,2) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runInsert((2, 2)) + assertTable(2) + // run insert (3,3), table should have 3 rows (1,1),(2,2),(3,3) + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runInsert((3, 3)) + assertTable(3) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + test("idempotent write: idempotent SQL insert") { + withTempDir { tableDir => + val tableName = "myInsertTable" + spark.conf.set("spark.databricks.delta.write.txnAppId", "insertTestSQL") + + spark.sql(s"CREATE TABLE $tableName (col1 INT, col2 INT) USING DELTA LOCATION '" + + tableDir.getCanonicalPath + "'") + + def runInsert(data: (Int, Int)): Unit = { + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (${data._1}, ${data._2})") + } + + def assertTable(numRows: Int): Unit = { + val count = spark.sql(s"SELECT * FROM $tableName").count() + assert(count == numRows) + } + + // run insert (1,1), table should have 1 row (1,1) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runInsert((1, 1)) + assertTable(1) + // run insert (2,2), table should have 2 rows (1,1),(2,2) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runInsert((2, 2)) + assertTable(2) + // retry update 2, table should have 2 rows (1,1),(2,2) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runInsert((2, 2)) + assertTable(2) + // run insert (3,3), table should have 3 rows (1,1),(2,2),(3,3) + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runInsert((3, 3)) + assertTable(3) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + test("idempotent write: idempotent DeltaTable merge") { + withTempDir { tableDir => + spark.conf.set("spark.databricks.delta.write.txnAppId", "mergeTest") + + io.delta.tables.DeltaTable.createOrReplace(spark) + .addColumn("col1", "INT") + .addColumn("col2", "INT") + .location(tableDir.getCanonicalPath) + .execute() + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tableDir.getCanonicalPath) + + def runMerge(data: (Int, Int)): Unit = { + val df = Seq(data).toDF("col1", "col2") + deltaTable.as("t") + .merge( + df.as("s"), + "t.col1 = s.col1") + .whenMatched.updateExpr(Map("t.col2" -> "t.col2 + s.col2")) + .whenNotMatched().insertAll() + .execute() + } + + def assertTable(col2Val: Int, numRows: Int): Unit = { + val res1 = deltaTable.toDF.select("col2").where("col1 = 1").collect() + assert(res1.length == numRows) + assert(res1(0).getInt(0) == col2Val) + } + + // merge (1,0) into empty table, table should have 1 row (1,0) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runMerge((1, 0)) + assertTable(0, 1) + // merge (1,2) into table, table should have 1 row (1,2) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runMerge((1, 2)) + assertTable(2, 1) + // retry merge 2, table should have 1 row (1,2) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runMerge((1, 2)) + assertTable(2, 1) + // merge (1,3) into table, table should have 1 row (1,5) + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runMerge((1, 3)) + assertTable(5, 1) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + test("idempotent write: idempotent SQL merge") { + def withTempDirs(f: (File, File) => Unit): Unit = { + withTempDir { file1 => + withTempDir { file2 => + f(file1, file2) + } + } + } + + withTempDirs { (tableDir, updateTableDir) => + val targetTableName = "myMergeTable" + val sourceTableName = "updates" + spark.conf.set("spark.databricks.delta.write.txnAppId", "mergeTestSQL") + + spark.sql(s"CREATE TABLE $targetTableName (col1 INT, col2 INT) USING DELTA LOCATION '" + + tableDir.getCanonicalPath + "'") + spark.sql(s"CREATE TABLE $sourceTableName (col1 INT, col2 INT) USING DELTA LOCATION '" + + updateTableDir.getCanonicalPath + "'") + + def runMerge(data: (Int, Int), txnVersion: Int): Unit = { + val df = Seq(data).toDF("col1", "col2") + spark.conf.set("spark.databricks.delta.write.txnVersion", s"$txnVersion") + df.write.format("delta").mode("overwrite").save(updateTableDir.getCanonicalPath) + spark.conf.set("spark.databricks.delta.write.txnVersion", s"$txnVersion") + spark.sql(s""" + |MERGE INTO $targetTableName AS t USING $sourceTableName AS s + | ON t.col1 = s.col1 + | WHEN MATCHED THEN UPDATE SET t.col2 = t.col2 + s.col2 + | WHEN NOT MATCHED THEN INSERT (col1, col2) VALUES (col1, col2) + |""".stripMargin) + } + + def assertTable(col2Val: Int, numRows: Int): Unit = { + val res1 = spark.sql(s"SELECT col2 FROM $targetTableName WHERE col1 = 1").collect() + assert(res1.length == numRows) + assert(res1(0).getInt(0) == col2Val) + } + + // merge (1,0) into empty table, table should have 1 row (1,0) + runMerge((1, 0), 1) + assertTable(0, 1) + // merge (1,2) into table, table should have 1 row (1,2) + runMerge((1, 2), 2) + assertTable(2, 1) + // retry merge 2, table should have 1 row (1,2) + runMerge((1, 2), 2) + assertTable( 2, 1) + // merge (1,3) into table, table should have 1 row (1,5) + runMerge((1, 3), 3) + assertTable(5, 1) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + test("idempotent write: idempotent DeltaTable update") { + withTempDir { tableDir => + spark.conf.set("spark.databricks.delta.write.txnAppId", "updateTest") + + io.delta.tables.DeltaTable.createOrReplace(spark) + .addColumn("col1", "INT") + .addColumn("col2", "INT") + .location(tableDir.getCanonicalPath) + .execute() + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tableDir.getCanonicalPath) + spark.conf.set("spark.databricks.delta.write.txnVersion", "0") + Seq((1, 0)).toDF("col1", "col2") + .write.format("delta").mode("append").save(tableDir.getCanonicalPath) + + def runUpdate(data: (Int, Int)): Unit = { + deltaTable.update( + condition = expr(s"col1 == ${data._1}"), + set = Map("col2" -> expr(s"col2 + ${data._2}")) + ) + } + + def assertTable(col2Val: Int, numRows: Int): Unit = { + val res1 = deltaTable.toDF.select("col2").where("col1 = 1").collect() + assert(res1.length == numRows) + assert(res1(0).getInt(0) == col2Val) + } + + // run update (1,1), table should have 1 row (1,1) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runUpdate((1, 1)) + assertTable(1, 1) + // run update (1,2), table should have 1 row (1,3) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runUpdate((1, 2)) + assertTable(3, 1) + // retry update 2, table should have 1 row (1,3) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runUpdate((1, 2)) + assertTable(3, 1) + // retry update 1, table should have 1 row (1,3) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runUpdate((1, 1)) + assertTable(3, 1) + // run update (1,3) into table, table should have 1 row (1,6) + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runUpdate((1, 3)) + assertTable(6, 1) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + test("idempotent write: idempotent SQL update") { + withTempDir { tableDir => + val tableName = "myUpdateTable" + spark.conf.set("spark.databricks.delta.write.txnAppId", "updateTestSQL") + + spark.sql(s"CREATE TABLE $tableName (col1 INT, col2 INT) USING DELTA LOCATION '" + + tableDir.getCanonicalPath + "'") + spark.conf.set("spark.databricks.delta.write.txnVersion", "0") + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (1, 0)") + + def runUpdate(data: (Int, Int)): Unit = { + spark.sql(s""" + |UPDATE $tableName SET + | col2 = col2 + ${data._2} WHERE col1 = ${data._1} + """.stripMargin) + } + + def assertTable(col2Val: Int, numRows: Int): Unit = { + val res1 = spark.sql(s"SELECT col2 FROM $tableName WHERE col1 = 1").collect() + assert(res1.length == numRows) + assert(res1(0).getInt(0) == col2Val) + } + + // run update (1,1), table should have 1 row (1,1) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runUpdate((1, 1)) + assertTable(1, 1) + // run update (1,2), table should have 1 row (1,3) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runUpdate((1, 2)) + assertTable(3, 1) + // retry update 2, table should have 1 row (1,3) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runUpdate((1, 2)) + assertTable(3, 1) + // retry update 1, table should have 1 row (1,3) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runUpdate((1, 1)) + assertTable(3, 1) + // run update (1,3) into table, table should have 1 row (1,6) + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runUpdate((1, 3)) + assertTable(6, 1) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + test("idempotent write: idempotent DeltaTable delete") { + withTempDir { tableDir => + spark.conf.set("spark.databricks.delta.write.txnAppId", "deleteTest") + + io.delta.tables.DeltaTable.createOrReplace(spark) + .addColumn("col1", "INT") + .addColumn("col2", "INT") + .location(tableDir.getCanonicalPath) + .execute() + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tableDir.getCanonicalPath) + spark.conf.set("spark.databricks.delta.write.txnVersion", "0") + Seq((1, 0), (2, 0), (3, 0), (4, 0)).toDF("col1", "col2") + .write.format("delta").mode("append").save(tableDir.getCanonicalPath) + + def runDelete(toDelete: Int): Unit = { + deltaTable.delete(s"col1 = $toDelete") + } + + def assertTable(numRows: Int): Unit = { + val rows = deltaTable.toDF.count() + assert(rows == numRows) + } + + // run delete (1), table should have 3 rows (2,0),(3,0),(4,0) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runDelete(1) + assertTable(3) + // add (1,0) back to table + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + Seq((1, 0)).toDF("col1", "col2") + .write.format("delta").mode("append").save(tableDir.getCanonicalPath) + assertTable(4) + // retry delete 1, table should have 4 rows (2,0),(3,0),(4,0) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runDelete(1) + assertTable(4) + // run delete (1), table should have 3 rows (2,0),(3,0),(4,0) + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runDelete(1) + assertTable(3) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + test("idempotent write: idempotent SQL delete") { + withTempDir { tableDir => + val tableName = "myDeleteTable" + spark.conf.set("spark.databricks.delta.write.txnAppId", "deleteTestSQL") + + spark.sql(s"CREATE TABLE $tableName (col1 INT, col2 INT) USING DELTA LOCATION '" + + tableDir.getCanonicalPath + "'") + spark.conf.set("spark.databricks.delta.write.txnVersion", "0") + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (1, 0), (2, 0), (3, 0), (4, 0)") + + def runDelete(toDelete: Int): Unit = { + spark.sql(s"DELETE FROM $tableName WHERE col1 = $toDelete") + } + + def assertTable(numRows: Long): Unit = { + val res1 = spark.sql(s"SELECT COUNT(*) FROM $tableName").collect() + assert(res1.length == 1) + assert(res1(0).getLong(0) == numRows) + } + + // run delete (1), table should have 3 rows (2,0),(3,0),(4,0) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runDelete(1) + assertTable(3) + // add (1,0) back to table + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (1, 0)") + assertTable(4) + // retry delete (1), table should have 4 rows (2,0),(3,0),(4,0) + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runDelete(1) + assertTable(4) + // run delete (1), table should have 3 rows (2,0),(3,0),(4,0) + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runDelete(1) + assertTable(3) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + test("idempotent write: valid txnVersion") { + spark.conf.set("spark.databricks.delta.write.txnAppId", "deleteTestSQL") + val e = intercept[IllegalArgumentException] { + spark.sessionState.conf.setConfString( + "spark.databricks.delta.write.txnVersion", "someVersion") + } + assert(e.getMessage == "spark.databricks.delta.write.txnVersion should be " + + "long, but was someVersion") + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + + Seq("REPLACE", "CREATE OR REPLACE").foreach { command => + test(s"Idempotent $command command") { + withTempDir { tableDir => + val tableName = "myIdempotentReplaceTable" + withTable(tableName) { + spark.conf.set("spark.databricks.delta.write.txnAppId", "replaceTestSQL") + spark.sql(s"CREATE TABLE $tableName(c1 INT, c2 INT, c3 INT)" + + s"USING DELTA LOCATION '" + tableDir.getCanonicalPath + "'") + + def runReplace(data: (Int, Int, Int)): Unit = { + spark.sql(s"$command table $tableName USING DELTA " + + s"as SELECT ${data._1} as c1, ${data._2} as c2, ${data._3} as c3") + } + + def assertTable(numRows: Int, commitVersion: Int, data: (Int, Int, Int)): Unit = { + val count = spark.sql(s"SELECT * FROM $tableName").count() + assert(count == numRows) + val snapshot = DeltaLog.forTable(spark, tableDir.getCanonicalPath).update() + assert(snapshot.version == commitVersion) + val tableContent = spark.sql(s"SELECT * FROM $tableName").collect().head + assert(tableContent.getInt(0) == data._1) + assert(tableContent.getInt(1) == data._2) + assert(tableContent.getInt(2) == data._3) + } + + // run replace (1,1,1) with version 1, table should have 1 row (1,1,1). + spark.conf.set("spark.databricks.delta.write.txnVersion", "1") + runReplace((1, 1, 1)) + assertTable(1, 1, (1, 1, 1)) + // run replace (2,2,2) with version 2, table should have 1 row (2,2,2) + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runReplace((2, 2, 2)) + assertTable(1, 2, (2, 2, 2)) + // retry replace (3,3,3) with version 2, table should have 1 row (2,2,2). + spark.conf.set("spark.databricks.delta.write.txnVersion", "2") + runReplace((3, 3, 3)) + assertTable(1, 2, (2, 2, 2)) + // run replace (4,4,4) with version 3, table should have 1 row (4,4,4). + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runReplace((4, 4, 4)) + assertTable(1, 3, (4, 4, 4)) + // run replace (5,5,5) with version 3, table should have 1 row (4,4,4). + spark.conf.set("spark.databricks.delta.write.txnVersion", "3") + runReplace((5, 5, 5)) + assertTable(1, 3, (4, 4, 4)) + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + } + } + + test("idempotent write: auto reset txnVersion") { + withTempDir { tableDir => + val tableName = "myAutoResetTable" + spark.conf.set("spark.databricks.delta.write.txnAppId", "autoReset") + spark.sql(s"CREATE TABLE $tableName (col1 INT, col2 INT) USING DELTA LOCATION '" + + tableDir.getCanonicalPath + "'") + + // this write is done with txn version 0 + spark.conf.set("spark.databricks.delta.write.txnVersion", "0") + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (1, 0)") + // this write should be skipped as the version is not reset so it will be applied + // with the same version + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (2, 0)") + assert(spark.sql(s"SELECT * FROM $tableName").count() == 1) + + // now enable auto reset + spark.conf.set("spark.databricks.delta.write.txnVersion.autoReset.enabled", "true") + + // this write should be skipped as it is using the same txnVersion as the first write + spark.conf.set("spark.databricks.delta.write.txnVersion", "0") + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (3, 0)") + // this should throw an exception as the txn version is automatically reset + val e1 = intercept[IllegalArgumentException] { + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (4, 0)") + } + assert(e1.getMessage == "[DELTA_INVALID_IDEMPOTENT_WRITES_OPTIONS] " + + "Invalid options for idempotent Dataframe writes: " + + "Both spark.databricks.delta.write.txnAppId and spark.databricks.delta.write.txnVersion " + + "must be specified for idempotent Delta writes") + // this write should succeed as it's using a newer version than the latest + spark.conf.set("spark.databricks.delta.write.txnVersion", "10") + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (2, 0)") + // this should throw an exception as the txn version is automatically reset + val e2 = intercept[IllegalArgumentException] { + spark.sql(s"INSERT INTO $tableName (col1, col2) VALUES (3, 0)") + } + assert(e2.getMessage == "[DELTA_INVALID_IDEMPOTENT_WRITES_OPTIONS] " + + "Invalid options for idempotent Dataframe writes: " + + "Both spark.databricks.delta.write.txnAppId and spark.databricks.delta.write.txnVersion " + + "must be specified for idempotent Delta writes") + + val res = spark.sql(s"SELECT col1 FROM $tableName") + .orderBy(asc("col1")) + .collect() + assert(res.length == 2) + assert(res(0).getInt(0) == 1) + assert(res(1).getInt(0) == 2) + + // clean up + spark.conf.unset("spark.databricks.delta.write.txnAppId") + spark.conf.unset("spark.databricks.delta.write.txnVersion") + } + } + + def idempotentWrite( + mode: String, + appId: String, + seq: DataFrame, + path: String, + name: String, + version: Long, + expectedCount: Long, + commitVersion: Int, + isSaveAsTable: Boolean = true): Unit = { + val df = seq.write.format("delta") + .option(DeltaOptions.TXN_VERSION, version) + .option(DeltaOptions.TXN_APP_ID, appId) + .mode(mode) + if (isSaveAsTable) { + df.option("path", path).saveAsTable(name) + } else { + df.save(path) + } + val i = spark.read.format("delta").load(path).count() + assert(i == expectedCount) + val snapshot = DeltaLog.forTable(spark, path).update() + assert(snapshot.version == (commitVersion - 1)) + } + + Seq((true, true), (true, false), (false, true), (false, false)) + .foreach {case (isSaveAsTable, isLegacy) => + val op = if (isSaveAsTable) "saveAsTable" else "save" + val version = if (isLegacy) "legacy" else "non-legacy" + val appId1 = "myAppId1" + val appId2 = "myAppId2" + val confs = if (isLegacy) Seq(SQLConf.USE_V1_SOURCE_LIST.key -> "tahoe,delta") else Seq.empty + + if (!(isSaveAsTable && isLegacy)) { + test(s"Idempotent $version Dataframe $op: append") { + withSQLConf(confs: _*) { + withTempDir { dir => + val path = dir.getCanonicalPath + val name = "append_table_t1" + val mode = "append" + sql("DROP TABLE IF EXISTS append_table_t1") + val df = Seq((1, 2, 3), (4, 5, 6), (7, 8, 9)).toDF("a", "b", "c") + // The first 2 runs must succeed increasing the expected count. + idempotentWrite(mode, appId1, df, path, name, 1, 3, 1, isSaveAsTable) + idempotentWrite(mode, appId1, df, path, name, 2, 6, 2, isSaveAsTable) + + // Even if the version is not consecutive, higher versions should commit successfully. + idempotentWrite(mode, appId1, df, path, name, 5, 9, 3, isSaveAsTable) + + // This run should be ignored because it uses an older version. + idempotentWrite(mode, appId1, df, path, name, 5, 9, 3, isSaveAsTable) + + // Use a different app ID, but same version. This should succeed. + idempotentWrite(mode, appId2, df, path, name, 5, 12, 4, isSaveAsTable) + idempotentWrite(mode, appId2, df, path, name, 5, 12, 4, isSaveAsTable) + + // Verify that specifying only one of the options -- either appId or version -- fails. + val e1 = intercept[Exception] { + val stage = df.write.format("delta").option(DeltaOptions.TXN_APP_ID, 1).mode(mode) + if (isSaveAsTable) { + stage.option("path", path).saveAsTable(name) + } else { + stage.save(path) + } + } + assert(e1.getMessage.contains("Invalid options for idempotent Dataframe writes")) + val e2 = intercept[Exception] { + val stage = df.write.format("delta").option(DeltaOptions.TXN_VERSION, 1).mode(mode) + if (isSaveAsTable) { + stage.option("path", path).saveAsTable(name) + } else { + stage.save(path) + } + } + assert(e2.getMessage.contains("Invalid options for idempotent Dataframe writes")) + } + } + } + } + + test(s"Idempotent $version Dataframe $op: overwrite") { + withSQLConf(confs: _*) { + withTempDir { dir => + val path = dir.getCanonicalPath + val name = "overwrite_table_t1" + val mode = "overwrite" + sql("DROP TABLE IF EXISTS overwrite_table_t1") + val df = Seq((1, 2, 3), (4, 5, 6), (7, 8, 9)).toDF("a", "b", "c") + // The first 2 runs must succeed increasing the expected count. + idempotentWrite(mode, appId1, df, path, name, 1, 3, 1, isSaveAsTable) + idempotentWrite(mode, appId1, df, path, name, 2, 3, 2, isSaveAsTable) + + // Even if the version is not consecutive, higher versions should commit successfully. + idempotentWrite(mode, appId1, df, path, name, 5, 3, 3, isSaveAsTable) + + // This run should be ignored because it uses an older version. + idempotentWrite(mode, appId1, df, path, name, 5, 3, 3, isSaveAsTable) + + // Use a different app ID, but same version. This should succeed. + idempotentWrite(mode, appId2, df, path, name, 5, 3, 4, isSaveAsTable) + idempotentWrite(mode, appId2, df, path, name, 5, 3, 4, isSaveAsTable) + + // Verify that specifying only one of the options -- either appId or version -- fails. + val e1 = intercept[Exception] { + val stage = df.write.format("delta").option(DeltaOptions.TXN_APP_ID, 1).mode(mode) + if (isSaveAsTable) stage.option("path", path).saveAsTable(name) else stage.save(path) + } + assert(e1.getMessage.contains("Invalid options for idempotent Dataframe writes")) + val e2 = intercept[Exception] { + val stage = df.write.format("delta").option(DeltaOptions.TXN_VERSION, 1).mode(mode) + if (isSaveAsTable) stage.option("path", path).saveAsTable(name) else stage.save(path) + } + assert(e2.getMessage.contains("Invalid options for idempotent Dataframe writes")) + } + } + } + } + + test("idempotent writes in streaming foreachBatch") { + // Function to get a checkpoint location and 2 table locations. + def withTempDirs(f: (File, File, File) => Unit): Unit = { + withTempDir { file1 => + withTempDir { file2 => + withTempDir { file3 => + f(file1, file2, file3) + } + } + } + } + + // In this test, we are going to run a streaming query in a deterministic way. + // This streaming query uses foreachBatch to append data to two tables, and + // depending on a boolean flag, the query can fail between the two table writes. + // By setting this flag, we will test whether both tables are consistenly updated + // when query resumes after failure - no duplicates, no data missing. + + withTempDirs { (checkpointDir, table1Dir, table2Dir) => + @volatile var shouldFail = false + + /* Function to write a batch's data to 2 tables */ + def runBatch(batch: DataFrame, appId: String, batchId: Long): Unit = { + // Append to table 1 + batch.write.format("delta") + .option(DeltaOptions.TXN_VERSION, batchId) + .option(DeltaOptions.TXN_APP_ID, appId) + .mode("append").save(table1Dir.getCanonicalPath) + if (shouldFail) { + throw new Exception("Terminating execution") + } else { + // Append to table 2 + batch.write.format("delta") + .option(DeltaOptions.TXN_VERSION, batchId) + .option(DeltaOptions.TXN_APP_ID, appId) + .mode("append").save(table2Dir.getCanonicalPath) + } + } + + @volatile var query: StreamingQuery = null + + // Prepare a streaming query + val inputData = MemoryStream[Int] + val df = inputData.toDF() + val streamWriter = df.writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .foreachBatch { (batch: DataFrame, id: Long) => { + runBatch(batch, query.id.toString, id) } + } + + /* Add data and run streaming query, then verify # rows in 2 tables */ + def runQuery(dataToAdd: Int, expectedTable1Count: Int, expectedTable2Count: Int): Unit = { + inputData.addData(dataToAdd) + query = streamWriter.start() + try { + query.processAllAvailable() + } catch { + case e: Exception => + assert(e.getMessage.contains("Terminating execution")) + } finally { + query.stop() + } + val t1Count = spark.read.format("delta").load(table1Dir.getCanonicalPath).count() + assert(t1Count == expectedTable1Count) + val t2Count = spark.read.format("delta").load(table2Dir.getCanonicalPath).count() + assert(t2Count == expectedTable2Count) + } + + // Run the query 3 times. First time without failure, both the output tables are updated. + shouldFail = false + runQuery(dataToAdd = 0, expectedTable1Count = 1, expectedTable2Count = 1) + // Second time with failure. Only one of the tables should be updated. + shouldFail = true + runQuery(dataToAdd = 1, expectedTable1Count = 2, expectedTable2Count = 1) + // Third time without failure. Both the tables should be consistently updated. + shouldFail = false + runQuery(dataToAdd = 2, expectedTable1Count = 3, expectedTable2Count = 3) + } + } + + + test("parsing table name and alias using test helper") { + import DeltaTestUtils.parseTableAndAlias + // Parse table name from path and optional alias. + assert(parseTableAndAlias("delta.`store_sales`") === "delta.`store_sales`" -> None) + assert(parseTableAndAlias("delta.`store sales`") === "delta.`store sales`" -> None) + assert(parseTableAndAlias("delta.`store_sales` s") === "delta.`store_sales`" -> Some("s")) + assert(parseTableAndAlias("delta.`store sales` as s") === "delta.`store sales`" -> Some("s")) + assert(parseTableAndAlias("delta.`store%sales` AS s") === "delta.`store%sales`" -> Some("s")) + + // Parse table name and optional alias. + assert(parseTableAndAlias("store_sales") === "store_sales" -> None) + assert(parseTableAndAlias("store sales") === "store" -> Some("sales")) + assert(parseTableAndAlias("store_sales s") === "store_sales" -> Some("s")) + assert(parseTableAndAlias("'store sales' as s") === "'store sales'" -> Some("s")) + assert(parseTableAndAlias("'store%sales' AS s") === "'store%sales'" -> Some("s")) + + // Not properly supported: ambiguous without special handling for escaping. + assert(parseTableAndAlias("'store sales'") === "'store" -> Some("sales'")) + } +} + + +class DeltaNameColumnMappingSuite extends DeltaSuite + with DeltaColumnMappingEnableNameMode { + + import testImplicits._ + + override protected def runOnlyTests = Seq( + "handle partition filters and data filters", + "query with predicates should skip partitions", + "valid replaceWhere", + "batch write: append, overwrite where", + "get touched files for update, delete and merge", + "isBlindAppend with save and saveAsTable" + ) + + + test( + "dynamic partition overwrite with conflicting logical vs. physical named partition columns") { + // It isn't sufficient to just test with column mapping enabled because the physical names are + // generated automatically and thus are unique w.r.t. the logical names. + // Instead we need to have: ColA.logicalName = ColB.physicalName, + // which means we need to start with columnMappingMode=None, and then upgrade to + // columnMappingMode=name and rename our columns + + withSQLConf(DeltaSQLConf.DYNAMIC_PARTITION_OVERWRITE_ENABLED.key -> "true", + DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey-> NoMapping.name) { + withTempDir { tempDir => + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + Seq(("a", "x", 1), ("b", "y", 2), ("c", "x", 3)).toDF("part1", "part2", "value") + .write + .format("delta") + .partitionBy("part1", "part2") + .mode("append") + .save(tempDir.getCanonicalPath) + + val protocol = DeltaLog.forTable(spark, tempDir).snapshot.protocol + val (r, w) = if (protocol.supportsReaderFeatures || protocol.supportsWriterFeatures) { + (TableFeatureProtocolUtils.TABLE_FEATURES_MIN_READER_VERSION, + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION) + } else { + (ColumnMappingTableFeature.minReaderVersion, ColumnMappingTableFeature.minWriterVersion) + } + + spark.sql( + s""" + |ALTER TABLE delta.`${tempDir.getCanonicalPath}` SET TBLPROPERTIES ( + | 'delta.minReaderVersion' = '$r', + | 'delta.minWriterVersion' = '$w', + | 'delta.columnMapping.mode' = 'name' + |) + |""".stripMargin) + + spark.sql( + s""" + |ALTER TABLE delta.`${tempDir.getCanonicalPath}` RENAME COLUMN part1 TO temp + |""".stripMargin) + spark.sql( + s""" + |ALTER TABLE delta.`${tempDir.getCanonicalPath}` RENAME COLUMN part2 TO part1 + |""".stripMargin) + spark.sql( + s""" + |ALTER TABLE delta.`${tempDir.getCanonicalPath}` RENAME COLUMN temp TO part2 + |""".stripMargin) + + Seq(("a", "x", 4), ("d", "x", 5)).toDF("part2", "part1", "value") + .write + .format("delta") + .partitionBy("part2", "part1") + .mode("overwrite") + .option(DeltaOptions.PARTITION_OVERWRITE_MODE_OPTION, "dynamic") + .save(tempDir.getCanonicalPath) + checkDatasetUnorderly(data.select("part2", "part1", "value").as[(String, String, Int)], + ("a", "x", 4), ("b", "y", 2), ("c", "x", 3), ("d", "x", 5)) + } + } + } + + test("replaceWhere dataframe V2 API with less than predicate") { + withTempDir { dir => + val insertedDF = spark.range(10).toDF() + + insertedDF.write.format("delta").save(dir.toString) + + val otherDF = spark.range(start = 0, end = 4).toDF() + otherDF.writeTo(s"delta.`${dir.toString}`").overwrite(col("id") < 6) + checkAnswer(spark.read.load(dir.toString), + insertedDF.filter(col("id") >= 6).union(otherDF)) + } + } + + test("replaceWhere SQL - partition column - dynamic filter") { + withTempDir { dir => + // create partitioned table + spark.range(100).withColumn("part", 'id % 10) + .write + .format("delta") + .partitionBy("part") + .save(dir.toString) + + // ans will be used to replace the entire contents of the table + val ans = spark.range(10) + .withColumn("part", lit(0)) + + ans.createOrReplaceTempView("replace") + sql(s"INSERT INTO delta.`${dir.toString}` REPLACE WHERE part >=0 SELECT * FROM replace") + checkAnswer(spark.read.format("delta").load(dir.toString), ans) + } + } + + test("replaceWhere SQL - partition column - static filter") { + withTable("tbl") { + // create partitioned table + spark.range(100).withColumn("part", lit(0)) + .write + .format("delta") + .partitionBy("part") + .saveAsTable("tbl") + + val partEq1DF = spark.range(10, 20) + .withColumn("part", lit(1)) + partEq1DF.write.format("delta").mode("append").saveAsTable("tbl") + + + val replacer = spark.range(10) + .withColumn("part", lit(0)) + + replacer.createOrReplaceTempView("replace") + sql(s"INSERT INTO tbl REPLACE WHERE part=0 SELECT * FROM replace") + checkAnswer(spark.read.format("delta").table("tbl"), replacer.union(partEq1DF)) + } + } + + test("replaceWhere SQL - data column - dynamic") { + withTable("tbl") { + // write table + spark.range(100).withColumn("col", lit(1)) + .write + .format("delta") + .saveAsTable("tbl") + + val colGt2DF = spark.range(100, 200) + .withColumn("col", lit(3)) + + colGt2DF.write + .format("delta") + .mode("append") + .saveAsTable("tbl") + + val replacer = spark.range(10) + .withColumn("col", lit(1)) + + replacer.createOrReplaceTempView("replace") + sql(s"INSERT INTO tbl REPLACE WHERE col < 2 SELECT * FROM replace") + checkAnswer( + spark.read.format("delta").table("tbl"), + replacer.union(colGt2DF) + ) + } + } + + test("replaceWhere SQL - data column - static") { + withTempDir { dir => + // write table + spark.range(100).withColumn("col", lit(2)) + .write + .format("delta") + .save(dir.toString) + + val colEq2DF = spark.range(100, 200) + .withColumn("col", lit(1)) + + colEq2DF.write + .format("delta") + .mode("append") + .save(dir.toString) + + val replacer = spark.range(10) + .withColumn("col", lit(2)) + + replacer.createOrReplaceTempView("replace") + sql(s"INSERT INTO delta.`${dir.toString}` REPLACE WHERE col = 2 SELECT * FROM replace") + checkAnswer( + spark.read.format("delta").load(dir.toString), + replacer.union(colEq2DF) + ) + } + } + + test("replaceWhere SQL - multiple predicates - static") { + withTempDir { dir => + // write table + spark.range(100).withColumn("col", lit(2)) + .write + .format("delta") + .save(dir.toString) + + spark.range(100, 200).withColumn("col", lit(5)) + .write + .format("delta") + .mode("append") + .save(dir.toString) + + val colEq2DF = spark.range(100, 200) + .withColumn("col", lit(1)) + + colEq2DF.write + .format("delta") + .mode("append") + .save(dir.toString) + + val replacer = spark.range(10) + .withColumn("col", lit(2)) + + replacer.createOrReplaceTempView("replace") + sql(s"INSERT INTO delta.`${dir.toString}` REPLACE WHERE col = 2 OR col = 5 " + + s"SELECT * FROM replace") + checkAnswer( + spark.read.format("delta").load(dir.toString), + replacer.union(colEq2DF) + ) + } + } + + test("replaceWhere with less than predicate") { + withTempDir { dir => + val insertedDF = spark.range(10).toDF() + + insertedDF.write.format("delta").save(dir.toString) + + val otherDF = spark.range(start = 0, end = 4).toDF() + otherDF.write.format("delta").mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "id < 6") + .save(dir.toString) + checkAnswer(spark.read.load(dir.toString), + insertedDF.filter(col("id") >= 6).union(otherDF)) + } + } + + test("replaceWhere SQL with less than predicate") { + withTempDir { dir => + val insertedDF = spark.range(10).toDF() + + insertedDF.write.format("delta").save(dir.toString) + + val otherDF = spark.range(start = 0, end = 4).toDF() + otherDF.createOrReplaceTempView("replace") + + sql( + s""" + |INSERT INTO delta.`${dir.getAbsolutePath}` + |REPLACE WHERE id < 6 + |SELECT * FROM replace + |""".stripMargin) + checkAnswer(spark.read.load(dir.toString), + insertedDF.filter(col("id") >= 6).union(otherDF)) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableCreationTests.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableCreationTests.scala new file mode 100644 index 00000000000..143628e8268 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableCreationTests.scala @@ -0,0 +1,2442 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.util.Locale + +// scalastyle:off import.ordering.noEmptyLine +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.actions.Metadata +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, ExternalCatalogUtils, SessionCatalog} +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, Table, TableCatalog} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{MetadataBuilder, StructType} +import org.apache.spark.util.Utils + +trait DeltaTableCreationTests + extends QueryTest + with SharedSparkSession + with DeltaColumnMappingTestUtils { + + import testImplicits._ + + val format = "delta" + + override protected def sparkConf: SparkConf = { + super.sparkConf + // to make compatible with existing empty schema fail tests + .set(DeltaSQLConf.DELTA_ALLOW_CREATE_EMPTY_SCHEMA_TABLE.key, "false") + } + + private def createDeltaTableByPath( + path: File, + df: DataFrame, + tableName: String, + partitionedBy: Seq[String] = Nil): Unit = { + df.write + .partitionBy(partitionedBy: _*) + .mode(SaveMode.Append) + .format(format) + .save(path.getCanonicalPath) + + sql(s""" + |CREATE TABLE delta_test + |USING delta + |LOCATION '${path.getCanonicalPath}' + """.stripMargin) + } + + private implicit def toTableIdentifier(tableName: String): TableIdentifier = { + spark.sessionState.sqlParser.parseTableIdentifier(tableName) + } + + protected def getTablePath(tableName: String): String = { + new Path(spark.sessionState.catalog.getTableMetadata(tableName).location).toString + } + + protected def getDefaultTablePath(tableName: String): String = { + new Path(spark.sessionState.catalog.defaultTablePath(tableName)).toString + } + + protected def getPartitioningColumns(tableName: String): Seq[String] = { + spark.sessionState.catalog.getTableMetadata(tableName).partitionColumnNames + } + + protected def getSchema(tableName: String): StructType = { + spark.sessionState.catalog.getTableMetadata(tableName).schema + } + + protected def getTableProperties(tableName: String): Map[String, String] = { + spark.sessionState.catalog.getTableMetadata(tableName).properties + } + + private def getDeltaLog(table: CatalogTable): DeltaLog = { + getDeltaLog(new Path(table.storage.locationUri.get)) + } + + private def getDeltaLog(tableName: String): DeltaLog = { + getDeltaLog(spark.sessionState.catalog.getTableMetadata(tableName)) + } + + protected def getDeltaLog(path: Path): DeltaLog = { + DeltaLog.forTable(spark, path) + } + + protected def verifyTableInCatalog(catalog: SessionCatalog, table: String): Unit = { + val externalTable = + catalog.externalCatalog.getTable("default", table) + assertEqual(externalTable.schema, new StructType()) + assert(externalTable.partitionColumnNames.isEmpty) + } + + protected def checkResult( + result: DataFrame, + expected: Seq[Any], + columns: Seq[String]): Unit = { + checkAnswer( + result.select(columns.head, columns.tail: _*), + Seq(Row(expected: _*)) + ) + } + + Seq("partitioned" -> Seq("v2"), "non-partitioned" -> Nil).foreach { case (isPartitioned, cols) => + SaveMode.values().foreach { saveMode => + test(s"saveAsTable to a new table (managed) - $isPartitioned, saveMode: $saveMode") { + val tbl = "delta_test" + withTable(tbl) { + Seq(1L -> "a").toDF("v1", "v2") + .write + .partitionBy(cols: _*) + .mode(saveMode) + .format(format) + .saveAsTable(tbl) + + checkDatasetUnorderly(spark.table(tbl).as[(Long, String)], 1L -> "a") + assert(getTablePath(tbl) === getDefaultTablePath(tbl), "Table path is wrong") + assert(getPartitioningColumns(tbl) === cols, "Partitioning columns don't match") + } + } + + test(s"saveAsTable to a new table (managed) - $isPartitioned," + + s" saveMode: $saveMode (empty df)") { + val tbl = "delta_test" + withTable(tbl) { + Seq(1L -> "a").toDF("v1", "v2").where("false") + .write + .partitionBy(cols: _*) + .mode(saveMode) + .format(format) + .saveAsTable(tbl) + + checkDatasetUnorderly(spark.table(tbl).as[(Long, String)]) + assert(getTablePath(tbl) === getDefaultTablePath(tbl), "Table path is wrong") + assert(getPartitioningColumns(tbl) === cols, "Partitioning columns don't match") + } + } + } + + SaveMode.values().foreach { saveMode => + test(s"saveAsTable to a new table (external) - $isPartitioned, saveMode: $saveMode") { + withTempDir { dir => + val tbl = "delta_test" + withTable(tbl) { + Seq(1L -> "a").toDF("v1", "v2") + .write + .partitionBy(cols: _*) + .mode(saveMode) + .format(format) + .option("path", dir.getCanonicalPath) + .saveAsTable(tbl) + + checkDatasetUnorderly(spark.table(tbl).as[(Long, String)], 1L -> "a") + assert(getTablePath(tbl) === new Path(dir.toURI).toString.stripSuffix("/"), + "Table path is wrong") + assert(getPartitioningColumns(tbl) === cols, "Partitioning columns don't match") + } + } + } + + test(s"saveAsTable to a new table (external) - $isPartitioned," + + s" saveMode: $saveMode (empty df)") { + withTempDir { dir => + val tbl = "delta_test" + withTable(tbl) { + Seq(1L -> "a").toDF("v1", "v2").where("false") + .write + .partitionBy(cols: _*) + .mode(saveMode) + .format(format) + .option("path", dir.getCanonicalPath) + .saveAsTable(tbl) + + checkDatasetUnorderly(spark.table(tbl).as[(Long, String)]) + assert(getTablePath(tbl) === new Path(dir.toURI).toString.stripSuffix("/"), + "Table path is wrong") + assert(getPartitioningColumns(tbl) === cols, "Partitioning columns don't match") + } + } + } + } + + test(s"saveAsTable (append) to an existing table - $isPartitioned") { + withTempDir { dir => + val tbl = "delta_test" + withTable(tbl) { + createDeltaTableByPath(dir, Seq(1L -> "a").toDF("v1", "v2"), tbl, cols) + + Seq(2L -> "b").toDF("v1", "v2") + .write + .partitionBy(cols: _*) + .mode(SaveMode.Append) + .format(format) + .saveAsTable(tbl) + + checkDatasetUnorderly(spark.table(tbl).as[(Long, String)], 1L -> "a", 2L -> "b") + } + } + } + + test(s"saveAsTable (overwrite) to an existing table - $isPartitioned") { + withTempDir { dir => + val tbl = "delta_test" + withTable(tbl) { + createDeltaTableByPath(dir, Seq(1L -> "a").toDF("v1", "v2"), tbl, cols) + + Seq(2L -> "b").toDF("v1", "v2") + .write + .partitionBy(cols: _*) + .mode(SaveMode.Overwrite) + .format(format) + .saveAsTable(tbl) + + checkDatasetUnorderly(spark.table(tbl).as[(Long, String)], 2L -> "b") + } + } + } + + test(s"saveAsTable (ignore) to an existing table - $isPartitioned") { + withTempDir { dir => + val tbl = "delta_test" + withTable(tbl) { + createDeltaTableByPath(dir, Seq(1L -> "a").toDF("v1", "v2"), tbl, cols) + + Seq(2L -> "b").toDF("v1", "v2") + .write + .partitionBy(cols: _*) + .mode(SaveMode.Ignore) + .format(format) + .saveAsTable(tbl) + + checkDatasetUnorderly(spark.table(tbl).as[(Long, String)], 1L -> "a") + } + } + } + + test(s"saveAsTable (error if exists) to an existing table - $isPartitioned") { + withTempDir { dir => + val tbl = "delta_test" + withTable(tbl) { + createDeltaTableByPath(dir, Seq(1L -> "a").toDF("v1", "v2"), tbl, cols) + + val e = intercept[AnalysisException] { + Seq(2L -> "b").toDF("v1", "v2") + .write + .partitionBy(cols: _*) + .mode(SaveMode.ErrorIfExists) + .format(format) + .saveAsTable(tbl) + } + assert(e.getMessage.contains(tbl)) + assert(e.getMessage.contains("already exists")) + + checkDatasetUnorderly(spark.table(tbl).as[(Long, String)], 1L -> "a") + } + } + } + } + + test("saveAsTable (append) + insert to a table created without a schema") { + withTempDir { dir => + withTable("delta_test") { + Seq(1L -> "a").toDF("v1", "v2") + .write + .mode(SaveMode.Append) + .partitionBy("v2") + .format(format) + .option("path", dir.getCanonicalPath) + .saveAsTable("delta_test") + + // Out of order + Seq("b" -> 2L).toDF("v2", "v1") + .write + .partitionBy("v2") + .mode(SaveMode.Append) + .format(format) + .saveAsTable("delta_test") + + Seq(3L -> "c").toDF("v1", "v2") + .write + .format(format) + .insertInto("delta_test") + + checkDatasetUnorderly( + spark.table("delta_test").as[(Long, String)], 1L -> "a", 2L -> "b", 3L -> "c") + } + } + } + + test("saveAsTable to a table created with an invalid partitioning column") { + withTempDir { dir => + withTable("delta_test") { + Seq(1L -> "a").toDF("v1", "v2") + .write + .mode(SaveMode.Append) + .partitionBy("v2") + .format(format) + .option("path", dir.getCanonicalPath) + .saveAsTable("delta_test") + checkDatasetUnorderly(spark.table("delta_test").as[(Long, String)], 1L -> "a") + + var ex = intercept[Exception] { + Seq("b" -> 2L).toDF("v2", "v1") + .write + .partitionBy("v1") + .mode(SaveMode.Append) + .format(format) + .saveAsTable("delta_test") + }.getMessage + assert(ex.contains("not match")) + assert(ex.contains("partition")) + checkDatasetUnorderly(spark.table("delta_test").as[(Long, String)], 1L -> "a") + + ex = intercept[Exception] { + Seq("b" -> 2L).toDF("v3", "v1") + .write + .partitionBy("v1") + .mode(SaveMode.Append) + .format(format) + .saveAsTable("delta_test") + }.getMessage + assert(ex.contains("not match")) + assert(ex.contains("partition")) + checkDatasetUnorderly(spark.table("delta_test").as[(Long, String)], 1L -> "a") + + Seq("b" -> 2L).toDF("v1", "v3") + .write + .partitionBy("v1") + .mode(SaveMode.Ignore) + .format(format) + .saveAsTable("delta_test") + checkDatasetUnorderly(spark.table("delta_test").as[(Long, String)], 1L -> "a") + + ex = intercept[AnalysisException] { + Seq("b" -> 2L).toDF("v1", "v3") + .write + .partitionBy("v1") + .mode(SaveMode.ErrorIfExists) + .format(format) + .saveAsTable("delta_test") + }.getMessage + assert(ex.contains("delta_test")) + assert(ex.contains("already exists")) + checkDatasetUnorderly(spark.table("delta_test").as[(Long, String)], 1L -> "a") + } + } + } + + testQuietly("create delta table with spaces in column names") { + val tableName = "delta_test" + + val tableLoc = + new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier(tableName))) + Utils.deleteRecursively(tableLoc) + + def createTableUsingDF: Unit = { + Seq(1, 2, 3).toDF("a column name with spaces") + .write + .format(format) + .mode(SaveMode.Overwrite) + .saveAsTable(tableName) + } + + def createTableUsingSQL: DataFrame = { + sql(s"CREATE TABLE $tableName(`a column name with spaces` LONG, b String) USING delta") + } + + withTable(tableName) { + if (!columnMappingEnabled) { + val ex = intercept[AnalysisException] { + createTableUsingDF + } + assert( + ex.getMessage.contains("[INVALID_COLUMN_NAME_AS_PATH]") || + ex.getMessage.contains("invalid character(s)") + ) + assert(!tableLoc.exists()) + } else { + // column mapping modes support creating table with arbitrary col names + createTableUsingDF + assert(tableLoc.exists()) + } + } + + withTable(tableName) { + if (!columnMappingEnabled) { + val ex2 = intercept[AnalysisException] { + createTableUsingSQL + } + assert( + ex2.getMessage.contains("[INVALID_COLUMN_NAME_AS_PATH]") || + ex2.getMessage.contains("invalid character(s)") + ) + assert(!tableLoc.exists()) + } else { + // column mapping modes support creating table with arbitrary col names + createTableUsingSQL + assert(tableLoc.exists()) + } + } + } + + testQuietly("cannot create delta table when using buckets") { + withTable("bucketed_table") { + val e = intercept[AnalysisException] { + Seq(1L -> "a").toDF("i", "j").write + .format(format) + .partitionBy("i") + .bucketBy(numBuckets = 8, "j") + .saveAsTable("bucketed_table") + } + assert(e.getMessage.toLowerCase(Locale.ROOT).contains( + "is not supported for delta tables")) + } + } + + test("save without a path") { + val e = intercept[IllegalArgumentException] { + Seq(1L -> "a").toDF("i", "j").write + .format(format) + .partitionBy("i") + .save() + } + assert(e.getMessage.toLowerCase(Locale.ROOT).contains("'path' is not specified")) + } + + test("save with an unknown partition column") { + withTempDir { dir => + val path = dir.getCanonicalPath + val e = intercept[AnalysisException] { + Seq(1L -> "a").toDF("i", "j").write + .format(format) + .partitionBy("unknownColumn") + .save(path) + } + assert(e.getMessage.contains("unknownColumn")) + } + } + + test("create a table with special column names") { + withTable("t") { + Seq(1 -> "a").toDF("x.x", "y.y").write.format(format).saveAsTable("t") + Seq(2 -> "b").toDF("x.x", "y.y").write.format(format).mode("append").saveAsTable("t") + checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Nil) + } + } + + testQuietly("saveAsTable (overwrite) to a non-partitioned table created with different paths") { + withTempDir { dir1 => + withTempDir { dir2 => + withTable("delta_test") { + Seq(1L -> "a").toDF("v1", "v2") + .write + .mode(SaveMode.Append) + .format(format) + .option("path", dir1.getCanonicalPath) + .saveAsTable("delta_test") + + val ex = intercept[AnalysisException] { + Seq((3L, "c")).toDF("v1", "v2") + .write + .mode(SaveMode.Overwrite) + .format(format) + .option("path", dir2.getCanonicalPath) + .saveAsTable("delta_test") + }.getMessage + assert(ex.contains("The location of the existing table")) + assert(ex.contains("`default`.`delta_test`")) + checkAnswer( + spark.table("delta_test"), Row(1L, "a") :: Nil) + } + } + } + } + + test("saveAsTable (append) to a non-partitioned table created without path") { + withTempDir { dir => + withTable("delta_test") { + Seq(1L -> "a").toDF("v1", "v2") + .write + .mode(SaveMode.Overwrite) + .format(format) + .option("path", dir.getCanonicalPath) + .saveAsTable("delta_test") + + Seq((3L, "c")).toDF("v1", "v2") + .write + .mode(SaveMode.Append) + .format(format) + .saveAsTable("delta_test") + + checkAnswer( + spark.table("delta_test"), Row(1L, "a") :: Row(3L, "c") :: Nil) + } + } + } + + test("saveAsTable (append) to a non-partitioned table created with identical paths") { + withTempDir { dir => + withTable("delta_test") { + Seq(1L -> "a").toDF("v1", "v2") + .write + .mode(SaveMode.Overwrite) + .format(format) + .option("path", dir.getCanonicalPath) + .saveAsTable("delta_test") + + Seq((3L, "c")).toDF("v1", "v2") + .write + .mode(SaveMode.Append) + .format(format) + .option("path", dir.getCanonicalPath) + .saveAsTable("delta_test") + + checkAnswer( + spark.table("delta_test"), Row(1L, "a") :: Row(3L, "c") :: Nil) + } + } + } + + test("overwrite mode saveAsTable without path shouldn't create managed table") { + withTempDir { dir => + withTable("delta_test") { + sql( + s"""CREATE TABLE delta_test + |USING delta + |LOCATION '${dir.getAbsolutePath}' + |AS SELECT 1 as a + """.stripMargin) + val deltaLog = DeltaLog.forTable(spark, dir) + assert(deltaLog.snapshot.version === 0, "CTAS should be a single commit") + + checkAnswer(spark.table("delta_test"), Row(1) :: Nil) + + Seq((2, "key")).toDF("a", "b") + .write + .mode(SaveMode.Overwrite) + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .format(format) + .saveAsTable("delta_test") + + assert(deltaLog.snapshot.version === 1, "Overwrite mode shouldn't create new managed table") + + checkAnswer(spark.table("delta_test"), Row(2, "key") :: Nil) + + } + } + } + + testQuietly("reject table creation with column names that only differ by case") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + withTempDir { dir => + withTable("delta_test") { + intercept[AnalysisException] { + sql( + s"""CREATE TABLE delta_test + |USING delta + |LOCATION '${dir.getAbsolutePath}' + |AS SELECT 1 as a, 2 as A + """.stripMargin) + } + + intercept[AnalysisException] { + sql( + s"""CREATE TABLE delta_test( + | a string, + | A string + |) + |USING delta + |LOCATION '${dir.getAbsolutePath}' + """.stripMargin) + } + + intercept[ParseException] { + sql( + s"""CREATE TABLE delta_test( + | a string, + | b string + |) + |partitioned by (a, a) + |USING delta + |LOCATION '${dir.getAbsolutePath}' + """.stripMargin) + } + } + } + } + } + + testQuietly("saveAsTable into a view throws exception around view definition") { + withTempDir { dir => + val viewName = "delta_test" + withView(viewName) { + Seq((1, "key")).toDF("a", "b").write.format(format).save(dir.getCanonicalPath) + sql(s"create view $viewName as select * from delta.`${dir.getCanonicalPath}`") + val e = intercept[AnalysisException] { + Seq((2, "key")).toDF("a", "b").write.format(format).mode("append").saveAsTable(viewName) + } + assert(e.getMessage.contains("a view")) + } + } + } + + testQuietly("saveAsTable into a parquet table throws exception around format") { + withTempPath { dir => + val tabName = "delta_test" + withTable(tabName) { + Seq((1, "key")).toDF("a", "b").write.format("parquet") + .option("path", dir.getCanonicalPath).saveAsTable(tabName) + intercept[AnalysisException] { + Seq((2, "key")).toDF("a", "b").write.format("delta").mode("append").saveAsTable(tabName) + } + } + } + } + + test("create table with schema and path") { + withTempDir { dir => + withTable("delta_test") { + sql( + s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}')""".stripMargin) + sql("INSERT INTO delta_test SELECT 1, 'a'") + checkDatasetUnorderly( + sql("SELECT * FROM delta_test").as[(Long, String)], + 1L -> "a") + + } + } + } + + protected def createTableWithEmptySchemaQuery( + tableName: String, + provider: String = "delta", + location: Option[String] = None): String = { + var query = s"CREATE TABLE $tableName USING $provider" + if (location.nonEmpty) { + query = s"$query LOCATION '${location.get}'" + } + query + } + + testQuietly("failed to create a table and then able to recreate it") { + withTable("delta_test") { + val createEmptySchemaQuery = createTableWithEmptySchemaQuery("delta_test") + val e = intercept[AnalysisException] { + sql(createEmptySchemaQuery) + }.getMessage + assert(e.contains("but the schema is not specified")) + + sql("CREATE TABLE delta_test(a LONG, b String) USING delta") + + sql("INSERT INTO delta_test SELECT 1, 'a'") + + checkDatasetUnorderly( + sql("SELECT * FROM delta_test").as[(Long, String)], + 1L -> "a") + } + } + + test("create external table without schema") { + withTempDir { dir => + withTable("delta_test", "delta_test1") { + Seq(1L -> "a").toDF() + .selectExpr("_1 as v1", "_2 as v2") + .write + .mode("append") + .partitionBy("v2") + .format("delta") + .save(dir.getCanonicalPath) + + sql(s""" + |CREATE TABLE delta_test + |USING delta + |OPTIONS('path'='${dir.getCanonicalPath}') + """.stripMargin) + + spark.catalog.createTable("delta_test1", dir.getCanonicalPath, "delta") + + checkDatasetUnorderly( + sql("SELECT * FROM delta_test").as[(Long, String)], + 1L -> "a") + + checkDatasetUnorderly( + sql("SELECT * FROM delta_test1").as[(Long, String)], + 1L -> "a") + } + } + } + + testQuietly("create managed table without schema") { + withTable("delta_test") { + val createEmptySchemaQuery = createTableWithEmptySchemaQuery("delta_test") + val e = intercept[AnalysisException] { + sql(createEmptySchemaQuery) + }.getMessage + assert(e.contains("but the schema is not specified")) + } + } + + testQuietly("reject creating a delta table pointing to non-delta files") { + withTempPath { dir => + withTable("delta_test") { + val path = dir.getCanonicalPath + Seq(1L -> "a").toDF("col1", "col2").write.parquet(path) + val e = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE delta_test (col1 int, col2 string) + |USING delta + |LOCATION '$path' + """.stripMargin) + }.getMessage + var catalogPrefix = "" + assert(e.contains( + s"Cannot create table ('$catalogPrefix`default`.`delta_test`'). The associated location")) + } + } + } + + testQuietly("create external table without schema but using non-delta files") { + withTempDir { dir => + withTable("delta_test") { + Seq(1L -> "a").toDF().selectExpr("_1 as v1", "_2 as v2").write + .mode("append").partitionBy("v2").format("parquet").save(dir.getCanonicalPath) + + val createEmptySchemaQuery = createTableWithEmptySchemaQuery( + "delta_test", location = Some(dir.getCanonicalPath)) + val e = intercept[AnalysisException] { + sql(createEmptySchemaQuery) + }.getMessage + assert(e.contains("but there is no transaction log")) + } + } + } + + testQuietly("create external table without schema and input files") { + withTempDir { dir => + withTable("delta_test") { + val createEmptySchemaQuery = createTableWithEmptySchemaQuery( + "delta_test", location = Some(dir.getCanonicalPath)) + val e = intercept[AnalysisException] { + sql(createEmptySchemaQuery) + }.getMessage + assert(e.contains("but the schema is not specified") && e.contains("input path is empty")) + } + } + } + + test("create and drop delta table - external") { + val catalog = spark.sessionState.catalog + withTempDir { tempDir => + withTable("delta_test") { + sql("CREATE TABLE delta_test(a LONG, b String) USING delta " + + s"OPTIONS (path='${tempDir.getCanonicalPath}')") + val table = catalog.getTableMetadata(TableIdentifier("delta_test")) + assert(table.tableType == CatalogTableType.EXTERNAL) + assert(table.provider.contains("delta")) + + // Query the data and the metadata directly via the DeltaLog + val deltaLog = getDeltaLog(table) + + assertEqual( + deltaLog.snapshot.schema, new StructType().add("a", "long").add("b", "string")) + assertEqual( + deltaLog.snapshot.metadata.partitionSchema, new StructType()) + + assertEqual(deltaLog.snapshot.schema, getSchema("delta_test")) + assert(getPartitioningColumns("delta_test").isEmpty) + + // External catalog does not contain the schema and partition column names. + verifyTableInCatalog(catalog, "delta_test") + + sql("INSERT INTO delta_test SELECT 1, 'a'") + checkDatasetUnorderly( + sql("SELECT * FROM delta_test").as[(Long, String)], + 1L -> "a") + + sql("DROP TABLE delta_test") + intercept[NoSuchTableException](catalog.getTableMetadata(TableIdentifier("delta_test"))) + // Verify that the underlying location is not deleted for an external table + checkAnswer(spark.read.format("delta") + .load(new Path(tempDir.getCanonicalPath).toString), Seq(Row(1L, "a"))) + } + } + } + + test("create and drop delta table - managed") { + val catalog = spark.sessionState.catalog + withTable("delta_test") { + sql("CREATE TABLE delta_test(a LONG, b String) USING delta") + val table = catalog.getTableMetadata(TableIdentifier("delta_test")) + assert(table.tableType == CatalogTableType.MANAGED) + assert(table.provider.contains("delta")) + + // Query the data and the metadata directly via the DeltaLog + val deltaLog = getDeltaLog(table) + + assertEqual( + deltaLog.snapshot.schema, new StructType().add("a", "long").add("b", "string")) + assertEqual( + deltaLog.snapshot.metadata.partitionSchema, new StructType()) + + assertEqual(deltaLog.snapshot.schema, getSchema("delta_test")) + assert(getPartitioningColumns("delta_test").isEmpty) + assertEqual(getSchema("delta_test"), new StructType().add("a", "long").add("b", "string")) + + // External catalog does not contain the schema and partition column names. + verifyTableInCatalog(catalog, "delta_test") + + sql("INSERT INTO delta_test SELECT 1, 'a'") + checkDatasetUnorderly( + sql("SELECT * FROM delta_test").as[(Long, String)], + 1L -> "a") + + sql("DROP TABLE delta_test") + intercept[NoSuchTableException](catalog.getTableMetadata(TableIdentifier("delta_test"))) + // Verify that the underlying location is deleted for a managed table + assert(!new File(table.location).exists()) + } + } + + test("create table using - with partitioned by") { + val catalog = spark.sessionState.catalog + withTable("delta_test") { + sql("CREATE TABLE delta_test(a LONG, b String) USING delta PARTITIONED BY (a)") + val table = catalog.getTableMetadata(TableIdentifier("delta_test")) + assert(table.tableType == CatalogTableType.MANAGED) + assert(table.provider.contains("delta")) + + + // Query the data and the metadata directly via the DeltaLog + val deltaLog = getDeltaLog(table) + + assertEqual( + deltaLog.snapshot.schema, new StructType().add("a", "long").add("b", "string")) + assertEqual( + deltaLog.snapshot.metadata.partitionSchema, new StructType().add("a", "long")) + + assertEqual(deltaLog.snapshot.schema, getSchema("delta_test")) + assert(getPartitioningColumns("delta_test") == Seq("a")) + assertEqual(getSchema("delta_test"), new StructType().add("a", "long").add("b", "string")) + + // External catalog does not contain the schema and partition column names. + verifyTableInCatalog(catalog, "delta_test") + + sql("INSERT INTO delta_test SELECT 1, 'a'") + + assertPartitionWithValueExists("a", "1", deltaLog) + + checkDatasetUnorderly( + sql("SELECT * FROM delta_test").as[(Long, String)], + 1L -> "a") + } + } + + test("CTAS a managed table with the existing empty directory") { + val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1"))) + try { + tableLoc.mkdir() + withTable("tab1") { + sql("CREATE TABLE tab1 USING delta AS SELECT 2, 'b'") + checkAnswer(spark.table("tab1"), Row(2, "b")) + } + } finally { + waitForTasksToFinish() + Utils.deleteRecursively(tableLoc) + } + } + + test("create a managed table with the existing empty directory") { + val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1"))) + try { + tableLoc.mkdir() + withTable("tab1") { + sql("CREATE TABLE tab1 (col1 int, col2 string) USING delta") + sql("INSERT INTO tab1 VALUES (2, 'B')") + checkAnswer(spark.table("tab1"), Row(2, "B")) + } + } finally { + waitForTasksToFinish() + Utils.deleteRecursively(tableLoc) + } + } + + testQuietly( + "create a managed table with the existing non-empty directory") { + withTable("tab1") { + val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1"))) + try { + // create an empty hidden file + tableLoc.mkdir() + val hiddenGarbageFile = new File(tableLoc.getCanonicalPath, ".garbage") + hiddenGarbageFile.createNewFile() + var ex = intercept[AnalysisException] { + sql("CREATE TABLE tab1 USING delta AS SELECT 2, 'b'") + }.getMessage + assert(ex.contains("Cannot create table")) + + ex = intercept[AnalysisException] { + sql("CREATE TABLE tab1 (col1 int, col2 string) USING delta") + }.getMessage + assert(ex.contains("Cannot create table")) + } finally { + waitForTasksToFinish() + Utils.deleteRecursively(tableLoc) + } + } + } + + test("create table with table properties") { + withTable("delta_test") { + sql(s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta + |TBLPROPERTIES( + | 'delta.logRetentionDuration' = '2 weeks', + | 'delta.checkpointInterval' = '20', + | 'key' = 'value' + |) + """.stripMargin) + + val deltaLog = getDeltaLog("delta_test") + + val snapshot = deltaLog.update() + assertEqual(snapshot.metadata.configuration, Map( + "delta.logRetentionDuration" -> "2 weeks", + "delta.checkpointInterval" -> "20", + "key" -> "value")) + assert(deltaLog.deltaRetentionMillis(snapshot.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot.metadata) == 20) + } + } + + test("create table with table properties - case insensitivity") { + withTable("delta_test") { + sql(s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta + |TBLPROPERTIES( + | 'dEltA.lOgrEteNtiOndURaTion' = '2 weeks', + | 'DelTa.ChEckPoiNtinTervAl' = '20' + |) + """.stripMargin) + + val deltaLog = getDeltaLog("delta_test") + + val snapshot = deltaLog.update() + assertEqual(snapshot.metadata.configuration, + Map("delta.logRetentionDuration" -> "2 weeks", "delta.checkpointInterval" -> "20")) + assert(deltaLog.deltaRetentionMillis(snapshot.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot.metadata) == 20) + } + } + + test( + "create table with table properties - case insensitivity with existing configuration") { + withTempDir { tempDir => + withTable("delta_test") { + val path = tempDir.getCanonicalPath + + val deltaLog = getDeltaLog(new Path(path)) + + val txn = deltaLog.startTransaction() + txn.commit(Seq(Metadata( + schemaString = new StructType().add("a", "long").add("b", "string").json, + configuration = Map( + "delta.logRetentionDuration" -> "2 weeks", + "delta.checkpointInterval" -> "20", + "key" -> "value"))), + ManualUpdate) + + sql(s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta LOCATION '$path' + |TBLPROPERTIES( + | 'dEltA.lOgrEteNtiOndURaTion' = '2 weeks', + | 'DelTa.ChEckPoiNtinTervAl' = '20', + | 'key' = "value" + |) + """.stripMargin) + + val snapshot = deltaLog.update() + assertEqual(snapshot.metadata.configuration, Map( + "delta.logRetentionDuration" -> "2 weeks", + "delta.checkpointInterval" -> "20", + "key" -> "value")) + assert(deltaLog.deltaRetentionMillis(snapshot.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot.metadata) == 20) + } + } + } + + + test("schema mismatch between DDL and table location should throw an error") { + withTempDir { tempDir => + withTable("delta_test") { + val deltaLog = getDeltaLog(new Path(tempDir.getCanonicalPath)) + + val txn = deltaLog.startTransaction() + txn.commit( + Seq(Metadata(schemaString = new StructType().add("a", "long").add("b", "long").json)), + DeltaOperations.ManualUpdate) + + val ex = intercept[AnalysisException] { + sql("CREATE TABLE delta_test(a LONG, b String)" + + s" USING delta OPTIONS (path '${tempDir.getCanonicalPath}')") + } + assert(ex.getMessage.contains("The specified schema does not match the existing schema")) + assert(ex.getMessage.contains("Specified type for b is different")) + + val ex1 = intercept[AnalysisException] { + sql("CREATE TABLE delta_test(a LONG)" + + s" USING delta OPTIONS (path '${tempDir.getCanonicalPath}')") + } + assert(ex1.getMessage.contains("The specified schema does not match the existing schema")) + assert(ex1.getMessage.contains("Specified schema is missing field")) + + val ex2 = intercept[AnalysisException] { + sql("CREATE TABLE delta_test(a LONG, b String, c INT, d LONG)" + + s" USING delta OPTIONS (path '${tempDir.getCanonicalPath}')") + } + assert(ex2.getMessage.contains("The specified schema does not match the existing schema")) + assert(ex2.getMessage.contains("Specified schema has additional field")) + assert(ex2.getMessage.contains("Specified type for b is different")) + } + } + } + + test( + "schema metadata mismatch between DDL and table location should throw an error") { + withTempDir { tempDir => + withTable("delta_test") { + val deltaLog = getDeltaLog(new Path(tempDir.getCanonicalPath)) + + val txn = deltaLog.startTransaction() + txn.commit( + Seq(Metadata(schemaString = new StructType().add("a", "long") + .add("b", "string", nullable = true, + new MetadataBuilder().putBoolean("pii", value = true).build()).json)), + DeltaOperations.ManualUpdate) + val ex = intercept[AnalysisException] { + sql("CREATE TABLE delta_test(a LONG, b String)" + + s" USING delta OPTIONS (path '${tempDir.getCanonicalPath}')") + } + assert(ex.getMessage.contains("The specified schema does not match the existing schema")) + assert(ex.getMessage.contains("metadata for field b is different")) + } + } + } + + test( + "partition schema mismatch between DDL and table location should throw an error") { + withTempDir { tempDir => + withTable("delta_test") { + val deltaLog = getDeltaLog(new Path(tempDir.getCanonicalPath)) + + val txn = deltaLog.startTransaction() + txn.commit( + Seq(Metadata( + schemaString = new StructType().add("a", "long").add("b", "string").json, + partitionColumns = Seq("a"))), + DeltaOperations.ManualUpdate) + val ex = intercept[AnalysisException](sql("CREATE TABLE delta_test(a LONG, b String)" + + s" USING delta PARTITIONED BY(b) LOCATION '${tempDir.getCanonicalPath}'")) + assert(ex.getMessage.contains( + "The specified partitioning does not match the existing partitioning")) + } + } + } + + testQuietly("create table with unknown table properties should throw an error") { + withTempDir { tempDir => + withTable("delta_test") { + val ex = intercept[AnalysisException](sql( + s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta LOCATION '${tempDir.getCanonicalPath}' + |TBLPROPERTIES('delta.key' = 'value') + """.stripMargin)) + assert(ex.getMessage.contains( + "Unknown configuration was specified: delta.key")) + } + } + } + + testQuietly("create table with invalid table properties should throw an error") { + withTempDir { tempDir => + withTable("delta_test") { + val ex1 = intercept[IllegalArgumentException](sql( + s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta LOCATION '${tempDir.getCanonicalPath}' + |TBLPROPERTIES('delta.randomPrefixLength' = '-1') + """.stripMargin)) + assert(ex1.getMessage.contains( + "randomPrefixLength needs to be greater than 0.")) + + val ex2 = intercept[IllegalArgumentException](sql( + s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta LOCATION '${tempDir.getCanonicalPath}' + |TBLPROPERTIES('delta.randomPrefixLength' = 'value') + """.stripMargin)) + assert(ex2.getMessage.contains( + "randomPrefixLength needs to be greater than 0.")) + } + } + } + + test( + "table properties mismatch between DDL and table location should throw an error") { + withTempDir { tempDir => + withTable("delta_test") { + val deltaLog = getDeltaLog(new Path(tempDir.getCanonicalPath)) + + val txn = deltaLog.startTransaction() + txn.commit( + Seq(Metadata( + schemaString = new StructType().add("a", "long").add("b", "string").json)), + DeltaOperations.ManualUpdate) + val ex = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE delta_test(a LONG, b String) + |USING delta LOCATION '${tempDir.getCanonicalPath}' + |TBLPROPERTIES('delta.randomizeFilePrefixes' = 'true') + """.stripMargin) + } + + assert(ex.getMessage.contains( + "The specified properties do not match the existing properties")) + } + } + } + + test("create table on an existing table location") { + val catalog = spark.sessionState.catalog + withTempDir { tempDir => + withTable("delta_test") { + val deltaLog = getDeltaLog(new Path(tempDir.getCanonicalPath)) + + val txn = deltaLog.startTransaction() + txn.commit( + Seq(Metadata( + schemaString = new StructType().add("a", "long").add("b", "string").json, + partitionColumns = Seq("b"))), + DeltaOperations.ManualUpdate) + sql("CREATE TABLE delta_test(a LONG, b String) USING delta " + + s"OPTIONS (path '${tempDir.getCanonicalPath}') PARTITIONED BY(b)") + val table = catalog.getTableMetadata(TableIdentifier("delta_test")) + assert(table.tableType == CatalogTableType.EXTERNAL) + assert(table.provider.contains("delta")) + + // Query the data and the metadata directly via the DeltaLog + val deltaLog2 = getDeltaLog(table) + + // Since we manually committed Metadata without schema, we won't have column metadata in + // the latest deltaLog snapshot + assert( + deltaLog2.snapshot.schema == new StructType().add("a", "long").add("b", "string")) + assert( + deltaLog2.snapshot.metadata.partitionSchema == new StructType().add("b", "string")) + + assert(getSchema("delta_test") === deltaLog2.snapshot.schema) + assert(getPartitioningColumns("delta_test") === Seq("b")) + + // External catalog does not contain the schema and partition column names. + verifyTableInCatalog(catalog, "delta_test") + } + } + } + + test("create datasource table with a non-existing location") { + withTempPath { dir => + withTable("t") { + spark.sql(s"CREATE TABLE t(a int, b int) USING delta LOCATION '${dir.toURI}'") + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + spark.sql("INSERT INTO TABLE t SELECT 1, 2") + assert(dir.exists()) + + checkDatasetUnorderly( + sql("SELECT * FROM t").as[(Int, Int)], + 1 -> 2) + } + } + + // partition table + withTempPath { dir => + withTable("t1") { + spark.sql( + s"CREATE TABLE t1(a int, b int) USING delta PARTITIONED BY(a) LOCATION '${dir.toURI}'") + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + Seq((1, 2)).toDF("a", "b") + .write.format("delta").mode("append").save(table.location.toString) + val read = spark.read.format("delta").load(table.location.toString) + checkAnswer(read, Seq(Row(1, 2))) + + val deltaLog = loadDeltaLog(table.location.toString) + assertPartitionWithValueExists("a", "1", deltaLog) + } + } + } + + Seq(true, false).foreach { shouldDelete => + val tcName = if (shouldDelete) "non-existing" else "existing" + test(s"CTAS for external data source table with $tcName location") { + val catalog = spark.sessionState.catalog + withTable("t", "t1") { + withTempDir { dir => + if (shouldDelete) dir.delete() + spark.sql( + s""" + |CREATE TABLE t + |USING delta + |LOCATION '${dir.toURI}' + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(table.tableType == CatalogTableType.EXTERNAL) + assert(table.provider.contains("delta")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + // Query the data and the metadata directly via the DeltaLog + val deltaLog = getDeltaLog(table) + + assertEqual(deltaLog.snapshot.schema, new StructType() + .add("a", "integer").add("b", "integer") + .add("c", "integer").add("d", "integer")) + assertEqual( + deltaLog.snapshot.metadata.partitionSchema, new StructType()) + + assertEqual(getSchema("t"), deltaLog.snapshot.schema) + assert(getPartitioningColumns("t").isEmpty) + + // External catalog does not contain the schema and partition column names. + verifyTableInCatalog(catalog, "t") + + // Query the table + checkAnswer(spark.table("t"), Row(3, 4, 1, 2)) + + // Directly query the reservoir + checkAnswer(spark.read.format("delta") + .load(new Path(table.storage.locationUri.get).toString), Seq(Row(3, 4, 1, 2))) + } + // partition table + withTempDir { dir => + if (shouldDelete) dir.delete() + spark.sql( + s""" + |CREATE TABLE t1 + |USING delta + |PARTITIONED BY(a, b) + |LOCATION '${dir.toURI}' + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) + assert(table.tableType == CatalogTableType.EXTERNAL) + assert(table.provider.contains("delta")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + // Query the data and the metadata directly via the DeltaLog + val deltaLog = getDeltaLog(table) + + assertEqual(deltaLog.snapshot.schema, new StructType() + .add("a", "integer").add("b", "integer") + .add("c", "integer").add("d", "integer")) + assertEqual( + deltaLog.snapshot.metadata.partitionSchema, new StructType() + .add("a", "integer").add("b", "integer")) + + assertEqual(getSchema("t1"), deltaLog.snapshot.schema) + assert(getPartitioningColumns("t1") == Seq("a", "b")) + + // External catalog does not contain the schema and partition column names. + verifyTableInCatalog(catalog, "t1") + + // Query the table + checkAnswer(spark.table("t1"), Row(3, 4, 1, 2)) + + // Directly query the reservoir + checkAnswer(spark.read.format("delta") + .load(new Path(table.storage.locationUri.get).toString), Seq(Row(3, 4, 1, 2))) + } + } + } + } + + test("CTAS with table properties") { + withTable("delta_test") { + sql( + s""" + |CREATE TABLE delta_test + |USING delta + |TBLPROPERTIES( + | 'delta.logRetentionDuration' = '2 weeks', + | 'delta.checkpointInterval' = '20', + | 'key' = 'value' + |) + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + + val deltaLog = getDeltaLog("delta_test") + + val snapshot = deltaLog.update() + assertEqual(snapshot.metadata.configuration, Map( + "delta.logRetentionDuration" -> "2 weeks", + "delta.checkpointInterval" -> "20", + "key" -> "value")) + assert(deltaLog.deltaRetentionMillis(snapshot.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot.metadata) == 20) + } + } + + test("CTAS with table properties - case insensitivity") { + withTable("delta_test") { + sql( + s""" + |CREATE TABLE delta_test + |USING delta + |TBLPROPERTIES( + | 'dEltA.lOgrEteNtiOndURaTion' = '2 weeks', + | 'DelTa.ChEckPoiNtinTervAl' = '20' + |) + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + + val deltaLog = getDeltaLog("delta_test") + + val snapshot = deltaLog.update() + assertEqual(snapshot.metadata.configuration, + Map("delta.logRetentionDuration" -> "2 weeks", "delta.checkpointInterval" -> "20")) + assert(deltaLog.deltaRetentionMillis(snapshot.metadata) == 2 * 7 * 24 * 60 * 60 * 1000) + assert(deltaLog.checkpointInterval(snapshot.metadata) == 20) + } + } + + testQuietly("CTAS external table with existing data should fail") { + withTable("t") { + withTempDir { dir => + dir.delete() + Seq((3, 4)).toDF("a", "b") + .write.format("delta") + .save(dir.toString) + val ex = intercept[AnalysisException](spark.sql( + s""" + |CREATE TABLE t + |USING delta + |LOCATION '${dir.toURI}' + |AS SELECT 1 as a, 2 as b + """.stripMargin)) + assert(ex.getMessage.contains("Cannot create table")) + } + } + + withTable("t") { + withTempDir { dir => + dir.delete() + Seq((3, 4)).toDF("a", "b") + .write.format("parquet") + .save(dir.toString) + val ex = intercept[AnalysisException](spark.sql( + s""" + |CREATE TABLE t + |USING delta + |LOCATION '${dir.toURI}' + |AS SELECT 1 as a, 2 as b + """.stripMargin)) + assert(ex.getMessage.contains("Cannot create table")) + } + } + } + + testQuietly("CTAS with unknown table properties should throw an error") { + withTempDir { tempDir => + withTable("delta_test") { + val ex = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE delta_test + |USING delta + |LOCATION '${tempDir.getCanonicalPath}' + |TBLPROPERTIES('delta.key' = 'value') + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + } + assert(ex.getMessage.contains( + "Unknown configuration was specified: delta.key")) + } + } + } + + testQuietly("CTAS with invalid table properties should throw an error") { + withTempDir { tempDir => + withTable("delta_test") { + val ex1 = intercept[IllegalArgumentException] { + sql( + s""" + |CREATE TABLE delta_test + |USING delta + |LOCATION '${tempDir.getCanonicalPath}' + |TBLPROPERTIES('delta.randomPrefixLength' = '-1') + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + } + assert(ex1.getMessage.contains( + "randomPrefixLength needs to be greater than 0.")) + + val ex2 = intercept[IllegalArgumentException] { + sql( + s""" + |CREATE TABLE delta_test + |USING delta + |LOCATION '${tempDir.getCanonicalPath}' + |TBLPROPERTIES('delta.randomPrefixLength' = 'value') + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + } + assert(ex2.getMessage.contains( + "randomPrefixLength needs to be greater than 0.")) + } + } + } + + Seq("a:b", "a%b").foreach { specialChars => + test(s"data source table:partition column name containing $specialChars") { + // On Windows, it looks colon in the file name is illegal by default. See + // https://support.microsoft.com/en-us/help/289627 + assume(!Utils.isWindows || specialChars != "a:b") + + withTable("t") { + withTempDir { dir => + spark.sql( + s""" + |CREATE TABLE t(a string, `$specialChars` string) + |USING delta + |PARTITIONED BY(`$specialChars`) + |LOCATION '${dir.toURI}' + """.stripMargin) + + assert(dir.listFiles().forall(_.toString.contains("_delta_log"))) + spark.sql(s"INSERT INTO TABLE t SELECT 1, 2") + + val deltaLog = loadDeltaLog(dir.toString) + assertPartitionWithValueExists(specialChars, "2", deltaLog) + + checkAnswer(spark.table("t"), Row("1", "2") :: Nil) + } + } + } + } + + Seq("a b", "a:b", "a%b").foreach { specialChars => + test(s"location uri contains $specialChars for datasource table") { + // On Windows, it looks colon in the file name is illegal by default. See + // https://support.microsoft.com/en-us/help/289627 + assume(!Utils.isWindows || specialChars != "a:b") + + withTable("t", "t1") { + withTempDir { dir => + val loc = new File(dir, specialChars) + loc.mkdir() + // The parser does not recognize the backslashes on Windows as they are. + // These currently should be escaped. + val escapedLoc = loc.getAbsolutePath.replace("\\", "\\\\") + spark.sql( + s""" + |CREATE TABLE t(a string) + |USING delta + |LOCATION '$escapedLoc' + """.stripMargin) + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(table.location == makeQualifiedPath(loc.getAbsolutePath)) + assert(new Path(table.location).toString.contains(specialChars)) + + assert(loc.listFiles().forall(_.toString.contains("_delta_log"))) + spark.sql("INSERT INTO TABLE t SELECT 1") + assert(!loc.listFiles().forall(_.toString.contains("_delta_log"))) + checkAnswer(spark.table("t"), Row("1") :: Nil) + } + + withTempDir { dir => + val loc = new File(dir, specialChars) + loc.mkdir() + // The parser does not recognize the backslashes on Windows as they are. + // These currently should be escaped. + val escapedLoc = loc.getAbsolutePath.replace("\\", "\\\\") + spark.sql( + s""" + |CREATE TABLE t1(a string, b string) + |USING delta + |PARTITIONED BY(b) + |LOCATION '$escapedLoc' + """.stripMargin) + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) + assert(table.location == makeQualifiedPath(loc.getAbsolutePath)) + assert(new Path(table.location).toString.contains(specialChars)) + + assert(loc.listFiles().forall(_.toString.contains("_delta_log"))) + spark.sql("INSERT INTO TABLE t1 SELECT 1, 2") + + checkAnswer(spark.table("t1"), Row("1", "2") :: Nil) + + if (columnMappingEnabled) { + // column mapping always use random file prefixes so we can't compare path + val deltaLog = loadDeltaLog(loc.getCanonicalPath) + val partPaths = getPartitionFilePathsWithValue("b", "2", deltaLog) + assert(partPaths.nonEmpty) + assert(partPaths.forall { p => + val parentPath = new File(p).getParentFile + !parentPath.listFiles().forall(_.toString.contains("_delta_log")) + }) + + // In column mapping mode, as we are using random file prefixes, + // this partition value is valid + spark.sql("INSERT INTO TABLE t1 SELECT 1, '2017-03-03 12:13%3A14'") + assertPartitionWithValueExists("b", "2017-03-03 12:13%3A14", deltaLog) + checkAnswer( + spark.table("t1"), Row("1", "2") :: Row("1", "2017-03-03 12:13%3A14") :: Nil) + } else { + val partFile = new File(loc, "b=2") + assert(!partFile.listFiles().forall(_.toString.contains("_delta_log"))) + spark.sql("INSERT INTO TABLE t1 SELECT 1, '2017-03-03 12:13%3A14'") + val partFile1 = new File(loc, "b=2017-03-03 12:13%3A14") + assert(!partFile1.exists()) + + if (!Utils.isWindows) { + // Actual path becomes "b=2017-03-03%2012%3A13%253A14" on Windows. + val partFile2 = new File(loc, "b=2017-03-03 12%3A13%253A14") + assert(!partFile2.listFiles().forall(_.toString.contains("_delta_log"))) + checkAnswer( + spark.table("t1"), Row("1", "2") :: Row("1", "2017-03-03 12:13%3A14") :: Nil) + } + } + } + } + } + } + + test("the qualified path of a delta table is stored in the catalog") { + withTempDir { dir => + withTable("t", "t1") { + assert(!dir.getAbsolutePath.startsWith("file:/")) + // The parser does not recognize the backslashes on Windows as they are. + // These currently should be escaped. + val escapedDir = dir.getAbsolutePath.replace("\\", "\\\\") + spark.sql( + s""" + |CREATE TABLE t(a string) + |USING delta + |LOCATION '$escapedDir' + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(table.location.toString.startsWith("file:/")) + } + } + + withTempDir { dir => + withTable("t", "t1") { + assert(!dir.getAbsolutePath.startsWith("file:/")) + // The parser does not recognize the backslashes on Windows as they are. + // These currently should be escaped. + val escapedDir = dir.getAbsolutePath.replace("\\", "\\\\") + spark.sql( + s""" + |CREATE TABLE t1(a string, b string) + |USING delta + |PARTITIONED BY(b) + |LOCATION '$escapedDir' + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) + assert(table.location.toString.startsWith("file:/")) + } + } + } + + testQuietly("CREATE TABLE with existing data path") { + // Re-use `filterV2TableProperties()` from `SQLTestUtils` as soon as it will be released. + def isReservedProperty(propName: String): Boolean = { + CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(propName) || + propName.startsWith(TableCatalog.OPTION_PREFIX) || + propName == TableCatalog.PROP_EXTERNAL + } + def filterV2TableProperties(properties: Map[String, String]): Map[String, String] = { + properties.filterNot(kv => isReservedProperty(kv._1)) + } + + withTempPath { path => + withTable("src", "t1", "t2", "t3", "t4", "t5", "t6") { + sql("CREATE TABLE src(i int, p string) USING delta PARTITIONED BY (p) " + + "TBLPROPERTIES('delta.randomizeFilePrefixes' = 'true') " + + s"LOCATION '${path.getAbsolutePath}'") + sql("INSERT INTO src SELECT 1, 'a'") + + // CREATE TABLE without specifying anything works + sql(s"CREATE TABLE t1 USING delta LOCATION '${path.getAbsolutePath}'") + checkAnswer(spark.table("t1"), Row(1, "a")) + + // CREATE TABLE with the same schema and partitioning but no properties works + sql(s"CREATE TABLE t2(i int, p string) USING delta PARTITIONED BY (p) " + + s"LOCATION '${path.getAbsolutePath}'") + checkAnswer(spark.table("t2"), Row(1, "a")) + // Table properties should not be changed to empty. + assert(filterV2TableProperties(getTableProperties("t2")) == + Map("delta.randomizeFilePrefixes" -> "true")) + + // CREATE TABLE with the same schema but no partitioning fails. + val e0 = intercept[AnalysisException] { + sql(s"CREATE TABLE t3(i int, p string) USING delta LOCATION '${path.getAbsolutePath}'") + } + assert(e0.message.contains("The specified partitioning does not match the existing")) + + // CREATE TABLE with different schema fails + val e1 = intercept[AnalysisException] { + sql(s"CREATE TABLE t4(j int, p string) USING delta LOCATION '${path.getAbsolutePath}'") + } + assert(e1.message.contains("The specified schema does not match the existing")) + + // CREATE TABLE with different partitioning fails + val e2 = intercept[AnalysisException] { + sql(s"CREATE TABLE t5(i int, p string) USING delta PARTITIONED BY (i) " + + s"LOCATION '${path.getAbsolutePath}'") + } + assert(e2.message.contains("The specified partitioning does not match the existing")) + + // CREATE TABLE with different table properties fails + val e3 = intercept[AnalysisException] { + sql(s"CREATE TABLE t6 USING delta " + + "TBLPROPERTIES ('delta.randomizeFilePrefixes' = 'false') " + + s"LOCATION '${path.getAbsolutePath}'") + } + assert(e3.message.contains("The specified properties do not match the existing")) + } + } + } + + test("CREATE TABLE on existing data should not commit metadata") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath() + val df = Seq(1, 2, 3, 4, 5).toDF() + df.write.format("delta").save(path) + val deltaLog = getDeltaLog(new Path(path)) + + val oldVersion = deltaLog.snapshot.version + sql(s"CREATE TABLE table USING delta LOCATION '$path'") + assert(oldVersion == deltaLog.snapshot.version) + } + } +} + +class DeltaTableCreationSuite + extends DeltaTableCreationTests + with DeltaSQLCommandTest { + + private def loadTable(tableName: String): Table = { + val ti = spark.sessionState.sqlParser.parseMultipartIdentifier(tableName) + val namespace = if (ti.length == 1) Array("default") else ti.init.toArray + spark.sessionState.catalogManager.currentCatalog.asInstanceOf[TableCatalog] + .loadTable(Identifier.of(namespace, ti.last)) + } + + override protected def getPartitioningColumns(tableName: String): Seq[String] = { + loadTable(tableName).partitioning() + .map(_.references().head.fieldNames().mkString(".")) + } + + override def getSchema(tableName: String): StructType = { + loadTable(tableName).schema() + } + + override protected def getTableProperties(tableName: String): Map[String, String] = { + loadTable(tableName).properties().asScala.toMap + .filterKeys(!CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(_)) + .filterKeys(!TableFeatureProtocolUtils.isTableProtocolProperty(_)) + .toMap + } + + testQuietly("REPLACE TABLE") { + withTempDir { dir => + withTable("delta_test") { + sql( + s"""CREATE TABLE delta_test + |USING delta + |LOCATION '${dir.getAbsolutePath}' + |AS SELECT 1 as a + """.stripMargin) + val deltaLog = DeltaLog.forTable(spark, dir) + assert(deltaLog.snapshot.version === 0, "CTAS should be a single commit") + + sql( + s"""REPLACE TABLE delta_test (col string) + |USING delta + |LOCATION '${dir.getAbsolutePath}' + """.stripMargin) + assert(deltaLog.snapshot.version === 1) + assertEqual( + deltaLog.snapshot.schema, new StructType().add("col", "string")) + + + val e2 = intercept[AnalysisException] { + sql( + s"""REPLACE TABLE delta_test + |USING delta + |LOCATION '${dir.getAbsolutePath}' + """.stripMargin) + } + assert(e2.getMessage.contains("schema is not provided")) + } + } + } + + testQuietly("CREATE OR REPLACE TABLE on table without schema") { + withTempDir { dir => + withTable("delta_test") { + spark.range(10).write.format("delta").option("path", dir.getCanonicalPath) + .saveAsTable("delta_test") + // We need the schema + val e = intercept[AnalysisException] { + sql(s"""CREATE OR REPLACE TABLE delta_test + |USING delta + |LOCATION '${dir.getAbsolutePath}' + """.stripMargin) + } + assert(e.getMessage.contains("schema is not provided")) + } + } + } + + testQuietly("CREATE OR REPLACE TABLE on non-empty directory") { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getCanonicalPath) + withTable("delta_test") { + // We need the schema + val e = intercept[AnalysisException] { + sql(s"""CREATE OR REPLACE TABLE delta_test + |USING delta + |LOCATION '${dir.getAbsolutePath}' + """.stripMargin) + } + assert(e.getMessage.contains("schema is not provided")) + } + } + } + + testQuietly( + "REPLACE TABLE on non-empty directory") { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getCanonicalPath) + withTable("delta_test") { + val e = intercept[AnalysisException] { + sql( + s"""REPLACE TABLE delta_test + |USING delta + |LOCATION '${dir.getAbsolutePath}' + """.stripMargin) + } + assert(e.getMessage.contains("cannot be replaced as it did not exist") || + e.getMessage.contains(s"table or view `default`.`delta_test` cannot be found")) + } + } + } + + test("Create a table without comment") { + withTempDir { dir => + val table = "delta_without_comment" + withTable(table) { + sql(s"CREATE TABLE $table (col string) USING delta LOCATION '${dir.getAbsolutePath}'") + checkResult( + sql(s"DESCRIBE DETAIL $table"), + Seq("delta", null), + Seq("format", "description")) + } + } + } + + protected def withEmptySchemaTable(emptyTableName: String)(f: => Unit): Unit = { + def getDeltaLog: DeltaLog = + DeltaLog.forTable(spark, TableIdentifier(emptyTableName)) + + // create using SQL API + withTable(emptyTableName) { + sql(s"CREATE TABLE $emptyTableName USING delta") + assert(getDeltaLog.snapshot.schema.isEmpty) + f + + // just make sure this statement runs + sql(s"CREATE TABLE IF NOT EXISTS $emptyTableName USING delta") + } + + // create using Delta table API (creates v1 table) + withTable(emptyTableName) { + io.delta.tables.DeltaTable + .create(spark) + .tableName(emptyTableName) + .execute() + assert(getDeltaLog.snapshot.schema.isEmpty) + f + io.delta.tables.DeltaTable + .createIfNotExists(spark) + .tableName(emptyTableName) + .execute() + } + + } + + test("Create an empty table without schema - unsupported cases") { + import testImplicits._ + + withSQLConf(DeltaSQLConf.DELTA_ALLOW_CREATE_EMPTY_SCHEMA_TABLE.key -> "true") { + val emptySchemaTableName = "t1" + + // TODO: support CREATE OR REPLACE code path if needed in the future + intercept[AnalysisException] { + sql(s"CREATE OR REPLACE TABLE $emptySchemaTableName USING delta") + } + + // similarly blocked using Delta Table API + withTable(emptySchemaTableName) { + intercept[AnalysisException] { + io.delta.tables.DeltaTable + .createOrReplace(spark) + .tableName(emptySchemaTableName) + .execute() + } + } + + withTable(emptySchemaTableName) { + io.delta.tables.DeltaTable + .create(spark) + .tableName(emptySchemaTableName) + .execute() + + intercept[AnalysisException] { + io.delta.tables.DeltaTable + .replace(spark) + .tableName(emptySchemaTableName) + .execute() + } + } + + // external table with an invalid location it shouldn't work (e.g. no transaction log present) + withTable(emptySchemaTableName) { + withTempDir { dir => + Seq(1, 2, 3).toDF().write.format("delta").save(dir.getAbsolutePath) + Utils.deleteRecursively(new File(dir, "_delta_log")) + val e = intercept[AnalysisException] { + sql(s"CREATE TABLE $emptySchemaTableName USING delta LOCATION '${dir.getAbsolutePath}'") + } + assert(e.getErrorClass == "DELTA_CREATE_EXTERNAL_TABLE_WITHOUT_TXN_LOG") + } + } + + // CTAS from an empty schema dataframe should be blocked + intercept[AnalysisException] { + withTable(emptySchemaTableName) { + val df = spark.emptyDataFrame + df.createOrReplaceTempView("empty_df") + sql(s"CREATE TABLE $emptySchemaTableName USING delta AS SELECT * FROM empty_df") + } + } + + // create empty schema table using dataframe api should be blocked + intercept[AnalysisException] { + withTable(emptySchemaTableName) { + spark.emptyDataFrame + .write.format("delta") + .saveAsTable(emptySchemaTableName) + } + } + + intercept[AnalysisException] { + withTable(emptySchemaTableName) { + spark.emptyDataFrame + .writeTo(emptySchemaTableName) + .using("delta") + .create() + } + } + + def assertFailToRead(f: => Any): Unit = { + try f catch { + case e: AnalysisException => + assert(e.getMessage.contains("that does not have any columns.")) + } + } + + def assertSchemaEvolutionRequired(f: => Any): Unit = { + val e = intercept[AnalysisException] { + f + } + assert(e.getMessage.contains("A schema mismatch detected when writing to the Delta")) + } + + // data reading or writing without mergeSchema should fail + withEmptySchemaTable(emptySchemaTableName) { + assertFailToRead { + spark.read.table(emptySchemaTableName).collect() + } + + assertFailToRead { + sql(s"SELECT * FROM $emptySchemaTableName").collect() + } + + assertSchemaEvolutionRequired { + sql(s"INSERT INTO $emptySchemaTableName VALUES (1,2,3)") + } + + // but enabling auto merge should make insert work + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "true") { + sql(s"INSERT INTO $emptySchemaTableName VALUES (1,2,3)") + checkAnswer(spark.read.table(emptySchemaTableName), Seq(Row(1, 2, 3))) + } + } + + // allows drop and recreate the same table with empty schema + withTempDir { dir => + withTable(emptySchemaTableName) { + sql(s"CREATE TABLE $emptySchemaTableName USING delta LOCATION '${dir.getCanonicalPath}'") + val snapshot = DeltaLog.forTable(spark, TableIdentifier(emptySchemaTableName)).update() + assert(snapshot.schema.isEmpty && snapshot.version == 0) + assertFailToRead { + sql(s"SELECT * FROM $emptySchemaTableName") + } + // drop the table + sql(s"DROP TABLE $emptySchemaTableName") + // recreate the table again should work + sql(s"CREATE TABLE $emptySchemaTableName USING delta LOCATION '${dir.getCanonicalPath}'") + assertFailToRead { + sql(s"SELECT * FROM $emptySchemaTableName") + } + // write some data to it + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "true") { + sql(s"INSERT INTO $emptySchemaTableName VALUES (1,2,3)") + checkAnswer(spark.read.table(emptySchemaTableName), Seq(Row(1, 2, 3))) + } + // drop again + sql(s"DROP TABLE $emptySchemaTableName") + // recreate the table again should work + sql(s"CREATE TABLE $emptySchemaTableName USING delta LOCATION '${dir.getCanonicalPath}'") + checkAnswer(spark.read.table(emptySchemaTableName), Seq(Row(1, 2, 3))) + } + } + } + } + + test("Create an empty table without schema - supported cases") { + import testImplicits._ + + withSQLConf(DeltaSQLConf.DELTA_ALLOW_CREATE_EMPTY_SCHEMA_TABLE.key -> "true") { + val emptyTableName = "t1" + + def getDeltaLog: DeltaLog = DeltaLog.forTable(spark, TableIdentifier(emptyTableName)) + + // yet CTAS should be allowed + withTable(emptyTableName) { + sql(s"CREATE TABLE $emptyTableName USING delta AS SELECT 1") + assert(getDeltaLog.snapshot.schema.size == 1) + } + + // and create Delta table using existing valid location should work without () + withTable(emptyTableName) { + withTempDir { dir => + Seq(1, 2, 3).toDF().write.format("delta").save(dir.getAbsolutePath) + sql(s"CREATE TABLE $emptyTableName USING delta LOCATION '${dir.getAbsolutePath}'") + assert(getDeltaLog.snapshot.schema.size == 1) + } + } + + // checkpointing should work + withEmptySchemaTable(emptyTableName) { + getDeltaLog.checkpoint() + assert(getDeltaLog.readLastCheckpointFile().exists(_.version == 0)) + // run some operations + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "true") { + sql(s"INSERT INTO $emptyTableName VALUES (1,2,3)") + checkAnswer(spark.read.table(emptyTableName), Seq(Row(1, 2, 3))) + } + getDeltaLog.checkpoint() + assert(getDeltaLog.readLastCheckpointFile().exists(_.version == 1)) + } + + withEmptySchemaTable(emptyTableName) { + // TODO: possibly support MERGE into the future + try { + val source = "t2" + withTable(source) { + sql(s"CREATE TABLE $source USING delta AS SELECT 1") + sql( + s""" + |MERGE INTO $emptyTableName + |USING $source + |ON FALSE + |WHEN NOT MATCHED + | THEN INSERT * + |""".stripMargin) + } + } catch { + case _: AssertionError | _: SparkException => + } + } + + // Delta specific DMLs should work, though they should basically be noops + withEmptySchemaTable(emptyTableName) { + sql(s"OPTIMIZE $emptyTableName") + sql(s"VACUUM $emptyTableName") + + assert(getDeltaLog.snapshot.schema.isEmpty) + } + + // metadata DDL should work + withEmptySchemaTable(emptyTableName) { + sql(s"ALTER TABLE $emptyTableName SET TBLPROPERTIES ('a' = 'b')") + assert(DeltaLog.forTable(spark, + TableIdentifier(emptyTableName)).snapshot.metadata.configuration.contains("a")) + + checkAnswer( + sql(s"COMMENT ON TABLE $emptyTableName IS 'My Empty Cool Table'"), Nil) + assert(sql(s"DESCRIBE TABLE $emptyTableName").collect().length == 0) + + // create table, alter tbl property, tbl comment + assert(sql(s"DESCRIBE HISTORY $emptyTableName").collect().length == 3) + + checkAnswer(sql(s"SHOW COLUMNS IN $emptyTableName"), Nil) + } + + // schema evolution ddl should work + withEmptySchemaTable(emptyTableName) { + sql(s"ALTER TABLE $emptyTableName ADD COLUMN (id long COMMENT 'haha')") + assert(getDeltaLog.snapshot.schema.size == 1) + } + + withEmptySchemaTable(emptyTableName) { + sql(s"ALTER TABLE $emptyTableName ADD COLUMNS (id long, id2 long)") + assert(getDeltaLog.snapshot.schema.size == 2) + } + + // schema evolution through df should work + // - v1 api + withEmptySchemaTable(emptyTableName) { + Seq(1, 2, 3).toDF() + .write.format("delta") + .mode("append") + .option("mergeSchema", "true") + .saveAsTable(emptyTableName) + + assert(getDeltaLog.snapshot.schema.size == 1) + } + + withEmptySchemaTable(emptyTableName) { + Seq(1, 2, 3).toDF() + .write.format("delta") + .mode("overwrite") + .option("overwriteSchema", "true") + .saveAsTable(emptyTableName) + + assert(getDeltaLog.snapshot.schema.size == 1) + } + + // - v2 api + withEmptySchemaTable(emptyTableName) { + Seq(1, 2, 3).toDF() + .writeTo(emptyTableName) + .option("mergeSchema", "true") + .append() + + assert(getDeltaLog.snapshot.schema.size == 1) + } + + withEmptySchemaTable(emptyTableName) { + Seq(1, 2, 3).toDF() + .writeTo(emptyTableName) + .using("delta") + .replace() + + assert(getDeltaLog.snapshot.schema.size == 1) + } + + + } + } + + test("Create a table with comment") { + val table = "delta_with_comment" + withTempDir { dir => + withTable(table) { + sql( + s""" + |CREATE TABLE $table (col string) + |USING delta + |COMMENT 'This is my table' + |LOCATION '${dir.getAbsolutePath}' + """.stripMargin) + checkResult( + sql(s"DESCRIBE DETAIL $table"), + Seq("delta", "This is my table"), + Seq("format", "description")) + } + } + } + + test("Replace a table without comment") { + withTempDir { dir => + val table = "replace_table_without_comment" + val location = dir.getAbsolutePath + withTable(table) { + sql(s"CREATE TABLE $table (col string) USING delta COMMENT 'Table' LOCATION '$location'") + sql(s"REPLACE TABLE $table (col string) USING delta LOCATION '$location'") + checkResult( + sql(s"DESCRIBE DETAIL $table"), + Seq("delta", null), + Seq("format", "description")) + } + } + } + + test("Replace a table with comment") { + withTempDir { dir => + val table = "replace_table_with_comment" + val location = dir.getAbsolutePath + withTable(table) { + sql(s"CREATE TABLE $table (col string) USING delta LOCATION '$location'") + sql( + s""" + |REPLACE TABLE $table (col string) + |USING delta + |COMMENT 'This is my table' + |LOCATION '$location' + """.stripMargin) + checkResult( + sql(s"DESCRIBE DETAIL $table"), + Seq("delta", "This is my table"), + Seq("format", "description")) + } + } + } + + test("CTAS a table without comment") { + val table = "ctas_without_comment" + withTable(table) { + sql(s"CREATE TABLE $table USING delta AS SELECT * FROM range(10)") + checkResult( + sql(s"DESCRIBE DETAIL $table"), + Seq("delta", null), + Seq("format", "description")) + } + } + + test("CTAS a table with comment") { + val table = "ctas_with_comment" + withTable(table) { + sql( + s"""CREATE TABLE $table + |USING delta + |COMMENT 'This table is created with existing data' + |AS SELECT * FROM range(10) + """.stripMargin) + checkResult( + sql(s"DESCRIBE DETAIL $table"), + Seq("delta", "This table is created with existing data"), + Seq("format", "description")) + } + } + + test("Replace CTAS a table without comment") { + val table = "replace_ctas_without_comment" + withTable(table) { + sql( + s"""CREATE TABLE $table + |USING delta + |COMMENT 'This table is created with existing data' + |AS SELECT * FROM range(10) + """.stripMargin) + sql(s"REPLACE TABLE $table USING delta AS SELECT * FROM range(10)") + checkResult( + sql(s"DESCRIBE DETAIL $table"), + Seq("delta", null), + Seq("format", "description")) + } + } + + test("Replace CTAS a table with comment") { + val table = "replace_ctas_with_comment" + withTable(table) { + sql(s"CREATE TABLE $table USING delta COMMENT 'a' AS SELECT * FROM range(10)") + sql( + s"""REPLACE TABLE $table + |USING delta + |COMMENT 'This table is created with existing data' + |AS SELECT * FROM range(10) + """.stripMargin) + checkResult( + sql(s"DESCRIBE DETAIL $table"), + Seq("delta", "This table is created with existing data"), + Seq("format", "description")) + } + } + + /** + * Verifies that the correct table properties are stored in the transaction log as well as the + * catalog. + */ + private def verifyTableProperties( + tableName: String, + deltaLogPropertiesContains: Seq[String], + deltaLogPropertiesMissing: Seq[String], + catalogStorageProps: Seq[String] = Nil): Unit = { + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)) + + if (catalogStorageProps.isEmpty) { + assert(table.storage.properties.isEmpty) + } else { + assert(catalogStorageProps.forall(table.storage.properties.contains), + s"Catalog didn't contain properties: ${catalogStorageProps}.\n" + + s"Catalog: ${table.storage.properties}") + } + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + + deltaLogPropertiesContains.foreach { prop => + assert(deltaLog.snapshot.getProperties.contains(prop)) + } + + deltaLogPropertiesMissing.foreach { prop => + assert(!deltaLog.snapshot.getProperties.contains(prop)) + } + } + + test("do not store write options in the catalog - DataFrameWriter") { + withTempDir { dir => + withTable("t") { + spark.range(10).write.format("delta") + .option("path", dir.getCanonicalPath) + .option("mergeSchema", "true") + .option("delta.appendOnly", "true") + .saveAsTable("t") + + verifyTableProperties( + "t", + // Still allow delta prefixed confs + Seq("delta.appendOnly"), + Seq("mergeSchema") + ) + // Sanity check that table is readable + checkAnswer(spark.table("t"), spark.range(10).toDF()) + } + } + } + + + test("do not store write options in the catalog - DataFrameWriterV2") { + withTempDir { dir => + withTable("t") { + spark.range(10).writeTo("t").using("delta") + .option("path", dir.getCanonicalPath) + .option("mergeSchema", "true") + .option("delta.appendOnly", "true") + .tableProperty("key", "value") + .create() + + verifyTableProperties( + "t", + Seq( + "delta.appendOnly", // Still allow delta prefixed confs + "key" // Explicit properties should work + ), + Seq("mergeSchema") + ) + // Sanity check that table is readable + checkAnswer(spark.table("t"), spark.range(10).toDF()) + } + } + } + + test( + "do not store write options in the catalog - legacy flag") { + withTempDir { dir => + withTable("t") { + withSQLConf(DeltaSQLConf.DELTA_LEGACY_STORE_WRITER_OPTIONS_AS_PROPS.key -> "true") { + spark.range(10).write.format("delta") + .option("path", dir.getCanonicalPath) + .option("mergeSchema", "true") + .option("delta.appendOnly", "true") + .saveAsTable("t") + + verifyTableProperties( + "t", + // Everything gets stored in the transaction log + Seq("delta.appendOnly", "mergeSchema"), + Nil, + // Things get stored in the catalog props as well + Seq("delta.appendOnly", "mergeSchema") + ) + + checkAnswer(spark.table("t"), spark.range(10).toDF()) + } + } + } + } + + test("create table using varchar at the same location should succeed") { + withTempDir { location => + withTable("t1", "t2") { + sql(s""" + |create table t1 + |(colourID string, colourName varchar(128), colourGroupID string) + |USING delta LOCATION '$location'""".stripMargin) + sql( + s""" + |insert into t1 (colourID, colourName, colourGroupID) + |values ('1', 'RED', 'a'), ('2', 'BLUE', 'b') + |""".stripMargin) + sql(s""" + |create table t2 + |(colourID string, colourName varchar(128), colourGroupID string) + |USING delta LOCATION '$location'""".stripMargin) + // Verify that select from the second table should be the same as inserted + val readout = sql( + s""" + |select * from t2 order by colourID + |""".stripMargin).collect() + assert(readout.length == 2) + assert(readout(0).get(0) == "1") + assert(readout(0).get(1) == "RED") + assert(readout(1).get(0) == "2") + assert(readout(1).get(1) == "BLUE") + } + } + } + + test("CREATE OR REPLACE TABLE on a catalog table where the backing " + + "directory has been deleted") { + val tbl = "delta_tbl" + withTempDir { dir => + withTable(tbl) { + val subdir = new File(dir, "subdir") + sql(s"CREATE OR REPLACE table $tbl (id String) USING delta " + + s"LOCATION '${subdir.getCanonicalPath}'") + val tableIdentifier = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)).identifier + val tableName = tableIdentifier.copy(catalog = None).toString + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tbl)) + sql(s"INSERT INTO $tbl VALUES ('1')") + FileUtils.deleteDirectory(subdir) + val e = intercept[DeltaIllegalStateException] { + sql( + s"CREATE OR REPLACE table $tbl (id String) USING delta" + + s" LOCATION '${subdir.getCanonicalPath}'") + } + checkError( + exception = e, + errorClass = "DELTA_METADATA_ABSENT_EXISTING_CATALOG_TABLE", + parameters = Map( + "tableName" -> tableName, + "tablePath" -> deltaLog.logPath.toString, + "tableNameForDropCmd" -> tableName + )) + + // Table creation should work after running DROP TABLE. + sql(s"DROP table ${e.getMessageParameters().get("tableNameForDropCmd")}") + sql(s"CREATE OR REPLACE table $tbl (id String) USING delta " + + s"LOCATION '${subdir.getCanonicalPath}'") + sql(s"INSERT INTO $tbl VALUES ('21')") + val data = sql(s"SELECT * FROM $tbl").collect() + assert(data.length == 1) + } + } + } +} + +trait DeltaTableCreationColumnMappingSuiteBase extends DeltaColumnMappingSelectedTestMixin { + override protected def runOnlyTests: Seq[String] = Seq( + "create table with schema and path", + "create external table without schema", + "REPLACE TABLE", + "CREATE OR REPLACE TABLE on non-empty directory" + ) ++ Seq("partitioned" -> Seq("v2"), "non-partitioned" -> Nil) + .flatMap { case (isPartitioned, cols) => + SaveMode.values().flatMap { saveMode => + Seq( + s"saveAsTable to a new table (managed) - $isPartitioned, saveMode: $saveMode", + s"saveAsTable to a new table (external) - $isPartitioned, saveMode: $saveMode") + } + } ++ Seq("a b", "a:b", "a%b").map { specialChars => + s"location uri contains $specialChars for datasource table" + } +} + +class DeltaTableCreationIdColumnMappingSuite extends DeltaTableCreationSuite + with DeltaColumnMappingEnableIdMode { + override protected def getTableProperties(tableName: String): Map[String, String] = { + // ignore comparing column mapping properties + dropColumnMappingConfigurations(super.getTableProperties(tableName)) + } +} + +class DeltaTableCreationNameColumnMappingSuite extends DeltaTableCreationSuite + with DeltaColumnMappingEnableNameMode { + override protected def getTableProperties(tableName: String): Map[String, String] = { + // ignore comparing column mapping properties + dropColumnMappingConfigurations(super.getTableProperties(tableName)) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableFeatureSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableFeatureSuite.scala new file mode 100644 index 00000000000..46f31d151d0 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableFeatureSuite.scala @@ -0,0 +1,499 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import scala.collection.mutable + +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames.deltaFile + +import org.apache.spark.SparkConf +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType + +class DeltaTableFeatureSuite + extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + private lazy val testTableSchema = spark.range(1).schema + + // This is solely a test hook. Users cannot create new Delta tables with protocol lower than + // that of their current version. + protected def createTableWithProtocol( + protocol: Protocol, + path: File, + schema: StructType = testTableSchema): DeltaLog = { + val log = DeltaLog.forTable(spark, path) + log.ensureLogDirectoryExist() + log.store.write( + deltaFile(log.logPath, 0), + Iterator(Metadata(schemaString = schema.json).json, protocol.json), + overwrite = false, + log.newDeltaHadoopConf()) + log.update() + log + } + + test("all defined table features are registered") { + import scala.reflect.runtime.{universe => ru} + + val subClassNames = mutable.Set[String]() + def collect(clazz: ru.Symbol): Unit = { + val collected = clazz.asClass.knownDirectSubclasses + // add only table feature objects to the result set + subClassNames ++= collected.filter(_.isModuleClass).map(_.name.toString) + collected.filter(_.isAbstract).foreach(collect) + } + collect(ru.typeOf[TableFeature].typeSymbol) + + val registeredFeatures = TableFeature.allSupportedFeaturesMap.values + .map(_.getClass.getSimpleName.stripSuffix("$")) // remove '$' from object names + .toSet + val notRegisteredFeatures = subClassNames.diff(registeredFeatures) + + assert( + notRegisteredFeatures.isEmpty, + "Expecting all defined table features are registered (either as prod or testing-only) " + + s"but the followings are not: $notRegisteredFeatures") + } + + test("adding feature requires supported protocol version") { + assert( + intercept[DeltaTableFeatureException] { + Protocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestLegacyReaderWriterFeature) + }.getMessage.contains("Unable to enable table feature testLegacyReaderWriter because it " + + "requires a higher reader protocol version")) + + assert(intercept[DeltaTableFeatureException] { + Protocol(TABLE_FEATURES_MIN_READER_VERSION, 6) + }.getMessage.contains("Unable to upgrade only the reader protocol version")) + + assert( + Protocol(2, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(AppendOnlyTableFeature) + .readerAndWriterFeatureNames === Set(AppendOnlyTableFeature.name)) + + assert( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestReaderWriterFeature) + .readerAndWriterFeatureNames === Set(TestReaderWriterFeature.name)) + } + + test("adding feature automatically adds all dependencies") { + assert( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestFeatureWithDependency) + .readerAndWriterFeatureNames === + Set(TestFeatureWithDependency.name, TestReaderWriterFeature.name)) + + assert( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(TestFeatureWithTransitiveDependency) + .readerAndWriterFeatureNames === + Set( + TestFeatureWithTransitiveDependency.name, + TestFeatureWithDependency.name, + TestReaderWriterFeature.name)) + + // Validate new protocol has required features enabled when a writer feature requires a + // reader/write feature. + val metadata = Metadata( + configuration = Map( + TableFeatureProtocolUtils.propertyKey(TestWriterFeatureWithTransitiveDependency) -> + TableFeatureProtocolUtils.FEATURE_PROP_SUPPORTED)) + assert( + Protocol + .forNewTable( + spark, + Some(metadata)) + .readerAndWriterFeatureNames === + Set( + TestWriterFeatureWithTransitiveDependency.name, + TestFeatureWithDependency.name, + TestReaderWriterFeature.name)) + } + + test("implicitly-enabled features") { + assert( + Protocol(2, 6).implicitlySupportedFeatures === Set( + AppendOnlyTableFeature, + ColumnMappingTableFeature, + InvariantsTableFeature, + CheckConstraintsTableFeature, + ChangeDataFeedTableFeature, + GeneratedColumnsTableFeature, + TestLegacyWriterFeature, + TestLegacyReaderWriterFeature, + TestRemovableLegacyWriterFeature, + TestRemovableLegacyReaderWriterFeature)) + assert( + Protocol(2, 5).implicitlySupportedFeatures === Set( + AppendOnlyTableFeature, + ColumnMappingTableFeature, + InvariantsTableFeature, + CheckConstraintsTableFeature, + ChangeDataFeedTableFeature, + GeneratedColumnsTableFeature, + TestLegacyWriterFeature, + TestLegacyReaderWriterFeature, + TestRemovableLegacyWriterFeature, + TestRemovableLegacyReaderWriterFeature)) + assert(Protocol(2, TABLE_FEATURES_MIN_WRITER_VERSION).implicitlySupportedFeatures === Set()) + assert( + Protocol( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION).implicitlySupportedFeatures === Set()) + } + + test("implicit feature listing") { + assert( + intercept[DeltaTableFeatureException] { + Protocol(1, 4).withFeature(TestLegacyReaderWriterFeature) + }.getMessage.contains( + "Unable to enable table feature testLegacyReaderWriter because it requires a higher " + + "reader protocol version (current 1)")) + + assert( + intercept[DeltaTableFeatureException] { + Protocol(2, 4).withFeature(TestLegacyReaderWriterFeature) + }.getMessage.contains( + "Unable to enable table feature testLegacyReaderWriter because it requires a higher " + + "writer protocol version (current 4)")) + + assert( + intercept[DeltaTableFeatureException] { + Protocol(1, TABLE_FEATURES_MIN_WRITER_VERSION).withFeature(TestLegacyReaderWriterFeature) + }.getMessage.contains( + "Unable to enable table feature testLegacyReaderWriter because it requires a higher " + + "reader protocol version (current 1)")) + + val protocol = + Protocol(2, TABLE_FEATURES_MIN_WRITER_VERSION).withFeature(TestLegacyReaderWriterFeature) + assert(!protocol.readerFeatures.isDefined) + assert( + protocol.writerFeatures.get === Set(TestLegacyReaderWriterFeature.name)) + } + + test("merge protocols") { + val tfProtocol1 = Protocol(1, TABLE_FEATURES_MIN_WRITER_VERSION) + val tfProtocol2 = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + + assert( + tfProtocol1.merge(Protocol(1, 2)) === + tfProtocol1.withFeatures(Seq(AppendOnlyTableFeature, InvariantsTableFeature))) + assert( + tfProtocol2.merge(Protocol(2, 6)) === + tfProtocol2.withFeatures(Set( + AppendOnlyTableFeature, + InvariantsTableFeature, + ColumnMappingTableFeature, + ChangeDataFeedTableFeature, + CheckConstraintsTableFeature, + GeneratedColumnsTableFeature, + TestLegacyWriterFeature, + TestLegacyReaderWriterFeature, + TestRemovableLegacyWriterFeature, + TestRemovableLegacyReaderWriterFeature))) + } + + test("protocol upgrade compatibility") { + assert(Protocol(1, 1).canUpgradeTo(Protocol(1, 1))) + assert(Protocol(1, 1).canUpgradeTo(Protocol(2, 1))) + assert(!Protocol(1, 2).canUpgradeTo(Protocol(1, 1))) + assert(!Protocol(2, 2).canUpgradeTo(Protocol(2, 1))) + assert( + Protocol(1, 1).canUpgradeTo( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION))) + assert( + !Protocol(2, 3).canUpgradeTo( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION))) + assert( + !Protocol(2, 6).canUpgradeTo( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures( + Seq( + // With one feature not referenced, `canUpgradeTo` must be `false`. + // AppendOnlyTableFeature, + InvariantsTableFeature, + CheckConstraintsTableFeature, + ChangeDataFeedTableFeature, + GeneratedColumnsTableFeature, + ColumnMappingTableFeature, + TestLegacyWriterFeature, + TestLegacyReaderWriterFeature, + TestRemovableLegacyWriterFeature, + TestRemovableLegacyReaderWriterFeature)))) + assert( + Protocol(2, 6).canUpgradeTo( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq( + AppendOnlyTableFeature, + InvariantsTableFeature, + CheckConstraintsTableFeature, + ChangeDataFeedTableFeature, + GeneratedColumnsTableFeature, + ColumnMappingTableFeature, + TestLegacyWriterFeature, + TestLegacyReaderWriterFeature, + TestRemovableLegacyWriterFeature, + TestRemovableLegacyReaderWriterFeature)))) + // Features are identical but protocol versions are lower, thus `canUpgradeTo` is `false`. + assert( + !Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .canUpgradeTo(Protocol(1, 1))) + } + + test("protocol downgrade compatibility") { + val tableFeatureProtocol = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + // Cannot downgrade when the original protocol does not support at a minimum writer features. + assert(!Protocol(1, 6).canDowngradeTo(Protocol(1, 6), droppedFeatureName = "")) + assert(tableFeatureProtocol.withFeature(TestWriterFeature) + .canDowngradeTo(Protocol(1, 1), droppedFeatureName = TestWriterFeature.name)) + assert(Protocol(1, 7).withFeature(TestWriterFeature) + .canDowngradeTo(Protocol(1, 1), droppedFeatureName = TestWriterFeature.name)) + for (n <- 1 to 3) { + assert( + !Protocol(n, 7) + .withFeatures(Seq(TestWriterFeature, AppendOnlyTableFeature)) + .canDowngradeTo(Protocol(1, 2), droppedFeatureName = TestWriterFeature.name)) + assert( + Protocol(n, 7) + .withFeatures(Seq(TestWriterFeature, AppendOnlyTableFeature, InvariantsTableFeature)) + .canDowngradeTo(Protocol(1, 2), droppedFeatureName = TestWriterFeature.name)) + } + // When there are no explicit features the protocol versions need to be downgraded + // below table features. + assert(!tableFeatureProtocol.withFeature(TestWriterFeature) + .canDowngradeTo(tableFeatureProtocol, droppedFeatureName = TestWriterFeature.name)) + assert(!tableFeatureProtocol.withFeature(TestWriterFeature) + .canDowngradeTo(Protocol(2, 7), droppedFeatureName = TestWriterFeature.name)) + // Only one non-legacy writer feature per time. + assert(!tableFeatureProtocol.withFeatures(Seq(TestWriterFeature, TestRemovableWriterFeature)) + .canDowngradeTo(tableFeatureProtocol, droppedFeatureName = TestWriterFeature.name)) + // Remove reader+writer feature. + assert(tableFeatureProtocol.withFeatures(Seq(TestReaderWriterFeature)) + .canDowngradeTo(Protocol(1, 1), droppedFeatureName = TestReaderWriterFeature.name)) + // Only one non-legacy feature at a time - multiple reader+writer features. + assert( + !tableFeatureProtocol + .withFeatures(Seq(TestReaderWriterFeature, TestReaderWriterMetadataAutoUpdateFeature)) + .canDowngradeTo(tableFeatureProtocol, droppedFeatureName = "")) + assert( + tableFeatureProtocol + .merge(Protocol(2, 5)) + .withFeatures(Seq(TestReaderWriterFeature, TestRemovableLegacyReaderWriterFeature)) + .canDowngradeTo(Protocol(2, 5), droppedFeatureName = TestReaderWriterFeature.name)) + // Only one feature at a time - mix of reader+writer and writer features. + assert(!tableFeatureProtocol.withFeatures(Seq(TestWriterFeature, TestReaderWriterFeature)) + .canDowngradeTo(tableFeatureProtocol, droppedFeatureName = TestWriterFeature.name)) + // Downgraded protocol must be able to support all legacy table features. + assert( + !tableFeatureProtocol + .withFeatures(Seq(TestWriterFeature, AppendOnlyTableFeature, ColumnMappingTableFeature)) + .canDowngradeTo(Protocol(2, 4), droppedFeatureName = TestWriterFeature.name)) + assert( + tableFeatureProtocol + .merge(Protocol(2, 5)) + .withFeatures(Seq(TestWriterFeature, AppendOnlyTableFeature, ColumnMappingTableFeature)) + .canDowngradeTo(Protocol(2, 5), droppedFeatureName = TestWriterFeature.name)) + } + + test("add reader and writer feature descriptors") { + var p = Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + val name = AppendOnlyTableFeature.name + p = p.withReaderFeatures(Seq(name)) + assert(p.readerFeatures === Some(Set(name))) + assert(p.writerFeatures === Some(Set.empty)) + p = p.withWriterFeatures(Seq(name)) + assert(p.readerFeatures === Some(Set(name))) + assert(p.writerFeatures === Some(Set(name))) + } + + test("native automatically-enabled feature can't be implicitly enabled") { + val p = Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + assert(p.implicitlySupportedFeatures.isEmpty) + } + + test("Table features are not automatically enabled by default table property settings") { + withTable("tbl") { + spark.range(10).write.format("delta").saveAsTable("tbl") + val metadata = DeltaLog.forTable(spark, TableIdentifier("tbl")).update().metadata + TableFeature.allSupportedFeaturesMap.values.foreach { + case feature: FeatureAutomaticallyEnabledByMetadata => + assert( + !feature.metadataRequiresFeatureToBeEnabled(metadata, spark), + s""" + |${feature.name} is automatically enabled by the default metadata. This will lead to + |the inability of reading existing tables that do not have the feature enabled and + |should not reach production! If this is only for testing purposes, ignore this test. + """.stripMargin) + case _ => + } + } + } + + test("Can enable legacy metadata table feature by setting default table property key") { + withSQLConf( + s"$DEFAULT_FEATURE_PROP_PREFIX${TestWriterFeature.name}" -> "enabled", + DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> "name") { + withTable("tbl") { + spark.range(10).write.format("delta").saveAsTable("tbl") + val log = DeltaLog.forTable(spark, TableIdentifier("tbl")) + val protocol = log.update().protocol + assert(protocol.readerAndWriterFeatureNames === Set( + ColumnMappingTableFeature.name, + TestWriterFeature.name)) + } + } + } + + test("CLONE does not take into account default table features") { + withTable("tbl") { + spark.range(0).write.format("delta").saveAsTable("tbl") + val log = DeltaLog.forTable(spark, TableIdentifier("tbl")) + val protocolBefore = log.update().protocol + withSQLConf(defaultPropertyKey(TestWriterFeature) -> "enabled") { + sql(buildTablePropertyModifyingCommand( + commandName = "CLONE", targetTableName = "tbl", sourceTableName = "tbl") + ) + } + val protocolAfter = log.update().protocol + assert(protocolBefore === protocolAfter) + } + } + + test("CLONE only enables enabled metadata table features") { + withTable("src", "target") { + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> + TABLE_FEATURES_MIN_WRITER_VERSION.toString, + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> + TABLE_FEATURES_MIN_READER_VERSION.toString, + DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> "name") { + spark.range(0).write.format("delta").saveAsTable("src") + } + sql(buildTablePropertyModifyingCommand( + commandName = "CLONE", targetTableName = "target", sourceTableName = "src")) + val targetLog = DeltaLog.forTable(spark, TableIdentifier("target")) + val protocol = targetLog.update().protocol + assert(protocol.readerAndWriterFeatureNames === Set( + ColumnMappingTableFeature.name)) + } + } + + for(commandName <- Seq("ALTER", "REPLACE", "CREATE OR REPLACE", "CLONE")) { + test(s"Can enable legacy metadata table feature during $commandName TABLE") { + withSQLConf( + s"${defaultPropertyKey(TestWriterFeature)}" -> "enabled") { + withTable("tbl") { + spark.range(0).write.format("delta").saveAsTable("tbl") + val log = DeltaLog.forTable(spark, TableIdentifier("tbl")) + + val tblProperties = Seq("'delta.enableChangeDataFeed' = true") + sql(buildTablePropertyModifyingCommand( + commandName, targetTableName = "tbl", sourceTableName = "tbl", tblProperties)) + val protocol = log.update().protocol + assert(protocol.readerAndWriterFeatureNames === Set( + ChangeDataFeedTableFeature.name, + TestWriterFeature.name)) + } + } + } + } + + for(commandName <- Seq("ALTER", "CLONE", "REPLACE", "CREATE OR REPLACE")) { + test("Enabling table feature on already existing table enables all table features " + + s"up to the table's protocol version during $commandName TABLE") { + withSQLConf(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> "name") { + withTable("tbl") { + spark.range(0).write.format("delta").saveAsTable("tbl") + val log = DeltaLog.forTable(spark, TableIdentifier("tbl")) + val protocol = log.update().protocol + assert(protocol.minReaderVersion === 2) + assert(protocol.minWriterVersion === 5) + val tblProperties = Seq(s"'$FEATURE_PROP_PREFIX${TestWriterFeature.name}' = 'enabled'", + s"'delta.minWriterVersion' = $TABLE_FEATURES_MIN_WRITER_VERSION") + sql(buildTablePropertyModifyingCommand( + commandName, targetTableName = "tbl", sourceTableName = "tbl", tblProperties)) + val newProtocol = log.update().protocol + assert(newProtocol.readerAndWriterFeatureNames === Set( + AppendOnlyTableFeature.name, + ColumnMappingTableFeature.name, + InvariantsTableFeature.name, + CheckConstraintsTableFeature.name, + ChangeDataFeedTableFeature.name, + GeneratedColumnsTableFeature.name, + TestWriterFeature.name, + TestLegacyWriterFeature.name, + TestLegacyReaderWriterFeature.name, + TestRemovableLegacyWriterFeature.name, + TestRemovableLegacyReaderWriterFeature.name)) + } + } + } + } + + private def buildTablePropertyModifyingCommand( + commandName: String, + targetTableName: String, + sourceTableName: String, + tblProperties: Seq[String] = Seq.empty): String = { + val commandStr = if (commandName == "CLONE") { + "CREATE OR REPLACE" + } else { + commandName + } + + val cloneClause = if (commandName == "CLONE") { + s"SHALLOW CLONE $sourceTableName" + } else { + "" + } + + val (usingDeltaClause, dataSourceClause) = if ("ALTER" != commandName && + "CLONE" != commandName) { + ("USING DELTA", s"AS SELECT * FROM $sourceTableName") + } else { + ("", "") + } + var tblPropertiesClause = "" + if (tblProperties.nonEmpty) { + if (commandName == "ALTER") { + tblPropertiesClause += "SET " + } + tblPropertiesClause += s"TBLPROPERTIES ${tblProperties.mkString("(", ",", ")")}" + } + s"""$commandStr TABLE $targetTableName + |$usingDeltaClause + |$cloneClause + |$tblPropertiesClause + |$dataSourceClause + |""".stripMargin + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableUtilsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableUtilsSuite.scala new file mode 100644 index 00000000000..3532f9208ad --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTableUtilsSuite.scala @@ -0,0 +1,86 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.net.URI + +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.hadoop.fs.{Path, RawLocalFileSystem} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ + +class DeltaTableUtilsSuite extends SharedSparkSession with DeltaSQLCommandTest { + + override protected def sparkConf: SparkConf = super.sparkConf + .set("spark.hadoop.fs.s3.impl", classOf[MockS3FileSystem].getCanonicalName) + + test("findDeltaTableRoot correctly combines paths") { + val path1 = new Path("s3://my-bucket") + assert(DeltaTableUtils.findDeltaTableRoot(spark, path1).isEmpty) + val path2 = new Path("s3://my-bucket/") + assert(DeltaTableUtils.findDeltaTableRoot(spark, path2).isEmpty) + withTempDir { dir => + sql(s"CREATE TABLE myTable (id INT) USING DELTA LOCATION '${dir.getAbsolutePath}'") + val path = new Path(s"file://${dir.getAbsolutePath}") + assert(DeltaTableUtils.findDeltaTableRoot(spark, path).contains(path)) + } + } + + test("safeConcatPaths") { + val basePath = new Path("s3://my-bucket/subfolder") + val basePathEmpty = new Path("s3://my-bucket") + assert(DeltaTableUtils.safeConcatPaths(basePath, "_delta_log") == + new Path("s3://my-bucket/subfolder/_delta_log")) + assert(DeltaTableUtils.safeConcatPaths(basePathEmpty, "_delta_log") == + new Path("s3://my-bucket/_delta_log")) + } + + test("removeInternalMetadata") { + for (flag <- BOOLEAN_DOMAIN) { + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_REMOVE_SPARK_INTERNAL_METADATA.key -> flag.toString) { + for (internalMetadataKey <- DeltaTableUtils.SPARK_INTERNAL_METADATA_KEYS) { + val metadata = new MetadataBuilder() + .putString(internalMetadataKey, "foo") + .putString("other", "bar") + .build() + val schema = StructType(Seq(StructField("foo", StringType, metadata = metadata))) + val newSchema = DeltaTableUtils.removeInternalMetadata(spark, schema) + newSchema.foreach { f => + if (flag) { + // Flag on: should remove internal metadata + assert(!f.metadata.contains(internalMetadataKey)) + // Should reserve non internal metadata + assert(f.metadata.contains("other")) + } else { + // Flag off: no-op + assert(f.metadata == metadata) + } + } + } + } + } + } +} + +private class MockS3FileSystem extends RawLocalFileSystem { + override def getScheme: String = "s3" + override def getUri: URI = URI.create("s3://my-bucket") +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTestUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTestUtils.scala new file mode 100644 index 00000000000..0d684300bfc --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTestUtils.scala @@ -0,0 +1,549 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.util.Locale +import java.util.concurrent.ConcurrentHashMap + +import scala.collection.JavaConverters._ +import scala.collection.concurrent +import scala.reflect.ClassTag +import scala.util.matching.Regex + +import org.apache.spark.sql.delta.DeltaTestUtils.Plans +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.FileNames +import io.delta.tables.{DeltaTable => IODeltaTable} +import org.apache.hadoop.fs.Path +import org.scalatest.BeforeAndAfterEach + +import org.apache.spark.SparkContext +import org.apache.spark.SparkFunSuite +import org.apache.spark.scheduler.{JobFailed, SparkListener, SparkListenerJobEnd, SparkListenerJobStart} +import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.util.quietly +import org.apache.spark.sql.execution.{FileSourceScanExec, QueryExecution, RDDScanExec, SparkPlan, WholeStageCodegenExec} +import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.util.QueryExecutionListener +import org.apache.spark.util.Utils + +trait DeltaTestUtilsBase { + import DeltaTestUtils.TableIdentifierOrPath + + final val BOOLEAN_DOMAIN: Seq[Boolean] = Seq(true, false) + + class PlanCapturingListener() extends QueryExecutionListener { + + private[this] var capturedPlans = List.empty[Plans] + + def plans: Seq[Plans] = capturedPlans.reverse + + override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { + capturedPlans ::= Plans( + qe.analyzed, + qe.optimizedPlan, + qe.sparkPlan, + qe.executedPlan) + } + + override def onFailure( + funcName: String, qe: QueryExecution, error: Exception): Unit = {} + } + + /** + * Run a thunk with physical plans for all queries captured and passed into a provided buffer. + */ + def withLogicalPlansCaptured[T]( + spark: SparkSession, + optimizedPlan: Boolean)( + thunk: => Unit): Seq[LogicalPlan] = { + val planCapturingListener = new PlanCapturingListener + + spark.sparkContext.listenerBus.waitUntilEmpty(15000) + spark.listenerManager.register(planCapturingListener) + try { + thunk + spark.sparkContext.listenerBus.waitUntilEmpty(15000) + planCapturingListener.plans.map { plans => + if (optimizedPlan) plans.optimized else plans.analyzed + } + } finally { + spark.listenerManager.unregister(planCapturingListener) + } + } + + /** + * Run a thunk with physical plans for all queries captured and passed into a provided buffer. + */ + def withPhysicalPlansCaptured[T]( + spark: SparkSession)( + thunk: => Unit): Seq[SparkPlan] = { + val planCapturingListener = new PlanCapturingListener + + spark.sparkContext.listenerBus.waitUntilEmpty(15000) + spark.listenerManager.register(planCapturingListener) + try { + thunk + spark.sparkContext.listenerBus.waitUntilEmpty(15000) + planCapturingListener.plans.map(_.sparkPlan) + } finally { + spark.listenerManager.unregister(planCapturingListener) + } + } + + /** + * Run a thunk with logical and physical plans for all queries captured and passed + * into a provided buffer. + */ + def withAllPlansCaptured[T]( + spark: SparkSession)( + thunk: => Unit): Seq[Plans] = { + val planCapturingListener = new PlanCapturingListener + + spark.sparkContext.listenerBus.waitUntilEmpty(15000) + spark.listenerManager.register(planCapturingListener) + try { + thunk + spark.sparkContext.listenerBus.waitUntilEmpty(15000) + planCapturingListener.plans + } finally { + spark.listenerManager.unregister(planCapturingListener) + } + } + + def countSparkJobs(sc: SparkContext, f: => Unit): Int = { + val jobs: concurrent.Map[Int, Long] = new ConcurrentHashMap[Int, Long]().asScala + val listener = new SparkListener { + override def onJobStart(jobStart: SparkListenerJobStart): Unit = { + jobs.put(jobStart.jobId, jobStart.stageInfos.map(_.numTasks).sum) + } + override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = jobEnd.jobResult match { + case JobFailed(_) => jobs.remove(jobEnd.jobId) + case _ => // On success, do nothing. + } + } + sc.addSparkListener(listener) + try { + sc.listenerBus.waitUntilEmpty(15000) + f + sc.listenerBus.waitUntilEmpty(15000) + } finally { + sc.removeSparkListener(listener) + } + // Spark will always log a job start/end event even when the job does not launch any task. + jobs.values.count(_ > 0) + } + + protected def getfindTouchedFilesJobPlans(plans: Seq[Plans]): SparkPlan = { + // The expected plan for touched file computation is of the format below. + // The data column should be pruned from both leaves. + // HashAggregate(output=[count#3463L]) + // +- HashAggregate(output=[count#3466L]) + // +- Project + // +- Filter (isnotnull(count#3454L) AND (count#3454L > 1)) + // +- HashAggregate(output=[count#3454L]) + // +- HashAggregate(output=[_row_id_#3418L, sum#3468L]) + // +- Project [_row_id_#3418L, UDF(_file_name_#3422) AS one#3448] + // +- BroadcastHashJoin [id#3342L], [id#3412L], Inner, BuildLeft + // :- Project [id#3342L] + // : +- Filter isnotnull(id#3342L) + // : +- FileScan parquet [id#3342L,part#3343L] + // +- Filter isnotnull(id#3412L) + // +- Project [...] + // +- Project [...] + // +- FileScan parquet [id#3412L,part#3413L] + // Note: It can be RDDScanExec instead of FileScan if the source was materialized. + // We pick the first plan starting from FileScan and ending in HashAggregate as a + // stable heuristic for the one we want. + plans.map(_.executedPlan) + .filter { + case WholeStageCodegenExec(hash: HashAggregateExec) => + hash.collectLeaves().size == 2 && + hash.collectLeaves() + .forall { s => + s.isInstanceOf[FileSourceScanExec] || + s.isInstanceOf[RDDScanExec] + } + case _ => false + }.head + } + + /** + * Separate name- from path-based SQL table identifiers. + */ + def getTableIdentifierOrPath(sqlIdentifier: String): TableIdentifierOrPath = { + // Match: delta.`path`[[ as] alias] or tahoe.`path`[[ as] alias] + val pathMatcher: Regex = raw"(?:delta|tahoe)\.`([^`]+)`(?:(?: as)? (.+))?".r + // Match: db.table[[ as] alias] + val qualifiedDbMatcher: Regex = raw"`?([^\.` ]+)`?\.`?([^\.` ]+)`?(?:(?: as)? (.+))?".r + // Match: table[[ as] alias] + val unqualifiedNameMatcher: Regex = raw"([^ ]+)(?:(?: as)? (.+))?".r + sqlIdentifier match { + case pathMatcher(path, alias) => + TableIdentifierOrPath.Path(path, Option(alias)) + case qualifiedDbMatcher(dbName, tableName, alias) => + TableIdentifierOrPath.Identifier(TableIdentifier(tableName, Some(dbName)), Option(alias)) + case unqualifiedNameMatcher(tableName, alias) => + TableIdentifierOrPath.Identifier(TableIdentifier(tableName), Option(alias)) + } + } + + /** + * Produce a DeltaTable instance given a `TableIdentifierOrPath` instance. + */ + def getDeltaTableForIdentifierOrPath( + spark: SparkSession, + identifierOrPath: TableIdentifierOrPath): IODeltaTable = { + identifierOrPath match { + case TableIdentifierOrPath.Identifier(id, optionalAlias) => + val table = IODeltaTable.forName(spark, id.unquotedString) + optionalAlias.map(table.as(_)).getOrElse(table) + case TableIdentifierOrPath.Path(path, optionalAlias) => + val table = IODeltaTable.forPath(spark, path) + optionalAlias.map(table.as(_)).getOrElse(table) + } + } + + @deprecated("Use checkError() instead") + protected def errorContains(errMsg: String, str: String): Unit = { + assert(errMsg.toLowerCase(Locale.ROOT).contains(str.toLowerCase(Locale.ROOT))) + } + + /** Utility method to check exception `e` is of type `E` or a cause of it is of type `E` */ + def findIfResponsible[E <: Throwable: ClassTag](e: Throwable): Option[E] = e match { + case culprit: E => Some(culprit) + case _ => + val children = Option(e.getCause).iterator ++ e.getSuppressed.iterator + children + .map(findIfResponsible[E](_)) + .collectFirst { case Some(culprit) => culprit } + } +} + +trait DeltaCheckpointTestUtils + extends DeltaTestUtilsBase { self: SparkFunSuite with SharedSparkSession => + + def testDifferentCheckpoints(testName: String, quiet: Boolean = false) + (f: (CheckpointPolicy.Policy, Option[V2Checkpoint.Format]) => Unit): Unit = { + test(s"$testName [Checkpoint V1]") { + def testFunc(): Unit = { + withSQLConf(DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> + CheckpointPolicy.Classic.name) { + f(CheckpointPolicy.Classic, None) + } + } + if (quiet) quietly { testFunc() } else testFunc() + } + for (checkpointFormat <- V2Checkpoint.Format.ALL) + test(s"$testName [Checkpoint V2, format: ${checkpointFormat.name}]") { + def testFunc(): Unit = { + withSQLConf( + DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> CheckpointPolicy.V2.name, + DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key -> checkpointFormat.name + ) { + f(CheckpointPolicy.V2, Some(checkpointFormat)) + } + } + if (quiet) quietly { testFunc() } else testFunc() + } + } + + /** + * Helper method to get the dataframe corresponding to the files which has the file actions for a + * given checkpoint. + */ + def getCheckpointDfForFilesContainingFileActions( + log: DeltaLog, + checkpointFile: Path): DataFrame = { + val ci = CheckpointInstance.apply(checkpointFile) + val allCheckpointFiles = log + .listFrom(ci.version) + .filter(FileNames.isCheckpointFile) + .filter(f => CheckpointInstance(f.getPath) == ci) + .toSeq + val fileActionsFileIndex = ci.format match { + case CheckpointInstance.Format.V2 => + val incompleteCheckpointProvider = ci.getCheckpointProvider(log, allCheckpointFiles) + val df = log.loadIndex(incompleteCheckpointProvider.topLevelFileIndex.get, Action.logSchema) + val sidecarFileStatuses = df.as[SingleAction].collect().map(_.unwrap).collect { + case sf: SidecarFile => sf + }.map(sf => sf.toFileStatus(log.logPath)) + DeltaLogFileIndex(DeltaLogFileIndex.CHECKPOINT_FILE_FORMAT_PARQUET, sidecarFileStatuses) + case CheckpointInstance.Format.SINGLE | CheckpointInstance.Format.WITH_PARTS => + DeltaLogFileIndex(DeltaLogFileIndex.CHECKPOINT_FILE_FORMAT_PARQUET, + allCheckpointFiles.toArray) + case _ => + throw new Exception(s"Unexpected checkpoint format for file $checkpointFile") + } + fileActionsFileIndex.files + .map(fileStatus => spark.read.parquet(fileStatus.getPath.toString)) + .reduce(_.union(_)) + } +} + +object DeltaTestUtils extends DeltaTestUtilsBase { + + sealed trait TableIdentifierOrPath + object TableIdentifierOrPath { + case class Identifier(id: TableIdentifier, alias: Option[String]) + extends TableIdentifierOrPath + case class Path(path: String, alias: Option[String]) extends TableIdentifierOrPath + } + + case class Plans( + analyzed: LogicalPlan, + optimized: LogicalPlan, + sparkPlan: SparkPlan, + executedPlan: SparkPlan) + + /** + * Creates an AddFile that can be used for tests where the exact parameters do not matter. + */ + def createTestAddFile( + path: String = "foo", + partitionValues: Map[String, String] = Map.empty, + size: Long = 1L, + modificationTime: Long = 1L, + dataChange: Boolean = true, + stats: String = "{\"numRecords\": 1}"): AddFile = { + AddFile(path, partitionValues, size, modificationTime, dataChange, stats) + } + + /** + * Extracts the table name and alias (if any) from the given string. Correctly handles whitespaces + * in table name but doesn't support whitespaces in alias. + */ + def parseTableAndAlias(table: String): (String, Option[String]) = { + // Matches 'delta.`path` AS alias' (case insensitive). + val deltaPathWithAsAlias = raw"(?i)(delta\.`.+`)(?: AS) (\S+)".r + // Matches 'delta.`path` alias'. + val deltaPathWithAlias = raw"(delta\.`.+`) (\S+)".r + // Matches 'delta.`path`'. + val deltaPath = raw"(delta\.`.+`)".r + // Matches 'tableName AS alias' (case insensitive). + val tableNameWithAsAlias = raw"(?i)(.+)(?: AS) (\S+)".r + // Matches 'tableName alias'. + val tableNameWithAlias = raw"(.+) (.+)".r + + table match { + case deltaPathWithAsAlias(tableName, alias) => tableName -> Some(alias) + case deltaPathWithAlias(tableName, alias) => tableName -> Some(alias) + case deltaPath(tableName) => tableName -> None + case tableNameWithAsAlias(tableName, alias) => tableName -> Some(alias) + case tableNameWithAlias(tableName, alias) => tableName -> Some(alias) + case tableName => tableName -> None + } + } + + /** + * Implements an ordering where `x < y` iff both reader and writer versions of + * `x` are strictly less than those of `y`. + * + * Can be used to conveniently check that this relationship holds in tests/assertions + * without having to write out the conjunction of the two subconditions every time. + */ + case object StrictProtocolOrdering extends PartialOrdering[Protocol] { + override def tryCompare(x: Protocol, y: Protocol): Option[Int] = { + if (x.minReaderVersion == y.minReaderVersion && + x.minWriterVersion == y.minWriterVersion) { + Some(0) + } else if (x.minReaderVersion < y.minReaderVersion && + x.minWriterVersion < y.minWriterVersion) { + Some(-1) + } else if (x.minReaderVersion > y.minReaderVersion && + x.minWriterVersion > y.minWriterVersion) { + Some(1) + } else { + None + } + } + + override def lteq(x: Protocol, y: Protocol): Boolean = + x.minReaderVersion <= y.minReaderVersion && x.minWriterVersion <= y.minWriterVersion + + // Just a more readable version of `lteq`. + def fulfillsVersionRequirements(actual: Protocol, requirement: Protocol): Boolean = + lteq(requirement, actual) + } +} + +trait DeltaTestUtilsForTempViews + extends SharedSparkSession + with DeltaTestUtilsBase { + + def testWithTempView(testName: String)(testFun: Boolean => Any): Unit = { + Seq(true, false).foreach { isSQLTempView => + val tempViewUsed = if (isSQLTempView) "SQL TempView" else "Dataset TempView" + test(s"$testName - $tempViewUsed") { + withTempView("v") { + testFun(isSQLTempView) + } + } + } + } + + def testQuietlyWithTempView(testName: String)(testFun: Boolean => Any): Unit = { + Seq(true, false).foreach { isSQLTempView => + val tempViewUsed = if (isSQLTempView) "SQL TempView" else "Dataset TempView" + testQuietly(s"$testName - $tempViewUsed") { + withTempView("v") { + testFun(isSQLTempView) + } + } + } + } + + def createTempViewFromTable( + tableName: String, + isSQLTempView: Boolean, + format: Option[String] = None): Unit = { + if (isSQLTempView) { + sql(s"CREATE OR REPLACE TEMP VIEW v AS SELECT * from $tableName") + } else { + spark.read.format(format.getOrElse("delta")).table(tableName).createOrReplaceTempView("v") + } + } + + def createTempViewFromSelect(text: String, isSQLTempView: Boolean): Unit = { + if (isSQLTempView) { + sql(s"CREATE OR REPLACE TEMP VIEW v AS $text") + } else { + sql(text).createOrReplaceTempView("v") + } + } + + def testErrorMessageAndClass( + isSQLTempView: Boolean, + ex: AnalysisException, + expectedErrorMsgForSQLTempView: String = null, + expectedErrorMsgForDataSetTempView: String = null, + expectedErrorClassForSQLTempView: String = null, + expectedErrorClassForDataSetTempView: String = null): Unit = { + if (isSQLTempView) { + if (expectedErrorMsgForSQLTempView != null) { + errorContains(ex.getMessage, expectedErrorMsgForSQLTempView) + } + if (expectedErrorClassForSQLTempView != null) { + assert(ex.getErrorClass == expectedErrorClassForSQLTempView) + } + } else { + if (expectedErrorMsgForDataSetTempView != null) { + errorContains(ex.getMessage, expectedErrorMsgForDataSetTempView) + } + if (expectedErrorClassForDataSetTempView != null) { + assert(ex.getErrorClass == expectedErrorClassForDataSetTempView, ex.getMessage) + } + } + } +} + +/** + * Trait collecting helper methods for DML tests e.p. creating a test table for each test and + * cleaning it up after each test. + */ +trait DeltaDMLTestUtils + extends DeltaTestUtilsBase + with BeforeAndAfterEach { + self: SharedSparkSession => + + protected var tempDir: File = _ + + protected var deltaLog: DeltaLog = _ + + protected def tempPath: String = tempDir.getCanonicalPath + + override protected def beforeEach(): Unit = { + super.beforeEach() + // Using a space in path to provide coverage for special characters. + tempDir = Utils.createTempDir(namePrefix = "spark test") + deltaLog = DeltaLog.forTable(spark, new Path(tempPath)) + } + + override protected def afterEach(): Unit = { + try { + Utils.deleteRecursively(tempDir) + DeltaLog.clearCache() + } finally { + super.afterEach() + } + } + + protected def append(df: DataFrame, partitionBy: Seq[String] = Nil): Unit = { + val dfw = df.write.format("delta").mode("append") + if (partitionBy.nonEmpty) { + dfw.partitionBy(partitionBy: _*) + } + dfw.save(tempPath) + } + + protected def withKeyValueData( + source: Seq[(Int, Int)], + target: Seq[(Int, Int)], + isKeyPartitioned: Boolean = false, + sourceKeyValueNames: (String, String) = ("key", "value"), + targetKeyValueNames: (String, String) = ("key", "value"))( + thunk: (String, String) => Unit = null): Unit = { + + import testImplicits._ + + append(target.toDF(targetKeyValueNames._1, targetKeyValueNames._2).coalesce(2), + if (isKeyPartitioned) Seq(targetKeyValueNames._1) else Nil) + withTempView("source") { + source.toDF(sourceKeyValueNames._1, sourceKeyValueNames._2).createOrReplaceTempView("source") + thunk("source", s"delta.`$tempPath`") + } + } + + protected def readDeltaTable(path: String): DataFrame = { + spark.read.format("delta").load(path) + } + + protected def getDeltaFileStmt(path: String): String = s"SELECT * FROM delta.`$path`" + + /** + * Finds the latest operation of the given type that ran on the test table and returns the + * dataframe with the changes of the corresponding table version. + * + * @param operation Delta operation name, see [[DeltaOperations]]. + */ + protected def getCDCForLatestOperation(deltaLog: DeltaLog, operation: String): DataFrame = { + val latestOperation = deltaLog.history + .getHistory(None) + .find(_.operation == operation) + assert(latestOperation.nonEmpty, s"Couldn't find a ${operation} operation to check CDF") + + val latestOperationVersion = latestOperation.get.version + assert(latestOperationVersion.nonEmpty, + s"Latest ${operation} operation doesn't have a version associated with it") + + CDCReader + .changesToBatchDF( + deltaLog, + latestOperationVersion.get, + latestOperationVersion.get, + spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP) + .drop(CDCReader.CDC_COMMIT_VERSION) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaThrowableSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaThrowableSuite.scala new file mode 100644 index 00000000000..f2c991704a2 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaThrowableSuite.scala @@ -0,0 +1,129 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.nio.charset.StandardCharsets +import java.nio.file.Files + +import scala.collection.immutable.SortedMap + +import org.apache.spark.sql.delta.DeltaThrowableHelper.{deltaErrorClassSource, sparkErrorClassSource} +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.fasterxml.jackson.core.JsonParser.Feature.STRICT_DUPLICATE_DETECTION +import com.fasterxml.jackson.core.`type`.TypeReference +import com.fasterxml.jackson.core.util.{DefaultIndenter, DefaultPrettyPrinter} +import com.fasterxml.jackson.databind.SerializationFeature +import com.fasterxml.jackson.databind.json.JsonMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import org.apache.commons.io.{FileUtils, IOUtils} + +import org.apache.spark.{ErrorClassesJsonReader, ErrorInfo, SparkFunSuite} + +/** Test suite for Delta Throwables. */ +class DeltaThrowableSuite extends SparkFunSuite { + + private lazy val sparkErrorClassesMap = { + new ErrorClassesJsonReader(Seq(sparkErrorClassSource)).errorInfoMap + } + + private lazy val deltaErrorClassToInfoMap = { + new ErrorClassesJsonReader(Seq(deltaErrorClassSource)).errorInfoMap + } + + /* Used to regenerate the error class file. Run: + {{{ + SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \ + "sql/testOnly *DeltaThrowableSuite -- -t \"Error classes are correctly formatted\"" + }}} + */ + + def checkIfUnique(ss: Seq[Any]): Unit = { + val duplicatedKeys = ss.groupBy(identity).mapValues(_.size).filter(_._2 > 1).keys.toSeq + assert(duplicatedKeys.isEmpty) + } + + def checkCondition(ss: Seq[String], fx: String => Boolean): Unit = { + ss.foreach { s => + assert(fx(s)) + } + } + + test("No duplicate error classes in Delta") { + // Enabling this feature incurs performance overhead (20-30%) + val mapper = JsonMapper.builder() + .addModule(DefaultScalaModule) + .enable(STRICT_DUPLICATE_DETECTION) + .build() + mapper.readValue(deltaErrorClassSource, new TypeReference[Map[String, ErrorInfo]]() {}) + } + + test("No error classes are shared by Delta and Spark") { + assert(deltaErrorClassToInfoMap.keySet.intersect(sparkErrorClassesMap.keySet).isEmpty) + } + + test("No word 'databricks' in OSS Delta errors") { + val errorClasses = deltaErrorClassToInfoMap.keys.toSeq + val errorMsgs = deltaErrorClassToInfoMap.values.toSeq.flatMap(_.message) + checkCondition(errorClasses ++ errorMsgs, s => !s.toLowerCase().contains("databricks")) + } + + test("Delta error classes are correctly formatted with keys in alphabetical order") { + lazy val ossDeltaErrorFile = new File(getWorkspaceFilePath( + "delta", "core", "src", "main", "resources", "error").toFile, + "delta-error-classes.json") + val errorClassFileContents = { + IOUtils.toString(deltaErrorClassSource.openStream()) + } + val mapper = JsonMapper.builder() + .addModule(DefaultScalaModule) + .enable(SerializationFeature.INDENT_OUTPUT) + .build() + val prettyPrinter = new DefaultPrettyPrinter() + .withArrayIndenter(DefaultIndenter.SYSTEM_LINEFEED_INSTANCE) + val rewrittenString = { + val writer = mapper.configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true) + .setSerializationInclusion(Include.NON_ABSENT) + .writer(prettyPrinter) + writer.writeValueAsString(deltaErrorClassToInfoMap) + } + + if (regenerateGoldenFiles) { + if (rewrittenString.trim != errorClassFileContents.trim) { + logInfo(s"Regenerating error class file $ossDeltaErrorFile") + Files.delete(ossDeltaErrorFile.toPath) + FileUtils.writeStringToFile(ossDeltaErrorFile, rewrittenString, StandardCharsets.UTF_8) + } + } else { + assert(rewrittenString.trim == errorClassFileContents.trim) + } + } + + test("Delta message format invariants") { + val messageFormats = deltaErrorClassToInfoMap.values.toSeq.flatMap { i => + i.subClass match { + // Has sub error class: the message template should be: base + sub + case Some(subs) => + subs.values.toSeq.map(sub => s"${i.messageTemplate} ${sub.messageTemplate}") + // Does not have any sub error class: the message template is itself + case None => Seq(i.messageTemplate) + } + } + checkCondition(messageFormats, s => s != null) + checkIfUnique(messageFormats) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTimeTravelSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTimeTravelSuite.scala new file mode 100644 index 00000000000..eb54bb54817 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTimeTravelSuite.scala @@ -0,0 +1,763 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.sql.Timestamp +import java.text.SimpleDateFormat +import java.util.{Calendar, Date, TimeZone} + +import scala.concurrent.duration._ +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.DeltaHistoryManager.BufferingLogDeletionIterator +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.sql.{functions, AnalysisException, QueryTest, Row} +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + +class DeltaTimeTravelSuite extends QueryTest + with SharedSparkSession + with SQLTestUtils + with DeltaSQLCommandTest { + + import testImplicits._ + + private val timeFormatter = new SimpleDateFormat("yyyyMMddHHmmssSSS") + + private implicit def durationToLong(duration: FiniteDuration): Long = { + duration.toMillis + } + + private implicit def longToTimestamp(ts: Long): Timestamp = new Timestamp(ts) + + private def modifyCommitTimestamp(deltaLog: DeltaLog, version: Long, ts: Long): Unit = { + val file = new File(FileNames.deltaFile(deltaLog.logPath, version).toUri) + file.setLastModified(ts) + val crc = new File(FileNames.checksumFile(deltaLog.logPath, version).toUri) + if (crc.exists()) { + crc.setLastModified(ts) + } + } + + private def modifyCheckpointTimestamp(deltaLog: DeltaLog, version: Long, ts: Long): Unit = { + val file = new File(FileNames.checkpointFileSingular(deltaLog.logPath, version).toUri) + file.setLastModified(ts) + } + + /** Generate commits with the given timestamp in millis. */ + private def generateCommitsCheap(deltaLog: DeltaLog, commits: Long*): Unit = { + var startVersion = deltaLog.snapshot.version + 1 + commits.foreach { ts => + val action = createTestAddFile(path = startVersion.toString, modificationTime = startVersion) + deltaLog.startTransaction().commitManually(action) + modifyCommitTimestamp(deltaLog, startVersion, ts) + startVersion += 1 + } + } + + /** Generate commits with the given timestamp in millis. */ + private def generateCommits(location: String, commits: Long*): Unit = { + val deltaLog = DeltaLog.forTable(spark, location) + var startVersion = deltaLog.snapshot.version + 1 + commits.foreach { ts => + val rangeStart = startVersion * 10 + val rangeEnd = rangeStart + 10 + spark.range(rangeStart, rangeEnd).write.format("delta").mode("append").save(location) + val file = new File(FileNames.deltaFile(deltaLog.logPath, startVersion).toUri) + file.setLastModified(ts) + startVersion += 1 + } + } + + private def identifierWithTimestamp(identifier: String, ts: Long): String = { + s"$identifier@${timeFormatter.format(new Date(ts))}" + } + + private def identifierWithVersion(identifier: String, v: Long): String = { + s"$identifier@v$v" + } + + private implicit def longToTimestampExpr(value: Long): String = { + s"cast($value / 1000 as timestamp)" + } + + private def getSparkFormattedTimestamps(values: Long*): Seq[String] = { + // Simulates getting timestamps directly from Spark SQL + values.map(new Timestamp(_)).toDF("ts") + .select($"ts".cast("string")).as[String].collect() + .map(i => s"$i") + } + + private def historyTest(testName: String)(f: DeltaLog => Unit): Unit = { + testQuietly(testName) { + withTempDir { dir => f(DeltaLog.forTable(spark, dir)) } + } + } + + historyTest("getCommits should monotonize timestamps") { deltaLog => + val start = 1540415658000L + // Make the commits out of order + generateCommitsCheap(deltaLog, + start, + start - 5.seconds, // adjusts to start + 1 ms + start + 1.milli, // adjusts to start + 2 ms + start + 2.millis, // adjusts to start + 3 ms + start - 2.seconds, // adjusts to start + 4 ms + start + 10.seconds) + + val commits = DeltaHistoryManager.getCommits( + deltaLog.store, + deltaLog.logPath, + 0, + None, + deltaLog.newDeltaHadoopConf()) + assert(commits.map(_.timestamp) === Seq(start, + start + 1.millis, start + 2.millis, start + 3.millis, start + 4.millis, start + 10.seconds)) + } + + historyTest("describe history timestamps are adjusted according to file timestamp") { deltaLog => + // this is in '2018-10-24', so earlier than today. The recorded timestamps in commitInfo will + // be much after this + val start = 1540415658000L + // Make the commits out of order + generateCommitsCheap(deltaLog, start, + start - 5.seconds, // adjusts to start + 1 ms + start + 1.milli // adjusts to start + 2 ms + ) + + val history = new DeltaHistoryManager(deltaLog) + val commits = history.getHistory(None) + assert(commits.map(_.timestamp.getTime) === Seq(start + 2.millis, start + 1.milli, start)) + } + + historyTest("should filter only delta files when computing earliest version") { deltaLog => + val start = 1540415658000L + generateCommitsCheap(deltaLog, start, start + 10.seconds, start + 20.seconds) + + val history = new DeltaHistoryManager(deltaLog) + assert(history.getActiveCommitAtTime(start + 15.seconds, false).version === 1) + + val commits2 = history.getHistory(Some(10)) + assert(commits2.last.version === Some(0)) + + assert(new File(FileNames.deltaFile(deltaLog.logPath, 0L).toUri).delete()) + val e = intercept[AnalysisException] { + history.getActiveCommitAtTime(start + 15.seconds, false).version + } + assert(e.getMessage.contains("recreatable")) + } + + historyTest("resolving commits should return commit before timestamp") { deltaLog => + val start = 1540415658000L + // Make a commit every 20 minutes + val commits = Seq.tabulate(10)(i => start + (i * 20).minutes) + generateCommitsCheap(deltaLog, commits: _*) + // When maxKeys is 2, we will use the parallel search algorithm, when it is 1000, we will + // use the linear search method + Seq(1, 2, 1000).foreach { maxKeys => + val history = new DeltaHistoryManager(deltaLog, maxKeys) + + (0 until 10).foreach { i => + assert(history.getActiveCommitAtTime(start + (i * 20 + 10).minutes, true).version === i) + } + + val e = intercept[AnalysisException] { + // This is 20 minutes after the last commit + history.getActiveCommitAtTime(start + 200.minutes, false) + } + assert(e.getMessage.contains("after the latest commit timestamp")) + assert(history.getActiveCommitAtTime(start + 180.minutes, true).version === 9) + + val e2 = intercept[AnalysisException] { + history.getActiveCommitAtTime(start - 10.minutes, true) + } + assert(e2.getMessage.contains("before the earliest version")) + } + } + + /** + * Creates FileStatus objects, where the name is the version of a commit, and the modification + * timestamps come from the input. + */ + private def createFileStatuses(modTimes: Long*): Iterator[FileStatus] = { + modTimes.zipWithIndex.map { case (time, version) => + new FileStatus(10L, false, 1, 10L, time, new Path(version.toString)) + }.iterator + } + + /** + * Creates a log deletion iterator with a retention `maxTimestamp` and `maxVersion` (both + * inclusive). The input iterator takes the original file timestamps, and the deleted output will + * return the adjusted timestamps of files that would actually be consumed by the iterator. + */ + private def testBufferingLogDeletionIterator( + maxTimestamp: Long, + maxVersion: Long)(inputTimestamps: Seq[Long], deleted: Seq[Long]): Unit = { + val i = new BufferingLogDeletionIterator( + createFileStatuses(inputTimestamps: _*), maxTimestamp, maxVersion, _.getName.toLong) + deleted.foreach { ts => + assert(i.hasNext, s"Was supposed to delete $ts, but iterator returned hasNext: false") + assert(i.next().getModificationTime === ts, "Returned files out of order!") + } + assert(!i.hasNext, "Iterator should be consumed") + } + + test("BufferingLogDeletionIterator: iterator behavior") { + val i1 = new BufferingLogDeletionIterator(Iterator.empty, 100, 100, _ => 1) + intercept[NoSuchElementException](i1.next()) + assert(!i1.hasNext) + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 100)( + inputTimestamps = Seq(10), + deleted = Seq(10) + ) + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 100)( + inputTimestamps = Seq(10, 15, 25), + deleted = Seq(10, 15, 25) + ) + } + + test("BufferingLogDeletionIterator: " + + "early exit while handling adjusted timestamps due to timestamp") { + // only should return 5 because 5 < 7 + testBufferingLogDeletionIterator(maxTimestamp = 7, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5) + ) + + // Should only return 5, because 10 is used to adjust the following 8 to 11 + testBufferingLogDeletionIterator(maxTimestamp = 10, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5) + ) + + // When it is 11, we can delete both 10 and 8 + testBufferingLogDeletionIterator(maxTimestamp = 11, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5, 10, 11) + ) + + // When it is 12, we can return all + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5, 10, 11, 12) + ) + + // Should only return 5, because 10 is used to adjust the following 8 to 11 + testBufferingLogDeletionIterator(maxTimestamp = 10, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8), + deleted = Seq(5) + ) + + // When it is 11, we can delete both 10 and 8 + testBufferingLogDeletionIterator(maxTimestamp = 11, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8), + deleted = Seq(5, 10, 11) + ) + } + + test("BufferingLogDeletionIterator: " + + "early exit while handling adjusted timestamps due to version") { + // only should return 5 because we can delete only up to version 0 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 0)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5) + ) + + // Should only return 5, because 10 is used to adjust the following 8 to 11 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 1)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5) + ) + + // When we can delete up to version 2, we can return up to version 2 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 2)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5, 10, 11) + ) + + // When it is version 3, we can return all + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 3)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5, 10, 11, 12) + ) + + // Should only return 5, because 10 is used to adjust the following 8 to 11 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 1)( + inputTimestamps = Seq(5, 10, 8), + deleted = Seq(5) + ) + + // When we can delete up to version 2, we can return up to version 2 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 2)( + inputTimestamps = Seq(5, 10, 8), + deleted = Seq(5, 10, 11) + ) + } + + test("BufferingLogDeletionIterator: multiple adjusted timestamps") { + Seq(9, 10, 11).foreach { retentionTimestamp => + // Files should be buffered but not deleted, because of the file 11, which has adjusted ts 12 + testBufferingLogDeletionIterator(maxTimestamp = retentionTimestamp, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 11, 14), + deleted = Seq(5) + ) + } + + // Safe to delete everything before (including) file: 11 which has adjusted timestamp 12 + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 11, 14), + deleted = Seq(5, 10, 11, 12) + ) + + Seq(0, 1, 2).foreach { retentionVersion => + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = retentionVersion)( + inputTimestamps = Seq(5, 10, 8, 11, 14), + deleted = Seq(5) + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 3)( + inputTimestamps = Seq(5, 10, 8, 11, 14), + deleted = Seq(5, 10, 11, 12) + ) + + // Test when the last element is adjusted with both timestamp and version + Seq(9, 10, 11).foreach { retentionTimestamp => + testBufferingLogDeletionIterator(maxTimestamp = retentionTimestamp, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9), + deleted = Seq(5) + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9), + deleted = Seq(5, 10, 11, 12) + ) + + Seq(0, 1, 2).foreach { retentionVersion => + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = retentionVersion)( + inputTimestamps = Seq(5, 10, 8, 9), + deleted = Seq(5) + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 3)( + inputTimestamps = Seq(5, 10, 8, 9), + deleted = Seq(5, 10, 11, 12) + ) + + Seq(9, 10, 11).foreach { retentionTimestamp => + testBufferingLogDeletionIterator(maxTimestamp = retentionTimestamp, maxVersion = 100)( + inputTimestamps = Seq(10, 8, 9), + deleted = Nil + ) + } + + // Test the first element causing cascading adjustments + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(10, 8, 9), + deleted = Seq(10, 11, 12) + ) + + Seq(0, 1).foreach { retentionVersion => + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = retentionVersion)( + inputTimestamps = Seq(10, 8, 9), + deleted = Nil + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 2)( + inputTimestamps = Seq(10, 8, 9), + deleted = Seq(10, 11, 12) + ) + + // Test multiple batches of time adjustments + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9, 12, 15, 14, 14), // 5, 10, 11, 12, 13, 15, 16, 17 + deleted = Seq(5) + ) + + Seq(13, 14, 15, 16).foreach { retentionTimestamp => + testBufferingLogDeletionIterator(maxTimestamp = retentionTimestamp, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9, 12, 15, 14, 14), // 5, 10, 11, 12, 13, 15, 16, 17 + deleted = Seq(5, 10, 11, 12, 13) + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 17, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9, 12, 15, 14, 14), // 5, 10, 11, 12, 13, 15, 16, 17 + deleted = Seq(5, 10, 11, 12, 13, 15, 16, 17) + ) + } + + test("[SPARK-45383] Time travel on a non-existing table should throw AnalysisException") { + intercept[AnalysisException] { + spark.sql("SELECT * FROM not_existing VERSION AS OF 0") + } + } + + test("as of timestamp in between commits should use commit before timestamp") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val start = 1540415658000L + generateCommits(tblLoc, start, start + 20.minutes, start + 40.minutes) + + val tablePathUri = identifierWithTimestamp(tblLoc, start + 10.minutes) + + val df1 = spark.read.format("delta").load(tablePathUri) + checkAnswer(df1.groupBy().count(), Row(10L)) + + // 2 minutes after start + val df2 = spark.read.format("delta").option("timestampAsOf", "2018-10-24 14:16:18") + .load(tblLoc) + + checkAnswer(df2.groupBy().count(), Row(10L)) + } + } + + test("as of timestamp on exact timestamp") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val start = 1540415658000L + generateCommits(tblLoc, start, start + 20.minutes) + + // Simulate getting the timestamp directly from Spark SQL + val ts = getSparkFormattedTimestamps(start, start + 20.minutes) + + checkAnswer( + spark.read.format("delta").option("timestampAsOf", ts.head).load(tblLoc).groupBy().count(), + Row(10L) + ) + + checkAnswer( + spark.read.format("delta").option("timestampAsOf", ts(1)).load(tblLoc).groupBy().count(), + Row(20L) + ) + + checkAnswer( + spark.read.format("delta").load(identifierWithTimestamp(tblLoc, start)).groupBy().count(), + Row(10L) + ) + + checkAnswer( + spark.read.format("delta").load(identifierWithTimestamp(tblLoc, start + 20.minutes)) + .groupBy().count(), + Row(20L) + ) + } + } + + test("as of timestamp on invalid timestamp") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val start = 1540415658000L + generateCommits(tblLoc, start, start + 20.minutes) + + val ex = intercept[AnalysisException] { + spark.read.format("delta").option("timestampAsOf", "i am not a timestamp") + .load(tblLoc).groupBy().count() + } + + assert(ex.getMessage.contains( + "The provided timestamp ('i am not a timestamp') cannot be converted to a valid timestamp")) + } + } + + test("as of exact timestamp after last commit should fail") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val start = 1540415658000L + generateCommits(tblLoc, start) + + // Simulate getting the timestamp directly from Spark SQL + val ts = getSparkFormattedTimestamps(start + 10.minutes) + + val e1 = intercept[AnalysisException] { + spark.read.format("delta").option("timestampAsOf", ts.head).load(tblLoc).collect() + } + assert(e1.getMessage.contains("VERSION AS OF 0")) + assert(e1.getMessage.contains("TIMESTAMP AS OF '2018-10-24 14:14:18'")) + + val e2 = intercept[AnalysisException] { + spark.read.format("delta").load(identifierWithTimestamp(tblLoc, start + 10.minutes)) + .collect() + } + assert(e2.getMessage.contains("VERSION AS OF 0")) + assert(e2.getMessage.contains("TIMESTAMP AS OF '2018-10-24 14:14:18'")) + + checkAnswer( + spark.read.format("delta").option("timestampAsOf", "2018-10-24 14:14:18") + .load(tblLoc).groupBy().count(), + Row(10) + ) + } + } + + test("as of with versions") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val start = 1540415658000L + generateCommits(tblLoc, start, start + 20.minutes, start + 40.minutes) + + val df = spark.read.format("delta").load(identifierWithVersion(tblLoc, 0)) + checkAnswer(df.groupBy().count(), Row(10L)) + + checkAnswer( + spark.read.format("delta").option("versionAsOf", "0").load(tblLoc).groupBy().count(), + Row(10) + ) + + checkAnswer( + spark.read.format("delta").option("versionAsOf", 1).load(tblLoc).groupBy().count(), + Row(20) + ) + + val e1 = intercept[AnalysisException] { + spark.read.format("delta").option("versionAsOf", 3).load(tblLoc).collect() + } + assert(e1.getMessage.contains("[0, 2]")) + + val deltaLog = DeltaLog.forTable(spark, tblLoc) + new File(FileNames.deltaFile(deltaLog.logPath, 0).toUri).delete() + val e2 = intercept[AnalysisException] { + spark.read.format("delta").option("versionAsOf", 0).load(tblLoc).collect() + } + assert(e2.getMessage.contains("recreatable")) + } + } + + test("time travelling with adjusted timestamps") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val start = 1540415658000L + generateCommits(tblLoc, start, start - 5.seconds, start + 3.minutes) + + val ts = getSparkFormattedTimestamps( + start, start + 1.milli, start + 119.seconds, start - 3.seconds) + + checkAnswer( + spark.read.option("timestampAsOf", ts.head).format("delta").load(tblLoc).groupBy().count(), + Row(10L) + ) + + checkAnswer( + spark.read.option("timestampAsOf", ts(1)).format("delta").load(tblLoc).groupBy().count(), + Row(20L) + ) + + checkAnswer( + spark.read.option("timestampAsOf", ts(2)).format("delta").load(tblLoc).groupBy().count(), + Row(20L) + ) + + val e = intercept[AnalysisException] { + spark.read.option("timestampAsOf", ts(3)).format("delta").load(tblLoc).collect() + } + assert(e.getMessage.contains("before the earliest version")) + } + } + + test("can't provide both version and timestamp in DataFrameReader") { + val e = intercept[IllegalArgumentException] { + spark.read.option("versionaSof", 1) + .option("timestampAsOF", "fake").format("delta").load("/some/fake") + } + assert(e.getMessage.contains("either provide 'timestampAsOf' or 'versionAsOf'")) + } + + test("don't time travel a valid delta path with @ syntax") { + withTempDir { dir => + val path = new File(dir, "base@v0").getCanonicalPath + spark.range(10).write.format("delta").mode("append").save(path) + spark.range(10).write.format("delta").mode("append").save(path) + + checkAnswer( + spark.read.format("delta").load(path), + spark.range(10).union(spark.range(10)).toDF() + ) + + checkAnswer( + spark.read.format("delta").load(path + "@v0"), + spark.range(10).toDF() + ) + } + } + + test("don't time travel a valid non-delta path with @ syntax") { + val format = "json" + withTempDir { dir => + val path = new File(dir, "base@v0").getCanonicalPath + spark.range(10).write.format(format).mode("append").save(path) + spark.range(10).write.format(format).mode("append").save(path) + + checkAnswer( + spark.read.format(format).load(path), + spark.range(10).union(spark.range(10)).toDF() + ) + + checkAnswer( + spark.table(s"$format.`$path`"), + spark.range(10).union(spark.range(10)).toDF() + ) + + intercept[AnalysisException] { + spark.read.format(format).load(path + "@v0").count() + } + + intercept[AnalysisException] { + spark.table(s"$format.`$path@v0`").count() + } + } + } + + test("scans on different versions of same table are executed correctly") { + withTempDir { dir => + val path = dir.getCanonicalPath + spark.range(5).selectExpr("id as key", "id * 10 as value").write.format("delta").save(path) + + spark.range(5, 10).selectExpr("id as key", "id * 10 as value") + .write.format("delta").mode("append").save(path) + + val df = spark.read.format("delta").option("versionAsOf", "0").load(path).as("a").join( + spark.read.format("delta").option("versionAsOf", "1").load(path).as("b"), + functions.expr("a.key == b.key"), + "fullOuter" + ).where("a.key IS NULL") // keys 5 to 9 should be null + assert(df.count() == 5) + } + } + + test("timestamp as of expression for table in database") { + withDatabase("testDb") { + sql("CREATE DATABASE testDb") + withTable("tbl") { + spark.range(10).write.format("delta").saveAsTable("testDb.tbl") + val ts = sql("DESCRIBE HISTORY testDb.tbl").select("timestamp").head().getTimestamp(0) + + sql(s"SELECT * FROM testDb.tbl TIMESTAMP AS OF " + + s"coalesce(CAST ('$ts' AS TIMESTAMP), current_date())") + } + } + } + + test("time travel with schema changes - should instantiate old schema") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + spark.range(10).write.format("delta").mode("append").save(tblLoc) + spark.range(10, 20).withColumn("part", 'id) + .write.format("delta").mode("append").option("mergeSchema", true).save(tblLoc) + + checkAnswer( + spark.read.option("versionAsOf", 0).format("delta").load(tblLoc), + spark.range(10).toDF()) + + checkAnswer( + spark.read.format("delta").load(identifierWithVersion(tblLoc, 0)), + spark.range(10).toDF()) + } + } + + test("time travel with partition changes - should instantiate old schema") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val v0 = spark.range(10).withColumn("part5", 'id % 5) + + v0.write.format("delta").partitionBy("part5").mode("append").save(tblLoc) + spark.range(10, 20).withColumn("part2", 'id % 2) + .write + .format("delta") + .partitionBy("part2") + .mode("overwrite") + .option("overwriteSchema", true) + .save(tblLoc) + + checkAnswer( + spark.read.option("versionAsOf", 0).format("delta").load(tblLoc), + v0) + + checkAnswer( + spark.read.format("delta").load(identifierWithVersion(tblLoc, 0)), + v0) + } + } + + test("time travel support in SQL") { + withTempDir { dir => + val tblLoc = dir.getCanonicalPath + val start = 1540415658000L + generateCommits(tblLoc, start, start + 20.minutes) + val tableName = "testTable" + + withTable(tableName) { + spark.sql(s"create table $tableName(id long) using delta location '$tblLoc'") + + checkAnswer( + spark.sql(s"SELECT * from $tableName FOR VERSION AS OF 0"), + spark.read.option("versionAsOf", 0).format("delta").load(tblLoc)) + + checkAnswer( + spark.sql(s"SELECT * from $tableName VERSION AS OF 1"), + spark.read.option("versionAsOf", 1).format("delta").load(tblLoc)) + + val ex = intercept[VersionNotFoundException] { + spark.sql(s"SELECT * from $tableName FOR VERSION AS OF 2") + } + assert(ex.getMessage contains + "Cannot time travel Delta table to version 2. Available versions: [0, 1]") + + checkAnswer( + spark.sql(s"SELECT * from $tableName FOR TIMESTAMP AS OF '2018-10-24 14:14:18'"), + spark.read.option("versionAsOf", 0).format("delta").load(tblLoc)) + + checkAnswer( + spark.sql(s"SELECT * from $tableName TIMESTAMP AS OF '2018-10-24 14:34:18'"), + spark.read.option("versionAsOf", 1).format("delta").load(tblLoc)) + + val ex2 = intercept[DeltaErrors.TemporallyUnstableInputException] { + spark.sql(s"SELECT * from $tableName FOR TIMESTAMP AS OF '2018-10-24 20:14:18'") + } + assert(ex2.getMessage contains + "The provided timestamp: 2018-10-24 20:14:18.0 is after the " + + "latest commit timestamp of\n2018-10-24 14:34:18.0") + } + } + } + + + test("SPARK-41154: Correct relation caching for queries with time travel spec") { + val tblName = "tab" + withTable(tblName) { + sql(s"CREATE TABLE $tblName USING DELTA AS SELECT 1 as c") + sql(s"INSERT INTO $tblName SELECT 2 as c") + checkAnswer( + sql(s""" + |SELECT * FROM $tblName VERSION AS OF '0' + |UNION ALL + |SELECT * FROM $tblName VERSION AS OF '1' + |""".stripMargin), + Row(1) :: Row(1) :: Row(2) :: Nil) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTimestampNTZSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTimestampNTZSuite.scala new file mode 100644 index 00000000000..ab7a7493997 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTimestampNTZSuite.scala @@ -0,0 +1,135 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.sql.Timestamp +import java.time.LocalDateTime + +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.SparkThrowable +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType + +class DeltaTimestampNTZSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + private def getProtocolForTable(table: String): Protocol = { + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(table)) + deltaLog.unsafeVolatileSnapshot.protocol + } + + test("create a new table with TIMESTAMP_NTZ, higher protocol and feature should be picked.") { + withTable("tbl") { + sql("CREATE TABLE tbl(c1 STRING, c2 TIMESTAMP, c3 TIMESTAMP_NTZ) USING DELTA") + sql( + """INSERT INTO tbl VALUES + |('foo','2022-01-02 03:04:05.123456','2022-01-02 03:04:05.123456')""".stripMargin) + assert(spark.table("tbl").head == Row( + "foo", + new Timestamp(2022 - 1900, 0, 2, 3, 4, 5, 123456000), + LocalDateTime.of(2022, 1, 2, 3, 4, 5, 123456000))) + assert(getProtocolForTable("tbl") == + TimestampNTZTableFeature.minProtocolVersion.withFeature(TimestampNTZTableFeature) + ) + } + } + + test("creating a table without TIMESTAMP_NTZ should use the usual minimum protocol") { + withTable("tbl") { + sql("CREATE TABLE tbl(c1 STRING, c2 TIMESTAMP, c3 TIMESTAMP) USING DELTA") + assert(getProtocolForTable("tbl") == Protocol(1, 2)) + + val deltaLog = DeltaLog.forTable(spark, TableIdentifier("tbl")) + assert( + !deltaLog.unsafeVolatileSnapshot.protocol.isFeatureSupported(TimestampNTZTableFeature), + s"Table tbl contains TimestampNTZFeature descriptor when its not supposed to" + ) + } + } + + test("add a new column using TIMESTAMP_NTZ should upgrade to the correct protocol versions") { + withTable("tbl") { + sql("CREATE TABLE tbl(c1 STRING, c2 TIMESTAMP) USING delta") + assert(getProtocolForTable("tbl") == Protocol(1, 2)) + + // Should throw error + val e = intercept[SparkThrowable] { + sql("ALTER TABLE tbl ADD COLUMN c3 TIMESTAMP_NTZ") + } + + // add table feature + sql(s"ALTER TABLE tbl " + + s"SET TBLPROPERTIES('delta.feature.timestampNtz' = 'supported')") + + sql("ALTER TABLE tbl ADD COLUMN c3 TIMESTAMP_NTZ") + + + sql( + """INSERT INTO tbl VALUES + |('foo','2022-01-02 03:04:05.123456','2022-01-02 03:04:05.123456')""".stripMargin) + assert(spark.table("tbl").head == Row( + "foo", + new Timestamp(2022 - 1900, 0, 2, 3, 4, 5, 123456000), + LocalDateTime.of(2022, 1, 2, 3, 4, 5, 123456000))) + + assert(getProtocolForTable("tbl") == + TimestampNTZTableFeature.minProtocolVersion + .withFeature(TimestampNTZTableFeature) + .withFeature(InvariantsTableFeature) + .withFeature(AppendOnlyTableFeature) + ) + } + } + + test("use TIMESTAMP_NTZ in a partition column") { + withTable("delta_test") { + sql( + """CREATE TABLE delta_test(c1 STRING, c2 TIMESTAMP, c3 TIMESTAMP_NTZ) + |USING delta + |PARTITIONED BY (c3)""".stripMargin) + sql( + """INSERT INTO delta_test VALUES + |('foo','2022-01-02 03:04:05.123456','2022-01-02 03:04:05.123456')""".stripMargin) + assert(spark.table("delta_test").head == Row( + "foo", + new Timestamp(2022 - 1900, 0, 2, 3, 4, 5, 123456000), + LocalDateTime.of(2022, 1, 2, 3, 4, 5, 123456000))) + assert(getProtocolForTable("delta_test") == + TimestampNTZTableFeature.minProtocolVersion.withFeature(TimestampNTZTableFeature) + ) + } + } + + test("min/max stats collection should apply on TIMESTAMP_NTZ") { + withTable("delta_test") { + val schemaString = "c1 STRING, c2 TIMESTAMP, c3 TIMESTAMP_NTZ" + sql(s"CREATE TABLE delta_test($schemaString) USING delta") + val statsSchema = DeltaLog.forTable(spark, TableIdentifier("delta_test")) + .unsafeVolatileSnapshot.statsSchema + assert(statsSchema("minValues").dataType == StructType + .fromDDL(schemaString)) + assert(statsSchema("maxValues").dataType == StructType + .fromDDL(schemaString)) + } + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUDFSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUDFSuite.scala new file mode 100644 index 00000000000..dbecc3f4005 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUDFSuite.scala @@ -0,0 +1,104 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.{Encoder, QueryTest, Row} +import org.apache.spark.sql.expressions.UserDefinedFunction +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSparkSession + +class DeltaUDFSuite extends QueryTest with SharedSparkSession { + + import testImplicits._ + + private def testUDF( + name: String, + testResultFunc: => Unit): Unit = { + test(name) { + // Verify the returned UDF function is working correctly + testResultFunc + } + } + + private def testUDF( + name: String, + func: => UserDefinedFunction, + expected: Any): Unit = { + testUDF( + name, + checkAnswer(Seq("foo").toDF.select(func()), Row(expected)) + ) + } + + private def testUDF[T: Encoder]( + name: String, + func: => UserDefinedFunction, + input: T, + expected: Any): Unit = { + testUDF( + name, + checkAnswer(Seq(input).toDF.select(func(col("value"))), Row(expected)) + ) + } + + private def testUDF[T1: Encoder, T2: Encoder]( + name: String, + func: => UserDefinedFunction, + input1: T1, + input2: T2, + expected: Any): Unit = { + testUDF( + name, + { + val df = Seq(input1) + .toDF("value1") + .withColumn("value2", lit(input2).as[T2]) + .select(func(col("value1"), col("value2"))) + checkAnswer(df, Row(expected)) + } + ) + } + + testUDF( + name = "stringFromString", + func = DeltaUDF.stringFromString(x => x), + input = "foo", + expected = "foo") + testUDF( + name = "intFromString", + func = DeltaUDF.intFromString(x => x.toInt), + input = "100", + expected = 100) + testUDF( + name = "intFromStringBoolean", + func = DeltaUDF.intFromStringBoolean((x, y) => 1), + input1 = "foo", + input2 = true, + expected = 1) + testUDF(name = "boolean", func = DeltaUDF.boolean(() => true), expected = true) + testUDF( + name = "stringFromMap", + func = DeltaUDF.stringFromMap(x => x.toString), + input = Map("foo" -> "bar"), + expected = "Map(foo -> bar)") + testUDF( + name = "booleanFromMap", + func = DeltaUDF.booleanFromMap(x => x.isEmpty), + input = Map("foo" -> "bar"), + expected = false) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUpdateCatalogSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUpdateCatalogSuite.scala new file mode 100644 index 00000000000..c4f9ab969bf --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUpdateCatalogSuite.scala @@ -0,0 +1,544 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import scala.util.control.NonFatal + +import com.databricks.spark.util.Log4jUsageLogger +import org.apache.spark.sql.delta.hooks.UpdateCatalog +import org.apache.spark.sql.delta.hooks.UpdateCatalog.MAX_CATALOG_TYPE_DDL_LENGTH +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaHiveTest +import com.fasterxml.jackson.core.JsonParseException + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions.{lit, struct} +import org.apache.spark.sql.hive.HiveExternalCatalog +import org.apache.spark.sql.types.{ArrayType, DoubleType, IntegerType, LongType, MapType, StringType, StructField, StructType} +import org.apache.spark.util.{ThreadUtils, Utils} + +class DeltaUpdateCatalogSuite + extends DeltaUpdateCatalogSuiteBase + with DeltaHiveTest { + + import testImplicits._ + + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set(DeltaSQLConf.DELTA_UPDATE_CATALOG_ENABLED.key, "true") + } + + override def beforeEach(): Unit = { + super.beforeEach() + cleanupDefaultTable() + } + + override def afterEach(): Unit = { + if (!UpdateCatalog.awaitCompletion(10000)) { + logWarning(s"There are active catalog udpate requests after 10 seconds") + } + cleanupDefaultTable() + super.afterEach() + } + + /** Remove Hive specific table properties. */ + override protected def filterProperties(properties: Map[String, String]): Map[String, String] = { + properties.filterKeys(_ != "transient_lastDdlTime").toMap + } + + + test("streaming") { + withTable(tbl) { + implicit val _sqlContext = spark.sqlContext + val stream = MemoryStream[Long] + val df1 = stream.toDF().toDF("id") + + withTempDir { dir => + try { + val q = df1.writeStream + .option("checkpointLocation", dir.getCanonicalPath) + .format("delta") + .toTable(tbl) + + verifyTableMetadata(expectedSchema = df1.schema.asNullable) + + stream.addData(1, 2, 3) + q.processAllAvailable() + q.stop() + + val q2 = df1.withColumn("id2", 'id) + .writeStream + .format("delta") + .option("mergeSchema", "true") + .option("checkpointLocation", dir.getCanonicalPath) + .toTable(tbl) + + stream.addData(4, 5, 6) + q2.processAllAvailable() + + verifyTableMetadataAsync(expectedSchema = df1.schema.asNullable.add("id2", LongType)) + } finally { + spark.streams.active.foreach(_.stop()) + } + } + } + } + + test("streaming - external location") { + withTempDir { dir => + withTable(tbl) { + implicit val _sqlContext = spark.sqlContext + val stream = MemoryStream[Long] + val df1 = stream.toDF().toDF("id") + + val chk = new File(dir, "chkpoint").getCanonicalPath + val data = new File(dir, "data").getCanonicalPath + try { + val q = df1.writeStream + .option("checkpointLocation", chk) + .format("delta") + .option("path", data) + .toTable(tbl) + + verifyTableMetadata(expectedSchema = df1.schema.asNullable) + + stream.addData(1, 2, 3) + q.processAllAvailable() + q.stop() + + val q2 = df1.withColumn("id2", 'id) + .writeStream + .format("delta") + .option("mergeSchema", "true") + .option("checkpointLocation", chk) + .toTable(tbl) + + stream.addData(4, 5, 6) + q2.processAllAvailable() + + verifyTableMetadataAsync(expectedSchema = df1.schema.add("id2", LongType).asNullable) + } finally { + spark.streams.active.foreach(_.stop()) + } + } + } + } + + test("streaming - external table that already exists") { + withTable(tbl) { + implicit val _sqlContext = spark.sqlContext + val stream = MemoryStream[Long] + val df1 = stream.toDF().toDF("id") + + withTempDir { dir => + val chk = new File(dir, "chkpoint").getCanonicalPath + val data = new File(dir, "data").getCanonicalPath + + spark.range(10).write.format("delta").save(data) + try { + val q = df1.writeStream + .option("checkpointLocation", chk) + .format("delta") + .option("path", data) + .toTable(tbl) + + verifyTableMetadataAsync(expectedSchema = df1.schema.asNullable) + + stream.addData(1, 2, 3) + q.processAllAvailable() + q.stop() + + val q2 = df1.withColumn("id2", 'id) + .writeStream + .format("delta") + .option("mergeSchema", "true") + .option("checkpointLocation", chk) + .toTable(tbl) + + stream.addData(4, 5, 6) + q2.processAllAvailable() + + verifyTableMetadataAsync(expectedSchema = df1.schema.add("id2", LongType).asNullable) + } finally { + spark.streams.active.foreach(_.stop()) + } + } + } + } + + + test("convert to delta with partitioning change") { + withTable(tbl) { + val df = spark.range(10).withColumn("part", 'id / 2).withColumn("id2", 'id) + df.writeTo(tbl) + .partitionedBy('part) + .using("parquet") + .create() + + // Partitioning columns go to the end for parquet tables + val tableSchema = + new StructType().add("id", LongType).add("id2", LongType).add("part", DoubleType) + verifyTableMetadata( + expectedSchema = tableSchema, + expectedProperties = Map.empty, + partitioningCols = Seq("part") + ) + + sql(s"CONVERT TO DELTA $tbl PARTITIONED BY (part double)") + // Information is duplicated for now + verifyTableMetadata( + expectedSchema = tableSchema, + expectedProperties = Map.empty, + partitioningCols = Seq("part") + ) + + // Remove partitioning of table + df.writeTo(tbl).using("delta").replace() + + assert(snapshot.metadata.partitionColumns === Nil, "Table is unpartitioned") + + // Hive does not allow for the removal of the partition column once it has + // been added. Spark keeps the partition columns towards the end if it + // finds them in Hive. So, for converted tables with partitions, + // Hive schema != df.schema + val expectedSchema = tableSchema + + // Schema converts to Delta's format + verifyTableMetadata( + expectedSchema = expectedSchema, + expectedProperties = getBaseProperties(snapshot), + partitioningCols = Seq("part") // The partitioning information cannot be removed... + ) + + // table is still usable + checkAnswer(spark.table(tbl), df) + + val df2 = spark.range(10).withColumn("id2", 'id) + // Gets rid of partition column "part" from the schema + df2.writeTo(tbl).using("delta").replace() + + val expectedSchema2 = new StructType() + .add("id", LongType).add("id2", LongType).add("part", DoubleType) + verifyTableMetadataAsync( + expectedSchema = expectedSchema2, + expectedProperties = getBaseProperties(snapshot), + partitioningCols = Seq("part") // The partitioning information cannot be removed... + ) + + // table is still usable + checkAnswer(spark.table(tbl), df2) + } + } + + test("partitioned table + add column") { + withTable(tbl) { + val df = spark.range(10).withColumn("part", 'id / 2).withColumn("id2", 'id) + df.writeTo(tbl) + .partitionedBy('part) + .using("delta") + .create() + + val tableSchema = + new StructType().add("id", LongType).add("part", DoubleType).add("id2", LongType) + verifyTableMetadata( + expectedSchema = tableSchema, + expectedProperties = getBaseProperties(snapshot), + partitioningCols = Seq()) + + sql(s"ALTER TABLE $tbl ADD COLUMNS (id3 bigint)") + verifyTableMetadataAsync( + expectedSchema = tableSchema.add("id3", LongType), + expectedProperties = getBaseProperties(snapshot), + partitioningCols = Seq()) + } + } + + test("partitioned convert to delta with schema change") { + withTable(tbl) { + val df = spark.range(10).withColumn("part", 'id / 2).withColumn("id2", 'id) + df.writeTo(tbl) + .partitionedBy('part) + .using("parquet") + .create() + + // Partitioning columns go to the end + val tableSchema = + new StructType().add("id", LongType).add("id2", LongType).add("part", DoubleType) + verifyTableMetadata( + expectedSchema = tableSchema, + expectedProperties = Map.empty, + partitioningCols = Seq("part") + ) + + sql(s"CONVERT TO DELTA $tbl PARTITIONED BY (part double)") + // Information is duplicated for now + verifyTableMetadata( + expectedSchema = tableSchema, + expectedProperties = Map.empty, + partitioningCols = Seq("part") + ) + + sql(s"ALTER TABLE $tbl ADD COLUMNS (id3 bigint)") + + // Hive does not allow for the removal of the partition column once it has + // been added. Spark keeps the partition columns towards the end if it + // finds them in Hive. So, for converted tables with partitions, + // Hive schema != df.schema + val expectedSchema = new StructType() + .add("id", LongType) + .add("id2", LongType) + .add("id3", LongType) + .add("part", DoubleType) + + verifyTableMetadataAsync( + expectedSchema = expectedSchema, + partitioningCols = Seq("part") + ) + + // Table is still queryable + checkAnswer( + spark.table(tbl), + // Ordering of columns are different than df due to Hive semantics + spark.range(10).withColumn("id2", 'id) + .withColumn("part", 'id / 2) + .withColumn("id3", lit(null))) + } + } + + + import UpdateCatalog.MAX_CATALOG_TYPE_DDL_LENGTH + + test("Very long schemas can be stored in the catalog") { + withTable(tbl) { + val schema = StructType(Seq.tabulate(1000)(i => StructField(s"col$i", StringType))) + require(schema.toDDL.length >= MAX_CATALOG_TYPE_DDL_LENGTH, + s"The length of the schema should be over $MAX_CATALOG_TYPE_DDL_LENGTH " + + "characters for this test") + + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta") + verifyTableMetadata(expectedSchema = schema) + } + } + + test("Schemas that contain very long fields cannot be stored in the catalog") { + withTable(tbl) { + val schema = new StructType() + .add("i", StringType) + .add("struct", StructType(Seq.tabulate(1000)(i => StructField(s"col$i", StringType)))) + require(schema.toDDL.length >= MAX_CATALOG_TYPE_DDL_LENGTH, + s"The length of the schema should be over $MAX_CATALOG_TYPE_DDL_LENGTH " + + s"characters for this test") + + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta") + verifySchemaInCatalog() + } + } + + test("Schemas that contain very long fields cannot be stored in the catalog - array") { + withTable(tbl) { + val struct = StructType(Seq.tabulate(1000)(i => StructField(s"col$i", StringType))) + val schema = new StructType() + .add("i", StringType) + .add("array", ArrayType(struct)) + require(schema.toDDL.length >= MAX_CATALOG_TYPE_DDL_LENGTH, + s"The length of the schema should be over $MAX_CATALOG_TYPE_DDL_LENGTH " + + s"characters for this test") + + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta") + verifySchemaInCatalog() + } + } + + test("Schemas that contain very long fields cannot be stored in the catalog - map") { + withTable(tbl) { + val struct = StructType(Seq.tabulate(1000)(i => StructField(s"col$i", StringType))) + val schema = new StructType() + .add("i", StringType) + .add("map", MapType(StringType, struct)) + require(schema.toDDL.length >= MAX_CATALOG_TYPE_DDL_LENGTH, + s"The length of the schema should be over $MAX_CATALOG_TYPE_DDL_LENGTH " + + s"characters for this test") + + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta") + verifySchemaInCatalog() + } + } + + test("Very long schemas can be stored in the catalog - partitioned") { + withTable(tbl) { + val schema = StructType(Seq.tabulate(1000)(i => StructField(s"col$i", StringType))) + .add("part", StringType) + require(schema.toDDL.length >= MAX_CATALOG_TYPE_DDL_LENGTH, + "The length of the schema should be over 4000 characters for this test") + + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta PARTITIONED BY (part)") + verifyTableMetadata(expectedSchema = schema) + } + } + + test("Very long nested fields cannot be stored in the catalog - partitioned") { + withTable(tbl) { + val schema = new StructType() + .add("i", StringType) + .add("part", StringType) + .add("struct", StructType(Seq.tabulate(1000)(i => StructField(s"col$i", StringType)))) + require(schema.toDDL.length >= MAX_CATALOG_TYPE_DDL_LENGTH, + "The length of the schema should be over 4000 characters for this test") + + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta PARTITIONED BY (part)") + verifySchemaInCatalog() + } + } + + // scalastyle:off nonascii + test("Schema containing non-latin characters cannot be stored - top-level") { + withTable(tbl) { + val schema = new StructType().add("今天", "string") + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta") + verifySchemaInCatalog() + } + } + + test("Schema containing non-latin characters cannot be stored - struct") { + withTable(tbl) { + val schema = new StructType().add("struct", new StructType().add("今天", "string")) + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta") + verifySchemaInCatalog() + } + } + + test("Schema containing non-latin characters cannot be stored - array") { + withTable(tbl) { + val schema = new StructType() + .add("i", StringType) + .add("array", ArrayType(new StructType().add("今天", "string"))) + + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta") + verifySchemaInCatalog() + } + } + + test("Schema containing non-latin characters cannot be stored - map") { + withTable(tbl) { + val schema = new StructType() + .add("i", StringType) + .add("map", MapType(StringType, new StructType().add("今天", "string"))) + + sql(s"CREATE TABLE $tbl (${schema.toDDL}) USING delta") + verifySchemaInCatalog() + } + } + // scalastyle:on nonascii + + /** + * Verifies that the schema stored in the catalog explicitly is empty, however the getTablesByName + * method still correctly returns the actual schema. + */ + private def verifySchemaInCatalog( + table: String = tbl, + catalogPartitionCols: Seq[String] = Nil): Unit = { + val cat = spark.sessionState.catalog.externalCatalog.getTable("default", table) + assert(cat.schema.isEmpty, s"Schema wasn't empty") + assert(cat.partitionColumnNames === catalogPartitionCols) + getBaseProperties(snapshot).foreach { case (k, v) => + assert(cat.properties.get(k) === Some(v), + s"Properties didn't match for table: $table. Expected: ${getBaseProperties(snapshot)}, " + + s"Got: ${cat.properties}") + } + assert(cat.properties(UpdateCatalog.ERROR_KEY) === UpdateCatalog.LONG_SCHEMA_ERROR) + + // Make sure table is readable + checkAnswer(spark.table(table), Nil) + } + + def testAddRemoveProperties(): Unit = { + withTable(tbl) { + val df = spark.range(10).toDF("id") + df.writeTo(tbl) + .using("delta") + .create() + + var initialProperties: Map[String, String] = Map.empty + val logs = Log4jUsageLogger.track { + sql(s"ALTER TABLE $tbl SET TBLPROPERTIES(some.key = 1, another.key = 2)") + + initialProperties = getBaseProperties(snapshot) + verifyTableMetadataAsync( + expectedSchema = df.schema.asNullable, + expectedProperties = Map("some.key" -> "1", "another.key" -> "2") ++ + initialProperties + ) + } + val updateLogged = logs.filter(_.metric == "tahoeEvent") + .filter(_.tags.get("opType").exists(_.startsWith("delta.catalog.update.properties"))) + assert(updateLogged.nonEmpty, "Ensure that the schema update in the MetaStore is logged") + + // The UpdateCatalog hook only checks if new properties have been + // added. If properties have been removed only, no metadata update will be triggered. + val logs2 = Log4jUsageLogger.track { + sql(s"ALTER TABLE $tbl UNSET TBLPROPERTIES(another.key)") + verifyTableMetadataAsync( + expectedSchema = df.schema.asNullable, + expectedProperties = Map("some.key" -> "1", "another.key" -> "2") ++ + initialProperties + ) + } + val updateLogged2 = logs2.filter(_.metric == "tahoeEvent") + .filter(_.tags.get("opType").exists(_.startsWith("delta.catalog.update.properties"))) + assert(updateLogged2.size == 0, "Ensure that the schema update in the MetaStore is logged") + + // Adding a new property will trigger an update + val logs3 = Log4jUsageLogger.track { + sql(s"ALTER TABLE $tbl SET TBLPROPERTIES(a.third.key = 3)") + verifyTableMetadataAsync( + expectedSchema = df.schema.asNullable, + expectedProperties = Map("some.key" -> "1", "a.third.key" -> "3") ++ + getBaseProperties(snapshot) + ) + } + val updateLogged3 = logs3.filter(_.metric == "tahoeEvent") + .filter(_.tags.get("opType").exists(_.startsWith("delta.catalog.update.properties"))) + assert(updateLogged3.nonEmpty, "Ensure that the schema update in the MetaStore is logged") + } + } + + test("add and remove properties") { + testAddRemoveProperties() + } + + test("alter table commands update the catalog") { + runAlterTableTests { (tableName, expectedSchema) => + verifyTableMetadataAsync( + expectedSchema = expectedSchema, + // The ALTER TABLE statements in runAlterTableTests create table version 7. + // However, version 7 is created by dropping a CHECK constraint, which currently + // *does not* trigger a catalog update. For Hive tables, only *adding* properties + // causes a catalog update, not *removing*. Hence, the metadata in the catalog should + // still be at version 6. + expectedProperties = getBaseProperties(snapshotAt(6)) ++ + Map("some" -> "thing", "delta.constraints.id_3" -> "id3 > 10"), + table = tableName + ) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUpdateCatalogSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUpdateCatalogSuiteBase.scala new file mode 100644 index 00000000000..4f9f25acef7 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaUpdateCatalogSuiteBase.scala @@ -0,0 +1,313 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils +import org.apache.spark.sql.delta.hooks.UpdateCatalog +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.scalatest.time.SpanSugar + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.{lit, struct} +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types.{BooleanType, DoubleType, IntegerType, LongType, StringType, StructField, StructType} +import org.apache.spark.util.{ThreadUtils, Utils} + +abstract class DeltaUpdateCatalogSuiteBase + extends QueryTest + with SQLTestUtils + with SpanSugar { + + protected val tbl = "delta_table" + + import testImplicits._ + + protected def cleanupDefaultTable(): Unit = disableUpdates { + spark.sql(s"DROP TABLE IF EXISTS $tbl") + val path = spark.sessionState.catalog.defaultTablePath(TableIdentifier(tbl)) + try Utils.deleteRecursively(new File(path)) catch { + case NonFatal(e) => // do nothing + } + } + + /** Turns off the storing of metadata (schema + properties) in the catalog. */ + protected def disableUpdates(f: => Unit): Unit = { + withSQLConf(DeltaSQLConf.DELTA_UPDATE_CATALOG_ENABLED.key -> "false") { + f + } + } + + protected def deltaLog: DeltaLog = DeltaLog.forTable(spark, TableIdentifier(tbl)) + protected def snapshot: Snapshot = deltaLog.unsafeVolatileSnapshot + protected def snapshotAt(v: Long): Snapshot = deltaLog.getSnapshotAt(v) + + protected def getBaseProperties(snapshot: Snapshot): Map[String, String] = { + Map( + DeltaConfigs.METASTORE_LAST_UPDATE_VERSION -> snapshot.version.toString, + DeltaConfigs.METASTORE_LAST_COMMIT_TIMESTAMP -> snapshot.timestamp.toString, + DeltaConfigs.MIN_READER_VERSION.key -> snapshot.protocol.minReaderVersion.toString, + DeltaConfigs.MIN_WRITER_VERSION.key -> snapshot.protocol.minWriterVersion.toString) ++ + snapshot.protocol.readerAndWriterFeatureNames.map { name => + s"${TableFeatureProtocolUtils.FEATURE_PROP_PREFIX}$name" -> + TableFeatureProtocolUtils.FEATURE_PROP_SUPPORTED + } ++ snapshot.metadata.configuration.get("delta.enableDeletionVectors") + .map("delta.enableDeletionVectors" -> _).toMap + } + + /** + * Verifies that the table metadata in the catalog are eventually up-to-date. Updates to the + * catalog are generally asynchronous, except explicit DDL operations, e.g. CREATE/REPLACE. + */ + protected def verifyTableMetadataAsync( + expectedSchema: StructType, + expectedProperties: Map[String, String] = getBaseProperties(snapshot), + table: String = tbl, + partitioningCols: Seq[String] = Nil): Unit = { + // We unfortunately need an eventually, because the updates can be async + eventually(timeout(10.seconds)) { + verifyTableMetadata(expectedSchema, expectedProperties, table, partitioningCols) + } + // Ensure that no other threads will later revert us back to the state we just checked + if (!UpdateCatalog.awaitCompletion(10000)) { + logWarning(s"There are active catalog udpate requests after 10 seconds") + } + } + + protected def filterProperties(properties: Map[String, String]): Map[String, String] + + /** Verifies that the table metadata in the catalog are up-to-date. */ + protected def verifyTableMetadata( + expectedSchema: StructType, + expectedProperties: Map[String, String] = getBaseProperties(snapshot), + table: String = tbl, + partitioningCols: Seq[String] = Nil): Unit = { + DeltaLog.clearCache() + val cat = spark.sessionState.catalog.externalCatalog.getTable("default", table) + assert(cat.schema === expectedSchema, s"Schema didn't match for table: $table") + assert(cat.partitionColumnNames === partitioningCols) + assert(filterProperties(cat.properties) === expectedProperties, + s"Properties didn't match for table: $table") + + val tables = spark.sessionState.catalog.getTablesByName(Seq(TableIdentifier(table))) + + assert(tables.head.schema === expectedSchema) + assert(tables.head.partitionColumnNames === partitioningCols) + assert(filterProperties(tables.head.properties) === expectedProperties) + } + + + test("mergeSchema") { + withTable(tbl) { + val df = spark.range(10).withColumn("part", 'id / 2) + df.writeTo(tbl).using("delta").create() + + verifyTableMetadata(expectedSchema = df.schema.asNullable) + + val df2 = spark.range(10).withColumn("part", 'id / 2).withColumn("id2", 'id) + df2.writeTo(tbl) + .option("mergeSchema", "true") + .append() + + verifyTableMetadataAsync(expectedSchema = df2.schema.asNullable) + } + } + + test("mergeSchema - nested data types") { + withTable(tbl) { + val df = spark.range(10).withColumn("part", 'id / 2) + .withColumn("str", struct('id.cast("int") as "int")) + df.writeTo(tbl).using("delta").create() + + verifyTableMetadata(expectedSchema = df.schema.asNullable) + + val df2 = spark.range(10).withColumn("part", 'id / 2) + .withColumn("str", struct('id as "id2", 'id.cast("int") as "int")) + df2.writeTo(tbl) + .option("mergeSchema", "true") + .append() + + val schema = new StructType() + .add("id", LongType) + .add("part", DoubleType) + .add("str", new StructType() + .add("int", IntegerType) + .add("id2", LongType)) // New columns go to the end + verifyTableMetadataAsync(expectedSchema = schema) + } + } + + + test("merge") { + val tmp = "tmpView" + withDeltaTable { df => + withTempView(tmp) { + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "true") { + df.withColumn("id2", 'id).createOrReplaceTempView(tmp) + sql( + s"""MERGE INTO $tbl t + |USING $tmp s + |ON t.id = s.id + |WHEN NOT MATCHED THEN INSERT * + """.stripMargin) + + verifyTableMetadataAsync(df.withColumn("id2", 'id).schema.asNullable) + } + } + } + } + + test("creating and replacing a table puts the schema and table properties in the metastore") { + withTable(tbl) { + val df = spark.range(10).withColumn("part", 'id / 2).withColumn("id2", 'id) + df.writeTo(tbl) + .tableProperty("delta.checkpointInterval", "5") + .tableProperty("some", "thing") + .partitionedBy('part) + .using("delta") + .create() + + verifyTableMetadata( + expectedSchema = df.schema.asNullable, + expectedProperties = getBaseProperties(snapshot) ++ Map( + "delta.checkpointInterval" -> "5", + "some" -> "thing") + ) + + val df2 = spark.range(10).withColumn("part", 'id / 2) + df2.writeTo(tbl) + .tableProperty("other", "thing") + .using("delta") + .replace() + + verifyTableMetadata( + expectedSchema = df2.schema.asNullable, + expectedProperties = getBaseProperties(snapshot) ++ Map("other" -> "thing") + ) + } + } + + test("creating table in metastore over existing path") { + withTempDir { dir => + withTable(tbl) { + val df = spark.range(10).withColumn("part", 'id % 2).withColumn("id2", 'id) + df.write.format("delta").partitionBy("part").save(dir.getCanonicalPath) + + sql(s"CREATE TABLE $tbl USING delta LOCATION '${dir.getCanonicalPath}'") + verifyTableMetadata(df.schema.asNullable) + } + } + } + + test("replacing non-Delta table") { + withTable(tbl) { + val df = spark.range(10).withColumn("part", 'id / 2).withColumn("id2", 'id) + df.writeTo(tbl) + .tableProperty("delta.checkpointInterval", "5") + .tableProperty("some", "thing") + .partitionedBy('part) + .using("parquet") + .create() + + val e = intercept[AnalysisException] { + df.writeTo(tbl).using("delta").replace() + } + + assert(e.getMessage.contains("not a Delta table")) + } + } + + test("alter table add columns") { + withDeltaTable { df => + sql(s"ALTER TABLE $tbl ADD COLUMNS (id2 bigint)") + verifyTableMetadataAsync(df.withColumn("id2", 'id).schema.asNullable) + } + } + + protected def runAlterTableTests(f: (String, StructType) => Unit): Unit = { + // We set the default minWriterVersion to the version required to ADD/DROP CHECK constraints + // to prevent an automatic protocol upgrade (i.e. an implicit property change) when adding + // the CHECK constraint below. + withSQLConf( + "spark.databricks.delta.properties.defaults.minReaderVersion" -> "1", + "spark.databricks.delta.properties.defaults.minWriterVersion" -> "3") { + withDeltaTable { _ => + sql(s"ALTER TABLE $tbl SET TBLPROPERTIES ('some' = 'thing', 'other' = 'thing')") + sql(s"ALTER TABLE $tbl UNSET TBLPROPERTIES ('other')") + sql(s"ALTER TABLE $tbl ADD COLUMNS (id2 bigint, id3 bigint)") + sql(s"ALTER TABLE $tbl CHANGE COLUMN id2 id2 bigint FIRST") + sql(s"ALTER TABLE $tbl REPLACE COLUMNS (id3 bigint, id2 bigint, id bigint)") + sql(s"ALTER TABLE $tbl ADD CONSTRAINT id_3 CHECK (id3 > 10)") + sql(s"ALTER TABLE $tbl DROP CONSTRAINT id_3") + + val expectedSchema = StructType(Seq( + StructField("id3", LongType, true), + StructField("id2", LongType, true), + StructField("id", LongType, true)) + ) + + f(tbl, expectedSchema) + } + } + } + + /** + * Creates a table with the name `tbl` and executes a function that takes a representative + * DataFrame with the schema of the table. Performs cleanup of the table afterwards. + */ + protected def withDeltaTable(f: DataFrame => Unit): Unit = { + // Turn off async updates so that we don't update the catalog during table cleanup + disableUpdates { + withTable(tbl) { + withSQLConf(DeltaSQLConf.DELTA_UPDATE_CATALOG_ENABLED.key -> "true") { + sql(s"CREATE TABLE $tbl (id bigint) USING delta") + val df = spark.range(10) + verifyTableMetadata(df.schema.asNullable) + + f(df.toDF()) + } + } + } + } + + test("skip update when flag is not set") { + withDeltaTable(df => { + withSQLConf(DeltaSQLConf.DELTA_UPDATE_CATALOG_ENABLED.key -> "false") { + val propertiesAtV1 = getBaseProperties(snapshot) + sql(s"ALTER TABLE $tbl SET TBLPROPERTIES(some.key = 1)") + verifyTableMetadataAsync( + expectedSchema = df.schema.asNullable, + expectedProperties = propertiesAtV1) + } + }) + } + + + test(s"REORG TABLE does not perform catalog update") { + val tableName = "myTargetTable" + withDeltaTable { df => + sql(s"REORG TABLE $tbl APPLY (PURGE)") + verifyTableMetadataAsync(df.schema.asNullable) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala new file mode 100644 index 00000000000..6a3d21c6e37 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala @@ -0,0 +1,1130 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.util.Locale +import java.util.concurrent.TimeUnit + +import scala.collection.mutable.ArrayBuffer +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.DeltaOperations.{Delete, Write} +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, Metadata, RemoveFile} +import org.apache.spark.sql.delta.commands.VacuumCommand +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.DeltaFileOperations +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import org.scalatest.GivenWhenThen + +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.util.IntervalUtils +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.execution.metric.SQLMetrics.createMetric +import org.apache.spark.sql.functions.{col, expr, lit} +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.ManualClock + +trait DeltaVacuumSuiteBase extends QueryTest + with SharedSparkSession + with GivenWhenThen + with SQLTestUtils + with DeletionVectorsTestUtils + with DeltaTestUtilsForTempViews { + + protected def withEnvironment(f: (File, ManualClock) => Unit): Unit = { + withTempDir { file => + val clock = new ManualClock() + withSQLConf("spark.databricks.delta.retentionDurationCheck.enabled" -> "false") { + f(file, clock) + } + } + } + + protected def defaultTombstoneInterval: Long = { + DeltaConfigs.getMilliSeconds( + IntervalUtils.safeStringToInterval( + UTF8String.fromString(DeltaConfigs.TOMBSTONE_RETENTION.defaultValue))) + } + + /** Lists the data files in a given dir recursively. */ + protected def listDataFiles(spark: SparkSession, tableDir: String): Seq[String] = { + val result = ArrayBuffer.empty[String] + // scalastyle:off deltahadoopconfiguration + val fs = FileSystem.get(spark.sessionState.newHadoopConf()) + // scalastyle:on deltahadoopconfiguration + val iterator = fs.listFiles(fs.makeQualified(new Path(tableDir)), true) + while (iterator.hasNext) { + val path = iterator.next().getPath.toUri.toString + if (path.endsWith(".parquet") && !path.contains(".checkpoint")) { + result += path + } + } + result.toSeq + } + + protected def assertNumFiles( + deltaLog: DeltaLog, + addFiles: Int, + addFilesWithDVs: Int, + dvFiles: Int, + dataFiles: Int): Unit = { + assert(deltaLog.update().allFiles.count() === addFiles) + assert(getFilesWithDeletionVectors(deltaLog).size === addFilesWithDVs) + assert(listDeletionVectors(deltaLog).size === dvFiles) + assert(listDataFiles(spark, deltaLog.dataPath.toString).size === dataFiles) + } + + implicit def fileToPathString(f: File): String = new Path(f.getAbsolutePath).toString + + trait Operation + /** + * Write a file to the given absolute or relative path. Could be inside or outside the Reservoir + * base path. The file can be committed to the action log to be tracked, or left out for deletion. + */ + case class CreateFile( + path: String, + commitToActionLog: Boolean, + partitionValues: Map[String, String] = Map.empty) extends Operation + /** Create a directory at the given path. */ + case class CreateDirectory(path: String) extends Operation + /** + * Logically deletes a file in the action log. Paths can be absolute or relative paths, and can + * point to files inside and outside a reservoir. + */ + case class LogicallyDeleteFile(path: String) extends Operation + /** Check that the given paths exist. */ + case class CheckFiles(paths: Seq[String], exist: Boolean = true) extends Operation + /** Garbage collect the reservoir. */ + case class GC( + dryRun: Boolean, + expectedDf: Seq[String], + retentionHours: Option[Double] = None) extends Operation + /** Garbage collect the reservoir. */ + case class ExecuteVacuumInScala( + deltaTable: io.delta.tables.DeltaTable, + expectedDf: Seq[String], + retentionHours: Option[Double] = None) extends Operation + /** Advance the time. */ + case class AdvanceClock(timeToAdd: Long) extends Operation + /** Execute SQL command */ + case class ExecuteVacuumInSQL( + identifier: String, + expectedDf: Seq[String], + retentionHours: Option[Long] = None, + dryRun: Boolean = false) extends Operation { + def sql: String = { + val retainStr = retentionHours.map { h => s"RETAIN $h HOURS"}.getOrElse("") + val dryRunStr = if (dryRun) "DRY RUN" else "" + s"VACUUM $identifier $retainStr $dryRunStr" + } + } + /** + * Expect a failure with the given exception type. Expect the given `msg` fragments as the error + * message. + */ + case class ExpectFailure[T <: Throwable]( + action: Operation, + expectedError: Class[T], + msg: Seq[String]) extends Operation + + protected def createFile( + reservoirBase: String, + filePath: String, + file: File, + clock: ManualClock, + partitionValues: Map[String, String] = Map.empty): AddFile = { + FileUtils.write(file, "gibberish") + file.setLastModified(clock.getTimeMillis()) + createTestAddFile( + path = filePath, + partitionValues = partitionValues, + modificationTime = clock.getTimeMillis()) + } + + protected def gcTest(deltaLog: DeltaLog, clock: ManualClock)(actions: Operation*): Unit = { + import testImplicits._ + val basePath = deltaLog.dataPath.toString + val fs = new Path(basePath).getFileSystem(deltaLog.newDeltaHadoopConf()) + actions.foreach { + case CreateFile(path, commit, partitionValues) => + Given(s"*** Writing file to $path. Commit to log: $commit") + val sanitizedPath = new Path(path).toUri.toString + val file = new File( + fs.makeQualified(DeltaFileOperations.absolutePath(basePath, sanitizedPath)).toUri) + if (commit) { + if (!DeltaTableUtils.isDeltaTable(spark, new Path(basePath))) { + // initialize the table + deltaLog.startTransaction().commitManually() + } + val txn = deltaLog.startTransaction() + val action = createFile(basePath, sanitizedPath, file, clock, partitionValues) + txn.commit(Seq(action), Write(SaveMode.Append)) + } else { + createFile(basePath, path, file, clock) + } + case CreateDirectory(path) => + Given(s"*** Creating directory at $path") + val dir = new File(DeltaFileOperations.absolutePath(basePath, path).toUri) + assert(dir.mkdir(), s"Couldn't create directory at $path") + assert(dir.setLastModified(clock.getTimeMillis())) + case LogicallyDeleteFile(path) => + Given(s"*** Removing files") + val txn = deltaLog.startTransaction() + // scalastyle:off + val metrics = Map[String, SQLMetric]( + "numRemovedFiles" -> createMetric(sparkContext, "number of files removed."), + "numAddedFiles" -> createMetric(sparkContext, "number of files added."), + "numDeletedRows" -> createMetric(sparkContext, "number of rows deleted."), + "numCopiedRows" -> createMetric(sparkContext, "total number of rows.") + ) + txn.registerSQLMetrics(spark, metrics) + txn.commit(Seq(RemoveFile(path, Option(clock.getTimeMillis()))), + Delete(Seq(Literal.TrueLiteral))) + // scalastyle:on + case e: ExecuteVacuumInSQL => + Given(s"*** Executing SQL: ${e.sql}") + val qualified = e.expectedDf.map(p => fs.makeQualified(new Path(p)).toString) + val df = spark.sql(e.sql).as[String] + checkDatasetUnorderly(df, qualified: _*) + case CheckFiles(paths, exist) => + Given(s"*** Checking files exist=$exist") + paths.foreach { p => + val sp = new Path(p).toUri.toString + val f = new File(fs.makeQualified(DeltaFileOperations.absolutePath(basePath, sp)).toUri) + val res = if (exist) f.exists() else !f.exists() + assert(res, s"Expectation: exist=$exist, paths: $p") + } + case GC(dryRun, expectedDf, retention) => + Given("*** Garbage collecting Reservoir") + val result = VacuumCommand.gc(spark, deltaLog, dryRun, retention, clock = clock) + val qualified = expectedDf.map(p => fs.makeQualified(new Path(p)).toString) + checkDatasetUnorderly(result.as[String], qualified: _*) + case ExecuteVacuumInScala(deltaTable, expectedDf, retention) => + Given("*** Garbage collecting Reservoir using Scala") + val result = if (retention.isDefined) { + deltaTable.vacuum(retention.get) + } else { + deltaTable.vacuum() + } + if(expectedDf == Seq()) { + assert(result === spark.emptyDataFrame) + } else { + val qualified = expectedDf.map(p => fs.makeQualified(new Path(p)).toString) + checkDatasetUnorderly(result.as[String], qualified: _*) + } + case AdvanceClock(timeToAdd: Long) => + Given(s"*** Advancing clock by $timeToAdd millis") + clock.advance(timeToAdd) + case ExpectFailure(action, failure, msg) => + Given(s"*** Expecting failure of ${failure.getName} for action: $action") + val e = intercept[Exception](gcTest(deltaLog, clock)(action)) + assert(e.getClass === failure) + assert( + msg.forall(m => + e.getMessage.toLowerCase(Locale.ROOT).contains(m.toLowerCase(Locale.ROOT))), + e.getMessage + "didn't contain: " + msg.mkString("[", ", ", "]")) + } + } + + protected def vacuumSQLTest(tablePath: String, identifier: String) { + val deltaLog = DeltaLog.forTable(spark, tablePath) + val committedFile = "committedFile.txt" + val notCommittedFile = "notCommittedFile.txt" + + gcTest(deltaLog, new ManualClock())( + // Prepare the table with files with timestamp of epoch-time 0 (i.e. 01-01-1970 00:00) + CreateFile(committedFile, commitToActionLog = true), + CreateFile(notCommittedFile, commitToActionLog = false), + CheckFiles(Seq(committedFile, notCommittedFile)), + + // Dry run should return the not committed file and but not delete files + ExecuteVacuumInSQL( + identifier, + expectedDf = Seq(new File(tablePath, notCommittedFile).toString), + dryRun = true), + CheckFiles(Seq(committedFile, notCommittedFile)), + + // Actual run should delete the not committed file but delete the not-committed file + ExecuteVacuumInSQL(identifier, Seq(tablePath)), + CheckFiles(Seq(committedFile)), + CheckFiles(Seq(notCommittedFile), exist = false), // file ts older than default retention + + // Logically delete the file. + LogicallyDeleteFile(committedFile), + CheckFiles(Seq(committedFile)), + + // Vacuum with 0 retention should actually delete the file. + ExecuteVacuumInSQL(identifier, Seq(tablePath), Some(0)), + CheckFiles(Seq(committedFile), exist = false)) + } + + protected def vacuumScalaTest(deltaTable: io.delta.tables.DeltaTable, tablePath: String) { + val deltaLog = DeltaLog.forTable(spark, tablePath) + val committedFile = "committedFile.txt" + val notCommittedFile = "notCommittedFile.txt" + + gcTest(deltaLog, new ManualClock())( + // Prepare the table with files with timestamp of epoch-time 0 (i.e. 01-01-1970 00:00) + CreateFile(committedFile, commitToActionLog = true), + CreateFile(notCommittedFile, commitToActionLog = false), + CheckFiles(Seq(committedFile, notCommittedFile)), + + // Actual run should delete the not committed file and but not delete files + ExecuteVacuumInScala(deltaTable, Seq()), + CheckFiles(Seq(committedFile)), + CheckFiles(Seq(notCommittedFile), exist = false), // file ts older than default retention + + // Logically delete the file. + LogicallyDeleteFile(committedFile), + CheckFiles(Seq(committedFile)), + + // Vacuum with 0 retention should actually delete the file. + ExecuteVacuumInScala(deltaTable, Seq(), Some(0)), + CheckFiles(Seq(committedFile), exist = false)) + } + + /** + * Helper method to tell us if the given filePath exists. Thus, it can be used to detect if a + * file has been deleted. + */ + protected def pathExists(deltaLog: DeltaLog, filePath: String): Boolean = { + val fs = deltaLog.logPath.getFileSystem(deltaLog.newDeltaHadoopConf()) + fs.exists(DeltaFileOperations.absolutePath(deltaLog.dataPath.toString, filePath)) + } + + /** + * Helper method to get all of the [[AddCDCFile]]s that exist in the delta table + */ + protected def getCDCFiles(deltaLog: DeltaLog): Seq[AddCDCFile] = { + val changes = deltaLog.getChanges(startVersion = 0, failOnDataLoss = true) + changes.flatMap(_._2).collect { case a: AddCDCFile => a }.toList + } + + protected def testCDCVacuumForUpdateMerge(): Unit = { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true", + DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false" + ) { + withTempDir { dir => + // create table - version 0 + spark.range(10) + .repartition(1) + .write + .format("delta") + .save(dir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(dir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, dir.getAbsolutePath) + + // update table - version 1 + deltaTable.update(expr("id == 0"), Map("id" -> lit("11"))) + + // merge table - version 2 + deltaTable.as("target") + .merge( + spark.range(0, 12).toDF().as("src"), + "src.id = target.id") + .whenMatched() + .updateAll() + .whenNotMatched() + .insertAll() + .execute() + + val df1 = sql(s"SELECT * FROM delta.`${dir.getAbsolutePath}`").collect() + + val changes = getCDCFiles(deltaLog) + var numExpectedChangeFiles = 2 + + assert(changes.size === numExpectedChangeFiles) + + // vacuum will not delete the cdc files if they are within retention + sql(s"VACUUM '${dir.getAbsolutePath}' RETAIN 100 HOURS") + changes.foreach { change => + assert(pathExists(deltaLog, change.path)) // cdc file exists + } + + // vacuum will delete the cdc files if they are outside retention + sql(s"VACUUM '${dir.getAbsolutePath}' RETAIN 0 HOURS") + changes.foreach { change => + assert(!pathExists(deltaLog, change.path)) // cdc file has been removed + } + + // try reading the table + checkAnswer(sql(s"SELECT * FROM delta.`${dir.getAbsolutePath}`"), df1) + + // try reading cdc data + val e = intercept[SparkException] { + spark.read + .format("delta") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 1) + .option("endingVersion", 2) + .load(dir.getAbsolutePath) + .count() + } + // QueryExecutionErrors.readCurrentFileNotFoundError + var expectedErrorMessage = "It is possible the underlying files have been updated." + assert(e.getMessage.contains(expectedErrorMessage)) + } + } + } + + protected def testCDCVacuumForTombstones(): Unit = { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true", + DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false" + ) { + withTempDir { dir => + // create table - version 0 + spark.range(0, 10, 1, 1) + .withColumn("part", col("id") % 2) + .write + .format("delta") + .partitionBy("part") + .save(dir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(dir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, dir.getAbsolutePath) + + // create version 1 - delete single row should generate one cdc file + deltaTable.delete(col("id") === lit(9)) + val changes = getCDCFiles(deltaLog) + assert(changes.size === 1) + val cdcPath = changes.head.path + assert(pathExists(deltaLog, cdcPath)) + val df1 = sql(s"SELECT * FROM delta.`${dir.getAbsolutePath}`").collect() + + // vacuum will not delete the cdc files if they are within retention + sql(s"VACUUM '${dir.getAbsolutePath}' RETAIN 100 HOURS") + assert(pathExists(deltaLog, cdcPath)) // cdc path exists + + // vacuum will delete the cdc files when they are outside retention + // one cdc file and one RemoveFile should be deleted by vacuum + sql(s"VACUUM '${dir.getAbsolutePath}' RETAIN 0 HOURS") + assert(!pathExists(deltaLog, cdcPath)) // cdc file is removed + + // try reading the table + checkAnswer(sql(s"SELECT * FROM delta.`${dir.getAbsolutePath}`"), df1) + + // create version 2 - partition delete - does not create new cdc files + deltaTable.delete(col("part") === lit(0)) + + assert(getCDCFiles(deltaLog).size === 1) // still just the one cdc file from before. + + // try reading cdc data + val e = intercept[SparkException] { + spark.read + .format("delta") + .option(DeltaOptions.CDC_READ_OPTION, "true") + .option("startingVersion", 1) + .option("endingVersion", 2) + .load(dir.getAbsolutePath) + .count() + } + // QueryExecutionErrors.readCurrentFileNotFoundError + var expectedErrorMessage = "It is possible the underlying files have been updated." + assert(e.getMessage.contains(expectedErrorMessage)) + } + } + } +} + +class DeltaVacuumSuite + extends DeltaVacuumSuiteBase with DeltaSQLCommandTest { + import testImplicits._ + + override def sparkConf: SparkConf = { + super.sparkConf.set("spark.sql.sources.parallelPartitionDiscovery.parallelism", "2") + } + + testQuietly("basic case - SQL command on path-based tables with direct 'path'") { + withEnvironment { (tempDir, _) => + vacuumSQLTest(tablePath = tempDir.getAbsolutePath, identifier = s"'$tempDir'") + } + } + + testQuietly("basic case - SQL command on path-based table with delta.`path`") { + withEnvironment { (tempDir, _) => + vacuumSQLTest(tablePath = tempDir.getAbsolutePath, identifier = s"delta.`$tempDir`") + } + } + + testQuietly("basic case - SQL command on name-based table") { + val tableName = "deltaTable" + withEnvironment { (_, _) => + withTable(tableName) { + import testImplicits._ + spark.emptyDataset[Int].write.format("delta").saveAsTable(tableName) + val tablePath = + new File(spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).location) + vacuumSQLTest(tablePath, tableName) + } + } + } + + testQuietlyWithTempView("basic case - SQL command on temp view not supported") { isSQLTempView => + val tableName = "deltaTable" + val viewName = "v" + withEnvironment { (_, _) => + withTable(tableName) { + import testImplicits._ + spark.emptyDataset[Int].write.format("delta").saveAsTable(tableName) + createTempViewFromTable(tableName, isSQLTempView) + val tablePath = new File( + spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).location) + val e = intercept[AnalysisException] { + vacuumSQLTest(tablePath, viewName) + } + assert( + e.getMessage.contains("v is a temp view. 'VACUUM' expects a table.")) + } + } + } + + test("basic case - Scala on path-based table") { + withEnvironment { (tempDir, _) => + import testImplicits._ + spark.emptyDataset[Int].write.format("delta").save(tempDir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.getAbsolutePath) + vacuumScalaTest(deltaTable, tempDir.getAbsolutePath) + } + } + + test("basic case - Scala on name-based table") { + val tableName = "deltaTable" + withEnvironment { (tempDir, _) => + withTable(tableName) { + // Initialize the table so that we can create the DeltaTable object + import testImplicits._ + spark.emptyDataset[Int].write.format("delta").saveAsTable(tableName) + val deltaTable = io.delta.tables.DeltaTable.forName(tableName) + val tablePath = + new File(spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).location) + vacuumScalaTest(deltaTable, tablePath) + } + } + } + + test("don't delete data in a non-reservoir") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = false), + CreateDirectory("abc"), + ExpectFailure( + GC(dryRun = false, Nil), classOf[IllegalArgumentException], Seq("no state defined")) + ) + } + } + + test("invisible files and dirs") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = true), + CreateFile("_hidden_dir/000001.text", commitToActionLog = false), + CreateFile(".hidden.txt", commitToActionLog = false), + AdvanceClock(defaultTombstoneInterval + 1000), + GC(dryRun = false, Seq(tempDir)), + CheckFiles(Seq( + "file1.txt", "_delta_log", "_hidden_dir", "_hidden_dir/000001.text", ".hidden.txt")) + ) + } + } + + test("partition column name starting with underscore") { + // We should be able to see inside partition directories to GC them, even if they'd normally + // be considered invisible because of their name. + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + val txn = deltaLog.startTransaction() + val schema = new StructType().add("_underscore_col_", IntegerType).add("n", IntegerType) + val metadata = + Metadata(schemaString = schema.json, partitionColumns = Seq("_underscore_col_")) + txn.commit(metadata :: Nil, DeltaOperations.CreateTable(metadata, isManaged = true)) + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = true, Map("_underscore_col_" -> "10")), + CreateFile("_underscore_col_=10/test.txt", true, Map("_underscore_col_" -> "10")), + CheckFiles(Seq("file1.txt", "_underscore_col_=10")), + LogicallyDeleteFile("_underscore_col_=10/test.txt"), + AdvanceClock(defaultTombstoneInterval + 1000), + GC(dryRun = false, Seq(tempDir)), + CheckFiles(Seq("file1.txt")), + CheckFiles(Seq("_underscore_col_=10/test.txt"), exist = false) + ) + } + } + + test("multiple levels of empty directory deletion") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = true), + CreateFile("abc/def/file2.txt", commitToActionLog = false), + AdvanceClock(defaultTombstoneInterval + 1000), + GC(dryRun = false, Seq(tempDir)), + CheckFiles(Seq("file1.txt", "abc", "abc/def")), + CheckFiles(Seq("abc/def/file2.txt"), exist = false), + GC(dryRun = false, Seq(tempDir)), + // we need two GCs to guarantee the deletion of the directories + GC(dryRun = false, Seq(tempDir)), + CheckFiles(Seq("file1.txt")), + CheckFiles(Seq("abc", "abc/def"), exist = false) + ) + } + } + + test("gc doesn't delete base path") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = true), + AdvanceClock(100), + LogicallyDeleteFile("file1.txt"), + AdvanceClock(defaultTombstoneInterval + 1000), + GC(dryRun = false, Seq(tempDir.toString)), + CheckFiles(Seq("file1.txt"), exist = false), + GC(dryRun = false, Seq(tempDir.toString)) // shouldn't throw an error + ) + } + } + + testQuietly("correctness test") { + withEnvironment { (tempDir, clock) => + + val reservoirDir = new File(tempDir.getAbsolutePath, "reservoir") + assert(reservoirDir.mkdirs()) + val externalDir = new File(tempDir.getAbsolutePath, "external") + assert(externalDir.mkdirs()) + val deltaLog = DeltaLog.forTable(spark, reservoirDir, clock) + + val externalFile = new File(externalDir, "file4.txt").getAbsolutePath + + gcTest(deltaLog, clock)( + // Create initial state + CreateFile("file1.txt", commitToActionLog = true), + CreateDirectory("abc"), + CreateFile("abc/file2.txt", commitToActionLog = true), + CheckFiles(Seq("file1.txt", "abc", "abc/file2.txt")), + + // Nothing should be deleted here, since we didn't logically delete any file + AdvanceClock(defaultTombstoneInterval + 1000), + GC(dryRun = false, Seq(reservoirDir.toString)), + CheckFiles(Seq("file1.txt", "abc", "abc/file2.txt")), + + // Create an untracked file + CreateFile("file3.txt", commitToActionLog = false), + CheckFiles(Seq("file3.txt")), + GC(dryRun = false, Seq(reservoirDir.toString)), + CheckFiles(Seq("file3.txt")), + AdvanceClock(defaultTombstoneInterval - 1000), // file is still new + GC(dryRun = false, Seq(reservoirDir.toString)), + CheckFiles(Seq("file3.txt")), + AdvanceClock(2000), + GC(dryRun = true, Seq(new File(reservoirDir, "file3.txt").toString)), + // nothing should be deleted + CheckFiles(Seq("file1.txt", "abc", "abc/file2.txt", "file3.txt")), + GC(dryRun = false, Seq(reservoirDir.toString)), // file3.txt should be deleted + CheckFiles(Seq("file1.txt", "abc", "abc/file2.txt")), + CheckFiles(Seq("file3.txt"), exist = false), + + // Verify tombstones + LogicallyDeleteFile("abc/file2.txt"), + GC(dryRun = false, Seq(reservoirDir.toString)), + CheckFiles(Seq("file1.txt", "abc", "abc/file2.txt")), + AdvanceClock(defaultTombstoneInterval - 1000), + GC(dryRun = false, Seq(reservoirDir.toString)), + CheckFiles(Seq("file1.txt", "abc", "abc/file2.txt")), + AdvanceClock(2000), // tombstone should expire + GC(dryRun = false, Seq(reservoirDir.toString)), + CheckFiles(Seq("file1.txt", "abc")), + CheckFiles(Seq("abc/file2.txt"), exist = false), + GC(dryRun = false, Seq(reservoirDir.toString)), // Second gc should clear empty directory + CheckFiles(Seq("file1.txt")), + CheckFiles(Seq("abc"), exist = false), + + // Make sure that files outside the reservoir are not affected + CreateFile(externalFile, commitToActionLog = true), + AdvanceClock(100), + CheckFiles(Seq("file1.txt", externalFile)), + LogicallyDeleteFile(externalFile), + AdvanceClock(defaultTombstoneInterval * 2), + CheckFiles(Seq("file1.txt", externalFile)) + ) + } + } + + test("parallel file delete") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + withSQLConf("spark.databricks.delta.vacuum.parallelDelete.enabled" -> "true") { + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = true), + CreateFile("file2.txt", commitToActionLog = true), + LogicallyDeleteFile("file1.txt"), + CheckFiles(Seq("file1.txt", "file2.txt")), + AdvanceClock(defaultTombstoneInterval + 1000), + GC(dryRun = false, Seq(tempDir)), + CheckFiles(Seq("file1.txt"), exist = false), + CheckFiles(Seq("file2.txt")), + GC(dryRun = false, Seq(tempDir)), // shouldn't throw an error with no files to delete + CheckFiles(Seq("file2.txt")) + ) + } + } + } + + test("retention duration must be greater than 0") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = true), + CheckFiles(Seq("file1.txt")), + ExpectFailure( + GC(false, Seq(tempDir), Some(-2)), + classOf[IllegalArgumentException], + Seq("Retention", "less than", "0")) + ) + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tempDir.getAbsolutePath) + gcTest(deltaLog, clock)( + CreateFile("file2.txt", commitToActionLog = true), + CheckFiles(Seq("file2.txt")), + ExpectFailure( + ExecuteVacuumInScala(deltaTable, Seq(), Some(-2)), + classOf[IllegalArgumentException], + Seq("Retention", "less than", "0")) + ) + } + } + + test("deleting directories") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + gcTest(deltaLog, clock)( + CreateFile("abc/def/file1.txt", commitToActionLog = true), + CreateFile("abc/def/file2.txt", commitToActionLog = true), + CreateDirectory("ghi"), + CheckFiles(Seq("abc", "abc/def", "ghi")), + GC(dryRun = true, Seq(new File(tempDir, "ghi"))), + GC(dryRun = false, Seq(tempDir)), + CheckFiles(Seq("abc", "abc/def")), + CheckFiles(Seq("ghi"), exist = false) + ) + } + } + + test("deleting files with special characters in path") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + gcTest(deltaLog, clock)( + CreateFile("abc def/#1/file1.txt", commitToActionLog = true), + CreateFile("abc def/#1/file2.txt", commitToActionLog = false), + CheckFiles(Seq("abc def", "abc def/#1")), + AdvanceClock(defaultTombstoneInterval + 1000), + GC(dryRun = true, Seq(new File(tempDir, "abc def/#1/file2.txt"))), + GC(dryRun = false, Seq(tempDir)), + CheckFiles(Seq("abc def/#1", "abc def/#1/file1.txt")), + CheckFiles(Seq("abc def/#1/file2.txt"), exist = false) + ) + } + } + + testQuietly("additional retention duration check with vacuum command") { + withEnvironment { (tempDir, clock) => + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + withSQLConf("spark.databricks.delta.retentionDurationCheck.enabled" -> "true") { + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = true), + CheckFiles(Seq("file1.txt")), + ExpectFailure( + GC(false, Nil, Some(0)), + classOf[IllegalArgumentException], + Seq("spark.databricks.delta.retentionDurationCheck.enabled = false", "168 hours")) + ) + } + + gcTest(deltaLog, clock)( + CreateFile("file2.txt", commitToActionLog = true), + CheckFiles(Seq("file2.txt")), + GC(false, Seq(tempDir.toString), Some(0)) + ) + } + } + + test("vacuum for a partition path") { + withEnvironment { (tempDir, _) => + import testImplicits._ + val path = tempDir.getCanonicalPath + Seq((1, "a"), (2, "b")).toDF("v1", "v2") + .write + .format("delta") + .partitionBy("v2") + .save(path) + + val ex = intercept[AnalysisException] { + sql(s"vacuum '$path/v2=a' retain 0 hours") + } + assert( + ex.getMessage.contains( + s"`$path/v2=a` is not a Delta table. VACUUM is only supported for Delta tables.")) + } + } + + test(s"vacuum table with DVs and zero retention policy throws exception by default") { + val targetDF = spark.range(0, 100, 1, 2) + .withColumn("value", col("id")) + + withTempDeltaTable(targetDF, enableDVs = true) { (targetTable, targetLog) => + // Add some DVs. + targetTable().delete("id < 10") + val e = intercept[IllegalArgumentException] { + spark.sql(s"VACUUM delta.`${targetLog.dataPath}` RETAIN 0 HOURS") + } + assert(e.getMessage.contains( + "Are you sure you would like to vacuum files with such a low retention period?")) + } + } + + test(s"vacuum after purge with zero retention policy") { + val tableName = "testTable" + withDeletionVectorsEnabled() { + withSQLConf( + DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false") { + withTable(tableName) { + // Create a Delta Table with 5 files of 10 rows, and delete half rows from first 4 files. + spark.range(0, 50, step = 1, numPartitions = 5) + .write.format("delta").saveAsTable(tableName) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + spark.sql(s"DELETE from $tableName WHERE ID % 2 = 0 and ID < 40") + assertNumFiles(deltaLog, addFiles = 5, addFilesWithDVs = 4, dvFiles = 1, dataFiles = 5) + + purgeDVs(tableName) + + assertNumFiles(deltaLog, addFiles = 5, addFilesWithDVs = 0, dvFiles = 1, dataFiles = 9) + spark.sql(s"VACUUM $tableName RETAIN 0 HOURS") + assertNumFiles(deltaLog, addFiles = 5, addFilesWithDVs = 0, dvFiles = 0, dataFiles = 5) + + checkAnswer( + spark.read.table(tableName), + Seq.range(0, 50).filterNot(x => x < 40 && x % 2 == 0).toDF) + } + } + } + } + + test("hidden metadata dir") { + withEnvironment { (tempDir, clock) => + spark.emptyDataset[Int].write.format("delta").save(tempDir) + val deltaLog = DeltaLog.forTable(spark, tempDir, clock) + gcTest(deltaLog, clock)( + CreateDirectory("metadata"), + CreateFile("metadata/file1.json", false), + + AdvanceClock(defaultTombstoneInterval + 1000), + GC(dryRun = false, Seq(tempDir)), + CheckFiles(Seq("metadata", "metadata/file1.json")) + ) + } + } + + // Helper method to remove the DVs in Delta table and rewrite the data files + def purgeDVs(tableName: String): Unit = { + withSQLConf( + // Set the max file size to low so that we always rewrite the single file without DVs + // and not combining with other data files. + DeltaSQLConf.DELTA_OPTIMIZE_MAX_FILE_SIZE.key -> "2") { + spark.sql(s"REORG TABLE $tableName APPLY (PURGE)") + } + } + + test(s"vacuum after purging deletion vectors") { + val tableName = "testTable" + val clock = new ManualClock() + withDeletionVectorsEnabled() { + withSQLConf( + DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false") { + withTable(tableName) { + // Create Delta table with 5 files of 10 rows. + spark.range(0, 50, step = 1, numPartitions = 5) + .write.format("delta").saveAsTable(tableName) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + assertNumFiles(deltaLog, addFiles = 5, addFilesWithDVs = 0, dvFiles = 0, dataFiles = 5) + + // Delete 1 row from each file. DVs will be packed to one DV file. + val deletedRows1 = Seq(0, 10, 20, 30, 40) + val deletedRowsStr1 = deletedRows1.mkString("(", ",", ")") + spark.sql(s"DELETE FROM $tableName WHERE id IN $deletedRowsStr1") + val timestampV1 = deltaLog.update().timestamp + assertNumFiles(deltaLog, addFiles = 5, addFilesWithDVs = 5, dvFiles = 1, dataFiles = 5) + + // Delete all rows from the first file. An ephemeral DV will still be created. + Thread.sleep(1000) // Ensure it's been at least 1000 ms since V1 + spark.sql(s"DELETE FROM $tableName WHERE id < 10") + val timestampV2 = deltaLog.update().timestamp + assertNumFiles(deltaLog, addFiles = 4, addFilesWithDVs = 4, dvFiles = 2, dataFiles = 5) + val expectedAnswerV2 = Seq.range(0, 50).filterNot(deletedRows1.contains).filterNot(_ < 10) + + // Delete 1 more row from each file. + Thread.sleep(1000) // Ensure it's been at least 1000 ms since V2 + val deletedRows2 = Seq(11, 21, 31, 41) + val deletedRowsStr2 = deletedRows2.mkString("(", ",", ")") + spark.sql(s"DELETE FROM $tableName WHERE id IN $deletedRowsStr2") + val timestampV3 = deltaLog.update().timestamp + assertNumFiles(deltaLog, addFiles = 4, addFilesWithDVs = 4, dvFiles = 3, dataFiles = 5) + val expectedAnswerV3 = expectedAnswerV2.filterNot(deletedRows2.contains) + + // Delete DVs by rewriting the data files with DVs. + Thread.sleep(1000) // Ensure it's been at least 1000 ms since V3 + purgeDVs(tableName) + + val numFilesAfterPurge = 4 + val timestampV4 = deltaLog.update().timestamp + assertNumFiles(deltaLog, addFiles = numFilesAfterPurge, addFilesWithDVs = 0, dvFiles = 3, + dataFiles = 9) + + // Run VACUUM with nothing expired. It should not delete anything. + clock.setTime(System.currentTimeMillis()) + VacuumCommand.gc(spark, deltaLog, retentionHours = Some(1), clock = clock, dryRun = false) + assertNumFiles(deltaLog, addFiles = numFilesAfterPurge, addFilesWithDVs = 0, dvFiles = 3, + dataFiles = 9) + + // Run VACUUM @ V1. + // We need to add 1000 ms for local filesystems that only write modificationTimes to the s + clock.setTime(timestampV1 + TimeUnit.HOURS.toMillis(1) + 1000) + VacuumCommand.gc(spark, deltaLog, retentionHours = Some(1), clock = clock, dryRun = false) + assertNumFiles(deltaLog, addFiles = numFilesAfterPurge, addFilesWithDVs = 0, dvFiles = 3, + dataFiles = 9) + + // Run VACUUM @ V2. It should delete the ephemeral DV and the removed Parquet file. + clock.setTime(timestampV2 + TimeUnit.HOURS.toMillis(1) + 1000) + VacuumCommand.gc(spark, deltaLog, retentionHours = Some(1), clock = clock, dryRun = false) + assertNumFiles(deltaLog, addFiles = numFilesAfterPurge, addFilesWithDVs = 0, dvFiles = 2, + dataFiles = 8) + checkAnswer( + spark.sql(s"SELECT * FROM $tableName VERSION AS OF 2"), expectedAnswerV2.toDF) + + // Run VACUUM @ V3. It should delete the persistent DVs from V1. + clock.setTime(timestampV3 + TimeUnit.HOURS.toMillis(1) + 1000) + VacuumCommand.gc(spark, deltaLog, retentionHours = Some(1), clock = clock, dryRun = false) + assertNumFiles(deltaLog, addFiles = numFilesAfterPurge, addFilesWithDVs = 0, dvFiles = 1, + dataFiles = 8) + checkAnswer( + spark.sql(s"SELECT * FROM $tableName VERSION AS OF 3"), expectedAnswerV3.toDF) + + // Run VACUUM @ V4. It should delete the Parquet files and DVs of V3. + clock.setTime(timestampV4 + TimeUnit.HOURS.toMillis(1) + 1000) + VacuumCommand.gc(spark, deltaLog, retentionHours = Some(1), clock = clock, dryRun = false) + assertNumFiles(deltaLog, addFiles = numFilesAfterPurge, addFilesWithDVs = 0, dvFiles = 0, + dataFiles = 4) + checkAnswer( + spark.sql(s"SELECT * FROM $tableName VERSION AS OF 4"), expectedAnswerV3.toDF) + + // Run VACUUM with zero retention period. It should not delete anything. + clock.setTime(timestampV4 + TimeUnit.HOURS.toMillis(1) + 1000) + VacuumCommand.gc(spark, deltaLog, retentionHours = Some(0), clock = clock, dryRun = false) + assertNumFiles(deltaLog, addFiles = numFilesAfterPurge, addFilesWithDVs = 0, dvFiles = 0, + dataFiles = 4) + + // Last version should still be readable. + checkAnswer(spark.sql(s"SELECT * FROM $tableName"), expectedAnswerV3.toDF) + } + } + } + } + + for (partitioned <- DeltaTestUtils.BOOLEAN_DOMAIN) { + test(s"delete persistent deletion vectors - partitioned = $partitioned") { + val targetDF = spark.range(0, 100, 1, 10).toDF + .withColumn("v", col("id")) + .withColumn("partCol", lit(0)) + val partitionBy = if (partitioned) Seq("partCol") else Seq.empty + withSQLConf( + DeltaSQLConf.DELETION_VECTOR_PACKING_TARGET_SIZE.key -> "0", + DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false") { + withDeletionVectorsEnabled() { + withTempDeltaTable( + targetDF, + partitionBy = partitionBy) { (targetTable, targetLog) => + val targetDir = targetLog.dataPath + + // Add a DV to all files and check that DVs are not deleted. + targetTable().delete("id % 2 == 0") + + assert(listDeletionVectors(targetLog).size == 10) + targetTable().vacuum(0) + assert(listDeletionVectors(targetLog).size == 10) + checkAnswer(sql(s"select count(*) from delta.`$targetDir`"), Row(50)) + + // Update the DV of the first file by deleting two rows and check that previous DV is + // deleted. + targetTable().delete("id < 10 AND id % 3 == 0") + + assert(listDeletionVectors(targetLog).size == 11) + targetTable().vacuum(0) + assert(listDeletionVectors(targetLog).size == 10) + checkAnswer(sql(s"select count(*) from delta.`$targetDir`"), Row(48)) + + // Delete all rows in first 5 files and check that DVs are not deleted due to + // the retention period, but deleted after that. + targetTable().delete("id < 50") + + assert(listDeletionVectors(targetLog).size == 15) + targetTable().vacuum(10) + assert(listDeletionVectors(targetLog).size == 15) + targetTable().vacuum(0) + assert(listDeletionVectors(targetLog).size == 5) + checkAnswer(sql(s"select count(*) from delta.`$targetDir`"), Row(25)) + } + } + } + } + } + + test("vacuum a non-existent path and a non Delta table") { + def assertNotADeltaTableException(path: String): Unit = { + for (table <- Seq(s"'$path'", s"delta.`$path`")) { + val e = intercept[AnalysisException] { + sql(s"vacuum $table") + } + assert(e.getMessage.contains("is not a Delta table.")) + } + } + withTempPath { tempDir => + assert(!tempDir.exists()) + assertNotADeltaTableException(tempDir.getCanonicalPath) + } + withTempPath { tempDir => + spark.range(1, 10).write.parquet(tempDir.getCanonicalPath) + assertNotADeltaTableException(tempDir.getCanonicalPath) + } + } + + test("vacuum for cdc - update/merge") { + testCDCVacuumForUpdateMerge() + } + + test("vacuum for cdc - delete tombstones") { + testCDCVacuumForTombstones() + } + + private def getFromHistory(history: DataFrame, key: String, pos: Integer): Map[String, String] = { + val op = history.select(key).take(pos + 1) + if (pos == 0) { + op.head.getMap(0).asInstanceOf[Map[String, String]] + } else { + op.tail.head.getMap(0).asInstanceOf[Map[String, String]] + } + } + + private def testEventLogging( + isDryRun: Boolean, + loggingEnabled: Boolean, + retentionHours: Long, + timeGapHours: Long): Unit = { + + test(s"vacuum event logging dryRun=$isDryRun loggingEnabled=$loggingEnabled" + + s" retentionHours=$retentionHours timeGap=$timeGapHours") { + withSQLConf(DeltaSQLConf.DELTA_VACUUM_LOGGING_ENABLED.key -> loggingEnabled.toString) { + + withEnvironment { (dir, clock) => + spark.range(2).write.format("delta").save(dir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, dir, clock) + val expectedReturn = if (isDryRun) { + // dry run returns files that will be deleted + Seq(new Path(dir.getAbsolutePath, "file1.txt").toString) + } else { + Seq(dir.getAbsolutePath) + } + + gcTest(deltaLog, clock)( + CreateFile("file1.txt", commitToActionLog = true), + CreateFile("file2.txt", commitToActionLog = true), + LogicallyDeleteFile("file1.txt"), + AdvanceClock(timeGapHours * 1000 * 60 * 60), + GC(dryRun = isDryRun, expectedReturn, Some(retentionHours)) + ) + val deltaTable = io.delta.tables.DeltaTable.forPath(deltaLog.dataPath.toString) + val history = deltaTable.history() + if (isDryRun || !loggingEnabled) { + // We do not record stats when logging is disabled or dryRun + assert(history.select("operation").head() == Row("DELETE")) + } else { + assert(history.select("operation").head() == Row("VACUUM END")) + assert(history.select("operation").collect()(1) == Row("VACUUM START")) + + val operationParamsBegin = getFromHistory(history, "operationParameters", 1) + val operationParamsEnd = getFromHistory(history, "operationParameters", 0) + val operationMetricsBegin = getFromHistory(history, "operationMetrics", 1) + val operationMetricsEnd = getFromHistory(history, "operationMetrics", 0) + + val filesDeleted = if (retentionHours > timeGapHours) { 0 } else { 1 } + assert(operationParamsBegin("retentionCheckEnabled") === "false") + assert(operationMetricsBegin("numFilesToDelete") === filesDeleted.toString) + assert(operationMetricsBegin("sizeOfDataToDelete") === (filesDeleted * 9).toString) + assert( + operationParamsBegin("specifiedRetentionMillis") === + (retentionHours * 60 * 60 * 1000).toString) + assert( + operationParamsBegin("defaultRetentionMillis") === + DeltaLog.tombstoneRetentionMillis(deltaLog.snapshot.metadata).toString) + + assert(operationParamsEnd === Map("status" -> "COMPLETED")) + assert(operationMetricsEnd === Map("numDeletedFiles" -> filesDeleted.toString, + "numVacuumedDirectories" -> "1")) + } + } + } + } + } + + testEventLogging( + isDryRun = false, + loggingEnabled = true, + retentionHours = 5, + timeGapHours = 10 + ) + + testEventLogging( + isDryRun = true, // dry run will not record the vacuum + loggingEnabled = true, + retentionHours = 5, + timeGapHours = 10 + ) + + testEventLogging( + isDryRun = false, + loggingEnabled = false, + retentionHours = 5, + timeGapHours = 0 + ) + + testEventLogging( + isDryRun = false, + loggingEnabled = true, + retentionHours = 20, // vacuum will not delete any files + timeGapHours = 10 + ) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaWithNewTransactionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaWithNewTransactionSuite.scala new file mode 100644 index 00000000000..85ae9fafdbf --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaWithNewTransactionSuite.scala @@ -0,0 +1,349 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{DataFrame, Dataset, QueryTest} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.{ThreadUtils, Utils} + +trait DeltaWithNewTransactionSuiteBase extends QueryTest + with SharedSparkSession + with DeltaColumnMappingTestUtils + with DeltaSQLCommandTest { + + /** + * Test whether `withNewTransaction` captures all delta read made within it and correctly + * detects conflicts in transaction table and provides snapshot isolation for other table reads. + * + * The order in which the given thunks are executed is as follows. + * - Txn started using `withNewTransaction`. The following are executed while the txn is active. + * - currentThreadReadOp - Read operations performed in current thread. + * - concurrentUpdateOp - Update operations performed in different thread to + * simulate concurrent modification. This is synchronously completed + * before moving on. + * - currentThreadCommitOperation - Attempt to commit changes in the txn. + */ + protected def testWithNewTransaction( + name: String, + partitionedTableKeys: Seq[Int], + preTxnSetup: DeltaLog => Unit = null, + currentThreadReadOp: DataFrame => Unit, + concurrentUpdateOp: String => Unit, + currentThreadCommitOperation: OptimisticTransaction => Unit, + shouldFail: Boolean, + confs: Map[String, String] = Map.empty, + partitionTablePath: String = Utils.createTempDir().getAbsolutePath): Unit = { + + val tableName = "NewTransactionTest" + require(currentThreadCommitOperation != null) + + import testImplicits._ + + test(s"withNewTransaction - $name") { + withSQLConf(confs.toSeq: _*) { withTable(tableName) { + sql(s"CREATE TABLE NewTransactionTest(key int, value int) " + + s"USING delta partitioned by (key) LOCATION '$partitionTablePath'") + partitionedTableKeys.toDS.select('value as "key", 'value) + .write.mode("append").partitionBy("key").format("delta").saveAsTable(tableName) + + val log = DeltaLog.forTable(spark, partitionTablePath) + assert(OptimisticTransaction.getActive().isEmpty, "active txn already set") + + if (preTxnSetup != null) preTxnSetup(log) + + log.withNewTransaction { txn => + assert(OptimisticTransaction.getActive().nonEmpty, "active txn not set") + + currentThreadReadOp(spark.table(tableName)) + + ThreadUtils.runInNewThread(s"withNewTransaction test - $name") { + concurrentUpdateOp(tableName) + } + + if (shouldFail) { + intercept[DeltaConcurrentModificationException] { currentThreadCommitOperation(txn) } + } else { + currentThreadCommitOperation(txn) + } + } + assert(OptimisticTransaction.getActive().isEmpty, "active txn not cleared") + }} + } + } + + testWithNewTransaction( + name = "capture reads on txn table with no filters (i.e. full scan)", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.count() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 1") + }, + currentThreadCommitOperation = txn => { + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + testWithNewTransaction( + name = "capture reads on txn table with partition filter + conflicting concurrent updates", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.filter("key == 1").count() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 1") + }, + currentThreadCommitOperation = txn => { + // Concurrent delete op touches the same partition as those read in the active txn. + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + testWithNewTransaction( + name = "snapshot isolation for query that can leverage metadata query optimization", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.count() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 1") + }, + currentThreadCommitOperation = txn => { + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + testWithNewTransaction( + name = "snapshot isolation for query that can leverage metadata query optimization " + + "with partition filter + conflicting concurrent updates", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.filter("key == 1").count() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 1") + }, + currentThreadCommitOperation = txn => { + // Concurrent delete op touches the same partition as those read in the active txn. + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + testWithNewTransaction( + name = "capture reads on txn table with data filter + conflicting concurrent updates", + partitionedTableKeys = Seq(1, 2, 3), // will generate (key, value) = (1, 1), (2, 2), (3, 3) + currentThreadReadOp = txnTable => { + txnTable.filter("value == 1").count() // pure data filter that touches one file + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 1") // deletes the one file read above + }, + currentThreadCommitOperation = txn => { + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + testWithNewTransaction( + name = "capture reads on txn table with partition filter + non-conflicting concurrent updates", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.filter("key == 1").count() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 2") + sql(s"INSERT INTO $txnTableName SELECT 4, 4") + }, + currentThreadCommitOperation = txn => { + // Concurrent delete op touches the different files as those read in the active txn. + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = false) + + testWithNewTransaction( + name = "snapshot isolation for metadata optimizable query with partition filter +" + + " non-conflicting concurrent updates", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.filter("key == 1").count() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 2") + sql(s"INSERT INTO $txnTableName SELECT 4, 4") + }, + currentThreadCommitOperation = txn => { + // Concurrent delete op touches the different files as those read in the active txn. + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = false) + + testWithNewTransaction( + name = "capture reads on txn table with filter+limit and conflicting concurrent updates", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.filter("key == 1").limit(1).collect() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 1") + }, + currentThreadCommitOperation = txn => { + // Concurrent delete op touches the same files as those read in the active txn. + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + testWithNewTransaction( + name = "capture reads on txn table with filter+limit and non-conflicting concurrent updates", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.filter("key == 1").limit(1).collect() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE key = 2") + sql(s"INSERT INTO $txnTableName SELECT 4, 4") + }, + currentThreadCommitOperation = txn => { + // Concurrent delete op touches the different files as those read in the active txn. + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = false) + + testWithNewTransaction( + name = "capture reads on txn table with limit + conflicting concurrent updates", + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.limit(1).collect() + }, + concurrentUpdateOp = txnTableName => { + sql(s"DELETE FROM $txnTableName WHERE true") + }, + currentThreadCommitOperation = txn => { + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + testWithNewTransaction( + name = "capture reads on txn table even when limit pushdown is disabled", + confs = Map(DeltaSQLConf.DELTA_LIMIT_PUSHDOWN_ENABLED.key -> "false"), + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.limit(1).collect() + }, + concurrentUpdateOp = txnTableName => { + sql(s"UPDATE $txnTableName SET key = 2 WHERE key = 3") + }, + currentThreadCommitOperation = txn => { + // Any concurrent change (even if its seemingly non-conflicting) should fail the filter as + // the whole table will be scanned by the filter when data skipping is disabled + txn.commit(Seq.empty, DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + test("withNewTransaction - nesting withNewTransaction is not supported") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getCanonicalPath) + log.withNewTransaction { txn => + + require(OptimisticTransaction.getActive().nonEmpty) + intercept[IllegalStateException] { + OptimisticTransaction.setActive(txn) + } + + intercept[IllegalStateException] { + log.withNewTransaction { txn2 => } + } + } + } + } + + testWithNewTransaction( + name = "capture reads on txn table even when data skipping is disabled", + confs = Map(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> "false"), + partitionedTableKeys = Seq(1, 2, 3), + currentThreadReadOp = txnTable => { + txnTable.filter("key == 1").count() + }, + concurrentUpdateOp = txnTableName => { + sql(s"UPDATE $txnTableName SET key = 2 WHERE key = 3") + }, + currentThreadCommitOperation = txn => { + // use physical name + val key = getPhysicalName("key", txn.metadata.schema) + // Any concurrent change (even if its seemingly non-conflicting) should fail the filter as + // the whole table will be scanned by the filter when data skipping is disabled. + // Note: Adding a file to avoid snapshot isolation level for the commit. + txn.commit(Seq(AddFile("a", Map(key -> "2"), 1, 1, true)), DeltaOperations.ManualUpdate) + }, + shouldFail = true) + + def testSnapshotIsolation(): Unit = { + val txnTablePath = Utils.createTempDir().getCanonicalPath + val nonTxnTablePath = Utils.createTempDir().getCanonicalPath + + def txnTable: DataFrame = spark.read.format("delta").load(txnTablePath) + def nonTxnTable: DataFrame = spark.read.format("delta").load(nonTxnTablePath) + + def writeToNonTxnTable(ds: Dataset[java.lang.Long]): Unit = { + import testImplicits._ + ds.toDF("key").select('key, 'key as "value") + .write.format("delta").mode("append").partitionBy("key").save(nonTxnTablePath) + DeltaLog.forTable(spark, nonTxnTablePath).update(stalenessAcceptable = false) + } + + testWithNewTransaction( + name = s"snapshot isolation uses first-access snapshots when enabled", + partitionTablePath = txnTablePath, + partitionedTableKeys = Seq(1, 2, 3, 4, 5), // Prepare txn-table + preTxnSetup = _ => { + writeToNonTxnTable(spark.range(3)) // Prepare non-txn table + }, + currentThreadReadOp = txnTable => { + // First read on tables + require(txnTable.count() == 5) + require(nonTxnTable.count() === 3) + }, + concurrentUpdateOp = txnTableName => { + // Update tables in a different thread and make sure the DeltaLog gets updated + sql(s"INSERT INTO $txnTableName SELECT 6, 6") + DeltaLog.forTable(spark, txnTablePath).update(stalenessAcceptable = false) + require(txnTable.count() == 6) + + writeToNonTxnTable(spark.range(3, 10)) + require(nonTxnTable.count() == 10) + }, + currentThreadCommitOperation = _ => { + // Second read on concurrently updated tables should read old snapshots + assert(txnTable.count() == 5, "snapshot isolation failed on txn table") + assert(nonTxnTable.count() == 3, "snapshot isolation failed on non-txn table") + }, + shouldFail = false) + } + + testSnapshotIsolation() +} + +class DeltaWithNewTransactionSuite extends DeltaWithNewTransactionSuiteBase + +class DeltaWithNewTransactionIdColumnMappingSuite extends DeltaWithNewTransactionSuite + with DeltaColumnMappingEnableIdMode + +class DeltaWithNewTransactionNameColumnMappingSuite extends DeltaWithNewTransactionSuite + with DeltaColumnMappingEnableNameMode diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaWriteConfigsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaWriteConfigsSuite.scala new file mode 100644 index 00000000000..f73b6991088 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaWriteConfigsSuite.scala @@ -0,0 +1,606 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.Locale + +import scala.collection.mutable.ListBuffer + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StringType + +/** + * This test suite tests all (or nearly-all) combinations of ways to write configs to a delta table. + * + * At a high level, it tests the following matrix of conditions: + * + * - DataFrameWriter or DataStreamWriter or DataFrameWriterV2 or DeltaTableBuilder or SQL API + * X + * - option is / is not prefixed with 'delta' + * X + * - using table name or table path + * X + * - CREATE or REPLACE or CREATE OR REPLACE (table already exists) OR CREATE OR REPLACE (table + * doesn't already exist) + * + * At the end of the test suite, it prints out summary tables all of the cases above. + */ +class DeltaWriteConfigsSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + val config_no_prefix = "dataSkippingNumIndexedCols" + val config_no_prefix_value = "33" + + val config_prefix = "delta.deletedFileRetentionDuration" + val config_prefix_value = "interval 2 weeks" + + val config_no_prefix_2 = "logRetentionDuration" + val config_no_prefix_2_value = "interval 60 days" + + val config_prefix_2 = "delta.checkpointInterval" + val config_prefix_2_value = "20" + + override def afterAll(): Unit = { + import testImplicits._ + // scalastyle:off println + + println("DataFrameWriter Test Output") + dfw_output.toSeq + .toDF("Output Location", "Output Mode", s"Contains No-Prefix Option", + "Contains Prefix-Option", "Config") + .show(100, false) + + println("DataStreamWriter Test Output") + dsw_output.toSeq + .toDF("Output Location", "Output Mode", s"Contains No-Prefix Option", + "Contains Prefix-Option", "Config") + .show(100, false) + + println("DataFrameWriterV2 Test Output") + dfw_v2_output.toSeq + .toDF("Output Location", "Output Mode", s"Contains No-Prefix Option", + "Contains Prefix-Option", "Config") + .show(100, false) + + println("DeltaTableBuilder Test Output") + dtb_output.toSeq + .toDF("Output Location", "Output Mode", s"Contains No-Prefix Option (lowercase)", + s"Contains No-Prefix Option", "Contains Prefix-Option", "ERROR", "Config") + .show(100, false) + + println("SQL Test Output") + sql_output.toSeq + .toDF("Output Location", "Config Input", s"SQL Operation", "AS SELECT", + "Contains OPTION no-prefix", "Contains OPTION prefix", "Contains TBLPROPERTIES no-prefix", + "Contains TBLPROPERTIES prefix", "Config") + .show(100, false) + + // scalastyle:on println + super.afterAll() + } + + + private val dfw_output = new ListBuffer[DeltaFrameStreamAPITestOutput] + private val dsw_output = new ListBuffer[DeltaFrameStreamAPITestOutput] + private val dfw_v2_output = new ListBuffer[DeltaFrameStreamAPITestOutput] + private val dtb_output = new ListBuffer[DeltaTableBuilderAPITestOutput] + private val sql_output = new ListBuffer[SQLAPIOutput] + + // scalastyle:off line.size.limit + /* + DataFrameWriter Test Output + +---------------+-----------+-------------------------+----------------------+------------------------------------------------------+ + |Output Location|Output Mode|Contains No-Prefix Option|Contains Prefix-Option|Config | + +---------------+-----------+-------------------------+----------------------+------------------------------------------------------+ + |path |create |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |path |overwrite |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |path |append |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |table |create |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |table |overwrite |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |table |append |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + +---------------+-----------+-------------------------+----------------------+------------------------------------------------------+ + */ + // scalastyle:on line.size.limit + Seq("path", "table").foreach { outputLoc => + Seq("create", "overwrite", "append").foreach { outputMode => + val testName = s"DataFrameWriter - outputLoc=$outputLoc & mode=$outputMode" + test(testName) { + withTempDir { dir => + withTable("tbl") { + var data = spark.range(10).write.format("delta") + .option(config_no_prefix, config_no_prefix_value) + .option(config_prefix, config_prefix_value) + + if (outputMode != "create") { + data = data.mode(outputMode) + } + + val log = outputLoc match { + case "path" => + data.save(dir.getCanonicalPath) + DeltaLog.forTable(spark, dir) + case "table" => + data.saveAsTable("tbl") + DeltaLog.forTable(spark, TableIdentifier("tbl")) + } + + val config = log.snapshot.metadata.configuration + val answer_no_prefix = config.contains(config_no_prefix) + val answer_prefix = config.contains(config_prefix) + + assert(!answer_no_prefix) + assert(answer_prefix) + assert(config.size == 1) + + dfw_output += DeltaFrameStreamAPITestOutput( + outputLocation = outputLoc, + outputMode = outputMode, + containsNoPrefixOption = answer_no_prefix, + containsPrefixOption = answer_prefix, + config = config.mkString(",") + ) + + } + } + } + } + } + + // scalastyle:off line.size.limit + /* + DataStreamWriter Test Output + +---------------+-----------+-------------------------+----------------------+------+ + |Output Location|Output Mode|Contains No-Prefix Option|Contains Prefix-Option|Config| + +---------------+-----------+-------------------------+----------------------+------+ + |path |create |false |false | | + |path |append |false |false | | + |path |complete |false |false | | + |table |create |false |false | | + |table |append |false |false | | + |table |complete |false |false | | + +---------------+-----------+-------------------------+----------------------+------+ + */ + // scalastyle:on line.size.limit + // Data source DeltaDataSource does not support Update output mode + Seq("path", "table").foreach { outputLoc => + Seq("create", "append", "complete").foreach { outputMode => + val testName = s"DataStreamWriter - outputLoc=$outputLoc & outputMode=$outputMode" + test(testName) { + withTempDir { dir => + withTempDir { checkpointDir => + withTable("src", "tbl") { + spark.range(10).write.format("delta").saveAsTable("src") + + var data = spark.readStream.format("delta").table("src") + + // Needed to resolve error: Complete output mode not supported when there are no + // streaming aggregations on streaming DataFrames/Datasets + if (outputMode == "complete") { + data = data.groupBy().count() + } + + var stream = data.writeStream + .format("delta") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .option(config_no_prefix, config_no_prefix_value) + .option(config_prefix, config_prefix_value) + + if (outputMode != "create") { + stream = stream.outputMode(outputMode) + } + + val log = outputLoc match { + case "path" => + stream.start(dir.getCanonicalPath).stop() + DeltaLog.forTable(spark, dir) + case "table" => + stream.toTable("tbl").stop() + DeltaLog.forTable(spark, TableIdentifier("tbl")) + } + + val config = log.snapshot.metadata.configuration + val answer_no_prefix = config.contains(config_no_prefix) + val answer_prefix = config.contains(config_prefix) + + assert(config.isEmpty) + assert(!answer_no_prefix) + assert(!answer_prefix) + + dsw_output += DeltaFrameStreamAPITestOutput( + outputLocation = outputLoc, + outputMode = outputMode, + containsNoPrefixOption = answer_no_prefix, + containsPrefixOption = answer_prefix, + config = config.mkString(",") + ) + + } + } + } + } + } + } + + // scalastyle:off line.size.limit + /* + DataFrameWriterV2 Test Output + +---------------+--------------+-------------------------+----------------------+------------------------------------------------------+ + |Output Location|Output Mode |Contains No-Prefix Option|Contains Prefix-Option|Config | + +---------------+--------------+-------------------------+----------------------+------------------------------------------------------+ + |path |create |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |path |replace |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |path |c_or_r_create |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |path |c_or_r_replace|false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |table |create |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |table |replace |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |table |c_or_r_create |false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + |table |c_or_r_replace|false |true |delta.deletedFileRetentionDuration -> interval 2 weeks| + +---------------+--------------+-------------------------+----------------------+------------------------------------------------------+ + */ + // scalastyle:on line.size.limit + Seq("path", "table").foreach { outputLoc => + Seq("create", "replace", "c_or_r_create", "c_or_r_replace").foreach { outputMode => + val testName = s"DataFrameWriterV2 - outputLoc=$outputLoc & outputMode=$outputMode" + test(testName) { + withTempDir { dir => + withTable("tbl") { + val table = outputLoc match { + case "path" => s"delta.`${dir.getCanonicalPath}`" + case "table" => "tbl" + } + + val data = spark.range(10).writeTo(table).using("delta") + .option(config_no_prefix, config_no_prefix_value) + .option(config_prefix, config_prefix_value) + + if (outputMode.contains("replace")) { + spark.range(100).writeTo(table).using("delta").create() + } + + outputMode match { + case "create" => data.create() + case "replace" => data.replace() + case "c_or_r_create" | "c_or_r_replace" => data.createOrReplace() + } + + val log = outputLoc match { + case "path" => DeltaLog.forTable(spark, dir) + case "table" => DeltaLog.forTable(spark, TableIdentifier("tbl")) + } + + val config = log.snapshot.metadata.configuration + + val answer_no_prefix = config.contains(config_no_prefix) + val answer_prefix = config.contains(config_prefix) + + assert(!answer_no_prefix) + assert(answer_prefix) + assert(config.size == 1) + + dfw_v2_output += DeltaFrameStreamAPITestOutput( + outputLocation = outputLoc, + outputMode = outputMode, + containsNoPrefixOption = answer_no_prefix, + containsPrefixOption = answer_prefix, + config = config.mkString(",") + ) + + } + } + } + } + } + + // scalastyle:off line.size.limit + /* + DeltaTableBuilder Test Output + +---------------+--------------+-------------------------------------+-------------------------+----------------------+-----+---------------------------------------------------------------------------------------+ + |Output Location|Output Mode |Contains No-Prefix Option (lowercase)|Contains No-Prefix Option|Contains Prefix-Option|ERROR|Config | + +---------------+--------------+-------------------------------------+-------------------------+----------------------+-----+---------------------------------------------------------------------------------------+ + |path |create |true |false |true |false|delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33| + |path |replace |true |false |true |false|delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33| + |path |c_or_r_create |true |false |true |false|delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33| + |path |c_or_r_replace|false |false |false |true | | + |table |create |true |false |true |false|dataSkippingNumIndexedCols -> 33,delta.deletedFileRetentionDuration -> interval 2 weeks| + |table |replace |true |false |true |false|delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33| + |table |c_or_r_create |true |false |true |false|delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33| + |table |c_or_r_replace|true |false |true |false|delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33| + +---------------+--------------+-------------------------------------+-------------------------+----------------------+-----+---------------------------------------------------------------------------------------+ + */ + // scalastyle:on line.size.limit + Seq("path", "table").foreach { outputLoc => + Seq("create", "replace", "c_or_r_create", "c_or_r_replace").foreach { outputMode => + val testName = s"DeltaTableBuilder - outputLoc=$outputLoc & outputMode=$outputMode" + test(testName) { + withTempDir { dir => + withTable("tbl") { + + if (outputMode.contains("replace")) { + outputLoc match { + case "path" => + io.delta.tables.DeltaTable.create() + .addColumn("bar", StringType).location(dir.getCanonicalPath).execute() + case "table" => + io.delta.tables.DeltaTable.create() + .addColumn("bar", StringType).tableName("tbl").execute() + } + } + + var tblBuilder = outputMode match { + case "create" => + io.delta.tables.DeltaTable.create() + case "replace" => + io.delta.tables.DeltaTable.replace() + case "c_or_r_create" | "c_or_r_replace" => + io.delta.tables.DeltaTable.createOrReplace() + } + + tblBuilder.addColumn("foo", StringType) + tblBuilder = tblBuilder.property(config_no_prefix, config_no_prefix_value) + tblBuilder = tblBuilder.property(config_prefix, config_prefix_value) + + val log = (outputLoc, outputMode) match { + case ("path", "c_or_r_replace") => + intercept[DeltaAnalysisException] { + tblBuilder.location(dir.getCanonicalPath).execute() + } + null + case ("path", _) => + tblBuilder.location(dir.getCanonicalPath).execute() + DeltaLog.forTable(spark, dir) + case ("table", _) => + tblBuilder.tableName("tbl").execute() + DeltaLog.forTable(spark, TableIdentifier("tbl")) + } + + log match { + case null => + // CREATE OR REPLACE seems broken when using path and the table already exists + // with a different schema. + // DeltaAnalysisException: The specified schema does not match the existing schema + // ... + // Specified schema is missing field(s): bar + // Specified schema has additional field(s): foo + assert(outputLoc == "path" && outputMode == "c_or_r_replace") + dtb_output += DeltaTableBuilderAPITestOutput( + outputLocation = outputLoc, + outputMode = outputMode, + containsNoPrefixOptionLowerCase = false, + containsNoPrefixOption = false, + containsPrefixOption = false, + error = true, + config = "" + ) + case _ => + val config = log.snapshot.metadata.configuration + + val answer_no_prefix_lowercase = + config.contains(config_no_prefix.toLowerCase(Locale.ROOT)) + val answer_no_prefix = config.contains(config_no_prefix) + val answer_prefix = config.contains(config_prefix) + + assert(!answer_no_prefix_lowercase) + assert(answer_no_prefix) + assert(answer_prefix) + assert(config.size == 2) + + dtb_output += DeltaTableBuilderAPITestOutput( + outputLocation = outputLoc, + outputMode = outputMode, + containsNoPrefixOptionLowerCase = answer_no_prefix_lowercase, + containsNoPrefixOption = answer_no_prefix, + containsPrefixOption = answer_prefix, + error = false, + config = config.mkString(",") + ) + } + } + } + } + } + } + + // scalastyle:off line.size.limit + /* + SQL Test Output + +---------------+-------------------------+--------------+---------+-------------------------+----------------------+--------------------------------+-----------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |Output Location|Config Input |SQL Operation |AS SELECT|Contains OPTION no-prefix|Contains OPTION prefix|Contains TBLPROPERTIES no-prefix|Contains TBLPROPERTIES prefix|Config | + +---------------+-------------------------+--------------+---------+-------------------------+----------------------+--------------------------------+-----------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |path |options |create |true |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |path |options |create |false |true |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33,option.delta.deletedFileRetentionDuration -> interval 2 weeks,option.dataSkippingNumIndexedCols -> 33 | + |path |options |replace |true |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |path |options |replace |false |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |path |options |c_or_r_create |true |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |path |options |c_or_r_create |false |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |path |options |c_or_r_replace|true |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |path |options |c_or_r_replace|false |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |path |tblproperties |create |true |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |tblproperties |create |false |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |tblproperties |replace |true |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |tblproperties |replace |false |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |tblproperties |c_or_r_create |true |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |tblproperties |c_or_r_create |false |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |tblproperties |c_or_r_replace|true |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |tblproperties |c_or_r_replace|false |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |options_and_tblproperties|create |true |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |options_and_tblproperties|create |false |true |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20,option.delta.deletedFileRetentionDuration -> interval 2 weeks,option.dataSkippingNumIndexedCols -> 33| + |path |options_and_tblproperties|replace |true |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |options_and_tblproperties|replace |false |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |options_and_tblproperties|c_or_r_create |true |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |options_and_tblproperties|c_or_r_create |false |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |options_and_tblproperties|c_or_r_replace|true |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |path |options_and_tblproperties|c_or_r_replace|false |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |options |create |true |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |table |options |create |false |true |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33,option.delta.deletedFileRetentionDuration -> interval 2 weeks,option.dataSkippingNumIndexedCols -> 33 | + |table |options |replace |true |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |table |options |replace |false |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |table |options |c_or_r_create |true |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |table |options |c_or_r_create |false |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |table |options |c_or_r_replace|true |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |table |options |c_or_r_replace|false |false |true |N/A |N/A |delta.deletedFileRetentionDuration -> interval 2 weeks | + |table |tblproperties |create |true |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |tblproperties |create |false |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |tblproperties |replace |true |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |tblproperties |replace |false |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |tblproperties |c_or_r_create |true |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |tblproperties |c_or_r_create |false |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |tblproperties |c_or_r_replace|true |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |tblproperties |c_or_r_replace|false |N/A |N/A |true |true |logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |options_and_tblproperties|create |true |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |options_and_tblproperties|create |false |true |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,dataSkippingNumIndexedCols -> 33,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20,option.delta.deletedFileRetentionDuration -> interval 2 weeks,option.dataSkippingNumIndexedCols -> 33| + |table |options_and_tblproperties|replace |true |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |options_and_tblproperties|replace |false |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |options_and_tblproperties|c_or_r_create |true |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |options_and_tblproperties|c_or_r_create |false |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |options_and_tblproperties|c_or_r_replace|true |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + |table |options_and_tblproperties|c_or_r_replace|false |false |true |true |true |delta.deletedFileRetentionDuration -> interval 2 weeks,logRetentionDuration -> interval 60 days,delta.checkpointInterval -> 20 | + +---------------+-------------------------+--------------+---------+-------------------------+----------------------+--------------------------------+-----------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + */ + // scalastyle:on line.size.limit + Seq("path", "table").foreach { outputLoc => + Seq("options", "tblproperties", "options_and_tblproperties").foreach { configInput => + Seq("create", "replace", "c_or_r_create", "c_or_r_replace").foreach { sqlOp => + Seq(true, false).foreach { useAsSelectStmt => + val testName = s"SQL - outputLoc=$outputLoc & configInput=$configInput & sqlOp=$sqlOp" + + s" & useAsSelectStmt=$useAsSelectStmt" + + test(testName) { + withTempDir { dir => + withTable("tbl", "other") { + if (sqlOp.contains("replace")) { + var stmt = "CREATE TABLE tbl (ID INT) USING DELTA" + if (outputLoc == "path") { + stmt = stmt + s" LOCATION '${dir.getCanonicalPath}'" + } + sql(stmt) + } + + val sqlOpStr = sqlOp match { + case "c_or_r_create" | "c_or_r_replace" => "CREATE OR REPLACE" + case _ => sqlOp.toUpperCase(Locale.ROOT) + } + + val schemaStr = if (useAsSelectStmt) "" else "(id INT) " + var stmt = sqlOpStr + " TABLE tbl " + schemaStr + "USING DELTA\n" + + if (configInput.contains("options")) { + stmt = stmt + s"OPTIONS(" + + s"'$config_no_prefix'=$config_no_prefix_value," + + s"'$config_prefix'='$config_prefix_value')\n" + } + if (outputLoc == "path") { + stmt = stmt + s"LOCATION '${dir.getCanonicalPath}'\n" + } + if (configInput.contains("tblproperties")) { + stmt = stmt + s"TBLPROPERTIES(" + + s"'$config_no_prefix_2'='$config_no_prefix_2_value'," + + s"'$config_prefix_2'=$config_prefix_2_value)\n" + } + if (useAsSelectStmt) { + sql("CREATE TABLE other (id INT) USING DELTA") + stmt = stmt + "AS SELECT * FROM other\n" + } + + // scalastyle:off println + println(stmt) + // scalastyle:on println + + sql(stmt) + + val log = DeltaLog.forTable(spark, TableIdentifier("tbl")) + val config = log.snapshot.metadata.configuration + + val option_was_set = configInput.contains("options") + val tblproperties_was_set = configInput.contains("tblproperties") + + val option_no_prefix = config.contains(config_no_prefix) + val option_prefix = config.contains(config_prefix) + val tblproperties_no_prefix = config.contains(config_no_prefix_2) + val tblproperties_prefix = config.contains(config_prefix_2) + + var expectedSize = 0 + if (option_was_set) { + assert(option_prefix) + expectedSize += 1 + if (sqlOp == "create" && !useAsSelectStmt) { + assert(option_no_prefix) + assert(config.contains(s"option.$config_prefix")) + assert(config.contains(s"option.$config_no_prefix")) + expectedSize += 3 + } + } + if (tblproperties_was_set) { + assert(tblproperties_prefix) + assert(tblproperties_no_prefix) + expectedSize += 2 + } + + assert(config.size == expectedSize) + + sql_output += SQLAPIOutput( + outputLoc, + configInput, + sqlOp, + useAsSelectStmt, + if (option_was_set) option_no_prefix.toString else "N/A", + if (option_was_set) option_prefix.toString else "N/A", + if (tblproperties_was_set) tblproperties_no_prefix.toString else "N/A", + if (tblproperties_was_set) tblproperties_prefix.toString else "N/A", + config.mkString(",") + ) + } + } + } + } + } + } + } +} + +// Need to be outside to be stable references for Spark to generate the case classes +case class DeltaFrameStreamAPITestOutput( + outputLocation: String, + outputMode: String, + containsNoPrefixOption: Boolean, + containsPrefixOption: Boolean, + config: String) + +case class DeltaTableBuilderAPITestOutput( + outputLocation: String, + outputMode: String, + containsNoPrefixOptionLowerCase: Boolean, + containsNoPrefixOption: Boolean, + containsPrefixOption: Boolean, + error: Boolean, + config: String) + +case class SQLAPIOutput( + outputLocation: String, + confiInput: String, + sqlOperation: String, + asSelect: Boolean, + containsOptionNoPrefix: String, + containsOptionPrefix: String, + containsTblPropertiesNoPrefix: String, + containsTblPropertiesPrefix: String, + config: String) diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaDetailSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaDetailSuite.scala new file mode 100644 index 00000000000..be5849f18fa --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaDetailSuite.scala @@ -0,0 +1,291 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.io.FileNotFoundException + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.{TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +trait DescribeDeltaDetailSuiteBase extends QueryTest + with SharedSparkSession + with DeltaTestUtilsForTempViews { + + import testImplicits._ + + val catalogAndSchema = { + s"$SESSION_CATALOG_NAME.default." + } + + protected def checkResult( + result: DataFrame, + expected: Seq[Any], + columns: Seq[String]): Unit = { + checkAnswer( + result.select(columns.head, columns.tail: _*), + Seq(Row(expected: _*)) + ) + } + + def describeDeltaDetailTest(f: File => String): Unit = { + val tempDir = Utils.createTempDir() + Seq(1 -> 1).toDF("column1", "column2") + .write + .format("delta") + .partitionBy("column1") + .save(tempDir.toString()) + + // Check SQL details + checkResult( + sql(s"DESCRIBE DETAIL ${f(tempDir)}"), + Seq("delta", Array("column1"), 1), + Seq("format", "partitionColumns", "numFiles")) + + // Check Scala details + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tempDir.toString) + checkResult( + deltaTable.detail(), + Seq("delta", Array("column1"), 1), + Seq("format", "partitionColumns", "numFiles")) + } + + test("delta table: Scala details using table name") { + withTable("delta_test") { + Seq(1, 2, 3).toDF().write.format("delta").saveAsTable("delta_test") + + val deltaTable = io.delta.tables.DeltaTable.forName(spark, "delta_test") + checkAnswer( + deltaTable.detail().select("format"), + Seq(Row("delta")) + ) + } + } + + test("delta table: path") { + describeDeltaDetailTest(f => s"'${f.toString()}'") + } + + test("delta table: delta table identifier") { + describeDeltaDetailTest(f => s"delta.`${f.toString()}`") + } + + test("non-delta table: SQL details using table name") { + withTable("describe_detail") { + sql( + """ + |CREATE TABLE describe_detail(column1 INT, column2 INT) + |USING parquet + |PARTITIONED BY (column1) + |COMMENT "this is a table comment" + """.stripMargin) + sql( + """ + |INSERT INTO describe_detail VALUES(1, 1) + """.stripMargin + ) + checkResult( + sql("DESCRIBE DETAIL describe_detail"), + Seq("parquet", Array("column1")), + Seq("format", "partitionColumns")) + } + } + + test("non-delta table: SQL details using table path") { + val tempDir = Utils.createTempDir().toString + Seq(1 -> 1).toDF("column1", "column2") + .write + .format("parquet") + .partitionBy("column1") + .mode("overwrite") + .save(tempDir) + checkResult( + sql(s"DESCRIBE DETAIL '$tempDir'"), + Seq(tempDir), + Seq("location")) + } + + test("non-delta table: SQL details when table path doesn't exist") { + val tempDir = Utils.createTempDir() + tempDir.delete() + val e = intercept[FileNotFoundException] { + sql(s"DESCRIBE DETAIL '$tempDir'") + } + assert(e.getMessage.contains(tempDir.toString)) + } + + test("delta table: SQL details using table name") { + withTable("describe_detail") { + sql( + """ + |CREATE TABLE describe_detail(column1 INT, column2 INT) + |USING delta + |PARTITIONED BY (column1) + |COMMENT "describe a non delta table" + """.stripMargin) + sql( + """ + |INSERT INTO describe_detail VALUES(1, 1) + """.stripMargin + ) + checkResult( + sql("DESCRIBE DETAIL describe_detail"), + Seq("delta", Array("column1"), 1), + Seq("format", "partitionColumns", "numFiles")) + } + } + + test("delta table: create table on an existing delta log") { + val tempDir = Utils.createTempDir().toString + Seq(1 -> 1).toDF("column1", "column2") + .write + .format("delta") + .partitionBy("column1") + .mode("overwrite") + .save(tempDir) + val tblName1 = "tbl_name1" + val tblName2 = "tbl_name2" + withTable(tblName1, tblName2) { + sql(s"CREATE TABLE $tblName1 USING DELTA LOCATION '$tempDir'") + sql(s"CREATE TABLE $tblName2 USING DELTA LOCATION '$tempDir'") + checkResult( + sql(s"DESCRIBE DETAIL $tblName1"), + Seq(s"$catalogAndSchema$tblName1"), + Seq("name")) + checkResult( + sql(s"DESCRIBE DETAIL $tblName2"), + Seq(s"$catalogAndSchema$tblName2"), + Seq("name")) + checkResult( + sql(s"DESCRIBE DETAIL delta.`$tempDir`"), + Seq(null), + Seq("name")) + checkResult( + sql(s"DESCRIBE DETAIL '$tempDir'"), + Seq(null), + Seq("name")) + } + } + + testWithTempView(s"SC-37296: describe detail on temp view") { isSQLTempView => + withTable("t1") { + Seq(1, 2, 3).toDF().write.format("delta").saveAsTable("t1") + val viewName = "v" + createTempViewFromTable("t1", isSQLTempView) + val e = intercept[AnalysisException] { + sql(s"DESCRIBE DETAIL $viewName") + } + assert(e.getMessage.contains("'DESCRIBE DETAIL' expects a table")) + } + } + + test("SC-37296: describe detail on permanent view") { + val view = "detailTestView" + withView(view) { + sql(s"CREATE VIEW $view AS SELECT 1") + val e = intercept[AnalysisException] { sql(s"DESCRIBE DETAIL $view") } + assert(e.getMessage.contains("'DESCRIBE DETAIL' expects a table")) + } + } + + test("delta table: describe detail always run on the latest snapshot") { + val tableName = "tbl_name_on_latest_snapshot" + withTable(tableName) { + val tempDir = Utils.createTempDir().toString + sql(s"CREATE TABLE $tableName USING DELTA LOCATION '$tempDir'") + + val deltaLog = DeltaLog.forTable(spark, tempDir) + DeltaLog.clearCache() + + // Cache a new DeltaLog + sql(s"DESCRIBE DETAIL $tableName") + + val txn = deltaLog.startTransaction() + val metadata = txn.snapshot.metadata + val newMetadata = metadata.copy(configuration = + metadata.configuration ++ Map("foo" -> "bar") + ) + txn.commit(newMetadata :: Nil, DeltaOperations.ManualUpdate) + checkResult(sql(s"DESCRIBE DETAIL $tableName"), + Seq(Map("foo" -> "bar")), + Seq("properties") + ) + } + } + + test("delta table: describe detail shows table features") { + withTable("t1") { + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2" + ) { + Seq(1, 2, 3).toDF().write.format("delta").saveAsTable("t1") + } + val p = DeltaLog.forTable(spark, TableIdentifier("t1")).snapshot.protocol + + checkResult( + sql(s"DESCRIBE DETAIL t1"), + Seq( + p.minReaderVersion, + p.minWriterVersion, + p.implicitlySupportedFeatures.map(_.name).toArray.sorted), + Seq("minReaderVersion", "minWriterVersion", "tableFeatures")) + + val features = p.readerAndWriterFeatureNames ++ p.implicitlySupportedFeatures.map(_.name) + sql(s"""ALTER TABLE t1 SET TBLPROPERTIES ( + | delta.minReaderVersion = $TABLE_FEATURES_MIN_READER_VERSION, + | delta.minWriterVersion = $TABLE_FEATURES_MIN_WRITER_VERSION, + | delta.feature.${TestReaderWriterFeature.name} = 'enabled' + |)""".stripMargin) + + checkResult( + sql(s"DESCRIBE DETAIL t1"), + Seq( + TABLE_FEATURES_MIN_READER_VERSION, + TABLE_FEATURES_MIN_WRITER_VERSION, + (features + TestReaderWriterFeature.name).toArray.sorted), + Seq("minReaderVersion", "minWriterVersion", "tableFeatures")) + } + } + + test("describe detail contains table name") { + val tblName = "test_table" + withTable(tblName) { + spark.sql(s"CREATE TABLE $tblName(id INT) USING delta") + val deltaTable = io.delta.tables.DeltaTable.forName(tblName) + checkResult( + deltaTable.detail(), + Seq(s"$catalogAndSchema$tblName"), + Seq("name") + ) + } + } + + // TODO: run it with OSS Delta after it's supported +} + +class DescribeDeltaDetailSuite + extends DescribeDeltaDetailSuiteBase with DeltaSQLCommandTest diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala new file mode 100644 index 00000000000..26deaae11eb --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala @@ -0,0 +1,1475 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File + +import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile, Metadata, Protocol, RemoveFile} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.scalactic.source.Position +import org.scalatest.Tag + +import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{BooleanType, LongType, MapType, StringType, StructField, StructType, TimestampType} +import org.apache.spark.util.Utils + +trait DescribeDeltaHistorySuiteBase + extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with DeltaTestUtilsForTempViews + with MergeIntoMetricsBase { + + import testImplicits._ + + protected val evolvabilityResource = { + new File("src/test/resources/delta/history/delta-0.2.0").getAbsolutePath() + } + + protected val evolvabilityLastOp = Seq("STREAMING UPDATE", null, null) + + protected def deleteMetricsSchema(partitioned: Boolean) = + if (partitioned) DeltaOperationMetrics.DELETE_PARTITIONS else DeltaOperationMetrics.DELETE + + protected val updateMetricsSchema = DeltaOperationMetrics.UPDATE + protected val mergeMetricsSchema = DeltaOperationMetrics.MERGE + protected val replaceWhereMetricsSchema = DeltaOperationMetrics.WRITE_REPLACE_WHERE + + protected def testWithFlag(name: String, tags: Tag*)(f: => Unit): Unit = { + test(name, tags: _*) { + f + } + } + + protected def checkLastOperation( + basePath: String, + expected: Seq[String], + columns: Seq[Column] = Seq($"operation", $"operationParameters.mode"), + removeExpressionId: Boolean = false): Unit = { + var df = io.delta.tables.DeltaTable.forPath(spark, basePath).history(1) + df = df.select(columns: _*) + if (removeExpressionId) { + // As the expression ID is written as part of the column predicate (in the form of col#expId) + // but it is non-deterministic, we remove it here so that any comparison can just go against + // the column name + df = df.withColumn("predicate", regexp_replace(col("predicate"), "#[0-9]+", "")) + } + checkAnswer(df, Seq(Row(expected: _*))) + df = spark.sql(s"DESCRIBE HISTORY delta.`$basePath` LIMIT 1") + df = df.select(columns: _*) + if (removeExpressionId) { + df = df.withColumn("predicate", regexp_replace(col("predicate"), "#[0-9]+", "")) + } + checkAnswer(df, Seq(Row(expected: _*))) + } + + protected def checkOperationMetrics( + expectedMetrics: Map[String, String], + operationMetrics: Map[String, String], + metricsSchema: Set[String]): Unit = { + if (metricsSchema != operationMetrics.keySet) { + fail( + s"""The collected metrics does not match the defined schema for the metrics. + | Expected : $metricsSchema + | Actual : ${operationMetrics.keySet} + """.stripMargin) + } + expectedMetrics.keys.foreach { key => + if (!operationMetrics.contains(key)) { + fail(s"The recorded operation metrics does not contain key: $key") + } + if (expectedMetrics(key) != operationMetrics(key)) { + fail( + s"""The recorded metric for $key does not equal the expected value. + | expected = ${expectedMetrics(key)} , + | But actual = ${operationMetrics(key)} + """.stripMargin + ) + } + } + } + + /** + * Check all expected metrics exist and executime time (if expected to exist) is the largest time + * metric. + */ + protected def checkOperationTimeMetricsInvariant( + expectedMetrics: Set[String], + operationMetrics: Map[String, String]): Unit = { + expectedMetrics.foreach { + m => assert(operationMetrics.contains(m)) + } + if (expectedMetrics.contains("executionTimeMs")) { + val executionTimeMs = operationMetrics("executionTimeMs").toLong + val maxTimeMs = operationMetrics.filterKeys(expectedMetrics.contains(_)) + .mapValues(v => v.toLong).valuesIterator.max + assert(executionTimeMs == maxTimeMs) + } + } + + protected def getOperationMetrics(history: DataFrame): Map[String, String] = { + history.select("operationMetrics") + .take(1) + .head + .getMap(0) + .asInstanceOf[Map[String, String]] + } + + testWithFlag("basic case - Scala history with path-based table") { + val tempDir = Utils.createTempDir().toString + Seq(1, 2, 3).toDF().write.format("delta").save(tempDir) + Seq(4, 5, 6).toDF().write.format("delta").mode("overwrite").save(tempDir) + + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tempDir) + // Full History + checkAnswer( + deltaTable.history().select("operation", "operationParameters.mode"), + Seq(Row("WRITE", "Overwrite"), Row("WRITE", "ErrorIfExists"))) + + // History with limit + checkAnswer( + deltaTable.history(1).select("operation", "operationParameters.mode"), + Seq(Row("WRITE", "Overwrite"))) + } + + test("basic case - Scala history with name-based table") { + withTable("delta_test") { + Seq(1, 2, 3).toDF().write.format("delta").saveAsTable("delta_test") + Seq(4, 5, 6).toDF().write.format("delta").mode("overwrite").saveAsTable("delta_test") + + val deltaTable = io.delta.tables.DeltaTable.forName(spark, "delta_test") + // Full History + checkAnswer( + deltaTable.history().select("operation"), + Seq(Row("CREATE OR REPLACE TABLE AS SELECT"), Row("CREATE TABLE AS SELECT"))) + + // History with limit + checkAnswer( + deltaTable.history(1).select("operation"), + Seq(Row("CREATE OR REPLACE TABLE AS SELECT"))) + } + } + + testWithFlag("basic case - SQL describe history with path-based table") { + val tempDir = Utils.createTempDir().toString + Seq(1, 2, 3).toDF().write.format("delta").save(tempDir) + Seq(4, 5, 6).toDF().write.format("delta").mode("overwrite").save(tempDir) + + // With delta.`path` format + checkAnswer( + sql(s"DESCRIBE HISTORY delta.`$tempDir`").select("operation", "operationParameters.mode"), + Seq(Row("WRITE", "Overwrite"), Row("WRITE", "ErrorIfExists"))) + + checkAnswer( + sql(s"DESCRIBE HISTORY delta.`$tempDir` LIMIT 1") + .select("operation", "operationParameters.mode"), + Seq(Row("WRITE", "Overwrite"))) + + // With direct path format + checkAnswer( + sql(s"DESCRIBE HISTORY '$tempDir'").select("operation", "operationParameters.mode"), + Seq(Row("WRITE", "Overwrite"), Row("WRITE", "ErrorIfExists"))) + + checkAnswer( + sql(s"DESCRIBE HISTORY '$tempDir' LIMIT 1") + .select("operation", "operationParameters.mode"), + Seq(Row("WRITE", "Overwrite"))) + } + + testWithFlag("basic case - SQL describe history with name-based table") { + withTable("delta_test") { + Seq(1, 2, 3).toDF().write.format("delta").saveAsTable("delta_test") + Seq(4, 5, 6).toDF().write.format("delta").mode("overwrite").saveAsTable("delta_test") + + checkAnswer( + sql(s"DESCRIBE HISTORY delta_test").select("operation"), + Seq(Row("CREATE OR REPLACE TABLE AS SELECT"), Row("CREATE TABLE AS SELECT"))) + + checkAnswer( + sql(s"DESCRIBE HISTORY delta_test LIMIT 1").select("operation"), + Seq(Row("CREATE OR REPLACE TABLE AS SELECT"))) + } + } + + testWithFlag("describe history fails on views") { + val tempDir = Utils.createTempDir().toString + Seq(1, 2, 3).toDF().write.format("delta").save(tempDir) + val viewName = "delta_view" + withView(viewName) { + sql(s"create view $viewName as select * from delta.`$tempDir`") + + val e = intercept[AnalysisException] { + sql(s"DESCRIBE HISTORY $viewName").collect() + } + assert(e.getMessage.contains("spark_catalog.default.delta_view is a view. " + + "'DESCRIBE HISTORY' expects a table")) + } + } + + testWithTempView("describe history fails on temp views") { isSQLTempView => + withTable("t1") { + Seq(1, 2, 3).toDF().write.format("delta").saveAsTable("t1") + val viewName = "v" + createTempViewFromTable("t1", isSQLTempView) + + val e = intercept[AnalysisException] { + sql(s"DESCRIBE HISTORY $viewName").collect() + } + assert(e.getMessage.contains("v is a temp view. 'DESCRIBE HISTORY' expects a table")) + } + } + + testWithFlag("operations - create table") { + withTable("delta_test") { + sql( + s"""create table delta_test ( + | a int, + | b string + |) + |using delta + |partitioned by (b) + |comment 'this is my table' + |tblproperties (delta.appendOnly=true) + """.stripMargin) + checkLastOperation( + spark.sessionState.catalog.getTableMetadata(TableIdentifier("delta_test")).location.getPath, + Seq( + "CREATE TABLE", + "true", + """["b"]""", + """{"delta.appendOnly":"true"}""", + "this is my table"), + Seq( + $"operation", $"operationParameters.isManaged", $"operationParameters.partitionBy", + $"operationParameters.properties", $"operationParameters.description")) + } + } + + testWithFlag("operations - ctas (saveAsTable)") { + val tempDir = Utils.createTempDir().toString + withTable("delta_test") { + Seq((1, "a"), (2, "3")).toDF("id", "data").write.format("delta") + .option("path", tempDir).saveAsTable("delta_test") + checkLastOperation( + tempDir, + Seq("CREATE TABLE AS SELECT", "false", """[]""", "{}", null), + Seq($"operation", $"operationParameters.isManaged", $"operationParameters.partitionBy", + $"operationParameters.properties", $"operationParameters.description")) + } + } + + testWithFlag("operations - ctas (sql)") { + val tempDir = Utils.createTempDir().toString + withTable("delta_test") { + sql( + s"""create table delta_test + |using delta + |location '$tempDir' + |tblproperties (delta.appendOnly=true) + |partitioned by (b) + |as select 1 as a, 'x' as b + """.stripMargin) + checkLastOperation( + tempDir, + Seq("CREATE TABLE AS SELECT", + "false", + """["b"]""", + """{"delta.appendOnly":"true"}""", null), + Seq($"operation", $"operationParameters.isManaged", $"operationParameters.partitionBy", + $"operationParameters.properties", $"operationParameters.description")) + } + val tempDir2 = Utils.createTempDir().toString + withTable("delta_test") { + sql( + s"""create table delta_test + |using delta + |location '$tempDir2' + |comment 'this is my table' + |as select 1 as a, 'x' as b + """.stripMargin) + // TODO(burak): Fix comments for CTAS + checkLastOperation( + tempDir2, + Seq("CREATE TABLE AS SELECT", + "false", """[]""", """{}""", "this is my table"), + Seq($"operation", $"operationParameters.isManaged", $"operationParameters.partitionBy", + $"operationParameters.properties", $"operationParameters.description")) + } + } + + + testWithFlag("operations - [un]set tbproperties") { + withTable("delta_test") { + sql("CREATE TABLE delta_test (v1 int, v2 string) USING delta") + + sql(""" + |ALTER TABLE delta_test + |SET TBLPROPERTIES ( + | 'delta.checkpointInterval' = '20', + | 'key' = 'value' + |)""".stripMargin) + checkLastOperation( + spark.sessionState.catalog.getTableMetadata(TableIdentifier("delta_test")).location.getPath, + Seq("SET TBLPROPERTIES", """{"delta.checkpointInterval":"20","key":"value"}"""), + Seq($"operation", $"operationParameters.properties")) + + sql("ALTER TABLE delta_test UNSET TBLPROPERTIES ('key')") + checkLastOperation( + spark.sessionState.catalog.getTableMetadata(TableIdentifier("delta_test")).location.getPath, + Seq("UNSET TBLPROPERTIES", """["key"]""", "true"), + Seq($"operation", $"operationParameters.properties", $"operationParameters.ifExists")) + } + } + + testWithFlag("operations - add columns") { + withTable("delta_test") { + sql("CREATE TABLE delta_test (v1 int, v2 string) USING delta") + + sql("ALTER TABLE delta_test ADD COLUMNS (v3 long, v4 int AFTER v1)") + val column3 = """{"name":"v3","type":"long","nullable":true,"metadata":{}}""" + val column4 = """{"name":"v4","type":"integer","nullable":true,"metadata":{}}""" + checkLastOperation( + spark.sessionState.catalog.getTableMetadata(TableIdentifier("delta_test")).location.getPath, + Seq("ADD COLUMNS", + s"""[{"column":$column3},{"column":$column4,"position":"AFTER v1"}]"""), + Seq($"operation", $"operationParameters.columns")) + } + } + + testWithFlag("operations - change column") { + withTable("delta_test") { + sql("CREATE TABLE delta_test (v1 int, v2 string) USING delta") + + sql("ALTER TABLE delta_test CHANGE COLUMN v1 v1 integer AFTER v2") + checkLastOperation( + spark.sessionState.catalog.getTableMetadata(TableIdentifier("delta_test")).location.getPath, + Seq("CHANGE COLUMN", + s"""{"name":"v1","type":"integer","nullable":true,"metadata":{}}""", + "AFTER v2"), + Seq($"operation", $"operationParameters.column", $"operationParameters.position")) + } + } + + test("operations - upgrade protocol") { + val readerVersion = Action.supportedProtocolVersion().minReaderVersion + val writerVersion = Action.supportedProtocolVersion().minWriterVersion + withTempDir { path => + val log = DeltaLog.forTable(spark, path) + log.ensureLogDirectoryExist() + log.store.write( + FileNames.deltaFile(log.logPath, 0), + Iterator( + Metadata(schemaString = spark.range(1).schema.asNullable.json).json, + Protocol(1, 1).json), + overwrite = false, + log.newDeltaHadoopConf()) + log.update() + log.upgradeProtocol( + Action.supportedProtocolVersion(withAllFeatures = false) + .withFeature(TestLegacyReaderWriterFeature)) + // scalastyle:off line.size.limit + checkLastOperation( + path.toString, + Seq("UPGRADE PROTOCOL", + s"""{"minReaderVersion":$readerVersion,""" + + s""""minWriterVersion":$writerVersion,""" + + s""""readerFeatures":["${TestLegacyReaderWriterFeature.name}"],""" + + s""""writerFeatures":["${TestLegacyReaderWriterFeature.name}"]}"""), + Seq($"operation", $"operationParameters.newProtocol")) + // scalastyle:on line.size.limit + } + } + + testWithFlag("operations - insert append with partition columns") { + val tempDir = Utils.createTempDir().toString + Seq((1, "a"), (2, "3")).toDF("id", "data") + .write + .format("delta") + .mode("append") + .partitionBy("id") + .save(tempDir) + + checkLastOperation( + tempDir, + Seq("WRITE", "Append", """["id"]"""), + Seq($"operation", $"operationParameters.mode", $"operationParameters.partitionBy")) + } + + testWithFlag("operations - insert append without partition columns") { + val tempDir = Utils.createTempDir().toString + Seq((1, "a"), (2, "3")).toDF("id", "data").write.format("delta").save(tempDir) + checkLastOperation( + tempDir, + Seq("WRITE", "ErrorIfExists", """[]"""), + Seq($"operation", $"operationParameters.mode", $"operationParameters.partitionBy")) + } + + testWithFlag("operations - insert error if exists with partitions") { + val tempDir = Utils.createTempDir().toString + Seq((1, "a"), (2, "3")).toDF("id", "data") + .write + .format("delta") + .partitionBy("id") + .mode("errorIfExists") + .save(tempDir) + checkLastOperation( + tempDir, + Seq("WRITE", "ErrorIfExists", """["id"]"""), + Seq($"operation", $"operationParameters.mode", $"operationParameters.partitionBy")) + } + + testWithFlag("operations - insert error if exists without partitions") { + val tempDir = Utils.createTempDir().toString + Seq((1, "a"), (2, "3")).toDF("id", "data") + .write + .format("delta") + .mode("errorIfExists") + .save(tempDir) + checkLastOperation( + tempDir, + Seq("WRITE", "ErrorIfExists", """[]"""), + Seq($"operation", $"operationParameters.mode", $"operationParameters.partitionBy")) + } + + test("operations - streaming append with transaction ids") { + + val tempDir = Utils.createTempDir().toString + val checkpoint = Utils.createTempDir().toString + + val data = MemoryStream[Int] + data.addData(1, 2, 3) + val stream = data.toDF() + .writeStream + .format("delta") + .option("checkpointLocation", checkpoint) + .start(tempDir) + stream.processAllAvailable() + stream.stop() + + checkLastOperation( + tempDir, + Seq("STREAMING UPDATE", "Append", "0"), + Seq($"operation", $"operationParameters.outputMode", $"operationParameters.epochId")) + } + + testWithFlag("operations - insert overwrite with predicate") { + val tempDir = Utils.createTempDir().toString + Seq((1, "a"), (2, "3")).toDF("id", "data").write.format("delta").partitionBy("id").save(tempDir) + + Seq((1, "b")).toDF("id", "data").write + .format("delta") + .mode("overwrite") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "id = 1") + .save(tempDir) + + checkLastOperation( + tempDir, + Seq("WRITE", "Overwrite", """id = 1"""), + Seq($"operation", $"operationParameters.mode", $"operationParameters.predicate")) + } + + testWithFlag("operations - delete with predicate") { + val tempDir = Utils.createTempDir().toString + Seq((1, "a"), (2, "3")).toDF("id", "data").write.format("delta").partitionBy("id").save(tempDir) + val deltaLog = DeltaLog.forTable(spark, tempDir) + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, deltaLog.dataPath.toString) + deltaTable.delete("id = 1") + + checkLastOperation( + tempDir, + Seq("DELETE", """["(id = 1)"]"""), + Seq($"operation", $"operationParameters.predicate"), removeExpressionId = true) + } + + testWithFlag("old and new writers") { + val tempDir = Utils.createTempDir().toString + Seq(1, 2, 3).toDF().write.format("delta").save(tempDir.toString) + + checkLastOperation(tempDir, Seq("WRITE", "ErrorIfExists")) + Seq(1, 2, 3).toDF().write.format("delta").mode("append").save(tempDir.toString) + + assert(spark.sql(s"DESCRIBE HISTORY delta.`$tempDir`").count() === 2) + checkLastOperation(tempDir, Seq("WRITE", "Append")) + } + + testWithFlag("order history by version") { + val tempDir = Utils.createTempDir().toString + + Seq(0).toDF().write.format("delta").save(tempDir) + Seq(1).toDF().write.format("delta").mode("overwrite").save(tempDir) + + Seq(2).toDF().write.format("delta").mode("append").save(tempDir) + Seq(3).toDF().write.format("delta").mode("overwrite").save(tempDir) + + Seq(4).toDF().write.format("delta").mode("overwrite").save(tempDir) + + + val ans = io.delta.tables.DeltaTable.forPath(spark, tempDir) + .history().as[DeltaHistory].collect() + assert(ans.map(_.version) === Seq(Some(4), Some(3), Some(2), Some(1), Some(0))) + + val ans2 = sql(s"DESCRIBE HISTORY delta.`$tempDir`").as[DeltaHistory].collect() + assert(ans2.map(_.version) === Seq(Some(4), Some(3), Some(2), Some(1), Some(0))) + } + + test("read version") { + val tempDir = Utils.createTempDir().toString + + Seq(0).toDF().write.format("delta").save(tempDir) // readVersion = None as first commit + Seq(1).toDF().write.format("delta").mode("overwrite").save(tempDir) // readVersion = Some(0) + + val log = DeltaLog.forTable(spark, tempDir) + val txn = log.startTransaction() // should read snapshot version 1 + + + Seq(2).toDF().write.format("delta").mode("append").save(tempDir) // readVersion = Some(1) + Seq(3).toDF().write.format("delta").mode("append").save(tempDir) // readVersion = Some(2) + + + txn.commit(Seq.empty, DeltaOperations.Truncate()) // readVersion = Some(1) + + Seq(5).toDF().write.format("delta").mode("append").save(tempDir) // readVersion = Some(4) + val ans = sql(s"DESCRIBE HISTORY delta.`$tempDir`").as[DeltaHistory].collect() + assert(ans.map(x => x.version.get -> x.readVersion) === + Seq(5 -> Some(4), 4 -> Some(1), 3 -> Some(2), 2 -> Some(1), 1 -> Some(0), 0 -> None)) + } + + testWithFlag("evolvability test") { + checkLastOperation( + evolvabilityResource, + evolvabilityLastOp, + Seq($"operation", $"operationParameters.mode", $"operationParameters.partitionBy")) + } + + test("using on non delta") { + withTempDir { basePath => + val e = intercept[AnalysisException] { + sql(s"describe history '$basePath'").collect() + } + assert(Seq("supported", "Delta").forall(e.getMessage.contains)) + } + } + + test("describe history a non-existent path and a non Delta table") { + def assertNotADeltaTableException(path: String): Unit = { + for (table <- Seq(s"'$path'", s"delta.`$path`")) { + val e = intercept[AnalysisException] { + sql(s"describe history $table").show() + } + Seq("is not a Delta table").foreach { msg => + assert(e.getMessage.contains(msg)) + } + } + } + withTempPath { tempDir => + assert(!tempDir.exists()) + assertNotADeltaTableException(tempDir.getCanonicalPath) + } + withTempPath { tempDir => + spark.range(1, 10).write.parquet(tempDir.getCanonicalPath) + assertNotADeltaTableException(tempDir.getCanonicalPath) + } + } + + test("operation metrics - write metrics") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + // create table + spark.range(100).repartition(5).write.format("delta").save(tempDir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.getAbsolutePath) + + // get last command history + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + val expectedMetrics = Map( + "numFiles" -> "5", + "numOutputRows" -> "100" + ) + + // Check if operation metrics from history are accurate + checkOperationMetrics(expectedMetrics, operationMetrics, DeltaOperationMetrics.WRITE) + assert(operationMetrics("numOutputBytes").toLong > 0) + } + } + } + + test("operation metrics - merge") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + // create target + spark.range(100).write.format("delta").save(tempDir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.getAbsolutePath) + + // run merge + deltaTable.as("t") + .merge(spark.range(50, 150).toDF().as("s"), "s.id = t.id") + .whenMatched() + .updateAll() + .whenNotMatched() + .insertAll() + .execute() + + // Get operation metrics + val operationMetrics: Map[String, String] = getOperationMetrics(deltaTable.history(1)) + + val expectedMetrics = Map( + "numTargetRowsInserted" -> "50", + "numTargetRowsUpdated" -> "50", + "numTargetRowsDeleted" -> "0", + "numOutputRows" -> "100", + "numSourceRows" -> "100" + ) + val copiedRows = operationMetrics("numTargetRowsCopied").toInt + assert(0 <= copiedRows && copiedRows <= 50) + checkOperationMetrics( + expectedMetrics, + operationMetrics, + mergeMetricsSchema) + val expectedTimeMetrics = Set("executionTimeMs", "scanTimeMs", "rewriteTimeMs") + checkOperationTimeMetricsInvariant(expectedTimeMetrics, operationMetrics) + } + } + } + + test("operation metrics - streaming update") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + val memoryStream = MemoryStream[Long] + val df = memoryStream.toDF() + + val tbl = tempDir.getAbsolutePath + "tbl1" + + spark.range(10).write.format("delta").save(tbl) + // ensure that you are writing out a single file per batch + val q = df.coalesce(1) + .withColumnRenamed("value", "id") + .writeStream + .format("delta") + .option("checkpointLocation", tempDir + "checkpoint") + .start(tbl) + memoryStream.addData(1) + q.processAllAvailable() + val deltaTable = io.delta.tables.DeltaTable.forPath(tbl) + var operationMetrics: Map[String, String] = getOperationMetrics(deltaTable.history(1)) + val expectedMetrics = Map( + "numAddedFiles" -> "1", + "numRemovedFiles" -> "0", + "numOutputRows" -> "1" + ) + checkOperationMetrics( + expectedMetrics, operationMetrics, DeltaOperationMetrics.STREAMING_UPDATE) + + // check if second batch also returns correct metrics. + memoryStream.addData(1, 2, 3) + q.processAllAvailable() + operationMetrics = getOperationMetrics(deltaTable.history(1)) + val expectedMetrics2 = Map( + "numAddedFiles" -> "1", + "numRemovedFiles" -> "0", + "numOutputRows" -> "3" + ) + checkOperationMetrics( + expectedMetrics2, operationMetrics, DeltaOperationMetrics.STREAMING_UPDATE) + assert(operationMetrics("numOutputBytes").toLong > 0) + q.stop() + } + } + } + + test("operation metrics - streaming update - complete mode") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + val memoryStream = MemoryStream[Long] + val df = memoryStream.toDF() + + val tbl = tempDir.getAbsolutePath + "tbl1" + + Seq(1L -> 1L, 2L -> 2L).toDF("value", "count") + .coalesce(1) + .write + .format("delta") + .save(tbl) + + // ensure that you are writing out a single file per batch + val q = df.groupBy("value").count().coalesce(1) + .writeStream + .format("delta") + .outputMode("complete") + .option("checkpointLocation", tempDir + "checkpoint") + .start(tbl) + memoryStream.addData(1) + q.processAllAvailable() + + val deltaTable = io.delta.tables.DeltaTable.forPath(tbl) + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + val expectedMetrics = Map( + "numAddedFiles" -> "1", + "numRemovedFiles" -> "1", + "numOutputRows" -> "1" + ) + checkOperationMetrics( + expectedMetrics, operationMetrics, DeltaOperationMetrics.STREAMING_UPDATE) + } + } + } + + def getLastCommitNumAddedAndRemovedBytes(deltaLog: DeltaLog): (Long, Long) = { + val changes = deltaLog.getChanges(deltaLog.update().version).flatMap(_._2).toSeq + val addedBytes = changes.collect { case a: AddFile => a.size }.sum + val removedBytes = changes.collect { case r: RemoveFile => r.getFileSize }.sum + + (addedBytes, removedBytes) + } + + def metricsUpdateTest : Unit = withTempDir { tempDir => + // Create the initial table as a single file + Seq(1, 2, 5, 11, 21, 3, 4, 6, 9, 7, 8, 0).toDF("key") + .withColumn("value", 'key % 2) + .write + .format("delta") + .save(tempDir.getAbsolutePath) + + // append additional data with the same number range to the table. + // This data is saved as a separate file as well + Seq(15, 16, 17).toDF("key") + .withColumn("value", 'key % 2) + .repartition(1) + .write + .format("delta") + .mode("append") + .save(tempDir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + deltaLog.snapshot.numOfFiles + + // update the table + deltaTable.update(col("key") === lit("16"), Map("value" -> lit("1"))) + // The file from the append gets updated but the file from the initial table gets scanned + // as well. We want to make sure numCopied rows is calculated from written files and not + // scanned files[SC-33980] + + // get operation metrics + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + val (addedBytes, removedBytes) = getLastCommitNumAddedAndRemovedBytes(deltaLog) + val expectedMetrics = Map( + "numAddedFiles" -> "1", + "numRemovedFiles" -> "1", + "numUpdatedRows" -> "1", + "numCopiedRows" -> "2", // There should be only three rows in total(updated + copied) + "numAddedBytes" -> addedBytes.toString, + "numRemovedBytes" -> removedBytes.toString + ) + checkOperationMetrics( + expectedMetrics, + operationMetrics, + updateMetricsSchema) + val expectedTimeMetrics = Set("executionTimeMs", "scanTimeMs", "rewriteTimeMs") + checkOperationTimeMetricsInvariant(expectedTimeMetrics, operationMetrics) + } + + test("operation metrics - update") { + withSQLConf((DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true")) { + metricsUpdateTest + } + } + + def metricsUpdatePartitionedColumnTest : Unit = { + val numRows = 100 + val numPartitions = 5 + withTempDir { tempDir => + spark.range(numRows) + .withColumn("c1", 'id + 1) + .withColumn("c2", 'id % numPartitions) + .write + .partitionBy("c2") + .format("delta") + .save(tempDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + val numFilesBeforeUpdate = deltaLog.snapshot.numOfFiles + deltaTable.update(col("c2") < 1, Map("c2" -> lit("1"))) + val numFilesAfterUpdate = deltaLog.snapshot.numOfFiles + + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + val newFiles = numFilesAfterUpdate - numFilesBeforeUpdate + val oldFiles = numFilesBeforeUpdate / numPartitions + val addedFiles = newFiles + oldFiles + val (addedBytes, removedBytes) = getLastCommitNumAddedAndRemovedBytes(deltaLog) + val expectedMetrics = Map( + "numUpdatedRows" -> (numRows / numPartitions).toString, + "numCopiedRows" -> "0", + "numAddedFiles" -> addedFiles.toString, + "numRemovedFiles" -> (numFilesBeforeUpdate / numPartitions).toString, + "numAddedBytes" -> addedBytes.toString, + "numRemovedBytes" -> removedBytes.toString + ) + checkOperationMetrics( + expectedMetrics, + operationMetrics, + updateMetricsSchema) + } + } + + test("operation metrics - update - partitioned column") { + withSQLConf((DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true")) { + metricsUpdatePartitionedColumnTest + } + } + + test("operation metrics - delete") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + // Create the initial table as a single file + Seq(1, 2, 5, 11, 21, 3, 4, 6, 9, 7, 8, 0).toDF("key") + .withColumn("value", 'key % 2) + .repartition(1) + .write + .format("delta") + .save(tempDir.getAbsolutePath) + + // Append to the initial table additional data in the same numerical range + Seq(15, 16, 17).toDF("key") + .withColumn("value", 'key % 2) + .repartition(1) + .write + .format("delta") + .mode("append") + .save(tempDir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + deltaLog.snapshot.numOfFiles + + // delete the table + deltaTable.delete(col("key") === lit("16")) + // The file from the append gets deleted but the file from the initial table gets scanned + // as well. We want to make sure numCopied rows is calculated from the written files instead + // of the scanned files.[SC-33980] + + // get operation metrics + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + + // get expected byte level metrics + val (numAddedBytesExpected, numRemovedBytesExpected) = + getLastCommitNumAddedAndRemovedBytes(deltaLog) + val expectedMetrics = Map( + "numAddedFiles" -> "1", + "numAddedBytes" -> numAddedBytesExpected.toString, + "numRemovedFiles" -> "1", + "numRemovedBytes" -> numRemovedBytesExpected.toString, + "numDeletedRows" -> "1", + "numCopiedRows" -> "2" // There should be only three rows in total(deleted + copied) + ) + checkOperationMetrics( + expectedMetrics, + operationMetrics, + deleteMetricsSchema(partitioned = false)) + val expectedTimeMetrics = Set("executionTimeMs", "scanTimeMs", "rewriteTimeMs") + checkOperationTimeMetricsInvariant(expectedTimeMetrics, operationMetrics) + } + } + } + + test("operation metrics - delete - partition column") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + val numRows = 100 + val numPartitions = 5 + withTempDir { tempDir => + spark.range(numRows) + .withColumn("c1", 'id % numPartitions) + .write + .format("delta") + .partitionBy("c1") + .save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + val numFilesBeforeDelete = deltaLog.snapshot.numOfFiles + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.getAbsolutePath) + + deltaTable.delete("c1 = 1") + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + // get expected byte level metrics + val (numAddedBytesExpected, numRemovedBytesExpected) = + getLastCommitNumAddedAndRemovedBytes(deltaLog) + val expectedMetrics = Map[String, String]( + "numRemovedFiles" -> (numFilesBeforeDelete / numPartitions).toString, + "numAddedBytes" -> numAddedBytesExpected.toString, + "numRemovedBytes" -> numRemovedBytesExpected.toString + ) + // row level metrics are not collected for deletes with parition columns + checkOperationMetrics( + expectedMetrics, + operationMetrics, + deleteMetricsSchema(partitioned = true)) + val expectedTimeMetrics = Set("executionTimeMs", "scanTimeMs", "rewriteTimeMs") + checkOperationTimeMetricsInvariant(expectedTimeMetrics, operationMetrics) + } + } + } + + test("operation metrics - delete - full") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + val numRows = 100 + val numPartitions = 5 + withTempDir { tempDir => + spark.range(numRows) + .withColumn("c1", 'id % numPartitions) + .write + .format("delta") + .partitionBy("c1") + .save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + val numFilesBeforeDelete = deltaLog.snapshot.numOfFiles + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.getAbsolutePath) + + deltaTable.delete() + + // get expected byte level metrics + val (numAddedBytesExpected, numRemovedBytesExpected) = + getLastCommitNumAddedAndRemovedBytes(deltaLog) + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + val expectedMetrics = Map[String, String]( + "numRemovedFiles" -> numFilesBeforeDelete.toString, + "numAddedBytes" -> numAddedBytesExpected.toString, + "numRemovedBytes" -> numRemovedBytesExpected.toString + ) + checkOperationMetrics( + expectedMetrics, + operationMetrics, + deleteMetricsSchema(partitioned = true)) + val expectedTimeMetrics = Set("executionTimeMs", "scanTimeMs", "rewriteTimeMs") + checkOperationTimeMetricsInvariant(expectedTimeMetrics, operationMetrics) + } + } + } + + test("operation metrics - convert to delta") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + val numPartitions = 5 + withTempDir { tempDir => + // Create a parquet table + val dir = tempDir.getAbsolutePath() + spark.range(10) + .withColumn("col2", 'id % numPartitions) + .write + .format("parquet") + .mode("overwrite") + .partitionBy("col2") + .save(dir) + + // convert to delta + val deltaTable = io.delta.tables.DeltaTable.convertToDelta(spark, s"parquet.`$dir`", + "col2 long") + val deltaLog = DeltaLog.forTable(spark, dir) + val expectedMetrics = Map( + "numConvertedFiles" -> deltaLog.snapshot.numOfFiles.toString + ) + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + checkOperationMetrics(expectedMetrics, operationMetrics, DeltaOperationMetrics.CONVERT) + } + } + } + + test("sort and collect the DESCRIBE HISTORY result") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + Seq(1, 2, 3).toDF().write.format("delta").save(path) + val rows = sql(s"DESCRIBE HISTORY delta.`$path`") + .orderBy("version") + .collect() + assert(rows.map(_.getAs[Long]("version")).toList == 0L :: Nil) + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") { + val rows = sql(s"DESCRIBE HISTORY delta.`$path`") + .filter("version >= 0") + .orderBy("version") + .collect() + assert(rows.map(_.getAs[Long]("version")).toList == 0L :: Nil) + } + } + } + + test("operation metrics - create table") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + val tblName = "tblName" + val numRows = 10 + withTable(tblName) { + sql(s"CREATE TABLE $tblName USING DELTA SELECT * from range($numRows)") + val deltaTable = io.delta.tables.DeltaTable.forName(tblName) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tblName)) + val numFiles = deltaLog.snapshot.numOfFiles + val expectedMetrics = Map( + "numFiles" -> numFiles.toString, + "numOutputRows" -> numRows.toString + ) + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + assert(operationMetrics("numOutputBytes").toLong > 0) + checkOperationMetrics(expectedMetrics, operationMetrics, DeltaOperationMetrics.WRITE) + } + } + } + + test("operation metrics - create table - without data") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + val tblName = "tbl" + withTable(tblName) { + sql(s"CREATE TABLE $tblName(id bigint) USING DELTA") + val deltaTable = io.delta.tables.DeltaTable.forName(tblName) + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + assert(operationMetrics === Map.empty) + } + } + } + + def testReplaceWhere(testName: String)(f: (Boolean, Boolean) => Unit): Unit = { + Seq(true, false).foreach { enableCDF => + Seq(true, false).foreach { enableStats => + test(testName + s"enableCDF=${enableCDF} - enableStats ${enableStats}") { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> enableCDF.toString, + DeltaSQLConf.DELTA_COLLECT_STATS.key ->enableStats.toString, + DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + f(enableCDF, enableStats) + } + } + } + } + } + + testReplaceWhere("replaceWhere on data column") { (enableCDF, enableStats) => + withTable("tbl") { + // create a table with one row + spark.range(10) + .repartition(1) // 1 file table + .withColumn("b", lit(1)) + .write + .format("delta") + .saveAsTable("tbl") + val deltaTable = io.delta.tables.DeltaTable.forName("tbl") + + val deltaLog = DeltaLog.forTable(spark, TableIdentifier("tbl")) + + // replace where + spark.range(20) + .withColumn("b", lit(1)) + .repartition(1) // write 1 file + .write + .format("delta") + .option("replaceWhere", "b = 1") + .mode("overwrite") + .saveAsTable("tbl") + + val numWrittenFiles = deltaLog.getChanges(1).flatMap { + case (a, v) => v + }.filter(_.isInstanceOf[AddFile]) + .toSeq + .size + + val numAddedChangeFiles = if (enableCDF) { + deltaLog.getChanges(1).flatMap { + case (a, v) => v + }.filter(_.isInstanceOf[AddCDCFile]) + .toSeq + .size + } else { + 0 + } + + // get expected byte level metrics + val (numAddedBytesExpected, numRemovedBytesExpected) = + getLastCommitNumAddedAndRemovedBytes(deltaLog) + + if (enableStats) { + checkOperationMetrics( + Map( + "numFiles" -> (numWrittenFiles).toString, + "numOutputRows" -> "20", + "numCopiedRows" -> "0", + "numOutputBytes" -> numAddedBytesExpected.toString, + "numRemovedBytes" -> numRemovedBytesExpected.toString, + "numAddedChangeFiles" -> numAddedChangeFiles.toString, + "numDeletedRows" -> "10", + "numRemovedFiles" -> "1" + ), + getOperationMetrics(deltaTable.history(1)), + replaceWhereMetricsSchema + ) + } else { + checkOperationMetrics( + Map( + "numFiles" -> (numWrittenFiles).toString, + "numOutputBytes" -> numAddedBytesExpected.toString, + "numRemovedBytes" -> numRemovedBytesExpected.toString, + "numAddedChangeFiles" -> numAddedChangeFiles.toString, + "numRemovedFiles" -> "1" + ), + getOperationMetrics(deltaTable.history(1)), + replaceWhereMetricsSchema.filter(!_.contains("Rows")) + ) + } + } + } + + testReplaceWhere(s"replaceWhere on data column - partial rewrite") { (enableCDF, enableStats) => + // Whats different from the above test + // replace where has a append + delete. + // make the delete also write new files + withTable("tbl") { + // create a table with one row + spark.range(10) + .repartition(1) // 1 file table + .withColumn("b", 'id % 2) // 1 file contains 2 values + .write + .format("delta") + .saveAsTable("tbl") + val deltaTable = io.delta.tables.DeltaTable.forName("tbl") + + // replace where + spark.range(20) + .withColumn("b", lit(1L)) + .repartition(3) // write 3 files + .write + .format("delta") + .option("replaceWhere", "b = 1") // partial match + .mode("overwrite") + .saveAsTable("tbl") + + val deltaLog = DeltaLog.forTable(spark, TableIdentifier("tbl")) + val numAddedChangeFiles = if (enableCDF) { + deltaLog.getChanges(1).flatMap { + case (a, v) => v + }.filter(_.isInstanceOf[AddCDCFile]) + .toSeq + .size + } else { + 0 + } + + // get expected byte level metrics + val (numAddedBytesExpected, numRemovedBytesExpected) = + getLastCommitNumAddedAndRemovedBytes(deltaLog) + + if (enableStats) { + checkOperationMetrics( + Map( + "numFiles" -> "4", // 3(append) + 1(delete) + "numOutputRows" -> "25", // 20 + 5 + "numCopiedRows" -> "5", + "numAddedChangeFiles" -> numAddedChangeFiles.toString, + "numDeletedRows" -> "5", + "numOutputBytes" -> numAddedBytesExpected.toString, + "numRemovedBytes" -> numRemovedBytesExpected.toString, + "numRemovedFiles" -> "1" + ), + getOperationMetrics(deltaTable.history(1)), + replaceWhereMetricsSchema + ) + } else { + checkOperationMetrics( + Map( + "numFiles" -> "4", // 3(append) + 1(delete) + "numAddedChangeFiles" -> numAddedChangeFiles.toString, + "numOutputBytes" -> numAddedBytesExpected.toString, + "numRemovedBytes" -> numRemovedBytesExpected.toString, + "numRemovedFiles" -> "1" + ), + getOperationMetrics(deltaTable.history(1)), + replaceWhereMetricsSchema.filter(!_.contains("Rows")) + ) + + } + } + } + + Seq("true", "false").foreach { enableArbitraryRW => + testReplaceWhere(s"replaceWhere on partition column " + + s"- arbitraryReplaceWhere=${enableArbitraryRW}") { (enableCDF, enableStats) => + withSQLConf(DeltaSQLConf.REPLACEWHERE_DATACOLUMNS_ENABLED.key -> enableArbitraryRW) { + withTable("tbl") { + // create a table with one row + spark.range(10) + .repartition(1) // 1 file table + .withColumn("b", lit(1)) + .write + .format("delta") + .partitionBy("b") + .saveAsTable("tbl") + val deltaTable = io.delta.tables.DeltaTable.forName("tbl") + + // replace where + spark.range(20) + .repartition(2) // write 2 files + .withColumn("b", lit(1)) + .write + .format("delta") + .option("replaceWhere", "b = 1") // partial match + .mode("overwrite") + .saveAsTable("tbl") + + val deltaLog = DeltaLog.forTable(spark, TableIdentifier("tbl")) + // get expected byte level metrics + val (numAddedBytesExpected, numRemovedBytesExpected) = + getLastCommitNumAddedAndRemovedBytes(deltaLog) + + // metrics are a subset here as it would involve a partition delete + if (enableArbitraryRW.toBoolean) { + if (enableStats) { + checkOperationMetrics( + Map( + "numFiles" -> "2", + "numOutputRows" -> "20", + "numAddedChangeFiles" -> "0", + "numRemovedFiles" -> "1", + "numCopiedRows" -> "0", + "numOutputBytes" -> numAddedBytesExpected.toString, + "numRemovedBytes" -> numRemovedBytesExpected.toString, + "numDeletedRows" -> "10" + ), + getOperationMetrics(deltaTable.history(1)), + replaceWhereMetricsSchema + ) + } else { + checkOperationMetrics( + Map( + "numFiles" -> "2", + "numAddedChangeFiles" -> "0", + "numOutputBytes" -> numAddedBytesExpected.toString, + "numRemovedBytes" -> numRemovedBytesExpected.toString, + "numRemovedFiles" -> "1" + ), + getOperationMetrics(deltaTable.history(1)), + replaceWhereMetricsSchema.filter(!_.contains("Rows")) + ) + + } + } else { + // legacy replace where mentioned output rows regardless of stats or not. + checkOperationMetrics( + Map( + "numFiles" -> "2", + "numOutputRows" -> "20", + "numOutputBytes" -> numAddedBytesExpected.toString + ), + getOperationMetrics(deltaTable.history(1)), + DeltaOperationMetrics.WRITE + ) + } + } + } + } + } + + test("replaceWhere metrics turned off - reverts to old behavior") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true", + DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false", + DeltaSQLConf.REPLACEWHERE_METRICS_ENABLED.key -> "false") { + withTable("tbl") { + // create a table with one row + spark.range(10) + .repartition(1) // 1 file table + .withColumn("b", lit(1)) + .write + .format("delta") + .partitionBy("b") + .saveAsTable("tbl") + val deltaTable = io.delta.tables.DeltaTable.forName("tbl") + + // replace where + spark.range(20) + .repartition(2) // write 2 files + .withColumn("b", lit(1)) + .write + .format("delta") + .option("replaceWhere", "b = 1") // partial match + .mode("overwrite") + .saveAsTable("tbl") + + checkOperationMetrics( + Map( + "numFiles" -> "2", + "numOutputRows" -> "20" + ), + getOperationMetrics(deltaTable.history(1)), + DeltaOperationMetrics.WRITE + ) + } + } + } + + test("operation metrics - create table - v2") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + val tblName = "tblName" + withTable(tblName) { + // Create + spark.range(100).writeTo(tblName).using("delta").create() + val deltaTable = io.delta.tables.DeltaTable.forName(spark, tblName) + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tblName)) + var operationMetrics = getOperationMetrics(deltaTable.history(1)) + var expectedMetrics = Map( + "numFiles" -> deltaLog.snapshot.numOfFiles.toString, + "numOutputRows" -> "100" + ) + assert(operationMetrics("numOutputBytes").toLong > 0) + checkOperationMetrics(expectedMetrics, operationMetrics, DeltaOperationMetrics.WRITE) + + // replace + spark.range(50).writeTo(tblName).using("delta").replace() + deltaLog.update() + expectedMetrics = Map( + "numFiles" -> deltaLog.snapshot.numOfFiles.toString, + "numOutputRows" -> "50" + ) + operationMetrics = getOperationMetrics(deltaTable.history(1)) + assert(operationMetrics("numOutputBytes").toLong > 0) + checkOperationMetrics(expectedMetrics, operationMetrics, DeltaOperationMetrics.WRITE) + + // create or replace + spark.range(70).writeTo(tblName).using("delta").createOrReplace() + deltaLog.update() + expectedMetrics = Map( + "numFiles" -> deltaLog.snapshot.numOfFiles.toString, + "numOutputRows" -> "70" + ) + operationMetrics = getOperationMetrics(deltaTable.history(1)) + assert(operationMetrics("numOutputBytes").toLong > 0) + checkOperationMetrics(expectedMetrics, operationMetrics, DeltaOperationMetrics.WRITE) + } + } + } + + test("operation metrics for RESTORE") { + withTempDir { dir => + // version 0 + spark.range(5).write.format("delta").save(dir.getCanonicalPath) + + val deltaLog = DeltaLog.forTable(spark, dir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, dir.getAbsolutePath) + val numFilesV0 = deltaLog.snapshot.numOfFiles + val sizeBytesV0 = deltaLog.snapshot.sizeInBytes + + // version 1 + spark.range(10, 12).write.format("delta").mode("append").save(dir.getCanonicalPath) + + val numFilesV1 = deltaLog.snapshot.numOfFiles + val sizeBytesV1 = deltaLog.snapshot.sizeInBytes + + // version 2 - RESTORE table to version 0 + sql(s"RESTORE TABLE delta.`${dir.getAbsolutePath}` VERSION AS OF 0") + + val expectedMetrics = Map( + "tableSizeAfterRestore" -> sizeBytesV0, + "numOfFilesAfterRestore" -> numFilesV0, + "numRemovedFiles" -> (numFilesV1 - numFilesV0), + "numRestoredFiles" -> 0, + "removedFilesSize" -> (sizeBytesV1 - sizeBytesV0), + "restoredFilesSize" -> 0).mapValues(_.toString).toMap + + val operationMetrics = getOperationMetrics(deltaTable.history(1)) + + checkOperationMetrics( + expectedMetrics, operationMetrics, DeltaOperationMetrics.RESTORE) + + // check operation parameters + checkLastOperation( + dir.getAbsolutePath, + Seq("RESTORE", "0"), + Seq($"operation", $"operationParameters.version")) + + // we can check metrics for a case where we restore files as well. + // version 3 + spark.range(10, 12).write.format("delta").mode("append").save(dir.getCanonicalPath) + + // version 4 - delete all rows + sql(s"DELETE FROM delta.`${dir.getAbsolutePath}`") + + val numFilesV4 = deltaLog.update().numOfFiles + val sizeBytesV4 = deltaLog.update().sizeInBytes + + // version 5 - RESTORE table to version 3 + sql(s"RESTORE TABLE delta.`${dir.getAbsolutePath}` VERSION AS OF 3") + + val numFilesV5 = deltaLog.update().numOfFiles + val sizeBytesV5 = deltaLog.update().sizeInBytes + + val expectedMetrics2 = Map( + "tableSizeAfterRestore" -> sizeBytesV5, + "numOfFilesAfterRestore" -> numFilesV5, + "numRemovedFiles" -> 0, + "numRestoredFiles" -> (numFilesV5 - numFilesV4), + "removedFilesSize" -> 0, + "restoredFilesSize" -> (sizeBytesV5 - sizeBytesV4)).mapValues(_.toString).toMap + + val operationMetrics2 = getOperationMetrics(deltaTable.history(1)) + + checkOperationMetrics( + expectedMetrics2, operationMetrics2, DeltaOperationMetrics.RESTORE) + } + } + + + test("test output schema of describe delta history command") { + val tblName = "tbl" + withTable(tblName) { + sql(s"CREATE TABLE $tblName(id bigint) USING DELTA") + val deltaTable = io.delta.tables.DeltaTable.forName(tblName) + val expectedSchema = StructType(Seq( + StructField("version", LongType, nullable = true), + StructField("timestamp", TimestampType, nullable = true), + StructField("userId", StringType, nullable = true), + StructField("userName", StringType, nullable = true), + StructField("operation", StringType, nullable = true), + StructField("operationParameters", + MapType(StringType, StringType, valueContainsNull = true), nullable = true), + StructField("job", + StructType(Seq( + StructField("jobId", StringType, nullable = true), + StructField("jobName", StringType, nullable = true), + StructField("jobRunId", StringType, nullable = true), + StructField("runId", StringType, nullable = true), + StructField("jobOwnerId", StringType, nullable = true), + StructField("triggerType", StringType, nullable = true))), + nullable = true), + StructField("notebook", + StructType(Seq(StructField("notebookId", StringType, nullable = true))), nullable = true), + StructField("clusterId", StringType, nullable = true), + StructField("readVersion", LongType, nullable = true), + StructField("isolationLevel", StringType, nullable = true), + StructField("isBlindAppend", BooleanType, nullable = true), + StructField("operationMetrics", + MapType(StringType, StringType, valueContainsNull = true), nullable = true), + StructField("userMetadata", StringType, nullable = true), + StructField("engineInfo", StringType, nullable = true))) + + // Test schema from [[io.delta.tables.DeltaTable.history]] api + val df1 = deltaTable.history(1) + assert(df1.schema == expectedSchema) + + // Test schema from SQL api + val df2 = spark.sql(s"DESCRIBE HISTORY $tblName LIMIT 1") + assert(df2.schema == expectedSchema) + } + } +} + +class DescribeDeltaHistorySuite + extends DescribeDeltaHistorySuiteBase with DeltaSQLCommandTest diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DomainMetadataSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DomainMetadataSuite.scala new file mode 100644 index 00000000000..e431e080812 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DomainMetadataSuite.scala @@ -0,0 +1,247 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.nio.charset.StandardCharsets.UTF_8 +import java.util.concurrent.ExecutionException + +import scala.util.{Failure, Success, Try} + +import org.apache.spark.sql.delta.DeltaOperations.{ManualUpdate, Truncate} +import org.apache.spark.sql.delta.actions.{DomainMetadata, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import org.junit.Assert._ + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.execution.streaming.CheckpointFileManager +import org.apache.spark.sql.test.SharedSparkSession + +class DomainMetadataSuite + extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + import testImplicits._ + + private def sortByDomain(domainMetadata: Seq[DomainMetadata]): Seq[DomainMetadata] = + domainMetadata.sortBy(_.domain) + + /** + * A helper to validate the [[DomainMetadata]] actions can be retained during the delta state + * reconstruction. + * + * @param doCheckpoint: Explicitly create a delta log checkpoint if marked as true. + * @param doChecksum: Disable writting checksum file if marked as false. + */ + private def validateStateReconstructionHelper( + doCheckpoint: Boolean, + doChecksum: Boolean): Unit = { + val table = "testTable" + withTable(table) { + withSQLConf( + DeltaSQLConf.DELTA_WRITE_CHECKSUM_ENABLED.key -> doChecksum.toString) { + sql( + s""" + | CREATE TABLE $table(id int) USING delta + | tblproperties + | ('${TableFeatureProtocolUtils.propertyKey(DomainMetadataTableFeature)}' = 'enabled') + |""".stripMargin) + (1 to 100).toDF("id").write.format("delta").mode("append").saveAsTable(table) + + var deltaTable = DeltaTableV2(spark, TableIdentifier(table)) + def deltaLog = deltaTable.deltaLog + assert(deltaTable.snapshot.domainMetadata.isEmpty) + + val domainMetadata = DomainMetadata("testDomain1", "", false) :: + DomainMetadata("testDomain2", "{\"key1\":\"value1\"", false) :: Nil + deltaTable.startTransactionWithInitialSnapshot().commit(domainMetadata, Truncate()) + assertEquals(sortByDomain(domainMetadata), sortByDomain(deltaLog.update().domainMetadata)) + assert(deltaLog.update().logSegment.checkpointProvider.version === -1) + + if (doCheckpoint) { + deltaLog.checkpoint(deltaLog.unsafeVolatileSnapshot) + // Clear the DeltaLog cache to force creating a new DeltaLog instance which will build + // the Snapshot from the checkpoint file. + DeltaLog.clearCache() + deltaTable = DeltaTableV2(spark, TableIdentifier(table)) + assert(!deltaTable.snapshot.logSegment.checkpointProvider.isEmpty) + + assertEquals( + sortByDomain(domainMetadata), + sortByDomain(deltaTable.snapshot.domainMetadata)) + } + + } + } + } + + // A helper to validate [[DomainMetadata]] actions can be deleted. + private def validateDeletionHelper(doCheckpoint: Boolean, doChecksum: Boolean): Unit = { + val table = "testTable" + withTable(table) { + withSQLConf( + DeltaSQLConf.DELTA_WRITE_CHECKSUM_ENABLED.key -> doChecksum.toString + ) { + sql( + s""" + | CREATE TABLE $table(id int) USING delta + | tblproperties + | ('${TableFeatureProtocolUtils.propertyKey(DomainMetadataTableFeature)}' = 'enabled') + |""".stripMargin) + (1 to 100).toDF("id").write.format("delta").mode("append").saveAsTable(table) + + DeltaLog.clearCache() + val deltaTable = DeltaTableV2(spark, TableIdentifier(table)) + val deltaLog = deltaTable.deltaLog + assert(deltaTable.snapshot.domainMetadata.isEmpty) + + val domainMetadata = DomainMetadata("testDomain1", "", false) :: + DomainMetadata("testDomain2", "{\"key1\":\"value1\"}", false) :: Nil + + deltaTable.startTransactionWithInitialSnapshot().commit(domainMetadata, Truncate()) + assertEquals(sortByDomain(domainMetadata), sortByDomain(deltaLog.update().domainMetadata)) + assert(deltaLog.update().logSegment.checkpointProvider.version === -1) + + // Delete testDomain1. + deltaTable.startTransaction().commit( + DomainMetadata("testDomain1", "", true) :: Nil, Truncate()) + val domainMetadatasAfterDeletion = DomainMetadata( + "testDomain2", + "{\"key1\":\"value1\"}", false) :: Nil + assertEquals( + sortByDomain(domainMetadatasAfterDeletion), + sortByDomain(deltaLog.update().domainMetadata)) + + // Create a new commit and validate the incrementally built snapshot state respects the + // DomainMetadata deletion. + deltaTable.startTransaction().commit(Nil, ManualUpdate) + var snapshot = deltaLog.update() + assertEquals(sortByDomain(domainMetadatasAfterDeletion), snapshot.domainMetadata) + if (doCheckpoint) { + deltaLog.checkpoint(snapshot) + assertEquals( + sortByDomain(domainMetadatasAfterDeletion), + deltaLog.update().domainMetadata) + } + + // force state reconstruction and validate it respects the DomainMetadata retention. + DeltaLog.clearCache() + snapshot = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(table))._2 + assertEquals(sortByDomain(domainMetadatasAfterDeletion), snapshot.domainMetadata) + } + } + } + + + test("DomainMetadata action survives state reconstruction [w/o checkpoint, w/o checksum]") { + validateStateReconstructionHelper(doCheckpoint = false, doChecksum = false) + } + + test("DomainMetadata action survives state reconstruction [w/ checkpoint, w/ checksum]") { + validateStateReconstructionHelper(doCheckpoint = true, doChecksum = true) + } + + test("DomainMetadata action survives state reconstruction [w/ checkpoint, w/o checksum]") { + validateStateReconstructionHelper(doCheckpoint = true, doChecksum = false) + } + + test("DomainMetadata action survives state reconstruction [w/o checkpoint, w/ checksum]") { + validateStateReconstructionHelper(doCheckpoint = false, doChecksum = true) + } + + test("DomainMetadata deletion [w/o checkpoint, w/o checksum]") { + validateDeletionHelper(doCheckpoint = false, doChecksum = false) + } + + test("DomainMetadata deletion [w/ checkpoint, w/o checksum]") { + validateDeletionHelper(doCheckpoint = true, doChecksum = false) + } + + test("DomainMetadata deletion [w/o checkpoint, w/ checksum]") { + validateDeletionHelper(doCheckpoint = false, doChecksum = true) + } + + test("DomainMetadata deletion [w/ checkpoint, w/ checksum]") { + validateDeletionHelper(doCheckpoint = true, doChecksum = true) + } + + test("Multiple DomainMetadatas with the same domain should fail in single transaction") { + val table = "testTable" + withTable(table) { + sql( + s""" + | CREATE TABLE $table(id int) USING delta + | tblproperties + | ('${TableFeatureProtocolUtils.propertyKey(DomainMetadataTableFeature)}' = 'enabled') + |""".stripMargin) + (1 to 100).toDF("id").write.format("delta").mode("append").saveAsTable(table) + val deltaTable = DeltaTableV2(spark, TableIdentifier(table)) + val domainMetadata = + DomainMetadata("testDomain1", "", false) :: + DomainMetadata("testDomain1", "", false) :: Nil + val e = intercept[DeltaIllegalArgumentException] { + deltaTable.startTransactionWithInitialSnapshot().commit(domainMetadata, Truncate()) + } + assertEquals(e.getMessage, + "[DELTA_DUPLICATE_DOMAIN_METADATA_INTERNAL_ERROR] " + + "Internal error: two DomainMetadata actions within the same transaction have " + + "the same domain testDomain1") + } + } + + test("Validate the failure when table feature is not enabled") { + withTempDir { dir => + (1 to 100).toDF().write.format("delta").save(dir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, dir) + val domainMetadata = DomainMetadata("testDomain1", "", false) :: Nil + val e = intercept[DeltaIllegalArgumentException] { + deltaLog.startTransaction().commit(domainMetadata, Truncate()) + } + assertEquals(e.getMessage, + "[DELTA_DOMAIN_METADATA_NOT_SUPPORTED] " + + "Detected DomainMetadata action(s) for domains [testDomain1], " + + "but DomainMetadataTableFeature is not enabled.") + } + } + + test("Validate the lifespan of metadata domains for the REPLACE TABLE operation") { + val existingDomainMetadatas = + DomainMetadata("testDomain1", "", false) :: + DomainMetadata("testDomain2", "", false) :: + Nil + val newDomainMetadatas = + DomainMetadata("testDomain2", "key=val", false) :: + DomainMetadata("testDomain3", "", false) :: + Nil + + val result = DomainMetadataUtils.handleDomainMetadataForReplaceTable( + existingDomainMetadatas, newDomainMetadatas) + + // testDomain1: survives by default (not in the final list since it already + // exists in the snapshot). + // testDomain2: overwritten by new domain metadata + // testDomain3: added to the final list since it only appears in the new set. + assert(result === + DomainMetadata("testDomain2", "key=val", false) :: // Overwritten + DomainMetadata("testDomain3", "", false) :: // New metadata domain + Nil) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DuplicatingListLogStoreSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DuplicatingListLogStoreSuite.scala new file mode 100644 index 00000000000..21f25a1d65f --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DuplicatingListLogStoreSuite.scala @@ -0,0 +1,87 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File} +import java.nio.charset.StandardCharsets + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage.{HDFSLogStore} +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.DeltaFileOperations +import com.google.common.io.Files +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.test.SharedSparkSession + +class DuplicatingListLogStore(sparkConf: SparkConf, defaultHadoopConf: Configuration) + extends HDFSLogStore(sparkConf, defaultHadoopConf) { + + override def listFrom(path: Path, hadoopConf: Configuration): Iterator[FileStatus] = { + val list = super.listFrom(path, hadoopConf).toSeq + // The first listing if directory will be listed twice to mimic the WASBS Log Store + if (!list.isEmpty && list.head.isDirectory) { + (Seq(list.head) ++ list).toIterator + } else { + list.toIterator + } + } +} + +class DuplicatingListLogStoreSuite extends SharedSparkSession with DeltaSQLCommandTest { + + override def sparkConf: SparkConf = { + super.sparkConf.set("spark.databricks.tahoe.logStore.class", + classOf[DuplicatingListLogStore].getName) + } + + def pathExists(deltaLog: DeltaLog, filePath: String): Boolean = { + val fs = deltaLog.logPath.getFileSystem(deltaLog.newDeltaHadoopConf()) + fs.exists(DeltaFileOperations.absolutePath(deltaLog.dataPath.toString, filePath)) + } + + test("vacuum should handle duplicate listing") { + withTempDir { dir => + // create cdc file (lexicographically < _delta_log) + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + val deltaTable = io.delta.tables.DeltaTable.forPath(dir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, dir.getAbsolutePath) + + val cdcDir = new File(new Path(dir.getAbsolutePath, "_change_data").toString) + cdcDir.mkdir() + val cdcPath = new File( + new Path(cdcDir.getAbsolutePath, "dupFile").toString) + Files.write("test", cdcPath, StandardCharsets.UTF_8) + + require(pathExists(deltaLog, cdcPath.toString)) + require(pathExists(deltaLog, cdcDir.toString)) + + withSQLConf(DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false") { + deltaTable.vacuum(0) + + // check if path doesn't exists + assert(!pathExists(deltaLog, cdcPath.toString)) + + // to delete directories + deltaTable.vacuum(0) + assert(!pathExists(deltaLog, cdcDir.toString)) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/EvolvabilitySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/EvolvabilitySuite.scala new file mode 100644 index 00000000000..e8cf5dd95a3 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/EvolvabilitySuite.scala @@ -0,0 +1,253 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import org.apache.hadoop.fs.Path + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.Trigger +import org.apache.spark.sql.types.StringType +import org.apache.spark.util.Utils + +class EvolvabilitySuite extends EvolvabilitySuiteBase with DeltaSQLCommandTest { + + import testImplicits._ + + test("delta 0.1.0") { + testEvolvability("src/test/resources/delta/delta-0.1.0") + } + + test("delta 0.1.0 - case sensitivity enabled") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + testEvolvability("src/test/resources/delta/delta-0.1.0") + } + } + + test("serialized partition values must contain null values") { + val tempDir = Utils.createTempDir().toString + val df1 = spark.range(5).withColumn("part", new Column(Literal(null, StringType))) + val df2 = spark.range(5).withColumn("part", new Column(Literal("1"))) + df1.union(df2).coalesce(1).write.partitionBy("part").format("delta").save(tempDir) + + // Clear the cache + DeltaLog.clearCache() + val deltaLog = DeltaLog.forTable(spark, tempDir) + + val dataThere = deltaLog.snapshot.allFiles.collect().forall { addFile => + if (!addFile.partitionValues.contains("part")) { + fail(s"The partition values: ${addFile.partitionValues} didn't contain the column 'part'.") + } + val value = addFile.partitionValues("part") + value === null || value === "1" + } + + assert(dataThere, "Partition values didn't match with null or '1'") + + // Check serialized JSON as well + val contents = deltaLog.store.read( + FileNames.deltaFile(deltaLog.logPath, 0L), + deltaLog.newDeltaHadoopConf()) + assert(contents.exists(_.contains(""""part":null""")), "null value should be written in json") + } + + testQuietly("parse old version LastCheckpointInfo") { + assert(JsonUtils.mapper.readValue[LastCheckpointInfo]("""{"version":1,"size":1}""") + === LastCheckpointInfo(1, 1, None, None, None, None)) + } + + test("parse partial version LastCheckpointInfo") { + assert(JsonUtils.mapper.readValue[LastCheckpointInfo]( + """{"version":1,"size":1,"parts":100}""") === + LastCheckpointInfo(1, 1, Some(100), None, None, None)) + } + + // Following tests verify that operations on Delta table won't fail when there is an + // unknown column in Delta files and checkpoints. + // The modified Delta files and checkpoints with an extra column is generated by + // `EvolvabilitySuiteBase.generateTransactionLogWithExtraColumn()` + + test("transaction log schema evolvability - batch change data read") { + withTempDir { dir => + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true") { + EvolvabilitySuiteBase.generateTransactionLogWithExtraColumn(spark, dir.getAbsolutePath) + spark.sql(s"UPDATE delta.`${dir.getAbsolutePath}` SET value = 10") + spark.read.format("delta").option("readChangeFeed", "true") + .option("startingVersion", 0).load(dir.getAbsolutePath).collect() + + val expectedPreimage = (1 until 10).flatMap(x => Seq(x, x)).toSeq + val expectedPostimage = Seq.fill(18)(10) + testCdfUpdate(dir.getAbsolutePath, 6, expectedPreimage, expectedPostimage) + } + } + } + + test("transaction log schema evolvability - streaming change data read") { + withTempDir { dir => + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true") { + EvolvabilitySuiteBase.generateTransactionLogWithExtraColumn(spark, dir.getAbsolutePath) + spark.sql(s"UPDATE delta.`${dir.getAbsolutePath}` SET value = 10") + val query = spark.readStream.format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", 0) + .load(dir.getAbsolutePath) + .writeStream.format("noop").start() + try { + query.processAllAvailable() + } finally { + query.stop() + } + + val expectedPreimage = (1 until 10).flatMap(x => Seq(x, x)).toSeq + val expectedPostimage = Seq.fill(18)(10) + testCdfUpdate(dir.getAbsolutePath, 6, expectedPreimage, expectedPostimage, true) + } + } + } + + test("transaction log schema evolvability - batch read") { + testLogSchemaEvolvability( + (path: String) => { spark.read.format("delta").load(path).collect() } + ) + } + + test("transaction log schema evolvability - batch write") { + testLogSchemaEvolvability( + (path: String) => { + (10 until 20).map(num => (num, num)).toDF("key", "value") + .write.format("delta").mode("append").save(path) + spark.read.format("delta").load(path).collect() + } + ) + } + + test("transaction log schema evolvability - streaming read") { + testLogSchemaEvolvability( + (path: String) => { + val query = spark.readStream.format("delta").load(path).writeStream.format("noop").start() + try { + query.processAllAvailable() + } finally { + query.stop() + } + } + ) + } + + test("transaction log schema evolvability - streaming write") { + testLogSchemaEvolvability( + (path: String) => { + withTempDir { tempDir => + val memStream = MemoryStream[(Int, Int)] + memStream.addData((11, 11), (12, 12)) + val stream = memStream.toDS().toDF("key", "value") + .coalesce(1).writeStream + .format("delta") + .trigger(Trigger.Once) + .outputMode("append") + .option("checkpointLocation", tempDir.getCanonicalPath + "/cp") + .start(path) + try { + stream.processAllAvailable() + } finally { + stream.stop() + } + } + } + ) + } + + test("transaction log schema evolvability - describe commands") { + testLogSchemaEvolvability( + (path: String) => { + spark.sql(s"DESCRIBE delta.`$path`") + spark.sql(s"DESCRIBE HISTORY delta.`$path`") + spark.sql(s"DESCRIBE DETAIL delta.`$path`") + } + ) + } + + test("transaction log schema evolvability - vacuum") { + testLogSchemaEvolvability( + (path: String) => { + sql(s"VACUUM delta.`$path`") + } + ) + } + + test("transaction log schema evolvability - alter table") { + testLogSchemaEvolvability( + (path: String) => { + sql(s"ALTER TABLE delta.`$path` ADD COLUMNS (col int)") + } + ) + } + + test("transaction log schema evolvability - delete") { + testLogSchemaEvolvability( + (path: String) => { sql(s"DELETE FROM delta.`$path` WHERE key = 1") } + ) + } + + test("transaction log schema evolvability - update") { + testLogSchemaEvolvability( + (path: String) => { sql(s"UPDATE delta.`$path` set value = 100 WHERE key = 1") } + ) + } + + test("transaction log schema evolvability - merge") { + testLogSchemaEvolvability( + (path: String) => { + withTable("source") { + Seq((1, 5), (11, 12)) + .toDF("key", "value") + .write + .mode("overwrite") + .format("delta") + .saveAsTable("source") + sql( + s""" + |MERGE INTO delta.`$path` tgrt + |USING source src + |ON src.key = tgrt.key + |WHEN MATCHED THEN + | UPDATE SET key = 20 + src.key, value = 20 + src.value + |WHEN NOT MATCHED THEN + | INSERT (key, value) VALUES (src.key + 5, src.value + 10) + """.stripMargin + ) + } + } + ) + } + + test("Delta Lake issue 1229: able to read a checkpoint containing `numRecords`") { + // table created using Delta 1.2.1 which has additional field `numRecords` in + // checkpoint schema. It is removed in version after 1.2.1. + // Make sure we are able to read the Delta table in the latest version. + val tablePath = "src/test/resources/delta/delta-1.2.1" + assert( + spark.read.format("delta") + .load(tablePath).where("col1 = 8").count() === 9L) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/EvolvabilitySuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/EvolvabilitySuiteBase.scala new file mode 100644 index 00000000000..d6103a762af --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/EvolvabilitySuiteBase.scala @@ -0,0 +1,351 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import org.apache.spark.sql.delta.actions.{Action, AddFile, FileAction, SingleAction} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{QueryTest, Row, SparkSession} +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Utils + +abstract class EvolvabilitySuiteBase extends QueryTest with SharedSparkSession + with SQLTestUtils { + import testImplicits._ + + protected def testEvolvability(tablePath: String): Unit = { + // Check we can load everything from a log checkpoint + val deltaLog = DeltaLog.forTable(spark, new Path(tablePath)) + val path = deltaLog.dataPath.toString + checkDatasetUnorderly( + spark.read.format("delta").load(path).select("id", "value").as[(Int, String)], + 4 -> "d", 5 -> "e", 6 -> "f") + assert(deltaLog.snapshot.metadata.schema === StructType.fromDDL("id INT, value STRING")) + assert(deltaLog.snapshot.metadata.partitionSchema === StructType.fromDDL("id INT")) + + // Check we can load LastCheckpointInfo + val lastCheckpointOpt = deltaLog.readLastCheckpointFile() + assert(lastCheckpointOpt.get.version === 3) + assert(lastCheckpointOpt.get.size === 6L) + assert(lastCheckpointOpt.get.checkpointSchema.isEmpty) + + // Check we can parse all `Action`s in delta files. It doesn't check correctness. + deltaLog.getChanges(0L).toList.map(_._2.toList) + } + + /** + * This tests the evolution of the schema at delta file and checkpoint file. + * Operations on the Delta table shouldn't fail when there is an unknown column + * in delta file and checkpoint file. + * + * Table Schema: StructType(StructField("key", StringType), StructField("value", StringType)) + * Overwritten Delta file: {"some_new_feature":{"a":1}} + * Overwritten checkpoint file with a new column called `unknown` with boolean type. + * + * The delta file and checkpoint file with an unknown column are generated by + * `EvolvabilitySuiteBase.generateTransactionLogWithExtraColumn()`. + */ + protected def testLogSchemaEvolvability(operation: String => Unit): Unit = { + withTempDir { tempDir => + // copy the existing dir to the temp data dir. + FileUtils.copyDirectory( + new File("src/test/resources/delta/transaction_log_schema_evolvability"), tempDir) + makeWritable(tempDir) + DeltaLog.clearCache() + operation(tempDir.getAbsolutePath) + } + } + + /** + * Recursively make all files in a directory writable. + */ + private def makeWritable(directory: File): Unit = { + if (!directory.isDirectory) return + directory.listFiles().foreach { file => + if (file.isDirectory) { + makeWritable(file) + } else { + file.setWritable(true) + } + } + } + + /** + * Read from a table's CDF and check for the expected preimage/postimage after applying an update + */ + protected def testCdfUpdate( + tablePath: String, + commitVersion: Long, + expectedPreimage: Seq[Int], + expectedPostimage: Seq[Int], + streaming: Boolean = false): Unit = { + + val df = if (streaming) { + val q = spark.readStream.format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", commitVersion) + .option("endingVersion", commitVersion) + .load(tablePath) + .writeStream + .option("checkpointLocation", tablePath + "-checkpoint") + .toTable("streaming"); + try { + q.processAllAvailable() + } finally { + q.stop() + } + spark.read.table("streaming") + } else { + spark.read.format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", commitVersion) + .option("endingVersion", commitVersion) + .load(tablePath) + } + + val preimage = df.where("_change_type = 'update_preimage'").select("value") + val postimage = df.where("_change_type = 'update_postimage'").select("value") + + checkAnswer(preimage, expectedPreimage.map(Row(_))) + checkAnswer(postimage, expectedPostimage.map(Row(_))) + } +} + + +// scalastyle:off +/*** + * A tool to generate data and transaction log for evolvability tests. + * + * Here are the steps to generate data. + * + * 1. Update `EvolvabilitySuite.generateData` if there are new [[Action]] types. + * 2. Change the following command with the right path and run it. Note: the working directory is "[delta_project_root]". + * + * scalastyle:off + * ``` + * build/sbt "core/test:runMain org.apache.spark.sql.delta.EvolvabilitySuite src/test/resources/delta/delta-0.1.0 generateData" + * ``` + * + * You can also use this tool to generate DeltaLog that contains a checkpoint a json log with a new column. + * + * scalastyle:off + * ``` + * build/sbt "core/test:runMain org.apache.spark.sql.delta.EvolvabilitySuite /path/src/test/resources/delta/transaction_log_schema_evolvability generateTransactionLogWithExtraColumn" + * ``` + */ +// scalastyle:on +object EvolvabilitySuiteBase { + + def generateData( + spark: SparkSession, + path: String, + tblProps: Map[DeltaConfig[_], String] = Map.empty): Unit = { + import org.apache.spark.sql.delta.implicits._ + implicit val s = spark.sqlContext + + Seq(1, 2, 3).toDF(spark).write.format("delta").save(path) + if (tblProps.nonEmpty) { + val tblPropsStr = tblProps.map { case (k, v) => s"'${k.key}' = '$v'" }.mkString(", ") + spark.sql(s"CREATE TABLE test USING DELTA LOCATION '$path'") + spark.sql(s"ALTER TABLE test SET TBLPROPERTIES($tblPropsStr)") + } + Seq(1, 2, 3).toDF(spark).write.format("delta").mode("append").save(path) + Seq(1, 2, 3).toDF(spark).write.format("delta").mode("overwrite").save(path) + + val checkpoint = Utils.createTempDir().toString + val data = MemoryStream[Int] + data.addData(1, 2, 3) + val stream = data.toDF() + .writeStream + .format("delta") + .option("checkpointLocation", checkpoint) + .start(path) + stream.processAllAvailable() + stream.stop() + + DeltaLog.forTable(spark, path).checkpoint() + } + + /** Validate the generated data contains all [[Action]] types */ + def validateData(spark: SparkSession, path: String): Unit = { + import org.apache.spark.sql.delta.util.FileNames._ + import scala.reflect.runtime.{universe => ru} + import org.apache.spark.sql.delta.implicits._ + + val mirror = ru.runtimeMirror(this.getClass.getClassLoader) + + val tpe = ru.typeOf[Action] + val clazz = tpe.typeSymbol.asClass + assert(clazz.isSealed, s"${classOf[Action]} must be sealed") + + val deltaLog = DeltaLog.forTable(spark, new Path(path)) + val deltas = 0L to deltaLog.snapshot.version + val deltaFiles = deltas.map(deltaFile(deltaLog.logPath, _)).map(_.toString) + val actionsTypesInLog = + spark.read.schema(Action.logSchema).json(deltaFiles: _*) + .as[SingleAction] + .collect() + .map(_.unwrap.getClass.asInstanceOf[Class[_]]) + .toSet + + val allActionTypes = + clazz.knownDirectSubclasses + .flatMap { + case t if t == ru.typeOf[FileAction].typeSymbol => t.asClass.knownDirectSubclasses + case t => Set(t) + } + .map(t => mirror.runtimeClass(t.asClass)) + + val missingTypes = allActionTypes -- actionsTypesInLog + val unknownTypes = actionsTypesInLog -- allActionTypes + assert( + missingTypes.isEmpty, + s"missing types: $missingTypes. " + + "Please update EvolveabilitySuite.generateData to include them in the log.") + assert( + unknownTypes.isEmpty, + s"unknown types: $unknownTypes. " + + s"Please make sure they inherit ${classOf[Action]} or ${classOf[FileAction]} directly.") + } + + /** Generate the transaction log with extra column in checkpoint and json. */ + def generateTransactionLogWithExtraColumn(spark: SparkSession, path: String): Unit = { + // scalastyle:off sparkimplicits + import spark.implicits._ + // scalastyle:on sparkimplicits + implicit val s = spark.sqlContext + + val absPath = new File(path).getAbsolutePath + + (1 until 10).map(num => (num, num)).toDF("key", "value").write.format("delta").save(path) + + // Enable struct-only stats + spark.sql(s"ALTER TABLE delta.`$absPath` " + + s"SET TBLPROPERTIES (delta.checkpoint.writeStatsAsStruct = true, " + + "delta.checkpoint.writeStatsAsJson = false)") + + (1 until 10).map(num => (num, num)).toDF("key", "value").write + .format("delta").mode("overwrite").save(path) + + val deltaLog = DeltaLog.forTable(spark, new Path(path)) + + deltaLog.checkpoint() + + // Create an incomplete checkpoint without the action and overwrite the + // original checkpoint + val checkpointPath = FileNames.checkpointFileSingular(deltaLog.logPath, + deltaLog.snapshot.version) + val tmpCheckpoint = Utils.createTempDir() + val checkpointDataWithNewCol = spark.read.parquet(checkpointPath.toString) + .withColumn("unknown", lit(true)) + + // Keep the add files and also filter by the additional condition + checkpointDataWithNewCol.coalesce(1).write + .mode("overwrite").parquet(tmpCheckpoint.toString) + val writtenCheckpoint = + tmpCheckpoint.listFiles().toSeq.filter(_.getName.startsWith("part")).head + val checkpointFile = new File(checkpointPath.toUri) + new File(deltaLog.logPath.toUri).listFiles().toSeq.foreach { file => + if (file.getName.startsWith(".0")) { + // we need to delete checksum files, + // otherwise trying to replace our incomplete + // checkpoint file fails due to the LocalFileSystem's checksum checks. + require(file.delete(), "Failed to delete checksum file") + } + } + require(checkpointFile.delete(), "Failed to delete old checkpoint") + require(writtenCheckpoint.renameTo(checkpointFile), + "Failed to rename corrupt checkpoint") + + (1 until 10).map(num => (num, num)).toDF("key", "value").write + .format("delta").mode("append").save(path) + + // Shouldn't fail here + deltaLog.update() + + val version = deltaLog.snapshot.version + // We want to have a delta log with a new column after a checkpoint, to test out operations + // against both checkpoint with unknown column and delta log with unkown column. + + // manually remove AddFile in the previous commit and append a new column. + val records = deltaLog.store.read( + FileNames.deltaFile(deltaLog.logPath, version), + deltaLog.newDeltaHadoopConf()) + val actions = records.map(Action.fromJson).filter(action => action.isInstanceOf[AddFile]) + .map { action => action.asInstanceOf[AddFile].remove} + .toIterator + val recordsWithNewAction = actions.map(_.json) ++ Iterator("""{"some_new_action":{"a":1}}""") + deltaLog.store.write( + FileNames.deltaFile(deltaLog.logPath, version + 1), + recordsWithNewAction, + overwrite = false, + deltaLog.newDeltaHadoopConf()) + + // manually add those files back and add a unknown field to it. + val newRecords = records.map{ record => + val recordMap = JsonUtils.fromJson[Map[String, Any]](record) + val newRecordMap = if (recordMap.contains("add")) { + // add a unknown column inside action fields. + val actionFields = recordMap("add").asInstanceOf[Map[String, Any]] + + ("some_new_column_in_add_action" -> 1) + recordMap + ("add" -> actionFields) + } else recordMap + // add a unknown column outside action fields. + JsonUtils.toJson(newRecordMap + ("some_new_action_alongside_add_action" -> ("a" -> "1"))) + }.toIterator + deltaLog.store.write( + FileNames.deltaFile(deltaLog.logPath, version + 2), + newRecords, + overwrite = false, + deltaLog.newDeltaHadoopConf()) + + // Shouldn't fail here + deltaLog.update() + + DeltaLog.clearCache() + } + + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder().master("local[2]").getOrCreate() + val path = new File(args(0)) + if (path.exists()) { + // Don't delete automatically in case the user types a wrong path. + // scalastyle:off throwerror + throw new AssertionError(s"${path.getCanonicalPath} exists. Please delete it and retry.") + // scalastyle:on throwerror + } + args(1) match { + case "generateData" => + generateData(spark, path.toString) + validateData(spark, path.toString) + case "generateTransactionLogWithExtraColumn" => + generateTransactionLogWithExtraColumn(spark, path.toString) + case _ => + throw new RuntimeException("Unrecognized (or omitted) argument. " + + "Please try again (no data generated).") + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/FakeFileSystem.scala b/spark/src/test/scala/org/apache/spark/sql/delta/FakeFileSystem.scala new file mode 100644 index 00000000000..9c3d7c6e2bc --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/FakeFileSystem.scala @@ -0,0 +1,33 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.net.URI + +import org.apache.hadoop.fs.RawLocalFileSystem + +/** A fake file system to test whether session Hadoop configuration will be picked up. */ +class FakeFileSystem extends RawLocalFileSystem { + override def getScheme: String = FakeFileSystem.scheme + override def getUri: URI = FakeFileSystem.uri +} + +object FakeFileSystem { + val scheme = "fake" + val uri = URI.create(s"$scheme:///") +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/FileNamesSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/FileNamesSuite.scala new file mode 100644 index 00000000000..4c6e0e395f4 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/FileNamesSuite.scala @@ -0,0 +1,73 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkFunSuite + +class FileNamesSuite extends SparkFunSuite { + + import org.apache.spark.sql.delta.util.FileNames._ + + test("isDeltaFile") { + assert(isDeltaFile(new Path("/a/123.json"))) + assert(!isDeltaFile(new Path("/a/123ajson"))) + assert(!isDeltaFile(new Path("/a/123.jso"))) + assert(!isDeltaFile(new Path("/a/123a.json"))) + assert(!isDeltaFile(new Path("/a/a123.json"))) + } + + test("isCheckpointFile") { + assert(isCheckpointFile(new Path("/a/123.checkpoint.parquet"))) + assert(isCheckpointFile(new Path("/a/123.checkpoint.0000000001.0000000087.parquet"))) + assert(!isCheckpointFile(new Path("/a/123.json"))) + } + + test("checkpointVersion") { + assert(checkpointVersion(new Path("/a/123.checkpoint.parquet")) == 123) + assert(checkpointVersion(new Path("/a/0.checkpoint.parquet")) == 0) + assert(checkpointVersion(new Path("/a/00000000000000000151.checkpoint.parquet")) == 151) + assert(checkpointVersion(new Path("/a/999.checkpoint.0000000090.0000000099.parquet")) == 999) + } + + test("listingPrefix") { + assert(listingPrefix(new Path("/a"), 1234) == new Path("/a/00000000000000001234.")) + } + + test("checkpointFileWithParts") { + assert(checkpointFileWithParts(new Path("/a"), 1, 1) == Seq( + new Path("/a/00000000000000000001.checkpoint.0000000001.0000000001.parquet"))) + assert(checkpointFileWithParts(new Path("/a"), 1, 2) == Seq( + new Path("/a/00000000000000000001.checkpoint.0000000001.0000000002.parquet"), + new Path("/a/00000000000000000001.checkpoint.0000000002.0000000002.parquet"))) + assert(checkpointFileWithParts(new Path("/a"), 1, 5) == Seq( + new Path("/a/00000000000000000001.checkpoint.0000000001.0000000005.parquet"), + new Path("/a/00000000000000000001.checkpoint.0000000002.0000000005.parquet"), + new Path("/a/00000000000000000001.checkpoint.0000000003.0000000005.parquet"), + new Path("/a/00000000000000000001.checkpoint.0000000004.0000000005.parquet"), + new Path("/a/00000000000000000001.checkpoint.0000000005.0000000005.parquet"))) + } + + test("numCheckpointParts") { + assert(numCheckpointParts(new Path("/a/00000000000000000099.checkpoint.parquet")).isEmpty) + assert( + numCheckpointParts( + new Path("/a/00000000000000000099.checkpoint.0000000078.0000000092.parquet")) + .contains(92)) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnCompatibilitySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnCompatibilitySuite.scala new file mode 100644 index 00000000000..f850a30d13b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnCompatibilitySuite.scala @@ -0,0 +1,162 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.commons.io.FileUtils + +import org.apache.spark.sql.AnalysisException + +/** + * We store the generation expressions in column's metadata. As Spark will propagate column metadata + * to downstream operations when reading a table, old versions may create tables that have + * generation expressions with an old writer version. For such tables, this test suite will verify + * it behaves as a normal table. In other words, the generation expressions should be ignored in + * new versions that understand generated columns so that all versions will have the same behaviors. + */ +class GeneratedColumnCompatibilitySuite extends GeneratedColumnTest { + import GeneratedColumn._ + import testImplicits._ + + /** + * This test uses a special table generated by the following steps: + * + * 1. Run the following command using DBR 8.1 to generate a generated column table. + * + * ``` + * spark.sql("""CREATE TABLE generated_columns_table( + * |c1 INT, + * |c2 INT GENERATED ALWAYS AS ( c1 + 1 ) + * |) USING DELTA + * |LOCATION 'sql/core/src/test/resources/delta/dbr_8_1_generated_columns' + * |""".stripMargin) + * ``` + * + * 2. Run the following command using DBR 8.0 to read the above table and create a new one. + * + * ``` + * spark.sql("""CREATE TABLE delta_non_generated_columns + * |USING DELTA + * |LOCATION 'sql/core/src/test/resources/delta/dbr_8_0_non_generated_columns' + * |AS SELECT * FROM + * |delta.`sql/core/src/test/resources/delta/dbr_8_1_generated_columns` + * |""".stripMargin) + * ``` + * + * Now the schema of `dbr_8_0_non_generated_columns` will contain generation expressions but it + * has an old writer version. This test will verify this test is treated as a non generated column + * table, which means new versions will have the exact behaviors as the old versions when reading + * or writing this table. + */ + def withDBR8_0Table(func: String => Unit): Unit = { + val resourcePath = "src/test/resources/delta/dbr_8_0_non_generated_columns" + withTempDir { tempDir => + // Prepare a table that has the old writer version and generation expressions + FileUtils.copyDirectory(new File(resourcePath), tempDir) + val path = tempDir.getCanonicalPath + val deltaLog = DeltaLog.forTable(spark, path) + // Verify the test table has the old writer version and generation expressions + assert(hasGeneratedColumns(deltaLog.snapshot.metadata.schema)) + assert(!enforcesGeneratedColumns(deltaLog.snapshot.protocol, deltaLog.snapshot.metadata)) + func(path) + } + } + + test("dbr 8_0") { + withDBR8_0Table { path => + withTempDir { normalTableDir => + // Prepare a normal table + val normalTablePath = normalTableDir.getCanonicalPath + spark.sql( + s"""CREATE TABLE generated_columns_table( + |c1 INT, + |c2 INT + |) USING DELTA + |LOCATION '$normalTablePath' + |""".stripMargin) + + // Now we are going to verify commands on `path` and `normalTablePath` should be the same. + + // Update `path` and `normalTablePath` using the same func and verify they have the + // same result + def updateTableAndCheckAnswer(func: String => Unit): Unit = { + func(path) + func(normalTablePath) + checkAnswer( + spark.read.format("delta").load(path), + spark.read.format("delta").load(normalTablePath) + ) + } + + + // Insert values that violate the generation expression should be okay because the table + // should not be treated as a generated column table. + updateTableAndCheckAnswer { tablePath => + sql(s"INSERT INTO delta.`$tablePath`VALUES(1, 10)") + } + updateTableAndCheckAnswer { tablePath => + sql(s"INSERT INTO delta.`$tablePath`(c2, c1) VALUES(11, 1)") + } + updateTableAndCheckAnswer { tablePath => + sql(s"INSERT OVERWRITE delta.`$tablePath`VALUES(1, 13)") + } + updateTableAndCheckAnswer { tablePath => + sql(s"INSERT OVERWRITE delta.`$tablePath`(c2, c1) VALUES(14, 1)") + } + updateTableAndCheckAnswer { tablePath => + // Append (1, null) to the table + Seq(1).toDF("c1").write.format("delta").mode("append").save(tablePath) + } + updateTableAndCheckAnswer { tablePath => + Seq(1 -> 15).toDF("c1", "c2").write.format("delta").mode("append").save(tablePath) + } + updateTableAndCheckAnswer { tablePath => + // Overwrite the table with (2, null) + Seq(2).toDF("c1").write.format("delta").mode("overwrite").save(tablePath) + } + } + } + } + + test("adding a new column should not enable generated columns") { + withDBR8_0Table { path => + val deltaLog = DeltaLog.forTable(spark, path) + val protocolBeforeUpdate = deltaLog.snapshot.protocol + sql(s"ALTER TABLE delta.`$path` ADD COLUMNS (c3 INT)") + deltaLog.update() + // The generation expressions should be dropped + assert(!hasGeneratedColumns(deltaLog.snapshot.metadata.schema)) + assert(deltaLog.snapshot.protocol == protocolBeforeUpdate) + assert(!enforcesGeneratedColumns(deltaLog.snapshot.protocol, deltaLog.snapshot.metadata)) + } + } + + test("specifying a min writer version should not enable generated column") { + withDBR8_0Table { path => + val deltaLog = DeltaLog.forTable(spark, path) + sql(s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.minWriterVersion'='4')") + deltaLog.update() + // The generation expressions should be dropped + assert(!hasGeneratedColumns(deltaLog.snapshot.metadata.schema)) + assert(deltaLog.snapshot.protocol == Protocol(1, 4)) + assert(!enforcesGeneratedColumns(deltaLog.snapshot.protocol, deltaLog.snapshot.metadata)) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala new file mode 100644 index 00000000000..8b701d301d4 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala @@ -0,0 +1,1808 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.PrintWriter + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.schema.{DeltaInvariantViolationException, InvariantViolationException, SchemaUtils} +import org.apache.spark.sql.delta.sources.DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import io.delta.tables.DeltaTableBuilder + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp, toJavaDate, toJavaTimestamp} +import org.apache.spark.sql.catalyst.util.quietly +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions.{current_timestamp, lit} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{ArrayType, DateType, IntegerType, MetadataBuilder, StringType, StructField, StructType, TimestampType} +import org.apache.spark.unsafe.types.UTF8String + +trait GeneratedColumnSuiteBase extends GeneratedColumnTest { + + import GeneratedColumn._ + import testImplicits._ + + protected def replaceTable( + tableName: String, + path: Option[String], + schemaString: String, + generatedColumns: Map[String, String], + partitionColumns: Seq[String], + notNullColumns: Set[String] = Set.empty, + comments: Map[String, String] = Map.empty, + properties: Map[String, String] = Map.empty, + orCreate: Option[Boolean] = None): Unit = { + var tableBuilder = if (orCreate.getOrElse(false)) { + io.delta.tables.DeltaTable.createOrReplace(spark) + } else { + io.delta.tables.DeltaTable.replace(spark) + } + buildTable(tableBuilder, tableName, path, schemaString, + generatedColumns, partitionColumns, notNullColumns, comments, properties).execute() + } + + // Define the information for a default test table used by many tests. + protected val defaultTestTableSchema = + "c1 bigint, c2_g bigint, c3_p string, c4_g_p date, c5 timestamp, c6 int, c7_g_p int, c8 date" + protected val defaultTestTableGeneratedColumns = Map( + "c2_g" -> "c1 + 10", + "c4_g_p" -> "cast(c5 as date)", + "c7_g_p" -> "c6 * 10" + ) + protected val defaultTestTablePartitionColumns = "c3_p, c4_g_p, c7_g_p".split(", ").toList + + protected def createDefaultTestTable(tableName: String, path: Option[String] = None): Unit = { + createTable( + tableName, + path, + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns + ) + } + + /** + * @param updateFunc A function that's called with the table information (tableName, path). It + * should execute update operations, and return the expected data after + * updating. + */ + protected def testTableUpdate( + testName: String, + isStreaming: Boolean = false)(updateFunc: (String, String) => Seq[Row]): Unit = { + def testBody(): Unit = { + val table = testName + withTempDir { path => + withTable(table) { + createDefaultTestTable(tableName = table, path = Some(path.getCanonicalPath)) + val expected = updateFunc(testName, path.getCanonicalPath) + checkAnswer(sql(s"select * from $table"), expected) + } + } + } + + if (isStreaming) { + test(testName) { + testBody() + } + } else { + test(testName) { + testBody() + } + } + } + + private def errorContains(errMsg: String, str: String): Unit = { + assert(errMsg.contains(str)) + } + + protected def testTableUpdateDPO( + testName: String)(updateFunc: (String, String) => Seq[Row]): Unit = { + withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> + SQLConf.PartitionOverwriteMode.DYNAMIC.toString) { + testTableUpdate("dpo_" + testName)(updateFunc) + } + } + + testTableUpdate("append_data") { (table, path) => + Seq( + Tuple5(1L, "foo", "2020-10-11 12:30:30", 100, "2020-11-12") + ).toDF("c1", "c3_p", "c5", "c6", "c8") + .withColumn("c5", $"c5".cast(TimestampType)) + .withColumn("c8", $"c8".cast(DateType)) + .write + .format("delta") + .mode("append") + .save(path) + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("append_data_in_different_column_order") { (table, path) => + Seq( + Tuple5("2020-10-11 12:30:30", 100, "2020-11-12", 1L, "foo") + ).toDF("c5", "c6", "c8", "c1", "c3_p") + .withColumn("c5", $"c5".cast(TimestampType)) + .withColumn("c8", $"c8".cast(DateType)) + .write + .format("delta") + .mode("append") + .save(path) + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("append_data_v2") { (table, _) => + Seq( + Tuple5(1L, "foo", "2020-10-11 12:30:30", 100, "2020-11-12") + ).toDF("c1", "c3_p", "c5", "c6", "c8") + .withColumn("c5", $"c5".cast(TimestampType)) + .withColumn("c8", $"c8".cast(DateType)) + .writeTo(table) + .append() + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("append_data_in_different_column_order_v2") { (table, _) => + Seq( + Tuple5("2020-10-11 12:30:30", 100, "2020-11-12", 1L, "foo") + ).toDF("c5", "c6", "c8", "c1", "c3_p") + .withColumn("c5", $"c5".cast(TimestampType)) + .withColumn("c8", $"c8".cast(DateType)) + .writeTo(table) + .append() + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + + testTableUpdate("insert_into_values_provide_all_columns") { (table, path) => + sql(s"INSERT INTO $table VALUES" + + s"(1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_into_by_name_provide_all_columns") { (table, _) => + sql(s"INSERT INTO $table (c5, c6, c7_g_p, c8, c1, c2_g, c3_p, c4_g_p) VALUES" + + s"('2020-10-11 12:30:30', 100, 1000, '2020-11-12', 1, 11, 'foo', '2020-10-11')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_into_by_name_not_provide_generated_columns") { (table, _) => + sql(s"INSERT INTO $table (c6, c8, c1, c3_p, c5) VALUES" + + s"(100, '2020-11-12', 1L, 'foo', '2020-10-11 12:30:30')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_into_by_name_with_some_generated_columns") { (table, _) => + sql(s"INSERT INTO $table (c5, c6, c8, c1, c3_p, c4_g_p) VALUES" + + s"('2020-10-11 12:30:30', 100, '2020-11-12', 1L, 'foo', '2020-10-11')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_into_select_provide_all_columns") { (table, path) => + sql(s"INSERT INTO $table SELECT " + + s"1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12'") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_into_by_name_not_provide_normal_columns") { (table, _) => + val e = intercept[AnalysisException] { + withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") { + sql(s"INSERT INTO $table (c6, c8, c1, c3_p) VALUES" + + s"(100, '2020-11-12', 1L, 'foo')") + } + } + errorContains(e.getMessage, "Column c5 is not specified in INSERT") + Nil + } + + testTableUpdate("insert_overwrite_values_provide_all_columns") { (table, path) => + sql(s"INSERT OVERWRITE TABLE $table VALUES" + + s"(1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_overwrite_select_provide_all_columns") { (table, path) => + sql(s"INSERT OVERWRITE TABLE $table SELECT " + + s"1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12'") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_overwrite_by_name_provide_all_columns") { (table, _) => + sql(s"INSERT OVERWRITE $table (c5, c6, c7_g_p, c8, c1, c2_g, c3_p, c4_g_p) VALUES" + + s"('2020-10-11 12:30:30', 100, 1000, '2020-11-12', 1, 11, 'foo', '2020-10-11')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_overwrite_by_name_not_provide_generated_columns") { (table, _) => + sql(s"INSERT OVERWRITE $table (c6, c8, c1, c3_p, c5) VALUES" + + s"(100, '2020-11-12', 1L, 'foo', '2020-10-11 12:30:30')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_overwrite_by_name_with_some_generated_columns") { (table, _) => + sql(s"INSERT OVERWRITE $table (c5, c6, c8, c1, c3_p, c4_g_p) VALUES" + + s"('2020-10-11 12:30:30', 100, '2020-11-12', 1L, 'foo', '2020-10-11')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("insert_overwrite_by_name_not_provide_normal_columns") { (table, _) => + val e = intercept[AnalysisException] { + withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") { + sql(s"INSERT OVERWRITE $table (c6, c8, c1, c3_p) VALUES" + + s"(100, '2020-11-12', 1L, 'foo')") + } + } + errorContains(e.getMessage, "Column c5 is not specified in INSERT") + Nil + } + + testTableUpdateDPO("insert_overwrite_values_provide_all_columns") { (table, path) => + sql(s"INSERT OVERWRITE TABLE $table VALUES" + + s"(1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12')") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdateDPO("insert_overwrite_select_provide_all_columns") { (table, path) => + sql(s"INSERT OVERWRITE TABLE $table SELECT " + + s"1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12'") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdateDPO("insert_overwrite_by_name_values_provide_all_columns") { (table, _) => + sql(s"INSERT OVERWRITE $table (c5, c6, c7_g_p, c8, c1, c2_g, c3_p, c4_g_p) VALUES" + + s"(CAST('2020-10-11 12:30:30' AS TIMESTAMP), 100, 1000, CAST('2020-11-12' AS DATE), " + + s"1L, 11L, 'foo', CAST('2020-10-11' AS DATE))") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdateDPO( + "insert_overwrite_by_name_not_provide_generated_columns") { (table, _) => + sql(s"INSERT OVERWRITE $table (c6, c8, c1, c3_p, c5) VALUES" + + s"(100, CAST('2020-11-12' AS DATE), 1L, 'foo', CAST('2020-10-11 12:30:30' AS TIMESTAMP))") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdateDPO("insert_overwrite_by_name_with_some_generated_columns") { (table, _) => + sql(s"INSERT OVERWRITE $table (c5, c6, c8, c1, c3_p, c4_g_p) VALUES" + + s"(CAST('2020-10-11 12:30:30' AS TIMESTAMP), 100, CAST('2020-11-12' AS DATE), 1L, " + + s"'foo', CAST('2020-10-11' AS DATE))") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdateDPO("insert_overwrite_by_name_not_provide_normal_columns") { (table, _) => + val e = intercept[AnalysisException] { + sql(s"INSERT OVERWRITE $table (c6, c8, c1, c3_p) VALUES" + + s"(100, '2020-11-12', 1L, 'foo')") + } + assert(e.getMessage.contains("with name `c5` cannot be resolved") || + e.getMessage.contains("Column c5 is not specified in INSERT")) + Nil + } + + testTableUpdate("delete") { (table, path) => + Seq( + Tuple5(1L, "foo", "2020-10-11 12:30:30", 100, "2020-11-12"), + Tuple5(2L, "foo", "2020-10-11 13:30:30", 100, "2020-12-12") + ).toDF("c1", "c3_p", "c5", "c6", "c8") + .withColumn("c5", $"c5".cast(TimestampType)) + .withColumn("c8", $"c8".cast(DateType)) + .coalesce(1) + .write + .format("delta") + .mode("append") + .save(path) + // Make sure we create only one file so that we will trigger file rewriting. + assert(DeltaLog.forTable(spark, path).snapshot.allFiles.count == 1) + sql(s"DELETE FROM $table WHERE c1 = 2") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("update_generated_column_with_correct_value") { (table, path) => + sql(s"INSERT INTO $table SELECT " + + s"1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12'") + sql(s"UPDATE $table SET c2_g = 11 WHERE c1 = 1") + Row(1, 11, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("update_generated_column_with_incorrect_value") { (table, path) => + sql(s"INSERT INTO $table SELECT " + + s"1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12'") + val e = intercept[InvariantViolationException] { + quietly { + sql(s"UPDATE $table SET c2_g = 12 WHERE c1 = 1") + } + } + errorContains(e.getMessage, + "CHECK constraint Generated Column (c2_g <=> (c1 + 10)) violated by row with values") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("update_source_column_used_by_generated_column") { (table, _) => + sql(s"INSERT INTO $table SELECT " + + s"1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12'") + sql(s"UPDATE $table SET c1 = 2 WHERE c1 = 1") + Row(2, 12, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("update_source_and_generated_columns_with_correct_value") { (table, _) => + sql(s"INSERT INTO $table SELECT " + + s"1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12'") + sql(s"UPDATE $table SET c2_g = 12, c1 = 2 WHERE c1 = 1") + Row(2, 12, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("update_source_and_generated_columns_with_incorrect_value") { (table, _) => + sql(s"INSERT INTO $table SELECT " + + s"1, 11, 'foo', '2020-10-11', '2020-10-11 12:30:30', 100, 1000, '2020-11-12'") + val e = intercept[InvariantViolationException] { + quietly { + sql(s"UPDATE $table SET c2_g = 12, c1 = 3 WHERE c1 = 1") + } + } + errorContains(e.getMessage, + "CHECK constraint Generated Column (c2_g <=> (c1 + 10)) violated by row with values") + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + test("various update commands") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTableName("update_commands") { table => + createTable(table, Some(path), "c INT, g INT", Map("g" -> "c + 10"), Nil) + sql(s"INSERT INTO $table VALUES(10, 20)") + sql(s"UPDATE $table SET c = 20") + checkAnswer(spark.table(table), Row(20, 30) :: Nil) + sql(s"UPDATE delta.`$path` SET c = 30") + checkAnswer(spark.table(table), Row(30, 40) :: Nil) + io.delta.tables.DeltaTable.forName(table).updateExpr(Map("c" -> "40")) + checkAnswer(spark.table(table), Row(40, 50) :: Nil) + io.delta.tables.DeltaTable.forPath(path).updateExpr(Map("c" -> "50")) + checkAnswer(spark.table(table), Row(50, 60) :: Nil) + } + } + } + + test("update with various column references") { + withTableName("update_with_various_references") { table => + createTable(table, None, "c1 INT, c2 INT, g INT", Map("g" -> "c1 + 10"), Nil) + sql(s"INSERT INTO $table VALUES(10, 50, 20)") + sql(s"UPDATE $table SET c1 = 20") + checkAnswer(spark.table(table), Row(20, 50, 30) :: Nil) + sql(s"UPDATE $table SET c1 = c2 + 100, c2 = 1000") + checkAnswer(spark.table(table), Row(150, 1000, 160) :: Nil) + sql(s"UPDATE $table SET c1 = c2 + g") + checkAnswer(spark.table(table), Row(1160, 1000, 1170) :: Nil) + sql(s"UPDATE $table SET c1 = g") + checkAnswer(spark.table(table), Row(1170, 1000, 1180) :: Nil) + } + } + + test("update a struct source column") { + withTableName("update_struct_column") { table => + createTable(table, + None, + "s STRUCT, g INT", + Map("g" -> "s.s1 + 10"), + Nil) + sql(s"INSERT INTO $table VALUES(struct(10, 'foo'), 20)") + sql(s"UPDATE $table SET s.s1 = 20 WHERE s.s1 = 10") + checkAnswer(spark.table(table), Row(Row(20, "foo"), 30) :: Nil) + } + } + + test("updating a temp view is not supported") { + withTableName("update_temp_view") { table => + createTable(table, None, "c1 INT, c2 INT", Map("c2" -> "c1 + 10"), Nil) + withTempView("test_view") { + sql(s"CREATE TEMP VIEW test_view AS SELECT * FROM $table") + val e = intercept[AnalysisException] { + sql(s"UPDATE test_view SET c1 = 2 WHERE c1 = 1") + } + assert(e.getMessage.contains("a temp view")) + } + } + } + + testTableUpdate("streaming_write", isStreaming = true) { (table, path) => + withTempDir { checkpointDir => + val stream = MemoryStream[Int] + val q = stream.toDF + .map(_ => Tuple5(1L, "foo", "2020-10-11 12:30:30", 100, "2020-11-12")) + .toDF("c1", "c3_p", "c5", "c6", "c8") + .withColumn("c5", $"c5".cast(TimestampType)) + .withColumn("c8", $"c8".cast(DateType)) + .writeStream + .format("delta") + .outputMode("append") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start(path) + stream.addData(1) + q.processAllAvailable() + q.stop() + } + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("streaming_write_with_different_case", isStreaming = true) { (table, path) => + withTempDir { checkpointDir => + val stream = MemoryStream[Int] + val q = stream.toDF + .map(_ => Tuple5(1L, "foo", "2020-10-11 12:30:30", 100, "2020-11-12")) + .toDF("C1", "c3_p", "c5", "c6", "c8") // C1 is using upper case + .withColumn("c5", $"c5".cast(TimestampType)) + .withColumn("c8", $"c8".cast(DateType)) + .writeStream + .format("delta") + .outputMode("append") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start(path) + stream.addData(1) + q.processAllAvailable() + q.stop() + } + Row(1L, 11L, "foo", sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:30:30"), + 100, 1000, sqlDate("2020-11-12")) :: Nil + } + + testTableUpdate("streaming_write_incorrect_value", isStreaming = true) { (table, path) => + withTempDir { checkpointDir => + quietly { + val stream = MemoryStream[Int] + val q = stream.toDF + // 2L is an incorrect value. The correct value should be 11L + .map(_ => Tuple6(1L, 2L, "foo", "2020-10-11 12:30:30", 100, "2020-11-12")) + .toDF("c1", "c2_g", "c3_p", "c5", "c6", "c8") + .withColumn("c5", $"c5".cast(TimestampType)) + .withColumn("c8", $"c8".cast(DateType)) + .writeStream + .format("delta") + .outputMode("append") + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .start(path) + stream.addData(1) + val e = intercept[StreamingQueryException] { + q.processAllAvailable() + } + errorContains(e.getMessage, + "CHECK constraint Generated Column (c2_g <=> (c1 + 10)) violated by row with values") + q.stop() + } + } + Nil + } + + testQuietly("write to a generated column with an incorrect value") { + withTableName("write_incorrect_value") { table => + createTable(table, None, "id INT, id2 INT", Map("id2" -> "id + 10"), partitionColumns = Nil) + val e = intercept[InvariantViolationException] { + sql(s"INSERT INTO $table VALUES(1, 12)") + } + errorContains(e.getMessage, + "CHECK constraint Generated Column (id2 <=> (id + 10)) violated by row with values") + } + } + + test("dot in the column name") { + withTableName("dot_in_column_name") { table => + createTable(table, None, "`a.b` INT, `x.y` INT", Map("x.y" -> "`a.b` + 10"), Nil) + sql(s"INSERT INTO $table VALUES(1, 11)") + sql(s"INSERT INTO $table VALUES(2, 12)") + checkAnswer(sql(s"SELECT * FROM $table"), Row(1, 11) :: Row(2, 12) :: Nil) + } + } + + test("validateGeneratedColumns: generated columns should not refer to non-existent columns") { + val f1 = StructField("c1", IntegerType) + val f2 = withGenerationExpression(StructField("c2", IntegerType), "c10 + 10") + val schema = StructType(f1 :: f2 :: Nil) + val e = intercept[DeltaAnalysisException](validateGeneratedColumns(spark, schema)) + errorContains(e.getMessage, + "A generated column cannot use a non-existent column or another generated column") + } + + test("validateGeneratedColumns: no generated columns") { + val f1 = StructField("c1", IntegerType) + val f2 = StructField("c2", IntegerType) + val schema = StructType(f1 :: f2 :: Nil) + validateGeneratedColumns(spark, schema) + } + + test("validateGeneratedColumns: all generated columns") { + val f1 = withGenerationExpression(StructField("c1", IntegerType), "1 + 2") + val f2 = withGenerationExpression(StructField("c1", IntegerType), "3 + 4") + val schema = StructType(f1 :: f2 :: Nil) + validateGeneratedColumns(spark, schema) + } + + test("validateGeneratedColumns: generated columns should not refer to other generated columns") { + val f1 = StructField("c1", IntegerType) + val f2 = withGenerationExpression(StructField("c2", IntegerType), "c1 + 10") + val f3 = withGenerationExpression(StructField("c3", IntegerType), "c2 + 10") + val schema = StructType(f1 :: f2 :: f3 :: Nil) + val e = intercept[DeltaAnalysisException](validateGeneratedColumns(spark, schema)) + errorContains(e.getMessage, + "A generated column cannot use a non-existent column or another generated column") + } + + test("validateGeneratedColumns: supported expressions") { + for (exprString <- Seq( + // Generated column should support timestamp to date + "to_date(foo, \"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS'Z'\")")) { + val f1 = StructField("foo", TimestampType) + val f2 = withGenerationExpression(StructField("bar", DateType), exprString) + val schema = StructType(Seq(f1, f2)) + validateGeneratedColumns(spark, schema) + } + } + + test("validateGeneratedColumns: unsupported expressions") { + spark.udf.register("myudf", (s: Array[Int]) => s) + for ((exprString, error) <- Seq( + "myudf(foo)" -> "Found myudf(foo). A generated column cannot use a user-defined function", + "rand()" -> + "Found rand(). A generated column cannot use a non deterministic expression", + "max(foo)" -> "Found max(foo). A generated column cannot use an aggregate expression", + "explode(foo)" -> "explode(foo) cannot be used in a generated column", + "current_timestamp" -> "current_timestamp() cannot be used in a generated column" + )) { + val f1 = StructField("foo", ArrayType(IntegerType, true)) + val f2 = withGenerationExpression(StructField("bar", IntegerType), exprString) + val schema = StructType(f1 :: f2 :: Nil) + val e = intercept[AnalysisException](validateGeneratedColumns(spark, schema)) + errorContains(e.getMessage, error) + } + } + + test("validateGeneratedColumns: column type doesn't match expression type") { + val f1 = StructField("foo", IntegerType) + val f2 = withGenerationExpression(StructField("bar", IntegerType), "CAST(foo AS string)") + val schema = StructType(f1 :: f2 :: Nil) + val e = intercept[AnalysisException](validateGeneratedColumns(spark, schema)) + errorContains(e.getMessage, "The expression type of the generated column bar is STRING, " + + "but the column type is INT") + } + + test("test partition transform expressions end to end") { + withTableName("partition_transform_expressions") { table => + createTable(table, None, + "time TIMESTAMP, year DATE, month DATE, day DATE, hour TIMESTAMP", + Map( + "year" -> "make_date(year(time), 1, 1)", + "month" -> "make_date(year(time), month(time), 1)", + "day" -> "make_date(year(time), month(time), day(time))", + "hour" -> "make_timestamp(year(time), month(time), day(time), hour(time), 0, 0)" + ), + partitionColumns = Nil) + Seq("2020-10-11 12:30:30") + .toDF("time") + .withColumn("time", $"time".cast(TimestampType)) + .write + .format("delta") + .mode("append"). + saveAsTable(table) + checkAnswer( + sql(s"SELECT * from $table"), + Row(sqlTimestamp("2020-10-11 12:30:30"), sqlDate("2020-01-01"), sqlDate("2020-10-01"), + sqlDate("2020-10-11"), sqlTimestamp("2020-10-11 12:00:00")) + ) + } + } + + test("the generation expression constraint should support null values") { + withTableName("null") { table => + createTable(table, None, "c1 STRING, c2 STRING", Map("c2" -> "CONCAT(c1, 'y')"), Nil) + sql(s"INSERT INTO $table VALUES('x', 'xy')") + sql(s"INSERT INTO $table VALUES(null, null)") + checkAnswer( + sql(s"SELECT * from $table"), + Row("x", "xy") :: Row(null, null) :: Nil + ) + quietly { + val e = + intercept[InvariantViolationException](sql(s"INSERT INTO $table VALUES('foo', null)")) + errorContains(e.getMessage, + "CHECK constraint Generated Column (c2 <=> CONCAT(c1, 'y')) " + + "violated by row with values") + } + quietly { + val e = + intercept[InvariantViolationException](sql(s"INSERT INTO $table VALUES(null, 'foo')")) + errorContains(e.getMessage, + "CHECK constraint Generated Column (c2 <=> CONCAT(c1, 'y')) " + + "violated by row with values") + } + } + } + + test("complex type extractors") { + withTableName("struct_field") { table => + createTable( + table, + None, + "`a.b` STRING, a STRUCT, array ARRAY, " + + "c1 STRING, c2 INT, c3 INT", + Map("c1" -> "CONCAT(`a.b`, 'b')", "c2" -> "a.b + 100", "c3" -> "array[1]"), + Nil) + sql(s"INSERT INTO $table VALUES(" + + s"'a', struct(100, 'foo'), array(1000, 1001), " + + s"'ab', 200, 1001)") + checkAnswer( + spark.table(table), + Row("a", Row(100, "foo"), Array(1000, 1001), "ab", 200, 1001) :: Nil) + } + } + + test("getGeneratedColumnsAndColumnsUsedByGeneratedColumns") { + def testSchema(schema: Seq[StructField], expected: Set[String]): Unit = { + assert(getGeneratedColumnsAndColumnsUsedByGeneratedColumns(StructType(schema)) == expected) + } + + val f1 = StructField("c1", IntegerType) + val f2 = withGenerationExpression(StructField("c2", IntegerType), "c1 + 10") + val f3 = StructField("c3", IntegerType) + val f4 = withGenerationExpression(StructField("c4", IntegerType), "hash(c3 + 10)") + val f5 = withGenerationExpression(StructField("c5", IntegerType), "hash(C1 + 10)") + val f6 = StructField("c6", StructType(StructField("x", IntegerType) :: Nil)) + val f6x = StructField("c6.x", IntegerType) + val f7x = withGenerationExpression(StructField("c7.x", IntegerType), "`c6.x` + 10") + val f8 = withGenerationExpression(StructField("c8", IntegerType), "c6.x + 10") + testSchema(Seq(f1, f2), Set("c1", "c2")) + testSchema(Seq(f1, f2, f3), Set("c1", "c2")) + testSchema(Seq(f1, f2, f3, f4), Set("c1", "c2", "c3", "c4")) + testSchema(Seq(f1, f2, f5), Set("c1", "c2", "c5")) + testSchema(Seq(f6x, f7x), Set("c6.x", "c7.x")) + testSchema(Seq(f6, f6x, f7x), Set("c6.x", "c7.x")) + testSchema(Seq(f6, f6x, f8), Set("c6", "c8")) + } + + test("disallow column type evolution") { + withTableName("disallow_column_type_evolution") { table => + // "CAST(HASH(c1 + 32767s) AS SMALLINT)" is a special expression that returns different + // results for SMALLINT and INT. For example, "CAST(hash(32767 + 32767s) AS SMALLINT)" returns + // 9876, but "SELECT CAST(hash(32767s + 32767s) AS SMALLINT)" returns 31349. Hence we should + // not allow updating column type from SMALLINT to INT. + createTable(table, None, "c1 SMALLINT, c2 SMALLINT", + Map("c2" -> "CAST(HASH(c1 + 32767s) AS SMALLINT)"), Nil) + val tableSchema = spark.table(table).schema + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + Seq(32767.toShort).toDF("c1").write.format("delta").mode("append").saveAsTable(table) + } + assert(tableSchema == spark.table(table).schema) + // Insert an INT to `c1` should fail rather than changing the `c1` type to INT + val e = intercept[AnalysisException] { + Seq(32767).toDF("c1").write.format("delta").mode("append") + .option("mergeSchema", "true") + .saveAsTable(table) + }.getMessage + assert(e.contains("Column c1") && + e.contains("The data type is SMALLINT. It doesn't accept data type INT")) + checkAnswer(spark.table(table), Row(32767, 31349) :: Nil) + } + } + + + test("reading from a Delta table should not see generation expressions") { + def verifyNoGenerationExpression(df: Dataset[_]): Unit = { + assert(!hasGeneratedColumns(df.schema)) + } + + withTableName("test_source") { table => + createTable(table, None, "c1 INT, c2 INT", Map("c1" -> "c2 + 1"), Nil) + sql(s"INSERT INTO $table VALUES(2, 1)") + val path = DeltaLog.forTable(spark, TableIdentifier(table)).dataPath.toString + + verifyNoGenerationExpression(spark.table(table)) + verifyNoGenerationExpression(spark.sql(s"select * from $table")) + verifyNoGenerationExpression(spark.sql(s"select * from delta.`$path`")) + verifyNoGenerationExpression(spark.read.format("delta").load(path)) + verifyNoGenerationExpression(spark.read.format("delta").table(table)) + verifyNoGenerationExpression(spark.readStream.format("delta").load(path)) + verifyNoGenerationExpression(spark.readStream.format("delta").table(table)) + withTempDir { checkpointDir => + val q = spark.readStream.format("delta").table(table).writeStream + .trigger(Trigger.Once) + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .foreachBatch { (ds: DataFrame, _: Long) => + verifyNoGenerationExpression(ds) + }.start() + try { + q.processAllAvailable() + } finally { + q.stop() + } + } + withTempDir { outputDir => + withTempDir { checkpointDir => + val q = spark.readStream.format("delta").table(table).writeStream + .trigger(Trigger.Once) + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(outputDir.getCanonicalPath) + try { + q.processAllAvailable() + } finally { + q.stop() + } + val deltaLog = DeltaLog.forTable(spark, outputDir) + assert(deltaLog.snapshot.version >= 0) + assert(!hasGeneratedColumns(deltaLog.snapshot.schema)) + } + } + } + } + + /** + * Verify if the table metadata matches the default test table. We use this to verify DDLs + * write correct table metadata into the transaction logs. + */ + protected def verifyDefaultTestTableMetadata(table: String): Unit = { + val (deltaLog, snapshot) = if (table.startsWith("delta.")) { + DeltaLog.forTableWithSnapshot(spark, table.stripPrefix("delta.`").stripSuffix("`")) + } else { + DeltaLog.forTableWithSnapshot(spark, TableIdentifier(table)) + } + val schema = StructType.fromDDL(defaultTestTableSchema) + val expectedSchema = StructType(schema.map { field => + defaultTestTableGeneratedColumns.get(field.name).map { expr => + withGenerationExpression(field, expr) + }.getOrElse(field) + }) + val partitionColumns = defaultTestTablePartitionColumns + val metadata = snapshot.metadata + assert(metadata.schema == expectedSchema) + assert(metadata.partitionColumns == partitionColumns) + } + + protected def testCreateTable(testName: String)(createFunc: String => Unit): Unit = { + test(testName) { + withTable(testName) { + createFunc(testName) + verifyDefaultTestTableMetadata(testName) + } + } + } + + protected def testCreateTableWithLocation( + testName: String)(createFunc: (String, String) => Unit): Unit = { + test(testName + ": external") { + withTempPath { path => + withTable(testName) { + createFunc(testName, path.getCanonicalPath) + verifyDefaultTestTableMetadata(testName) + verifyDefaultTestTableMetadata(s"delta.`${path.getCanonicalPath}`") + } + } + } + } + + testCreateTable("create_table") { table => + createTable( + table, + None, + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns + ) + } + + testCreateTable("replace_table") { table => + createTable(table, None, "id bigint", Map.empty, Seq.empty) + replaceTable( + table, + None, + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns + ) + } + + testCreateTable("create_or_replace_table_non_exist") { table => + replaceTable( + table, + None, + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns, + orCreate = Some(true) + ) + } + + testCreateTable("create_or_replace_table_exist") { table => + createTable(table, None, "id bigint", Map.empty, Seq.empty) + replaceTable( + table, + None, + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns, + orCreate = Some(true) + ) + } + + testCreateTableWithLocation("create_table") { (table, path) => + createTable( + table, + Some(path), + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns + ) + } + + testCreateTableWithLocation("replace_table") { (table, path) => + createTable( + table, + Some(path), + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns + ) + replaceTable( + table, + Some(path), + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns + ) + } + + testCreateTableWithLocation("create_or_replace_table_non_exist") { (table, path) => + replaceTable( + table, + Some(path), + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns, + orCreate = Some(true) + ) + } + + testCreateTableWithLocation("create_or_replace_table_exist") { (table, path) => + createTable( + table, + Some(path), + "id bigint", + Map.empty, + Seq.empty + ) + replaceTable( + table, + Some(path), + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns, + orCreate = Some(true) + ) + } + + test("using generated columns should upgrade the protocol") { + withTableName("upgrade_protocol") { table => + def protocolVersions: (Int, Int) = { + sql(s"DESC DETAIL $table") + .select("minReaderVersion", "minWriterVersion") + .as[(Int, Int)] + .head() + } + + // Use the default protocol versions when not using computed partitions + createTable(table, None, "i INT", Map.empty, Seq.empty) + assert(protocolVersions == (1, 2)) + assert(DeltaLog.forTable(spark, TableIdentifier(tableName = table)).snapshot.version == 0) + + // Protocol versions should be upgraded when using computed partitions + replaceTable( + table, + None, + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns) + assert(protocolVersions == (1, 4)) + // Make sure we did overwrite the table rather than deleting and re-creating. + assert(DeltaLog.forTable(spark, TableIdentifier(tableName = table)).snapshot.version == 1) + } + } + + test("creating a table with a different schema should fail") { + withTempPath { path => + // Currently SQL is the only way to define a table using generated columns. So we create a + // temp table and drop it to get a path for such table. + withTableName("temp_generated_column_table") { table => + createTable( + null, + Some(path.toString), + defaultTestTableSchema, + defaultTestTableGeneratedColumns, + defaultTestTablePartitionColumns + ) + } + withTableName("table_with_no_schema") { table => + createTable( + table, + Some(path.toString), + "", + Map.empty, + Seq.empty + ) + verifyDefaultTestTableMetadata(table) + } + withTableName("table_with_different_expr") { table => + val e = intercept[AnalysisException]( + createTable( + table, + Some(path.toString), + defaultTestTableSchema, + Map( + "c2_g" -> "c1 + 11", // use a different generated expr + "c4_g_p" -> "CAST(c5 AS date)", + "c7_g_p" -> "c6 * 10" + ), + defaultTestTablePartitionColumns + ) + ) + assert(e.getMessage.contains( + "Specified generation expression for field c2_g is different from existing schema")) + assert(e.getMessage.contains("Specified: c1 + 11")) + assert(e.getMessage.contains("Existing: c1 + 10")) + } + withTableName("table_add_new_expr") { table => + val e = intercept[AnalysisException]( + createTable( + table, + Some(path.toString), + defaultTestTableSchema, + Map( + "c2_g" -> "c1 + 10", + "c3_p" -> "CAST(c1 AS string)", // add a generated expr + "c4_g_p" -> "CAST(c5 AS date)", + "c7_g_p" -> "c6 * 10" + ), + defaultTestTablePartitionColumns + ) + ) + assert(e.getMessage.contains( + "Specified generation expression for field c3_p is different from existing schema")) + assert(e.getMessage.contains("CAST(c1 AS string)")) + assert(e.getMessage.contains("Existing: \n")) + } + } + } + + test("use the generation expression, column comment and NOT NULL at the same time") { + withTableName("generation_expression_comment") { table => + createTable( + table, + None, + "c1 INT, c2 INT, c3 INT", + Map("c2" -> "c1 + 10", "c3" -> "c1 + 10"), + Seq.empty, + Set("c3"), + Map("c2" -> "foo", "c3" -> "foo") + ) + // Verify schema + val f1 = StructField("c1", IntegerType, nullable = true) + val fieldMetadata = new MetadataBuilder() + .putString(GENERATION_EXPRESSION_METADATA_KEY, "c1 + 10") + .putString("comment", "foo") + .build() + val f2 = StructField("c2", IntegerType, nullable = true, metadata = fieldMetadata) + val f3 = StructField("c3", IntegerType, nullable = false, metadata = fieldMetadata) + val expectedSchema = StructType(f1 :: f2 :: f3 :: Nil) + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(table)) + assert(snapshot.metadata.schema == expectedSchema) + // Verify column comment + val comments = sql(s"DESC $table") + .where("col_name = 'c2'") + .select("comment") + .as[String] + .collect() + .toSeq + assert("foo" :: Nil == comments) + } + } + + test("MERGE UPDATE basic") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED THEN UPDATE SET ${tgt}.c2 = ${src}.c2 + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 3, 4)) + ) + } + } + } + + test("MERGE UPDATE set both generated column and its input") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED THEN UPDATE SET ${tgt}.c2 = ${src}.c2, ${tgt}.c3 = ${src}.c3 + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 3, 4)) + ) + } + } + } + + test("MERGE UPDATE set star") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 4, 5);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED THEN UPDATE SET * + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 4, 5)) + ) + } + } + } + + test("MERGE UPDATE set star add column") { + withSQLConf(("spark.databricks.delta.schema.autoMerge.enabled", "true")) { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c4 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 20, 40);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql( + s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED THEN UPDATE SET * + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 20, 21, 40)) + ) + } + } + } + } + + test("MERGE UPDATE using value from target") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED THEN UPDATE SET ${tgt}.c2 = ${tgt}.c3 + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 3, 4)) + ) + } + } + } + + test("MERGE UPDATE using value from both target and source") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED THEN UPDATE SET ${tgt}.c2 = ${tgt}.c3 + ${src}.c3 + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 7, 8)) + ) + } + } + } + + test("MERGE UPDATE set to null") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED THEN UPDATE SET ${tgt}.c2 = null + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, null, null)) + ) + } + } + } + + test("MERGE UPDATE multiple columns") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED THEN UPDATE + | SET ${tgt}.c2 = ${src}.c1 * 10, ${tgt}.c1 = ${tgt}.c1 * 100 + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(100, 10, 11)) + ) + } + } + } + + test("MERGE UPDATE source is a query") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING (SELECT c1, max(c3) + min(c2) AS m FROM ${src} GROUP BY c1) source + |on ${tgt}.c1 = source.c1 + |WHEN MATCHED THEN UPDATE SET ${tgt}.c2 = source.m + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 7, 8)) + ) + } + } + } + + test("MERGE UPDATE temp view is not supported") { + withTableName("source") { src => + withTableName("target") { tgt => + withTempView("test_temp_view") { + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s"CREATE TEMP VIEW test_temp_view AS SELECT c1 as c2, c2 as c1, c3 FROM ${tgt}") + val e = intercept[AnalysisException] { + sql(s""" + |MERGE INTO test_temp_view + |USING ${src} + |on test_temp_view.c2 = ${src}.c1 + |WHEN MATCHED THEN UPDATE SET test_temp_view.c1 = ${src}.c2 + |""".stripMargin) + } + assert(e.getMessage.contains("a temp view")) + } + } + } + } + + test("MERGE INSERT star satisfies constraint") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3), Row(2, 3, 4)) + ) + } + } + } + + test("MERGE INSERT star violates constraint") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 5);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + val e = intercept[InvariantViolationException]( + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + ) + assert(e.getMessage.contains("CHECK constraint Generated Column")) + } + } + } + + test("MERGE INSERT star add column") { + withSQLConf(("spark.databricks.delta.schema.autoMerge.enabled", "true")) { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c4 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 5);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3, null), Row(2, 3, 4, 5)) + ) + } + } + } + } + + test("MERGE INSERT star add column violates constraint") { + withSQLConf(("spark.databricks.delta.schema.autoMerge.enabled", "true")) { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c3 INT, c4 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 5);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + val e = intercept[InvariantViolationException]( + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + ) + assert(e.getMessage.contains("CHECK constraint Generated Column")) + } + } + } + } + + test("MERGE INSERT star add column unrelated to generated columns") { + withSQLConf(("spark.databricks.delta.schema.autoMerge.enabled", "true")) { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c4 INT, c5 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 5);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3, null, null), Row(2, null, null, 3, 5)) + ) + } + } + } + } + + test("MERGE INSERT unrelated columns") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c1) VALUES (${src}.c1) + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3), Row(2, null, null)) + ) + } + } + } + + test("MERGE INSERT unrelated columns with const") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c1) VALUES (3) + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3), Row(3, null, null)) + ) + } + } + } + + test("MERGE INSERT referenced column only") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c2) VALUES (10) + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3), Row(null, 10, 11)) + ) + } + } + } + + test("MERGE INSERT referenced column with null") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c2) VALUES (null) + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3), Row(null, null, null)) + ) + } + } + } + + test("MERGE INSERT not all referenced column inserted") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + c1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c2) VALUES (5) + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3), Row(null, 5, null)) + ) + } + } + } + + test("MERGE INSERT generated column only") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + val e = intercept[InvariantViolationException]( + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c3) VALUES (10) + |""".stripMargin) + ) + assert(e.getMessage.contains("CHECK constraint Generated Column")) + } + } + } + + test("MERGE INSERT referenced and generated columns satisfies constraint") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 4);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c2, c3) VALUES (${src}.c2, ${src}.c3) + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, 3), Row(null, 3, 4)) + ) + } + } + } + + test("MERGE INSERT referenced and generated columns violates constraint") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (2, 3, 5);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3);") + val e = intercept[InvariantViolationException]( + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c2, c3) VALUES (${src}.c2, ${src}.c3) + |""".stripMargin) + ) + assert(e.getMessage.contains("CHECK constraint Generated Column")) + } + } + } + + test("MERGE INSERT and UPDATE") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c3 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 11, 12), (2, 3, 4), (3, 20, 21), (4, 5, 6), (5, 6, 7);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3), (2, 100, 101);") + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED AND ${src}.c1 = 1 THEN UPDATE SET ${tgt}.c2 = 100 + |WHEN MATCHED THEN UPDATE SET * + |WHEN NOT MATCHED AND ${src}.c1 = 4 THEN INSERT (c1, c2) values (${src}.c1, 22) + |WHEN NOT MATCHED AND ${src}.c1 = 5 THEN INSERT (c1, c2) values (5, ${src}.c3) + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 100, 101), Row(2, 3, 4), Row(3, 20, 21), Row(4, 22, 23), Row(5, 7, 8)) + ) + } + } + } + + test("MERGE INSERT and UPDATE schema evolution") { + withSQLConf(("spark.databricks.delta.schema.autoMerge.enabled", "true")) { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c4 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 11, 12), (2, 3, 4), (3, 20, 21), " + + "(4, 5, 6), (5, 6, 7);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", Map("c3" -> "c2 + 1"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 3), (2, 100, 101);") + sql( + s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED AND ${src}.c1 = 1 THEN UPDATE SET ${tgt}.c2 = 100 + |WHEN MATCHED THEN UPDATE SET * + |WHEN NOT MATCHED AND ${src}.c1 = 4 THEN INSERT (c1, c2) values (${src}.c1, 22) + |WHEN NOT MATCHED AND ${src}.c1 = 5 THEN INSERT (c1, c2) values (5, ${src}.c4) + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq( + Row(1, 100, 101, null), + Row(2, 3, 4, 4), + Row(3, 20, 21, 21), + Row(4, 22, 23, null), + Row(5, 7, 8, null) + ) + ) + } + } + } + } + + test("MERGE INSERT and UPDATE schema evolution multiple referenced columns") { + withSQLConf(("spark.databricks.delta.schema.autoMerge.enabled", "true")) { + withTableName("source") { src => + withTableName("target") { tgt => + createTable(src, None, "c1 INT, c2 INT, c4 INT", Map.empty, Seq.empty) + sql(s"INSERT INTO ${src} values (1, 11, 12), (2, null, 4), (3, 20, 21), " + + "(4, 5, 6), (5, 6, 7);") + createTable(tgt, None, "c1 INT, c2 INT, c3 INT", + Map("c3" -> "c1 + CAST(ISNULL(c2) AS INT)"), Seq.empty) + sql(s"INSERT INTO ${tgt} values (1, 2, 1), (2, 100, 2);") + sql( + s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN MATCHED AND ${src}.c1 = 1 THEN UPDATE SET ${tgt}.c2 = 100 + |WHEN MATCHED THEN UPDATE SET * + |WHEN NOT MATCHED AND ${src}.c1 = 4 THEN INSERT (c1, c2) values (${src}.c1, 22) + |WHEN NOT MATCHED AND ${src}.c1 = 5 THEN INSERT (c1) values (5) + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq( + Row(1, 100, 1, null), + Row(2, null, 3, 4), + Row(3, 20, 3, 21), + Row(4, 22, 4, null), + Row(5, null, 6, null) + ) + ) + } + } + } + } + + test("MERGE INSERT with schema evolution on different name case") { + withTableName("source") { src => + withTableName("target") { tgt => + createTable( + tableName = src, + path = None, + schemaString = "c1 INT, c2 INT", + generatedColumns = Map.empty, + partitionColumns = Seq.empty + ) + sql(s"INSERT INTO ${src} values (2, 4);") + createTable( + tableName = tgt, + path = None, + schemaString = "c1 INT, c3 INT", + generatedColumns = Map("c3" -> "c1 + 1"), + partitionColumns = Seq.empty + ) + sql(s"INSERT INTO ${tgt} values (1, 2);") + + withSQLConf(("spark.databricks.delta.schema.autoMerge.enabled", "true")) { + sql(s""" + |MERGE INTO ${tgt} + |USING ${src} + |on ${tgt}.c1 = ${src}.c1 + |WHEN NOT MATCHED THEN INSERT (c1, C2) VALUES (${src}.c1, ${src}.c2) + |""".stripMargin) + } + checkAnswer( + sql(s"SELECT * FROM ${tgt}"), + Seq(Row(1, 2, null), Row(2, 3, 4)) + ) + } + } + } + + test("generated columns with cdf") { + val tableName1 = "gcEnabledCDCOn" + val tableName2 = "gcEnabledCDCOff" + withTable(tableName1, tableName2) { + + createTable( + tableName1, + None, + schemaString = "id LONG, timeCol TIMESTAMP, dateCol DATE", + generatedColumns = Map( + "dateCol" -> "CAST(timeCol AS DATE)" + ), + partitionColumns = Seq("dateCol"), + properties = Map( + "delta.enableChangeDataFeed" -> "true" + ) + ) + + spark.range(100).repartition(10) + .withColumn("timeCol", current_timestamp()) + .write + .format("delta") + .mode("append") + .saveAsTable(tableName1) + + spark.sql(s"DELETE FROM ${tableName1} WHERE id < 3") + + val changeData = spark.read.format("delta").option("readChangeData", "true") + .option("startingVersion", "2") + .table(tableName1) + .select("id", CDCReader.CDC_TYPE_COLUMN_NAME, CDCReader.CDC_COMMIT_VERSION) + + val expected = spark.range(0, 3) + .withColumn(CDCReader.CDC_TYPE_COLUMN_NAME, lit("delete")) + .withColumn(CDCReader.CDC_COMMIT_VERSION, lit(2)) + checkAnswer(changeData, expected) + + // Now write out the data frame of cdc to another table that has generated columns but not + // cdc enabled. + createTable( + tableName2, + None, + schemaString = "id LONG, _change_type STRING, timeCol TIMESTAMP, dateCol DATE", + generatedColumns = Map( + "dateCol" -> "CAST(timeCol AS DATE)" + ), + partitionColumns = Seq("dateCol"), + properties = Map( + "delta.enableChangeDataFeed" -> "false" + ) + ) + + val cdcRead = spark.read.format("delta").option("readChangeData", "true") + .option("startingVersion", "2") + .table(tableName1) + .select("id", CDCReader.CDC_TYPE_COLUMN_NAME, "timeCol") + + cdcRead + .write + .format("delta") + .mode("append") + .saveAsTable(tableName2) + + checkAnswer( + cdcRead, + spark.table(tableName2).drop("dateCol") + ) + } + } + + test("not null should be enforced with generated columns") { + withTableName("tbl") { tbl => + createTable(tbl, + None, "c1 INT, c2 STRING, c3 INT", Map("c3" -> "c1 + 1"), Seq.empty, Set("c1", "c2", "c3")) + + // try to write data without c2 in the DF + val schemaWithoutColumnC2 = StructType( + Seq(StructField("c1", IntegerType, true))) + val data1 = List(Row(3)) + val df1 = spark.createDataFrame(data1.asJava, schemaWithoutColumnC2) + + val e1 = intercept[DeltaInvariantViolationException] { + df1.write.format("delta").mode("append").saveAsTable("tbl") + } + assert(e1.getMessage.contains("Column c2, which has a NOT NULL constraint," + + " is missing from the data being written into the table.")) + } + } + + Seq(true, false).foreach { allowNullInsert => + test("nullable column should work with generated columns - " + + "allowNullInsert enabled=" + allowNullInsert) { + withTableName("tbl") { tbl => + withSQLConf(DeltaSQLConf.GENERATED_COLUMN_ALLOW_NULLABLE.key -> allowNullInsert.toString) { + createTable( + tbl, None, "c1 INT, c2 STRING, c3 INT", Map("c3" -> "c1 + 1"), Seq.empty) + + // create data frame that matches the table's schema + val data1 = List(Row(1, "a1"), Row(2, "a2")) + val schema = StructType( + Seq(StructField("c1", IntegerType, true), StructField("c2", StringType, true))) + val df1 = spark.createDataFrame(data1.asJava, schema) + df1.write.format("delta").mode("append").saveAsTable("tbl") + + // create a data frame that does not have c2 + val schemaWithoutOptionalColumnC2 = StructType( + Seq(StructField("c1", IntegerType, true))) + + val data2 = List(Row(3)) + val df2 = spark.createDataFrame(data2.asJava, schemaWithoutOptionalColumnC2) + + if (allowNullInsert) { + df2.write.format("delta").mode("append").saveAsTable("tbl") + // check correctness + val expectedDF = df1 + .union(df2.withColumn("c2", lit(null).cast(StringType))) + .withColumn("c3", 'c1 + 1) + checkAnswer(spark.read.table(tbl), expectedDF) + } else { + // when allow null insert is not enabled. + val e = intercept[AnalysisException] { + df2.write.format("delta").mode("append").saveAsTable("tbl") + } + e.getMessage.contains( + "A column, variable, or function parameter with name `c2` cannot be resolved") + } + } + } + } + } +} + +class GeneratedColumnSuite extends GeneratedColumnSuiteBase + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnTest.scala b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnTest.scala new file mode 100644 index 00000000000..ff0a79f8f38 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnTest.scala @@ -0,0 +1,134 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.io.PrintWriter + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.schema.{DeltaInvariantViolationException, InvariantViolationException, SchemaUtils} +import org.apache.spark.sql.delta.sources.DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import io.delta.tables.DeltaTableBuilder + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp, toJavaDate, toJavaTimestamp} +import org.apache.spark.sql.catalyst.util.quietly +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions.{current_timestamp, lit} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{ArrayType, DateType, IntegerType, MetadataBuilder, StringType, StructField, StructType, TimestampType} +import org.apache.spark.unsafe.types.UTF8String + +trait GeneratedColumnTest extends QueryTest with SharedSparkSession with DeltaSQLCommandTest { + + + protected def sqlDate(date: String): java.sql.Date = { + toJavaDate(stringToDate(UTF8String.fromString(date)).get) + } + + protected def sqlTimestamp(timestamp: String): java.sql.Timestamp = { + toJavaTimestamp(stringToTimestamp( + UTF8String.fromString(timestamp), + getZoneId(SQLConf.get.sessionLocalTimeZone)).get) + } + + protected def withTableName[T](tableName: String)(func: String => T): Unit = { + withTable(tableName) { + func(tableName) + } + } + + /** Create a new field with the given generation expression. */ + def withGenerationExpression(field: StructField, expr: String): StructField = { + val newMetadata = new MetadataBuilder() + .withMetadata(field.metadata) + .putString(GENERATION_EXPRESSION_METADATA_KEY, expr) + .build() + field.copy(metadata = newMetadata) + } + + protected def buildTable( + builder: DeltaTableBuilder, + tableName: String, + path: Option[String], + schemaString: String, + generatedColumns: Map[String, String], + partitionColumns: Seq[String], + notNullColumns: Set[String], + comments: Map[String, String], + properties: Map[String, String]): DeltaTableBuilder = { + val schema = if (schemaString.nonEmpty) { + StructType.fromDDL(schemaString) + } else { + new StructType() + } + val cols = schema.map(field => (field.name, field.dataType)) + if (tableName != null) { + builder.tableName(tableName) + } + cols.foreach(col => { + val (colName, dataType) = col + val nullable = !notNullColumns.contains(colName) + var columnBuilder = io.delta.tables.DeltaTable.columnBuilder(spark, colName) + columnBuilder.dataType(dataType.sql) + columnBuilder.nullable(nullable) + if (generatedColumns.contains(colName)) { + columnBuilder.generatedAlwaysAs(generatedColumns(colName)) + } + if (comments.contains(colName)) { + columnBuilder.comment(comments(colName)) + } + builder.addColumn(columnBuilder.build()) + }) + if (partitionColumns.nonEmpty) { + builder.partitionedBy(partitionColumns: _*) + } + if (path.nonEmpty) { + builder.location(path.get) + } + properties.foreach { case (key, value) => + builder.property(key, value) + } + builder + } + + protected def createTable( + tableName: String, + path: Option[String], + schemaString: String, + generatedColumns: Map[String, String], + partitionColumns: Seq[String], + notNullColumns: Set[String] = Set.empty, + comments: Map[String, String] = Map.empty, + properties: Map[String, String] = Map.empty): Unit = { + var tableBuilder = io.delta.tables.DeltaTable.create(spark) + buildTable(tableBuilder, tableName, path, schemaString, + generatedColumns, partitionColumns, notNullColumns, comments, properties) + .execute() + } +} + + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/HiveConvertToDeltaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/HiveConvertToDeltaSuite.scala new file mode 100644 index 00000000000..15348739e7b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/HiveConvertToDeltaSuite.scala @@ -0,0 +1,185 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaHiveTest + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.{col, from_json} +import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.test.SQLTestUtils + +abstract class HiveConvertToDeltaSuiteBase + extends ConvertToDeltaHiveTableTests + with SQLTestUtils { + + override protected def convertToDelta( + identifier: String, + partitionSchema: Option[String] = None, collectStats: Boolean = true): Unit = { + if (partitionSchema.isEmpty) { + sql(s"convert to delta $identifier ${collectStatisticsStringOption(collectStats)} ") + } else { + val stringSchema = partitionSchema.get + sql(s"convert to delta $identifier ${collectStatisticsStringOption(collectStats)}" + + s" partitioned by ($stringSchema) ") + } + } + + override protected def verifyExternalCatalogMetadata(tableName: String): Unit = { + val catalogTable = spark.sessionState.catalog.externalCatalog.getTable("default", tableName) + // Hive automatically adds some properties + val cleanProps = catalogTable.properties.filterKeys(_ != "transient_lastDdlTime") + // We can't alter the schema in the catalog at the moment :( + assert(cleanProps.isEmpty, + s"Table properties weren't empty for table $tableName: $cleanProps") + } + + test("convert with statistics") { + val tbl = "hive_parquet" + withTable(tbl) { + sql( + s""" + |CREATE TABLE $tbl (id int, str string) + |PARTITIONED BY (part string) + |STORED AS PARQUET + """.stripMargin) + + sql(s"insert into $tbl VALUES (1, 'a', 1)") + + val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) + convertToDelta(tbl, Some("part string"), collectStats = true) + val deltaLog = DeltaLog.forTable(spark, catalogTable) + val statsDf = deltaLog.unsafeVolatileSnapshot.allFiles + .select( + from_json(col("stats"), deltaLog.unsafeVolatileSnapshot.statsSchema).as("stats")) + .select("stats.*") + assert(statsDf.filter(col("numRecords").isNull).count == 0) + val history = io.delta.tables.DeltaTable.forPath(catalogTable.location.getPath).history() + assert(history.count == 1) + } + } + + test("convert without statistics") { + val tbl = "hive_parquet" + withTable(tbl) { + sql( + s""" + |CREATE TABLE $tbl (id int, str string) + |PARTITIONED BY (part string) + |STORED AS PARQUET + """.stripMargin) + + sql(s"insert into $tbl VALUES (1, 'a', 1)") + + val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) + convertToDelta(tbl, Some("part string"), collectStats = false) + val deltaLog = DeltaLog.forTable(spark, catalogTable) + val statsDf = deltaLog.unsafeVolatileSnapshot.allFiles + .select(from_json(col("stats"), deltaLog.unsafeVolatileSnapshot.statsSchema).as("stats")) + .select("stats.*") + assert(statsDf.filter(col("numRecords").isNotNull).count == 0) + val history = io.delta.tables.DeltaTable.forPath(catalogTable.location.getPath).history() + assert(history.count == 1) + + } + } + + test("convert a Hive based parquet table") { + val tbl = "hive_parquet" + withTable(tbl) { + sql( + s""" + |CREATE TABLE $tbl (id int, str string) + |PARTITIONED BY (part string) + |STORED AS PARQUET + """.stripMargin) + + sql(s"insert into $tbl VALUES (1, 'a', 1)") + + val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) + assert(catalogTable.provider === Some("hive")) + assert(catalogTable.storage.serde.exists(_.contains("parquet"))) + + convertToDelta(tbl, Some("part string")) + + checkAnswer( + sql(s"select * from delta.`${getPathForTableName(tbl)}`"), + Row(1, "a", "1")) + + verifyExternalCatalogMetadata(tbl) + val updatedTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) + assert(updatedTable.provider === Some("delta")) + } + } + + test("convert a Hive based external parquet table") { + val tbl = "hive_parquet" + withTempDir { dir => + withTable(tbl) { + sql( + s""" + |CREATE EXTERNAL TABLE $tbl (id int, str string) + |PARTITIONED BY (part string) + |STORED AS PARQUET + |LOCATION '${dir.getCanonicalPath}' + """.stripMargin) + sql(s"insert into $tbl VALUES (1, 'a', 1)") + + val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) + assert(catalogTable.provider === Some("hive")) + assert(catalogTable.storage.serde.exists(_.contains("parquet"))) + + convertToDelta(tbl, Some("part string")) + + checkAnswer( + sql(s"select * from delta.`${dir.getCanonicalPath}`"), + Row(1, "a", "1")) + + verifyExternalCatalogMetadata(tbl) + val updatedTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) + assert(updatedTable.provider === Some("delta")) + } + } + } + + test("negative case: convert empty partitioned parquet table") { + val tbl = "hive_parquet" + withTempDir { dir => + withTable(tbl) { + sql( + s""" + |CREATE EXTERNAL TABLE $tbl (id int, str string) + |PARTITIONED BY (part string) + |STORED AS PARQUET + |LOCATION '${dir.getCanonicalPath}' + """.stripMargin) + + val ae = intercept[AnalysisException] { + convertToDelta(tbl, Some("part string")) + } + + assert(ae.getErrorClass == "DELTA_CONVERSION_NO_PARTITION_FOUND") + assert(ae.getSqlState == "42KD6") + assert(ae.getMessage.contains(tbl)) + } + } + } +} + +class HiveConvertToDeltaSuite extends HiveConvertToDeltaSuiteBase with DeltaHiveTest diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/HiveDeltaDDLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/HiveDeltaDDLSuite.scala new file mode 100644 index 00000000000..4f8a31f5026 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/HiveDeltaDDLSuite.scala @@ -0,0 +1,35 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaHiveTest + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.hive.test.TestHiveSingleton + +abstract class HiveDeltaDDLSuiteBase + extends DeltaDDLTestBase { + import testImplicits._ + + override protected def verifyNullabilityFailure(exception: AnalysisException): Unit = { + exception.getMessage.contains("not supported for changing column") + } + +} + +class HiveDeltaDDLSuite extends HiveDeltaDDLSuiteBase with DeltaHiveTest diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/HiveDeltaNotSupportedDDLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/HiveDeltaNotSupportedDDLSuite.scala new file mode 100644 index 00000000000..7c9ffeae1ee --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/HiveDeltaNotSupportedDDLSuite.scala @@ -0,0 +1,23 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.test.DeltaHiveTest + +import org.apache.spark.sql.hive.test.TestHiveSingleton + +class HiveDeltaNotSupportedDDLSuite extends DeltaNotSupportedDDLBase with DeltaHiveTest diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala new file mode 100644 index 00000000000..b0f16b2c817 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala @@ -0,0 +1,342 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.annotation.tailrec +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.{SparkConf, SparkException, SparkThrowable} +import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.internal.SQLConf + +/** + * Tests for casts that are implicitly added in DML commands modifying Delta tables. + * These casts are added to convert values to the schema of a table. + * INSERT operations are excluded as they are covered by InsertSuite and InsertSuiteEdge. + */ +class ImplicitDMLCastingSuite extends QueryTest + with DeltaSQLCommandTest { + + private case class TestConfiguration( + sourceType: String, + sourceTypeInErrorMessage: String, + targetType: String, + targetTypeInErrorMessage: String, + validValue: String, + overflowValue: String, + // String because SparkArithmeticException is private and cannot be used for matching. + exceptionAnsiCast: String + ) { + override def toString: String = s"sourceType: $sourceType, targetType: $targetType" + } + + private case class SqlConfiguration( + followAnsiEnabled: Boolean, + ansiEnabled: Boolean, + storeAssignmentPolicy: SQLConf.StoreAssignmentPolicy.Value) { + + def withSqlSettings(f: => Unit): Unit = + withSQLConf( + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key + -> followAnsiEnabled.toString, + SQLConf.STORE_ASSIGNMENT_POLICY.key -> storeAssignmentPolicy.toString, + SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString)(f) + + override def toString: String = + s"followAnsiEnabled: $followAnsiEnabled, ansiEnabled: $ansiEnabled," + + s" storeAssignmentPolicy: $storeAssignmentPolicy" + } + + private def expectLegacyCastingBehaviour(sqlConfig: SqlConfiguration): Boolean = { + (sqlConfig.followAnsiEnabled && !sqlConfig.ansiEnabled) || + (!sqlConfig.followAnsiEnabled && + sqlConfig.storeAssignmentPolicy == SQLConf.StoreAssignmentPolicy.LEGACY) + } + + // Note that DATE to TIMESTAMP casts are not in this list as they always throw an error on + // overflow no matter if ANSI is enabled or not. + private val testConfigurations = Seq( + TestConfiguration(sourceType = "INT", sourceTypeInErrorMessage = "INT", + targetType = "TINYINT", targetTypeInErrorMessage = "TINYINT", + validValue = "1", overflowValue = Int.MaxValue.toString, + exceptionAnsiCast = "SparkArithmeticException"), + TestConfiguration(sourceType = "INT", sourceTypeInErrorMessage = "INT", + targetType = "SMALLINT", targetTypeInErrorMessage = "SMALLINT", + validValue = "1", overflowValue = Int.MaxValue.toString, + exceptionAnsiCast = "SparkArithmeticException"), + TestConfiguration(sourceType = "BIGINT", sourceTypeInErrorMessage = "BIGINT", + targetType = "INT", targetTypeInErrorMessage = "INT", + validValue = "1", overflowValue = Long.MaxValue.toString, + exceptionAnsiCast = "SparkArithmeticException"), + TestConfiguration(sourceType = "DOUBLE", sourceTypeInErrorMessage = "DOUBLE", + targetType = "BIGINT", targetTypeInErrorMessage = "BIGINT", + validValue = "1", overflowValue = "12345678901234567890D", + exceptionAnsiCast = "SparkArithmeticException"), + TestConfiguration(sourceType = "BIGINT", sourceTypeInErrorMessage = "BIGINT", + targetType = "DECIMAL(7,2)", targetTypeInErrorMessage = "DECIMAL(7,2)", + validValue = "1", overflowValue = Long.MaxValue.toString, + exceptionAnsiCast = "SparkArithmeticException"), + TestConfiguration(sourceType = "Struct", sourceTypeInErrorMessage = "BIGINT", + targetType = "Struct", targetTypeInErrorMessage = "INT", + validValue = "named_struct('value', 1)", + overflowValue = s"named_struct('value', ${Long.MaxValue.toString})", + exceptionAnsiCast = "SparkArithmeticException"), + TestConfiguration(sourceType = "ARRAY", sourceTypeInErrorMessage = "ARRAY", + targetType = "ARRAY", targetTypeInErrorMessage = "ARRAY", + validValue = "ARRAY(1)", overflowValue = s"ARRAY(${Long.MaxValue.toString})", + exceptionAnsiCast = "SparkArithmeticException"), + TestConfiguration(sourceType = "STRING", sourceTypeInErrorMessage = "STRING", + targetType = "INT", targetTypeInErrorMessage = "INT", + validValue = "'1'", overflowValue = s"'${Long.MaxValue.toString}'", + exceptionAnsiCast = "SparkNumberFormatException"), + TestConfiguration(sourceType = "MAP", + sourceTypeInErrorMessage = "MAP", targetType = "MAP", + targetTypeInErrorMessage = "MAP", validValue = "map('abc', 1)", + overflowValue = s"map('abc', ${Long.MaxValue.toString})", + exceptionAnsiCast = "SparkArithmeticException") + ) + + /** Returns cast failure exception if present in the cause chain. None otherwise. */ + @tailrec + private def castFailureCause(exception: Throwable): Option[Throwable] = { + exception match { + case arithmeticException: ArithmeticException => Some(arithmeticException) + case numberFormatException: NumberFormatException => Some(numberFormatException) + case _ if exception.getCause != null => castFailureCause(exception.getCause) + case _ => None + } + } + + /** + * Validate that a custom error is throws in case ansi.enabled is false, or a different + * overflow error is case ansi.enabled is true. + */ + private def validateException( + exception: Throwable, sqlConfig: SqlConfiguration, testConfig: TestConfiguration): Unit = { + // Validate that the type of error matches the expected error type. + castFailureCause(exception) match { + case Some(failureCause) if sqlConfig.followAnsiEnabled => + assert(sqlConfig.ansiEnabled) + assert(failureCause.toString.contains(testConfig.exceptionAnsiCast)) + + val sparkThrowable = failureCause.asInstanceOf[SparkThrowable] + assert(Seq("CAST_OVERFLOW", "NUMERIC_VALUE_OUT_OF_RANGE", "CAST_INVALID_INPUT") + .contains(sparkThrowable.getErrorClass)) + case Some(failureCause) if !sqlConfig.followAnsiEnabled => + assert(sqlConfig.storeAssignmentPolicy === SQLConf.StoreAssignmentPolicy.ANSI) + + val sparkThrowable = failureCause.asInstanceOf[SparkThrowable] + // Only arithmetic exceptions get a custom error message. + if (testConfig.exceptionAnsiCast == "SparkArithmeticException") { + assert(sparkThrowable.getErrorClass == "DELTA_CAST_OVERFLOW_IN_TABLE_WRITE") + assert(sparkThrowable.getMessageParameters == + Map("sourceType" -> ("\"" + testConfig.sourceTypeInErrorMessage + "\""), + "targetType" -> ("\"" + testConfig.targetTypeInErrorMessage + "\""), + "columnName" -> "`value`", + "storeAssignmentPolicyFlag" -> SQLConf.STORE_ASSIGNMENT_POLICY.key, + "updateAndMergeCastingFollowsAnsiEnabledFlag" -> + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key, + "ansiEnabledFlag" -> SQLConf.ANSI_ENABLED.key).asJava) + } else { + assert(sparkThrowable.getErrorClass == "CAST_INVALID_INPUT") + assert(sparkThrowable.getMessageParameters.get("sourceType") == "\"STRING\"") + } + case None => assert(false, s"No arithmetic exception thrown: $exception") + } + } + + Seq(true, false).foreach { followAnsiEnabled => + Seq(true, false).foreach { ansiEnabled => + Seq(SQLConf.StoreAssignmentPolicy.LEGACY, SQLConf.StoreAssignmentPolicy.ANSI) + .foreach { storeAssignmentPolicy => + val sqlConfiguration = + SqlConfiguration(followAnsiEnabled, ansiEnabled, storeAssignmentPolicy) + testConfigurations.foreach { testConfiguration => + updateTest(sqlConfiguration, testConfiguration) + mergeTests(sqlConfiguration, testConfiguration) + streamingMergeTest(sqlConfiguration, testConfiguration) + } + } + } + } + + /** Test an UPDATE that requires to cast the update value that is part of the SET clause. */ + private def updateTest( + sqlConfig: SqlConfiguration, testConfig: TestConfiguration): Unit = { + val testName = s"UPDATE overflow $testConfig $sqlConfig" + test(testName) { + sqlConfig.withSqlSettings { + val tableName = "overflowTable" + withTable(tableName) { + sql(s"""CREATE TABLE $tableName USING DELTA + |AS SELECT cast(${testConfig.validValue} AS ${testConfig.targetType}) AS value + |""".stripMargin) + val updateCommand = s"UPDATE $tableName SET value = ${testConfig.overflowValue}" + + if (expectLegacyCastingBehaviour(sqlConfig)) { + sql(updateCommand) + } else { + val exception = intercept[Throwable] { + sql(updateCommand) + } + + validateException(exception, sqlConfig, testConfig) + } + } + } + } + } + + + /** Tests for MERGE with overflows cause by the different conditions. */ + private def mergeTests( + sqlConfig: SqlConfiguration, testConfig: TestConfiguration): Unit = { + mergeTest(matchedCondition = s"WHEN MATCHED THEN UPDATE SET t.value = s.value", + sqlConfig, testConfig) + + mergeTest(matchedCondition = s"WHEN NOT MATCHED THEN INSERT *", sqlConfig, testConfig) + + mergeTest(matchedCondition = + s"WHEN NOT MATCHED BY SOURCE THEN UPDATE SET t.value = ${testConfig.overflowValue}", + sqlConfig, testConfig) + } + + private def mergeTest( + matchedCondition: String, + sqlConfig: SqlConfiguration, + testConfig: TestConfiguration + ): Unit = { + val testName = s"MERGE overflow in $matchedCondition $testConfig $sqlConfig" + test(testName) { + sqlConfig.withSqlSettings { + val targetTableName = "target_table" + val sourceViewName = "source_view" + withTable(targetTableName) { + withTempView(sourceViewName) { + val numRows = 10 + sql(s"""CREATE TABLE $targetTableName USING DELTA + |AS SELECT col as key, + | cast(${testConfig.validValue} AS ${testConfig.targetType}) AS value + |FROM explode(sequence(0, $numRows))""".stripMargin) + // The view maps the key space such that we get matched, not matched by source, and + // not match by target rows. + sql(s"""CREATE TEMPORARY VIEW $sourceViewName + |AS SELECT key + ($numRows / 2) AS key, + | cast(${testConfig.overflowValue} AS ${testConfig.sourceType}) AS value + |FROM $targetTableName""".stripMargin) + val mergeCommand = s"""MERGE INTO $targetTableName t + |USING $sourceViewName s + |ON s.key = t.key + |$matchedCondition + |""".stripMargin + + if (expectLegacyCastingBehaviour(sqlConfig)) { + sql(mergeCommand) + } else { + val exception = intercept[Throwable] { + sql(mergeCommand) + } + + validateException(exception, sqlConfig, testConfig) + } + } + } + } + } + } + + /** A merge that is executed for each batch of a stream and has to cast values before insert. */ + private def streamingMergeTest( + sqlConfig: SqlConfiguration, testConfig: TestConfiguration): Unit = { + val testName = s"Streaming MERGE overflow $testConfig $sqlConfig" + test(testName) { + sqlConfig.withSqlSettings { + val targetTableName = "target_table" + val sourceTableName = "source_table" + withTable(sourceTableName, targetTableName) { + sql(s"CREATE TABLE $targetTableName (key INT, value ${testConfig.targetType})" + + " USING DELTA") + sql(s"CREATE TABLE $sourceTableName (key INT, value ${testConfig.sourceType})" + + " USING DELTA") + + def upsertToDelta(microBatchOutputDF: DataFrame, batchId: Long): Unit = { + microBatchOutputDF.createOrReplaceTempView("micro_batch_output") + + microBatchOutputDF.sparkSession.sql(s"""MERGE INTO $targetTableName t + |USING micro_batch_output s + |ON s.key = t.key + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + } + + val sourceStream = spark.readStream.table(sourceTableName) + val streamWriter = + sourceStream + .writeStream + .format("delta") + .foreachBatch(upsertToDelta _) + .outputMode("update") + .start() + + sql(s"INSERT INTO $sourceTableName(key, value) VALUES(0, ${testConfig.overflowValue})") + + if (expectLegacyCastingBehaviour(sqlConfig)) { + streamWriter.processAllAvailable() + } else { + val exception = intercept[Throwable] { + streamWriter.processAllAvailable() + } + + validateException(exception, sqlConfig, testConfig) + } + } + } + } + } + + test("Details are part of the error message") { + val sourceTableName = "source_table_name" + val sourceValueType = "INT" + val targetTableName = "target_table_name" + val targetValueType = "LONG" + val valueColumnName = "value" + + withTable(sourceTableName, targetTableName) { + sql(s"CREATE OR REPLACE TABLE $targetTableName(id LONG, $valueColumnName $sourceValueType) " + + "USING DELTA") + sql(s"CREATE OR REPLACE TABLE $sourceTableName(id LONG, $valueColumnName $targetValueType) " + + "USING DELTA") + sql(s"INSERT INTO $sourceTableName VALUES(0, 9223372036854775807)") + + val userFacingError = intercept[SparkException] { + sql(s"""MERGE INTO $targetTableName t + |USING $sourceTableName s + |ON s.id = t.id + |WHEN NOT MATCHED THEN INSERT *""".stripMargin) + } + val expectedDetails = + Seq("DELTA_CAST_OVERFLOW_IN_TABLE_WRITE", sourceValueType, valueColumnName) + for (detail <- expectedDetails) { + assert(userFacingError.toString.contains(detail)) + } + } + } +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/LastCheckpointInfoSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/LastCheckpointInfoSuite.scala new file mode 100644 index 00000000000..9fb20fa012a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/LastCheckpointInfoSuite.scala @@ -0,0 +1,309 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.JsonUtils +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.exc.MismatchedInputException +import com.google.common.io.{ByteStreams, Closeables} +import org.apache.commons.codec.digest.DigestUtils +import org.apache.commons.io.IOUtils + +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{IntegerType, StructType} + +class LastCheckpointInfoSuite extends SharedSparkSession + with DeltaSQLCommandTest { + + // same checkpoint schema for tests + private val checkpointSchema = Some(new StructType().add("c1", IntegerType, nullable = false)) + + private def jsonStringToChecksum(jsonStr: String): String = { + val rootNode = JsonUtils.mapper.readValue(jsonStr, classOf[JsonNode]) + LastCheckpointInfo.treeNodeToChecksum(rootNode) + } + + test("test json to checksum conversion with maps") { + // test with different ordering and spaces, with different value data types + val s1 = """{"k1":"v1","k4":"v4","k3":23.45,"k2":123}""" + val s2 = """{"k1":"v1","k3":23.45,"k2":123, "k4":"v4"}""" + assert(jsonStringToChecksum(s1) === jsonStringToChecksum(s2)) + + // test json with nested maps + val s3 = + """{"k1":"v1","k4":{"k41":"v41","k40":{"k401":401,"k402":"402"}},"k3":23.45,"k2":123}""" + val s4 = + """{"k1":"v1","k4":{"k40":{"k401":401,"k402":"402"}, "k41":"v41"},"k3":23.45,"k2":123}""" + assert(jsonStringToChecksum(s3) === jsonStringToChecksum(s4)) + + // test empty json + val s5 = """{ }""" + val s6 = """{}""" + assert(jsonStringToChecksum(s5) === jsonStringToChecksum(s6)) + + // negative test: value for a specific key k4 is not same. + val s7 = """{"k1":"v1","k4":"v4","k3":23.45,"k2":123}""" + val s8 = """{"k1":"v1","k4":"v1","k3":23.45,"k2":123}""" + assert(jsonStringToChecksum(s7) != jsonStringToChecksum(s8)) + } + + test("test json to checksum conversion with array") { + // has top level array and array values are json objects + val s1 = """[{"id":"j1","stuff":"things"},{"stuff":"t2","id":"j2"}]""" + val s2 = """[{"id" : "j1", "stuff" : "things"}, {"id" : "j2", "stuff" : "t2"}]""" + assert(jsonStringToChecksum(s1) === jsonStringToChecksum(s2)) + + // array as part of value for a json key and array value has single json object + val s3 = """{"id":"j1","stuff":[{"hello": "world", "hello1": "world1"}]}""" + val s4 = """{"id": "j1","stuff":[{"hello1": "world1", "hello": "world"}]}""" + assert(jsonStringToChecksum(s3) === jsonStringToChecksum(s4)) + + // array as part of value for a json key and array values are multiple json objects + val s5 = """{"id":"j1","stuff":[{"hello": "world"}, {"hello1": "world1"}]}""" + val s6 = """{"id": "j1","stuff":[{"hello":"world"},{"hello1":"world1"}]}""" + assert(jsonStringToChecksum(s5) === jsonStringToChecksum(s6)) + + // Negative case: array as part of value for a json key and array values are multiple json + // objects with different order. + val s7 = """{"id":"j1","stuff":[{"hello1": "world1"}, {"hello": "world"}]}""" + val s8 = """{"id": "j1","stuff":[{"hello":"world"},{"hello1":"world1"}]}""" + assert(jsonStringToChecksum(s7) != jsonStringToChecksum(s8)) + + // array has scalar string values + val s9 = """{"id":"j1","stuff":["a", "b"]}""" + val s10 = """{"stuff":["a","b"], "id": "j1"}""" + assert(jsonStringToChecksum(s9) === jsonStringToChecksum(s10)) + + // array has scalar int values + val s11 = """{"id":"j1","stuff":[1, 2]}""" + val s12 = """{"stuff":[1,2], "id": "j1"}""" + assert(jsonStringToChecksum(s11) === jsonStringToChecksum(s12)) + + // Negative case: array has scalar values in different order + val s13 = """{"id":"j1","stuff":["a", "b", "c"]}""" + val s14 = """{"id":"j1","stuff":["c", "a", "b"]}""" + assert(jsonStringToChecksum(s13) != jsonStringToChecksum(s14)) + } + + // scalastyle:off line.size.limit + test("test json normalization") { + // test with different data types + val s1 = """{"k1":"v1","k4":"v4","k3":23.45,"k2":123,"k6":null,"k5":true}""" + val normalizedS1 = """"k1"="v1","k2"=123,"k3"=23.45,"k4"="v4","k5"=true,"k6"=null""" + assert(jsonStringToChecksum(s1) === DigestUtils.md5Hex(normalizedS1)) + + // test json with nested maps + val s2 = + """{"k1":"v1","k4":{"k41":"v41","k40":{"k401":401,"k402":"402"}},"k3":23.45,"k2":123}""" + val normalizedS2 = """"k1"="v1","k2"=123,"k3"=23.45,"k4"+"k40"+"k401"=401,"k4"+"k40"+"k402"="402","k4"+"k41"="v41"""" + assert(jsonStringToChecksum(s2) === DigestUtils.md5Hex(normalizedS2)) + + // test with arrays + val s3 = """{"stuff":[{"hx": "wx","h1":"w1"}, {"h2": "w2"}],"id":1}""" + val normalizedS3 = """"id"=1,"stuff"+0+"h1"="w1","stuff"+0+"hx"="wx","stuff"+1+"h2"="w2"""" + assert(jsonStringToChecksum(s3) === DigestUtils.md5Hex(normalizedS3)) + + // test top level `checksum` key is ignored in canonicalization + val s4 = """{"k1":"v1","checksum":"daswefdssfd","k3":23.45,"k2":123}""" + val normalizedS4 = """"k1"="v1","k2"=123,"k3"=23.45""" + assert(jsonStringToChecksum(s4) === DigestUtils.md5Hex(normalizedS4)) + + // test empty json + val s5 = """{ }""" + val normalizedS5 = """""" + assert(jsonStringToChecksum(s5) === DigestUtils.md5Hex(normalizedS5)) + + // test with complex strings + val s6 = """{"k0":"normal","k1":"'v1'","k4":"'v4","k3":":hello","k2":"\"double quote str\""}""" + val normalizedS6 = """"k0"="normal","k1"="%27v1%27","k2"="%22double%20quote%20str%22","k3"="%3Ahello","k4"="%27v4"""" + assert(jsonStringToChecksum(s6) === DigestUtils.md5Hex(normalizedS6)) + + // test covering different ASCII characters + val s7 = """{"k0":"normal","k1":"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789%'`~!@#$%^&*()_+-={[}]|\\;:'\"\/?.>,<"}""" + val normalizedS7 = """"k0"="normal","k1"="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789%25%27%60~%21%40%23%24%25%5E%26%2A%28%29_%2B-%3D%7B%5B%7D%5D%7C%5C%3B%3A%27%22%2F%3F.%3E%2C%3C"""" + assert(jsonStringToChecksum(s7) === DigestUtils.md5Hex(normalizedS7)) + + // test with nested maps and arrays + // This example is also part of Delta's PROTOCOL.md. We should keep these two in sync. + val s8 = """{"k0":"'v 0'", "checksum": "adsaskfljadfkjadfkj", "k1":{"k2": 2, "k3": ["v3", [1, 2], {"k4": "v4", "k5": ["v5", "v6", "v7"]}]}}""" + val normalizedS8 = """"k0"="%27v%200%27","k1"+"k2"=2,"k1"+"k3"+0="v3","k1"+"k3"+1+0=1,"k1"+"k3"+1+1=2,"k1"+"k3"+2+"k4"="v4","k1"+"k3"+2+"k5"+0="v5","k1"+"k3"+2+"k5"+1="v6","k1"+"k3"+2+"k5"+2="v7"""" + assert(jsonStringToChecksum(s8) === DigestUtils.md5Hex(normalizedS8)) + assert(jsonStringToChecksum(s8) === "6a92d155a59bf2eecbd4b4ec7fd1f875") + + // test non-ASCII character + // scalastyle:off nonascii + val s9 = s"""{"k0":"normal","k1":"a€+"}""" + val normalizedS9 = """"k0"="normal","k1"="a%E2%82%AC%2B"""" + assert(jsonStringToChecksum(s9) === DigestUtils.md5Hex(normalizedS9)) + // scalastyle:on nonascii + } + // scalastyle:on line.size.limit + + test("test LastCheckpointInfo checksum") { + val ci1 = LastCheckpointInfo(version = 1, size = 2, parts = Some(3), + sizeInBytes = Some(20L), numOfAddFiles = Some(2L), checkpointSchema = checkpointSchema) + val (stored1, actual1) = + LastCheckpointInfo.getChecksums(LastCheckpointInfo.serializeToJson(ci1, addChecksum = true)) + assert(stored1 === Some(actual1)) + + // checksum mismatch when version changes. + val ci2 = LastCheckpointInfo(version = 2, size = 2, parts = Some(3), + sizeInBytes = Some(20L), numOfAddFiles = Some(2L), + checkpointSchema = checkpointSchema) + val (stored2, actual2) = + LastCheckpointInfo.getChecksums(LastCheckpointInfo.serializeToJson(ci2, addChecksum = true)) + assert(stored2 === Some(actual2)) + assert(stored2 != stored1) + + // `checksum` doesn't participate in `actualChecksum` calculation. + val ci3 = LastCheckpointInfo(version = 1, size = 2, parts = Some(3), + checksum = Some("XYZ"), sizeInBytes = Some(20L), numOfAddFiles = Some(2L), + checkpointSchema = checkpointSchema) + val (stored3, actual3) = + LastCheckpointInfo.getChecksums(LastCheckpointInfo.serializeToJson(ci3, addChecksum = true)) + assert(stored3 === Some(actual3)) + assert(stored3 === stored1) + + // checksum doesn't depend on spaces and order of field + val json1 = """{"version":1,"size":2,"parts":3}""" + val json2 = """{"version":1 ,"parts":3,"size":2}""" + assert(jsonStringToChecksum(json1) === jsonStringToChecksum(json2)) + // `checksum` is ignored while calculating json + val json3 = """{"version":1 ,"parts":3,"size":2,"checksum":"xyz"}""" + assert(jsonStringToChecksum(json1) === jsonStringToChecksum(json3)) + // Change in any value changes the checksum + val json4 = """{"version":4,"size":2,"parts":3}""" + assert(jsonStringToChecksum(json1) != jsonStringToChecksum(json4)) + + } + + test("test backward compatibility - json without checksum is deserialized properly") { + val jsonStr = """{"version":1,"size":2,"parts":3,"sizeInBytes":20,"numOfAddFiles":2,""" + + """"checkpointSchema":{"type":"struct","fields":[{"name":"c1","type":"integer"""" + + ""","nullable":false,"metadata":{}}]}}""" + val expectedLastCheckpointInfo = LastCheckpointInfo( + version = 1, size = 2, parts = Some(3), sizeInBytes = Some(20), numOfAddFiles = Some(2), + checkpointSchema = Some(new StructType().add("c1", IntegerType, nullable = false))) + assert(LastCheckpointInfo.deserializeFromJson(jsonStr, validate = true) === + expectedLastCheckpointInfo) + } + + test("LastCheckpointInfo - serialize/deserialize") { + val ci1 = LastCheckpointInfo(version = 1, size = 2, parts = Some(3), + checksum = Some("XYZ"), sizeInBytes = Some(20L), numOfAddFiles = Some(2L), + checkpointSchema = checkpointSchema) + val ci2 = LastCheckpointInfo(version = 1, size = 2, parts = Some(3), checksum = None, + sizeInBytes = Some(20L), numOfAddFiles = Some(2L), + checkpointSchema = checkpointSchema) + + val actualChecksum = LastCheckpointInfo.getChecksums( + LastCheckpointInfo.serializeToJson(ci1, addChecksum = true))._2 + val ciWithCorrectChecksum = ci1.copy(checksum = Some(actualChecksum)) + + for(ci <- Seq(ci1, ci2)) { + val json = LastCheckpointInfo.serializeToJson(ci, addChecksum = true) + assert(LastCheckpointInfo.deserializeFromJson(json, validate = true) + === ciWithCorrectChecksum) + // The below assertion also validates that fields version/size/parts are in the beginning of + // the json. + assert(LastCheckpointInfo.serializeToJson(ci, addChecksum = true) === + """{"version":1,"size":2,"parts":3,"sizeInBytes":20,"numOfAddFiles":2,""" + + s""""checkpointSchema":${JsonUtils.toJson(checkpointSchema)},""" + + """"checksum":"524d4e2226f3c3f923df4ee42dae347e"}""") + } + + assert(LastCheckpointInfo.serializeToJson(ci1, addChecksum = true) + === LastCheckpointInfo.serializeToJson(ci2, addChecksum = true)) + } + + test("LastCheckpointInfo - json with duplicate keys should fail") { + val jsonString = + """{"version":1,"size":3,"parts":3,"checksum":"d84a0aa11c93304d57feca6acaceb7fb","size":2}""" + intercept[MismatchedInputException] { + LastCheckpointInfo.deserializeFromJson(jsonString, validate = true) + } + // Deserialization shouldn't fail when validate is false and the last `size` overrides the + // previous size. + assert(LastCheckpointInfo.deserializeFromJson(jsonString, validate = false).size === 2) + } + + test("LastCheckpointInfo - test checksum is written only when config is enabled") { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + + def readLastCheckpointFile(): String = { + val fs = log.LAST_CHECKPOINT.getFileSystem(log.newDeltaHadoopConf()) + val is = fs.open(log.LAST_CHECKPOINT) + try { + IOUtils.toString(is, "UTF-8") + } finally { + is.close() + } + } + + withSQLConf(DeltaSQLConf.LAST_CHECKPOINT_CHECKSUM_ENABLED.key -> "true") { + DeltaLog.forTable(spark, dir).checkpoint() + assert(readLastCheckpointFile().contains("checksum")) + } + + spark.range(10).write.mode("append").format("delta").save(dir.getAbsolutePath) + withSQLConf(DeltaSQLConf.LAST_CHECKPOINT_CHECKSUM_ENABLED.key -> "false") { + DeltaLog.forTable(spark, dir).checkpoint() + assert(!readLastCheckpointFile().contains("checksum")) + } + } + } + + test("Suppress optional fields in _last_checkpoint") { + val expectedStr = """{"version":1,"size":2,"parts":3}""" + val info = LastCheckpointInfo( + version = 1, size = 2, parts = Some(3), sizeInBytes = Some(20), numOfAddFiles = Some(2), + checkpointSchema = Some(new StructType().add("c1", IntegerType, nullable = false))) + val serializedJson = LastCheckpointInfo.serializeToJson( + info, addChecksum = true, suppressOptionalFields = true) + assert(serializedJson === expectedStr) + + val expectedStrNoPart = """{"version":1,"size":2}""" + val serializedJsonNoPart = LastCheckpointInfo.serializeToJson( + info.copy(parts = None), addChecksum = true, suppressOptionalFields = true) + assert(serializedJsonNoPart === expectedStrNoPart) + } + + test("read and write _last_checkpoint with optional fields suppressed") { + withTempDir { dir => + withSQLConf(DeltaSQLConf.SUPPRESS_OPTIONAL_LAST_CHECKPOINT_FIELDS.key -> "true") { + // Create a Delta table with a checkpoint. + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + DeltaLog.forTable(spark, dir).checkpoint() + DeltaLog.clearCache() + + val log = DeltaLog.forTable(spark, dir) + val metadata = log.readLastCheckpointFile().get + val trimmed = metadata.productIterator.drop(3).forall { + case o: Option[_] => o.isEmpty + } + assert(trimmed, s"Unexpected fields in _last_checkpoint: $metadata") + } + } + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreProviderSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreProviderSuite.scala new file mode 100644 index 00000000000..45360d08efc --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreProviderSuite.scala @@ -0,0 +1,177 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.storage.{DelegatingLogStore, LogStore, LogStoreAdaptor} + +import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.LocalSparkSession._ + +class LogStoreProviderSuite extends SparkFunSuite { + + + private val customLogStoreClassName = classOf[CustomPublicLogStore].getName + private def fakeSchemeWithNoDefault = "fake" + private def withoutSparkPrefix(key: String) = key.stripPrefix("spark.") + + private def constructSparkConf(confs: Seq[(String, String)]): SparkConf = { + val sparkConf = new SparkConf(loadDefaults = false).setMaster("local") + confs.foreach { case (key, value) => sparkConf.set(key, value) } + sparkConf + } + + /** + * Test with class conf set and scheme conf unset using `scheme`. Test using class conf key both + * with and without 'spark.' prefix. + */ + private def testLogStoreClassConfNoSchemeConf(scheme: String) { + for (classKeys <- Seq( + // set only prefixed key + Seq(LogStore.logStoreClassConfKey), + // set only non-prefixed key + Seq(withoutSparkPrefix(LogStore.logStoreClassConfKey)), + // set both spark-prefixed key and non-spark prefixed key + Seq(LogStore.logStoreClassConfKey, withoutSparkPrefix(LogStore.logStoreClassConfKey)) + )) { + val sparkConf = constructSparkConf(classKeys.map((_, customLogStoreClassName))) + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + assert(LogStore(spark).isInstanceOf[LogStoreAdaptor]) + assert(LogStore(spark).asInstanceOf[LogStoreAdaptor] + .logStoreImpl.getClass.getName == customLogStoreClassName) + } + } + } + + /** + * Test with class conf set and scheme conf set using `scheme`. This tests + * checkLogStoreConfConflicts. Test conf keys both with and without 'spark.' prefix. + */ + private def testLogStoreClassConfAndSchemeConf(scheme: String, classConf: String, + schemeConf: String) { + val schemeKey = LogStore.logStoreSchemeConfKey(scheme) + // we test with both the spark-prefixed and non-prefixed keys + val schemeConfKeys = Seq(schemeKey, withoutSparkPrefix(schemeKey)) + val classConfKeys = Seq(LogStore.logStoreClassConfKey, + withoutSparkPrefix(LogStore.logStoreClassConfKey)) + + schemeConfKeys.foreach { schemeKey => + classConfKeys.foreach { classKey => + val sparkConf = constructSparkConf(Seq((schemeKey, schemeConf), (classKey, classConf))) + val e = intercept[AnalysisException]( + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + LogStore(spark) + } + ) + assert(e.getMessage.contains( + s"(`$classKey`) and (`$schemeKey`) cannot be set at the same time")) + } + } + } + + test("class-conf = set, scheme has no default, scheme-conf = not set") { + testLogStoreClassConfNoSchemeConf(fakeSchemeWithNoDefault) + } + + test("class-conf = set, scheme has no default, scheme-conf = set") { + testLogStoreClassConfAndSchemeConf(fakeSchemeWithNoDefault, customLogStoreClassName, + DelegatingLogStore.defaultAzureLogStoreClassName) + } + + test("class-conf = set, scheme has default, scheme-conf = not set") { + testLogStoreClassConfNoSchemeConf("s3a") + } + + test("class-conf = set, scheme has default, scheme-conf = set") { + testLogStoreClassConfAndSchemeConf("s3a", customLogStoreClassName, + DelegatingLogStore.defaultAzureLogStoreClassName) + } + + test("verifyLogStoreConfs - scheme conf keys ") { + Seq( + fakeSchemeWithNoDefault, // scheme with no default + "s3a" // scheme with default + ).foreach { scheme => + val schemeConfKey = LogStore.logStoreSchemeConfKey(scheme) + for (confs <- Seq( + // set only non-prefixed key + Seq((withoutSparkPrefix(schemeConfKey), customLogStoreClassName)), + // set only prefixed key + Seq((schemeConfKey, customLogStoreClassName)), + // set both spark-prefixed key and non-spark prefixed key to same value + Seq((withoutSparkPrefix(schemeConfKey), customLogStoreClassName), + (schemeConfKey, customLogStoreClassName)) + )) { + val sparkConf = constructSparkConf(confs) + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + // no error is thrown + LogStore(spark) + } + } + + // set both spark-prefixed key and non-spark-prefixed key to inconsistent values + val sparkConf = constructSparkConf( + Seq((withoutSparkPrefix(schemeConfKey), customLogStoreClassName), + (schemeConfKey, DelegatingLogStore.defaultAzureLogStoreClassName))) + val e = intercept[IllegalArgumentException]( + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + LogStore(spark) + } + ) + assert(e.getMessage.contains( + s"(${withoutSparkPrefix(schemeConfKey)} = $customLogStoreClassName, " + + s"$schemeConfKey = ${DelegatingLogStore.defaultAzureLogStoreClassName}) cannot be set " + + s"to different values. Please only set one of them, or set them to the same value." + )) + } + } + + test("verifyLogStoreConfs - class conf keys") { + val classConfKey = LogStore.logStoreClassConfKey + for (confs <- Seq( + // set only non-prefixed key + Seq((withoutSparkPrefix(classConfKey), customLogStoreClassName)), + // set only prefixed key + Seq((classConfKey, customLogStoreClassName)), + // set both spark-prefixed key and non-spark prefixed key to same value + Seq((withoutSparkPrefix(classConfKey), customLogStoreClassName), + (classConfKey, customLogStoreClassName)) + )) { + val sparkConf = constructSparkConf(confs) + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + // no error is thrown + LogStore(spark) + } + } + + // set both spark-prefixed key and non-spark-prefixed key to inconsistent values + val sparkConf = constructSparkConf( + Seq((withoutSparkPrefix(classConfKey), customLogStoreClassName), + (classConfKey, DelegatingLogStore.defaultAzureLogStoreClassName))) + val e = intercept[IllegalArgumentException]( + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + LogStore(spark) + } + ) + assert(e.getMessage.contains( + s"(${withoutSparkPrefix(classConfKey)} = $customLogStoreClassName, " + + s"$classConfKey = ${DelegatingLogStore.defaultAzureLogStoreClassName})" + + s" cannot be set to different values. Please only set one of them, or set them to the " + + s"same value." + )) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreSuite.scala new file mode 100644 index 00000000000..feed74098e2 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreSuite.scala @@ -0,0 +1,359 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File, IOException} +import java.net.URI +import java.util.concurrent.atomic.AtomicInteger + +import scala.collection.mutable.ArrayBuffer + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage._ +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, FileSystem, FSDataOutputStream, Path, RawLocalFileSystem} + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.sql.{LocalSparkSession, QueryTest, SparkSession} +import org.apache.spark.sql.LocalSparkSession.withSparkSession +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +/////////////////////////// +// Child-specific traits // +/////////////////////////// + +trait AzureLogStoreSuiteBase extends LogStoreSuiteBase { + + testHadoopConf( + expectedErrMsg = ".*No FileSystem for scheme.*fake.*", + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") + + protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +trait HDFSLogStoreSuiteBase extends LogStoreSuiteBase { + + // HDFSLogStore is based on FileContext APIs and hence requires AbstractFileSystem-based + // implementations. + testHadoopConf( + expectedErrMsg = ".*No FileSystem for scheme.*fake.*", + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") + + import testImplicits._ + + test("writes on systems without AbstractFileSystem implemented") { + withSQLConf("fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") { + val tempDir = Utils.createTempDir() + // scalastyle:off pathfromuri + val path = new Path(new URI(s"fake://${tempDir.toURI.getRawPath}/1.json")) + // scalastyle:on pathfromuri + val e = intercept[IOException] { + createLogStore(spark) + .write(path, Iterator("zero", "none"), overwrite = false, sessionHadoopConf) + } + assert(e.getMessage + .contains("The error typically occurs when the default LogStore implementation")) + } + } + + test("reads should work on systems without AbstractFileSystem implemented") { + withTempDir { tempDir => + val writtenFile = new File(tempDir, "1") + val store = createLogStore(spark) + store.write( + new Path(writtenFile.getCanonicalPath), + Iterator("zero", "none"), + overwrite = false, + sessionHadoopConf) + withSQLConf("fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") { + val read = createLogStore(spark) + .read(new Path("fake://" + writtenFile.getCanonicalPath), sessionHadoopConf) + assert(read === ArrayBuffer("zero", "none")) + } + } + } + + test( + "No AbstractFileSystem - end to end test using data frame") { + // Writes to the fake file system will fail + withTempDir { tempDir => + val fakeFSLocation = s"fake://${tempDir.getCanonicalFile}" + withSQLConf("fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") { + val e = intercept[IOException] { + Seq(1, 2, 4).toDF().write.format("delta").save(fakeFSLocation) + } + assert(e.getMessage + .contains("The error typically occurs when the default LogStore implementation")) + } + } + // Reading files written by other systems will work. + withTempDir { tempDir => + Seq(1, 2, 4).toDF().write.format("delta").save(tempDir.getAbsolutePath) + withSQLConf("fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") { + val fakeFSLocation = s"fake://${tempDir.getCanonicalFile}" + checkAnswer(spark.read.format("delta").load(fakeFSLocation), Seq(1, 2, 4).toDF()) + } + } + } + + test("if fc.rename() fails, it should throw java.nio.file.FileAlreadyExistsException") { + withTempDir { tempDir => + withSQLConf( + "fs.AbstractFileSystem.fake.impl" -> classOf[FailingRenameAbstractFileSystem].getName, + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") { + val store = createLogStore(spark) + val commit0 = new Path(s"fake://${tempDir.getCanonicalPath}/00000.json") + + intercept[java.nio.file.FileAlreadyExistsException] { + store.write(commit0, Iterator("zero"), overwrite = false, sessionHadoopConf) + } + } + } + } + + test("Read after write consistency with msync") { + withTempDir { tempDir => + val tsFSLocation = s"ts://${tempDir.getCanonicalFile}" + // Use the file scheme so that it uses a different FileSystem cached object + withSQLConf( + ("fs.ts.impl", classOf[TimestampLocalFileSystem].getCanonicalName), + ("fs.AbstractFileSystem.ts.impl", + classOf[TimestampAbstractFileSystem].getCanonicalName)) { + val store = createLogStore(spark) + val path = new Path(tsFSLocation, "1.json") + + // Initialize the TimestampLocalFileSystem object which will be reused later due to the + // FileSystem cache + assert(store.listFrom(path, sessionHadoopConf).length == 0) + + store.write(path, Iterator("zero", "none"), overwrite = false, sessionHadoopConf) + // Verify `msync` is called by checking whether `listFrom` returns the latest result. + // Without the `msync` call, the TimestampLocalFileSystem would not see this file. + assert(store.listFrom(path, sessionHadoopConf).length == 1) + } + } + } + + protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +trait LocalLogStoreSuiteBase extends LogStoreSuiteBase { + testHadoopConf( + expectedErrMsg = ".*No FileSystem for scheme.*fake.*", + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") + + protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +trait GCSLogStoreSuiteBase extends LogStoreSuiteBase { + + testHadoopConf( + expectedErrMsg = ".*No FileSystem for scheme.*fake.*", + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") + + protected def shouldUseRenameToWriteCheckpoint: Boolean = false + + test("gcs write should happen in a new thread") { + withTempDir { tempDir => + // Use `FakeGCSFileSystem` to verify we write in the correct thread. + withSQLConf( + "fs.gs.impl" -> classOf[FakeGCSFileSystem].getName, + "fs.gs.impl.disable.cache" -> "true") { + val store = createLogStore(spark) + store.write( + new Path(s"gs://${tempDir.getCanonicalPath}", "1.json"), + Iterator("foo"), + overwrite = false, + sessionHadoopConf) + } + } + } + + test("handles precondition failure") { + withTempDir { tempDir => + withSQLConf( + "fs.gs.impl" -> classOf[FailingGCSFileSystem].getName, + "fs.gs.impl.disable.cache" -> "true") { + val store = createLogStore(spark) + + assertThrows[java.nio.file.FileAlreadyExistsException] { + store.write( + new Path(s"gs://${tempDir.getCanonicalPath}", "1.json"), + Iterator("foo"), + overwrite = false, + sessionHadoopConf) + } + + store.write( + new Path(s"gs://${tempDir.getCanonicalPath}", "1.json"), + Iterator("foo"), + overwrite = true, + sessionHadoopConf) + } + } + } +} + +//////////////////////////////// +// Concrete child test suites // +//////////////////////////////// + +class HDFSLogStoreSuite extends HDFSLogStoreSuiteBase { + override val logStoreClassName: String = classOf[HDFSLogStore].getName +} + +class AzureLogStoreSuite extends AzureLogStoreSuiteBase { + override val logStoreClassName: String = classOf[AzureLogStore].getName +} + +class LocalLogStoreSuite extends LocalLogStoreSuiteBase { + override val logStoreClassName: String = classOf[LocalLogStore].getName +} + +//////////////////////////////// +// File System Helper Classes // +//////////////////////////////// + +/** A fake file system to test whether GCSLogStore properly handles precondition failures. */ +class FailingGCSFileSystem extends RawLocalFileSystem { + override def getScheme: String = "gs" + override def getUri: URI = URI.create("gs:/") + + override def create(f: Path, overwrite: Boolean): FSDataOutputStream = { + throw new IOException("412 Precondition Failed"); + } +} + +/** + * A fake AbstractFileSystem to test whether session Hadoop configuration will be picked up. + * This is a wrapper around [[FakeFileSystem]]. + */ +class FakeAbstractFileSystem(uri: URI, conf: org.apache.hadoop.conf.Configuration) + extends org.apache.hadoop.fs.DelegateToFileSystem( + uri, + new FakeFileSystem, + conf, + FakeFileSystem.scheme, + false) { + + // Implementation copied from RawLocalFs + import org.apache.hadoop.fs.local.LocalConfigKeys + import org.apache.hadoop.fs._ + + override def getUriDefaultPort(): Int = -1 + override def getServerDefaults(): FsServerDefaults = LocalConfigKeys.getServerDefaults + override def isValidName(src: String): Boolean = true +} + +/** + * A file system allowing to track how many times `rename` is called. + * `TrackingRenameFileSystem.numOfRename` should be reset to 0 before starting to trace. + */ +class TrackingRenameFileSystem extends RawLocalFileSystem { + override def rename(src: Path, dst: Path): Boolean = { + TrackingRenameFileSystem.renameCounter.incrementAndGet() + super.rename(src, dst) + } +} + +object TrackingRenameFileSystem { + val renameCounter = new AtomicInteger(0) + def resetCounter(): Unit = renameCounter.set(0) +} + +/** + * A fake AbstractFileSystem to ensure FileSystem.renameInternal(), and thus FileContext.rename(), + * fails. This will be used to test HDFSLogStore.writeInternal corner case. + */ +class FailingRenameAbstractFileSystem(uri: URI, conf: org.apache.hadoop.conf.Configuration) + extends FakeAbstractFileSystem(uri, conf) { + + override def renameInternal(src: Path, dst: Path, overwrite: Boolean): Unit = { + throw new org.apache.hadoop.fs.FileAlreadyExistsException(s"$dst path already exists") + } +} + +//////////////////////////////////////////////////////////////////// +// Public LogStore (Java) suite tests from delta-storage artifact // +//////////////////////////////////////////////////////////////////// + +abstract class PublicLogStoreSuite extends LogStoreSuiteBase { + + protected val publicLogStoreClassName: String + + // The actual type of LogStore created will be LogStoreAdaptor. + override val logStoreClassName: String = classOf[LogStoreAdaptor].getName + + protected override def sparkConf = { + super.sparkConf.set(logStoreClassConfKey, publicLogStoreClassName) + } + + protected override def testInitFromSparkConf(): Unit = { + test("instantiation through SparkConf") { + assert(spark.sparkContext.getConf.get(logStoreClassConfKey) == publicLogStoreClassName) + assert(LogStore(spark).getClass.getName == logStoreClassName) + assert(LogStore(spark).asInstanceOf[LogStoreAdaptor] + .logStoreImpl.getClass.getName == publicLogStoreClassName) + + } + } +} + +class PublicHDFSLogStoreSuite extends PublicLogStoreSuite with HDFSLogStoreSuiteBase { + override protected val publicLogStoreClassName: String = + classOf[io.delta.storage.HDFSLogStore].getName +} + +class PublicS3SingleDriverLogStoreSuite + extends PublicLogStoreSuite + with S3SingleDriverLogStoreSuiteBase { + + override protected val publicLogStoreClassName: String = + classOf[io.delta.storage.S3SingleDriverLogStore].getName + + override protected def canInvalidateCache: Boolean = false +} + +class PublicAzureLogStoreSuite extends PublicLogStoreSuite with AzureLogStoreSuiteBase { + override protected val publicLogStoreClassName: String = + classOf[io.delta.storage.AzureLogStore].getName +} + +class PublicLocalLogStoreSuite extends PublicLogStoreSuite with LocalLogStoreSuiteBase { + override protected val publicLogStoreClassName: String = + classOf[io.delta.storage.LocalLogStore].getName +} + +class PublicGCSLogStoreSuite extends PublicLogStoreSuite with GCSLogStoreSuiteBase { + override protected val publicLogStoreClassName: String = + classOf[io.delta.storage.GCSLogStore].getName +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreSuiteBase.scala new file mode 100644 index 00000000000..a56281945ab --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/LogStoreSuiteBase.scala @@ -0,0 +1,260 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File, IOException} +import java.net.URI +import java.util.concurrent.atomic.AtomicInteger + +import scala.collection.mutable.ArrayBuffer + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage._ +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, FileSystem, FSDataOutputStream, Path, RawLocalFileSystem} + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.sql.{LocalSparkSession, QueryTest, SparkSession} +import org.apache.spark.sql.LocalSparkSession.withSparkSession +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +///////////////////// +// Base Test Suite // +///////////////////// + +abstract class LogStoreSuiteBase extends QueryTest + with LogStoreProvider + with SharedSparkSession + with DeltaSQLCommandTest { + + def logStoreClassName: String + + protected override def sparkConf = { + super.sparkConf.set(logStoreClassConfKey, logStoreClassName) + } + + // scalastyle:off deltahadoopconfiguration + def sessionHadoopConf: Configuration = spark.sessionState.newHadoopConf + // scalastyle:on deltahadoopconfiguration + + protected def testInitFromSparkConf(): Unit = { + test("instantiation through SparkConf") { + assert(spark.sparkContext.getConf.get(logStoreClassConfKey) == logStoreClassName) + assert(LogStore(spark).getClass.getName == logStoreClassName) + } + } + + testInitFromSparkConf() + + protected def withTempLogDir(f: File => Unit): Unit = { + val dir = Utils.createTempDir() + val deltaLogDir = new File(dir, "_delta_log") + deltaLogDir.mkdir() + try f(deltaLogDir) finally { + Utils.deleteRecursively(dir) + } + } + + test("read / write") { + def assertNoLeakedCrcFiles(dir: File): Unit = { + // crc file should not be leaked when origin file doesn't exist. + // The implementation of Hadoop filesystem may filter out checksum file, so + // listing files from local filesystem. + val fileNames = dir.listFiles().toSeq.filter(p => p.isFile).map(p => p.getName) + val crcFiles = fileNames.filter(n => n.startsWith(".") && n.endsWith(".crc")) + val originFileNamesForExistingCrcFiles = crcFiles.map { name => + // remove first "." and last ".crc" + name.substring(1, name.length - 4) + } + + // Check all origin files exist for all crc files. + assert(originFileNamesForExistingCrcFiles.toSet.subsetOf(fileNames.toSet), + s"Some of origin files for crc files don't exist - crc files: $crcFiles / " + + s"expected origin files: $originFileNamesForExistingCrcFiles / actual files: $fileNames") + } + + def pathToFileStatus(path: Path): FileStatus = + path.getFileSystem(sessionHadoopConf).getFileStatus(path) + + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + val deltas = Seq(0, 1) + .map(i => new File(tempLogDir, i.toString)).map(_.toURI).map(new Path(_)) + store.write(deltas.head, Iterator("zero", "none"), overwrite = false, sessionHadoopConf) + store.write(deltas(1), Iterator("one"), overwrite = false, sessionHadoopConf) + + // Test Path based read APIs + assert(store.read(deltas.head, sessionHadoopConf) == Seq("zero", "none")) + assert(store.readAsIterator(deltas.head, sessionHadoopConf).toSeq == Seq("zero", "none")) + assert(store.read(deltas(1), sessionHadoopConf) == Seq("one")) + assert(store.readAsIterator(deltas(1), sessionHadoopConf).toSeq == Seq("one")) + // Test FileStatus based read APIs + assert(store.read(pathToFileStatus(deltas.head), sessionHadoopConf) == Seq("zero", "none")) + assert(store.readAsIterator(pathToFileStatus(deltas.head), sessionHadoopConf).toSeq == + Seq("zero", "none")) + assert(store.read(pathToFileStatus(deltas(1)), sessionHadoopConf) == Seq("one")) + assert(store.readAsIterator(pathToFileStatus(deltas(1)), sessionHadoopConf).toSeq == + Seq("one")) + + assertNoLeakedCrcFiles(tempLogDir) + } + + } + + test("detects conflict") { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + val deltas = Seq(0, 1) + .map(i => new File(tempLogDir, i.toString)).map(_.toURI).map(new Path(_)) + store.write(deltas.head, Iterator("zero"), overwrite = false, sessionHadoopConf) + store.write(deltas(1), Iterator("one"), overwrite = false, sessionHadoopConf) + + intercept[java.nio.file.FileAlreadyExistsException] { + store.write(deltas(1), Iterator("uno"), overwrite = false, sessionHadoopConf) + } + } + + } + + test("listFrom") { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + + val deltas = + Seq(0, 1, 2, 3, 4).map(i => new File(tempLogDir, i.toString)).map(_.toURI).map(new Path(_)) + store.write(deltas(1), Iterator("zero"), overwrite = false, sessionHadoopConf) + store.write(deltas(2), Iterator("one"), overwrite = false, sessionHadoopConf) + store.write(deltas(3), Iterator("two"), overwrite = false, sessionHadoopConf) + + assert( + store.listFrom(deltas.head, sessionHadoopConf) + .map(_.getPath.getName).toArray === Seq(1, 2, 3).map(_.toString)) + assert( + store.listFrom(deltas(1), sessionHadoopConf) + .map(_.getPath.getName).toArray === Seq(1, 2, 3).map(_.toString)) + assert(store.listFrom(deltas(2), sessionHadoopConf) + .map(_.getPath.getName).toArray === Seq(2, 3).map(_.toString)) + assert(store.listFrom(deltas(3), sessionHadoopConf) + .map(_.getPath.getName).toArray === Seq(3).map(_.toString)) + assert(store.listFrom(deltas(4), sessionHadoopConf).map(_.getPath.getName).toArray === Nil) + } + } + + test("simple log store test") { + val tempDir = Utils.createTempDir() + val log1 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(log1.store.getClass.getName == logStoreClassName) + + val txn = log1.startTransaction() + txn.commitManually(createTestAddFile()) + log1.checkpoint() + + DeltaLog.clearCache() + val log2 = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(log2.store.getClass.getName == logStoreClassName) + + assert(log2.readLastCheckpointFile().map(_.version) === Some(0L)) + assert(log2.snapshot.allFiles.count == 1) + } + + protected def testHadoopConf(expectedErrMsg: String, fsImplConfs: (String, String)*): Unit = { + test("should pick up fs impl conf from session Hadoop configuration") { + withTempDir { tempDir => + // scalastyle:off pathfromuri + val path = new Path(new URI(s"fake://${tempDir.toURI.getRawPath}/1.json")) + // scalastyle:on pathfromuri + + // Make sure it will fail without FakeFileSystem + val e = intercept[IOException] { + createLogStore(spark).listFrom(path, sessionHadoopConf) + } + assert(e.getMessage.matches(expectedErrMsg)) + withSQLConf(fsImplConfs: _*) { + createLogStore(spark).listFrom(path, sessionHadoopConf) + } + } + } + } + + /** + * Whether the log store being tested should use rename to write checkpoint or not. The following + * test is using this method to verify the behavior of `checkpoint`. + */ + protected def shouldUseRenameToWriteCheckpoint: Boolean + + test( + "use isPartialWriteVisible to decide whether use rename") { + withTempDir { tempDir => + import testImplicits._ + // Write 5 files to delta table + (1 to 100).toDF().repartition(5).write.format("delta").save(tempDir.getCanonicalPath) + withSQLConf( + "fs.file.impl" -> classOf[TrackingRenameFileSystem].getName, + "fs.file.impl.disable.cache" -> "true") { + val deltaLog = DeltaLog.forTable(spark, tempDir.getCanonicalPath) + TrackingRenameFileSystem.renameCounter.set(0) + deltaLog.checkpoint() + val expectedNumOfRename = if (shouldUseRenameToWriteCheckpoint) 1 else 0 + assert(TrackingRenameFileSystem.renameCounter.get() === expectedNumOfRename) + + withSQLConf(DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "9") { + // Write 5 more files to the delta table + (1 to 100).toDF().repartition(5).write + .format("delta").mode("append").save(tempDir.getCanonicalPath) + // At this point table has total 10 files, which won't fit in 1 checkpoint part file (as + // DELTA_CHECKPOINT_PART_SIZE is set to 9 in this test). So this will end up generating + // 2 PART files. + TrackingRenameFileSystem.renameCounter.set(0) + deltaLog.checkpoint() + val expectedNumOfRename = if (shouldUseRenameToWriteCheckpoint) 2 else 0 + assert(TrackingRenameFileSystem.renameCounter.get() === expectedNumOfRename) + } + } + } + } + + test("readAsIterator should be lazy") { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + val testFile = new File(tempLogDir, "readAsIterator").getCanonicalPath + store.write(new Path(testFile), Iterator("foo", "bar"), overwrite = false, sessionHadoopConf) + + withSQLConf( + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") { + val fsStats = FileSystem.getStatistics("fake", classOf[FakeFileSystem]) + fsStats.reset() + val iter = store.readAsIterator(new Path(s"fake:///$testFile"), sessionHadoopConf) + try { + // We should not read any date when creating the iterator. + assert(fsStats.getBytesRead == 0) + assert(iter.toList == "foo" :: "bar" :: Nil) + // Verify we are using the correct Statistics instance. + assert(fsStats.getBytesRead == 8) + } finally { + iter.close() + } + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoAccumulatorSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoAccumulatorSuite.scala new file mode 100644 index 00000000000..a2b7d915088 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoAccumulatorSuite.scala @@ -0,0 +1,102 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.concurrent.atomic.AtomicReference + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.commands.MergeIntoCommandBase +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerNodeExcluded, SparkListenerTaskEnd} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.status.TaskDataWrapper +import org.apache.spark.util.JsonProtocol + +/** + * Tests how the accumulator used by the MERGE command reacts with other Spark components such as + * Spark UI. These tests stay in a separated file so that we can use the package name + * `org.apache.spark.sql.delta` to access `private[spark]` APIs. + */ +class MergeIntoAccumulatorSuite + extends SharedSparkSession + with DeltaSQLCommandTest { + + import testImplicits._ + + private def runTestMergeCommand(): Unit = { + // Run a simple merge command + withTempView("source") { + withTempDir { tempDir => + val tempPath = tempDir.getCanonicalPath + Seq((1, 1), (0, 3)).toDF("key", "value").createOrReplaceTempView("source") + Seq((2, 2), (1, 4)).toDF("key", "value").write.format("delta").save(tempPath) + spark.sql(s""" + |MERGE INTO delta.`$tempPath` target + |USING source src + |ON src.key = target.key + |WHEN MATCHED THEN UPDATE SET * + |WHEN NOT MATCHED THEN INSERT * + |""".stripMargin) + } + } + } + + test("accumulators used by MERGE should not be tracked by Spark UI") { + runTestMergeCommand() + + // Make sure all Spark events generated by the above command have been processed + spark.sparkContext.listenerBus.waitUntilEmpty(30000) + + val store = spark.sparkContext.statusStore.store + val iter = store.view(classOf[TaskDataWrapper]).closeableIterator() + try { + // Collect all accumulator names tracked by Spark UI. + val accumNames = iter.asScala.toVector.flatMap { task => + task.accumulatorUpdates.map(_.name) + }.toSet + // Verify accumulators used by MergeIntoCommand are not tracked. + assert(!accumNames.contains(MergeIntoCommandBase.TOUCHED_FILES_ACCUM_NAME)) + } finally { + iter.close() + } + } + + test("accumulators used by MERGE should not fail Spark event log generation") { + // Register a listener to convert `SparkListenerTaskEnd` to json and catch failures. + val failure = new AtomicReference[Throwable]() + val listener = new SparkListener { + override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { + try JsonProtocol.sparkEventToJsonString(taskEnd) catch { + case t: Throwable => failure.compareAndSet(null, t) + } + } + } + spark.sparkContext.listenerBus.addToSharedQueue(listener) + try { + runTestMergeCommand() + + // Make sure all Spark events generated by the above command have been processed + spark.sparkContext.listenerBus.waitUntilEmpty(30000) + // Converting `SparkListenerEvent` to json should not fail + assert(failure.get == null) + } finally { + spark.sparkContext.listenerBus.removeListener(listener) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoDVsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoDVsSuite.scala new file mode 100644 index 00000000000..cb8c5d34dbb --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoDVsSuite.scala @@ -0,0 +1,250 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.cdc.MergeCDCTests +import org.apache.spark.sql.delta.commands.{DeletionVectorBitmapGenerator, DMLWithDeletionVectorsHelper} +import org.apache.spark.sql.delta.files.TahoeBatchFileIndex +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.sql.execution.datasources.FileFormat.FILE_PATH +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.functions.col + +trait MergeIntoDVsTests extends MergeIntoSQLSuite with DeletionVectorsTestUtils { + + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectors(spark, merge = true) + } + + override def excluded: Seq[String] = { + val miscFailures = Seq( + "basic case - merge to view on a Delta table by path, " + + "partitioned: true skippingEnabled: false useSqlView: true", + "basic case - merge to view on a Delta table by path, " + + "partitioned: true skippingEnabled: false useSqlView: false", + "basic case - merge to view on a Delta table by path, " + + "partitioned: false skippingEnabled: false useSqlView: true", + "basic case - merge to view on a Delta table by path, " + + "partitioned: false skippingEnabled: false useSqlView: false", + "basic case - merge to Delta table by name, isPartitioned: false skippingEnabled: false", + "basic case - merge to Delta table by name, isPartitioned: true skippingEnabled: false", + "not matched by source - all 3 clauses - no changes - " + + "isPartitioned: true - cdcEnabled: true", + "not matched by source - all 3 clauses - no changes - " + + "isPartitioned: false - cdcEnabled: true", + "test merge on temp view - view with too many internal aliases - Dataset TempView" + ) + + super.excluded ++ miscFailures + } + + protected override lazy val expectedOpTypes: Set[String] = Set( + "delta.dml.merge.findTouchedFiles", + "delta.dml.merge.writeModifiedRowsOnly", + "delta.dml.merge.writeDeletionVectors", + "delta.dml.merge") +} + +class MergeIntoDVsSuite extends MergeIntoDVsTests { + import testImplicits._ + + def assertOperationalDVMetrics( + tablePath: String, + numDeletedRows: Long, + numUpdatedRows: Long, + numCopiedRows: Long, + numTargetFilesRemoved: Long, + numDeletionVectorsAdded: Long, + numDeletionVectorsRemoved: Long, + numDeletionVectorsUpdated: Long): Unit = { + val table = io.delta.tables.DeltaTable.forPath(tablePath) + val mergeMetrics = DeltaMetricsUtils.getLastOperationMetrics(table) + assert(mergeMetrics.getOrElse("numTargetRowsDeleted", -1) === numDeletedRows) + assert(mergeMetrics.getOrElse("numTargetRowsUpdated", -1) === numUpdatedRows) + assert(mergeMetrics.getOrElse("numTargetRowsCopied", -1) === numCopiedRows) + assert(mergeMetrics.getOrElse("numTargetFilesRemoved", -1) === numTargetFilesRemoved) + assert(mergeMetrics.getOrElse("numTargetDeletionVectorsAdded", -1) === numDeletionVectorsAdded) + assert( + mergeMetrics.getOrElse("numTargetDeletionVectorsRemoved", -1) === numDeletionVectorsRemoved) + assert( + mergeMetrics.getOrElse("numTargetDeletionVectorsUpdated", -1) === numDeletionVectorsUpdated) + } + + test(s"Merge with DVs metrics - Incremental Updates") { + withTempDir { dir => + val sourcePath = s"$dir/source" + val targetPath = s"$dir/target" + + spark.range(0, 10, 2).write.format("delta").save(sourcePath) + spark.range(10).write.format("delta").save(targetPath) + + executeMerge( + tgt = s"delta.`$targetPath` t", + src = s"delta.`$sourcePath` s", + cond = "t.id = s.id", + clauses = updateNotMatched(set = "id = t.id * 10")) + + checkAnswer(readDeltaTable(targetPath), Seq(0, 10, 2, 30, 4, 50, 6, 70, 8, 90).toDF("id")) + + assertOperationalDVMetrics( + targetPath, + numDeletedRows = 0, + numUpdatedRows = 5, + numCopiedRows = 0, + numTargetFilesRemoved = 0, // No files were fully deleted. + numDeletionVectorsAdded = 2, + numDeletionVectorsRemoved = 0, + numDeletionVectorsUpdated = 0) + + executeMerge( + tgt = s"delta.`$targetPath` t", + src = s"delta.`$sourcePath` s", + cond = "t.id = s.id", + clauses = delete(condition = "t.id = 2")) + + checkAnswer(readDeltaTable(targetPath), Seq(0, 10, 30, 4, 50, 6, 70, 8, 90).toDF("id")) + + assertOperationalDVMetrics( + targetPath, + numDeletedRows = 1, + numUpdatedRows = 0, + numCopiedRows = 0, + numTargetFilesRemoved = 0, + numDeletionVectorsAdded = 1, // Updating a DV equals removing and adding. + numDeletionVectorsRemoved = 1, // Updating a DV equals removing and adding. + numDeletionVectorsUpdated = 1) + + // Delete all rows from a file. + executeMerge( + tgt = s"delta.`$targetPath` t", + src = s"delta.`$sourcePath` s", + cond = "t.id = s.id", + clauses = delete(condition = "t.id < 5")) + + checkAnswer(readDeltaTable(targetPath), Seq(10, 30, 50, 6, 70, 8, 90).toDF("id")) + + assertOperationalDVMetrics( + targetPath, + numDeletedRows = 2, + numUpdatedRows = 0, + numCopiedRows = 0, + numTargetFilesRemoved = 1, + numDeletionVectorsAdded = 0, + numDeletionVectorsRemoved = 1, + numDeletionVectorsUpdated = 0) + } + } + + test(s"Merge with DVs metrics - delete entire file") { + withTempDir { dir => + val sourcePath = s"$dir/source" + val targetPath = s"$dir/target" + + spark.range(0, 7).write.format("delta").save(sourcePath) + spark.range(10).write.format("delta").save(targetPath) + + executeMerge( + tgt = s"delta.`$targetPath` t", + src = s"delta.`$sourcePath` s", + cond = "t.id = s.id", + clauses = update(set = "id = t.id * 10")) + + checkAnswer(readDeltaTable(targetPath), Seq(0, 10, 20, 30, 40, 50, 60, 7, 8, 9).toDF("id")) + + assertOperationalDVMetrics( + targetPath, + numDeletedRows = 0, + numUpdatedRows = 7, + numCopiedRows = 0, // No rows were copied. + numTargetFilesRemoved = 1, // 1 file was removed entirely. + numDeletionVectorsAdded = 1, // 1 file was deleted partially. + numDeletionVectorsRemoved = 0, + numDeletionVectorsUpdated = 0) + } + } + + test(s"Verify error is produced when paths are not joined correctly") { + withTempDir { dir => + val sourcePath = s"$dir/source" + val targetPath = s"$dir/target" + + spark.range(0, 10, 2).write.format("delta").save(sourcePath) + spark.range(10).write.format("delta").save(targetPath) + + // Execute buildRowIndexSetsForFilesMatchingCondition with a corrupted touched files list. + val sourceDF = io.delta.tables.DeltaTable.forPath(sourcePath).toDF + val targetDF = io.delta.tables.DeltaTable.forPath(targetPath).toDF + val targetLog = DeltaLog.forTable(spark, targetPath) + val condition = col("s.id") === col("t.id") + val allFiles = targetLog.update().allFiles.collect().toSeq + assert(allFiles.size === 2) + val corruptedFiles = Seq( + allFiles.head, + allFiles.last.copy(path = "corruptedPath")) + val txn = targetLog.startTransaction(catalogTableOpt = None) + + val fileIndex = new TahoeBatchFileIndex( + spark, + actionType = "merge", + addFiles = allFiles, + deltaLog = targetLog, + path = targetLog.dataPath, + snapshot = txn.snapshot) + + val targetDFWithMetadata = DMLWithDeletionVectorsHelper.createTargetDfForScanningForMatches( + spark, + targetDF.queryExecution.logical, + fileIndex) + val e = intercept[SparkException] { + DeletionVectorBitmapGenerator.buildRowIndexSetsForFilesMatchingCondition( + spark, + txn, + tableHasDVs = true, + targetDf = sourceDF.as("s").join(targetDFWithMetadata.as("t"), condition), + candidateFiles = corruptedFiles, + condition = condition.expr + ) + } + assert(e.getCause.getMessage.contains("Encountered a non matched file path.")) + } + } +} + +trait MergeCDCWithDVsTests extends MergeCDCTests with DeletionVectorsTestUtils { + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectors(spark, merge = true) + } + + override def excluded: Seq[String] = { + /** + * Merge commands that result to no actions do not generate a new commit when DVs are enabled. + * We correct affected tests by changing the expected CDC result (Create table CDC). + */ + val miscFailures = "merge CDC - all conditions failed for all rows" + + super.excluded :+ miscFailures + } +} +/** + * Includes the entire MergeIntoSQLSuite with CDC enabled. + */ +class MergeIntoDVsCDCSuite extends MergeIntoDVsTests with MergeCDCWithDVsTests diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala new file mode 100644 index 00000000000..ebb775553a6 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala @@ -0,0 +1,781 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.mutable +import scala.concurrent.duration._ +import scala.reflect.ClassTag +import scala.util.control.NonFatal + +import com.databricks.spark.util.{Log4jUsageLogger, MetricDefinitions, UsageRecord} +import org.apache.spark.sql.delta.DeltaTestUtils._ +import org.apache.spark.sql.delta.commands.merge.{MergeIntoMaterializeSourceError, MergeIntoMaterializeSourceErrorType, MergeIntoMaterializeSourceReason, MergeStats} +import org.apache.spark.sql.delta.commands.merge.MergeIntoMaterializeSource.mergeMaterializedSourceRddBlockLostErrorRegex +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.JsonUtils +import org.scalactic.source.Position +import org.scalatest.Tag + +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, Literal} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.execution.{FilterExec, LogicalRDD, RDDScanExec, SQLExecution} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.Utils + +trait MergeIntoMaterializeSourceTests + extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with SQLTestUtils + with DeltaTestUtilsBase + { + + import testImplicits._ + + override def beforeAll(): Unit = { + super.beforeAll() + // trigger source materialization in all tests + spark.conf.set(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key, "all") + } + + + // Test error message that we check if blocks of materialized source RDD were evicted. + test("missing RDD blocks error message") { + val checkpointedDf = sql("select * from range(10)") + .localCheckpoint(eager = false) + val rdd = checkpointedDf.queryExecution.analyzed.asInstanceOf[LogicalRDD].rdd + checkpointedDf.collect() // trigger lazy materialization + rdd.unpersist() + val ex = intercept[Exception] { + checkpointedDf.collect() + } + assert(ex.isInstanceOf[SparkException], ex) + assert( + ex.getMessage().matches(mergeMaterializedSourceRddBlockLostErrorRegex(rdd.id)), + s"RDD id ${rdd.id}: Message: ${ex.getMessage}") + } + + + + for (eager <- BOOLEAN_DOMAIN) + test(s"merge logs out of disk errors - eager=$eager") { + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_EAGER.key -> eager.toString) { + val injectEx = new java.io.IOException("No space left on device") + testWithCustomErrorInjected[SparkException](injectEx) { (thrownEx, errorOpt) => + // Compare messages instead of instances, since the equals method for these exceptions + // takes more into account. + assert(thrownEx.getCause.getMessage === injectEx.getMessage) + assert(errorOpt.isDefined) + val error = errorOpt.get + assert(error.errorType == MergeIntoMaterializeSourceErrorType.OUT_OF_DISK.toString) + assert(error.attempt == 1) + val storageLevel = StorageLevel.fromString( + spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_RDD_STORAGE_LEVEL)) + assert(error.materializedSourceRDDStorageLevel == storageLevel.toString) + } + } + } + + test("merge rethrows arbitrary errors") { + val injectEx = new RuntimeException("test") + testWithCustomErrorInjected[SparkException](injectEx) { (thrownEx, error) => + // Compare messages instead of instances, since the equals method for these exceptions + // takes more into account. + assert(thrownEx.getCause.getMessage === injectEx.getMessage) + assert(error.isEmpty) + } + } + + private def testWithCustomErrorInjected[Intercept <: Exception with AnyRef : ClassTag]( + inject: Exception)( + handle: (Intercept, Option[MergeIntoMaterializeSourceError]) => Unit): Unit = { + { + val tblName = "target" + withTable(tblName) { + val targetDF = spark.range(10).toDF("id").withColumn("value", rand()) + targetDF.write.format("delta").saveAsTable(tblName) + spark + .range(10) + .mapPartitions { x => + throw inject + x + } + .toDF("id") + .withColumn("value", rand()) + .createOrReplaceTempView("s") + // I don't know why it this cast is necessary. `Intercept` is marked as `AnyRef` so + // it should just let me assign `null`, but the compiler keeps rejecting it. + var thrownException: Intercept = null.asInstanceOf[Intercept] + val events = Log4jUsageLogger + .track { + thrownException = intercept[Intercept] { + sql(s"MERGE INTO $tblName t USING s ON t.id = s.id " + + s"WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT *") + } + } + .filter { e => + e.metric == MetricDefinitions.EVENT_TAHOE.name && + e.tags.get("opType").contains(MergeIntoMaterializeSourceError.OP_TYPE) + } + val error = events.headOption + .map(e => JsonUtils.fromJson[MergeIntoMaterializeSourceError](e.blob)) + handle(thrownException, error) + } + } + } + + // Runs a merge query with source materialization, while a killer thread tries to unpersist it. + private def testMergeMaterializedSourceUnpersist( + tblName: String, numKills: Int): Seq[UsageRecord] = { + val maxAttempts = spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_MAX_ATTEMPTS) + + // when we ask to join the killer thread, it should exit in the next iteration. + val killerThreadJoinTimeoutMs = 10000 + // sleep between attempts to unpersist + val killerIntervalMs = 1 + + // Data does not need to be big; there is enough latency to unpersist even with small data. + val targetDF = spark.range(100).toDF("id") + targetDF.write.format("delta").saveAsTable(tblName) + spark.range(90, 120).toDF("id").createOrReplaceTempView("s") + val mergeQuery = + s"MERGE INTO $tblName t USING s ON t.id = s.id " + + "WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT *" + + // Killer thread tries to unpersist any persisted mergeMaterializedSource RDDs, + // until it has seen more than numKills distinct ones (from distinct Merge retries) + @volatile var finished = false + @volatile var invalidStorageLevel: Option[String] = None + val killerThread = new Thread() { + override def run(): Unit = { + val seenSources = mutable.Set[Int]() + while (!finished) { + sparkContext.getPersistentRDDs.foreach { case (rddId, rdd) => + if (rdd.name == "mergeMaterializedSource") { + if (!seenSources.contains(rddId)) { + logInfo(s"First time seeing mergeMaterializedSource with id=$rddId") + seenSources.add(rddId) + } + if (seenSources.size > numKills) { + // already unpersisted numKills different source materialization attempts, + // the killer can retire + logInfo(s"seenSources.size=${seenSources.size}. Proceeding to finish.") + finished = true + } else { + // Need to wait until it is actually checkpointed, otherwise if we try to unpersist + // before it starts to actually persist it fails with + // java.lang.AssertionError: assumption failed: + // Storage level StorageLevel(1 replicas) is not appropriate for local checkpointing + // (this wouldn't happen in real world scenario of losing the block because executor + // was lost; there nobody manipulates with StorageLevel; if failure happens during + // computation of the materialized rdd, the task would be reattempted using the + // regular task retry mechanism) + if (rdd.isCheckpointed) { + // Use this opportunity to test if the source has the correct StorageLevel. + val expectedStorageLevel = StorageLevel.fromString( + if (seenSources.size == 1) { + spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_RDD_STORAGE_LEVEL) + } else { + spark.conf.get(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_RDD_STORAGE_LEVEL_RETRY) + } + ) + val rddStorageLevel = rdd.getStorageLevel + if (rddStorageLevel != expectedStorageLevel) { + invalidStorageLevel = + Some(s"For attempt ${seenSources.size} of materialized source expected " + + s"$expectedStorageLevel but got ${rddStorageLevel}") + finished = true + } + logInfo(s"Unpersisting mergeMaterializedSource with id=$rddId") + // don't make it blocking, so that the killer turns around quickly and is ready + // for the next kill when Merge retries + rdd.unpersist(blocking = false) + } + } + } + } + Thread.sleep(killerIntervalMs) + } + logInfo(s"seenSources.size=${seenSources.size}. Proceeding to finish.") + } + } + killerThread.start() + + val events = Log4jUsageLogger.track { + try { + sql(mergeQuery) + } catch { + case NonFatal(ex) => + if (numKills < maxAttempts) { + // The merge should succeed with retries + throw ex + } + } finally { + finished = true // put the killer to rest, if it didn't retire already + killerThread.join(killerThreadJoinTimeoutMs) + assert(!killerThread.isAlive) + } + }.filter(_.metric == MetricDefinitions.EVENT_TAHOE.name) + + // If killer thread recorded an invalid StorageLevel, throw it here + assert(invalidStorageLevel.isEmpty, invalidStorageLevel.toString) + + events + } + + private def testMergeMaterializeSourceUnpersistRetries = { + val maxAttempts = DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_MAX_ATTEMPTS.defaultValue.get + val tblName = "target" + + // For 1 to maxAttempts - 1 RDD block lost failures, merge should retry and succeed. + for { + eager <- BOOLEAN_DOMAIN + kills <- 1 to maxAttempts - 1 + } { + test(s"materialize source unpersist with $kills kill attempts succeeds - eager=$eager") { + withTable(tblName) { + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_EAGER.key -> eager.toString) { + val allDeltaEvents = testMergeMaterializedSourceUnpersist(tblName, kills) + val events = + allDeltaEvents.filter(_.tags.get("opType").contains("delta.dml.merge.stats")) + assert(events.length == 1, s"allDeltaEvents:\n$allDeltaEvents") + val mergeStats = JsonUtils.fromJson[MergeStats](events(0).blob) + assert(mergeStats.materializeSourceAttempts.isDefined, s"MergeStats:\n$mergeStats") + assert( + mergeStats.materializeSourceAttempts.get == kills + 1, + s"MergeStats:\n$mergeStats") + + // Check query result after merge + val tab = sql(s"select * from $tblName order by id") + .collect() + .map(row => row.getLong(0)) + .toSeq + assert(tab == (0L until 90L) ++ (100L until 120L)) + } + } + } + } + + // Eventually it should fail after exceeding maximum number of attempts. + for (eager <- BOOLEAN_DOMAIN) { + test(s"materialize source unpersist with $maxAttempts kill attempts fails - eager=$eager") { + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_EAGER.key -> eager.toString) { + withTable(tblName) { + val allDeltaEvents = testMergeMaterializedSourceUnpersist(tblName, maxAttempts) + val events = allDeltaEvents + .filter(_.tags.get("opType").contains(MergeIntoMaterializeSourceError.OP_TYPE)) + assert(events.length == 1, s"allDeltaEvents:\n$allDeltaEvents") + val error = JsonUtils.fromJson[MergeIntoMaterializeSourceError](events(0).blob) + assert(error.errorType == MergeIntoMaterializeSourceErrorType.RDD_BLOCK_LOST.toString) + assert(error.attempt == maxAttempts) + } + } + } + } + } + testMergeMaterializeSourceUnpersistRetries + + def getHints(df: => DataFrame): Seq[(Seq[ResolvedHint], JoinHint)] = { + val plans = withAllPlansCaptured(spark) { + df + } + var plansWithMaterializedSource = 0 + val hints = plans.flatMap { p => + val materializedSourceExists = p.analyzed.exists { + case l: LogicalRDD if l.rdd.name == "mergeMaterializedSource" => true + case _ => false + } + if (materializedSourceExists) { + // If it is a plan with materialized source, there should be exactly one join + // of target and source. We collect resolved hints from analyzed plans, and the hint + // applied to the join from optimized plan. + plansWithMaterializedSource += 1 + val hints = p.analyzed.collect { + case h: ResolvedHint => h + } + val joinHints = p.optimized.collect { + case j: Join => j.hint + } + assert(joinHints.length == 1, s"Got $joinHints") + val joinHint = joinHints.head + + // Only preserve join strategy hints, because we are testing with these. + // Other hints may be added by MERGE internally, e.g. hints to force DFP/DPP, that + // we don't want to be considering here. + val retHints = hints + .filter(_.hints.strategy.nonEmpty) + def retJoinHintInfo(hintInfo: Option[HintInfo]): Option[HintInfo] = hintInfo match { + case Some(h) if h.strategy.nonEmpty => Some(HintInfo(strategy = h.strategy)) + case _ => None + } + val retJoinHint = joinHint.copy( + leftHint = retJoinHintInfo(joinHint.leftHint), + rightHint = retJoinHintInfo(joinHint.rightHint) + ) + + Some((retHints, retJoinHint)) + } else { + None + } + } + assert(plansWithMaterializedSource == 2, + s"2 plans should have materialized source, but got: $plans") + hints + } + + for (eager <- BOOLEAN_DOMAIN) + test(s"materialize source preserves dataframe hints - eager=$eager") { + withTable("A", "B", "T") { + sql("select id, id as v from range(50000)").write.format("delta").saveAsTable("T") + sql("select id, id+2 as v from range(10000)").write.format("csv").saveAsTable("A") + sql("select id, id*2 as v from range(1000)").write.format("csv").saveAsTable("B") + + // Manually added broadcast hint will mess up the expected hints hence disable it + withSQLConf( + DeltaSQLConf.MERGE_MATERIALIZE_SOURCE_EAGER.key -> eager.toString, + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + // Simple BROADCAST hint + val hSimple = getHints( + sql("MERGE INTO T USING (SELECT /*+ BROADCAST */ * FROM A) s ON T.id = s.id" + + " WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *") + ) + hSimple.foreach { case (hints, joinHint) => + assert(hints.length == 1) + assert(hints.head.hints == HintInfo(strategy = Some(BROADCAST))) + assert(joinHint == JoinHint(Some(HintInfo(strategy = Some(BROADCAST))), None)) + } + + // Simple MERGE hint + val hSimpleMerge = getHints( + sql("MERGE INTO T USING (SELECT /*+ MERGE */ * FROM A) s ON T.id = s.id" + + " WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *") + ) + hSimpleMerge.foreach { case (hints, joinHint) => + assert(hints.length == 1) + assert(hints.head.hints == HintInfo(strategy = Some(SHUFFLE_MERGE))) + assert(joinHint == JoinHint(Some(HintInfo(strategy = Some(SHUFFLE_MERGE))), None)) + } + + // Aliased hint + val hAliased = getHints( + sql("MERGE INTO T USING " + + "(SELECT /*+ BROADCAST(FOO) */ * FROM (SELECT * FROM A) FOO) s ON T.id = s.id" + + " WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *") + ) + hAliased.foreach { case (hints, joinHint) => + assert(hints.length == 1) + assert(hints.head.hints == HintInfo(strategy = Some(BROADCAST))) + assert(joinHint == JoinHint(Some(HintInfo(strategy = Some(BROADCAST))), None)) + } + + // Aliased hint - hint propagation does not work from under an alias + // (remove if this ever gets implemented in the hint framework) + val hAliasedInner = getHints( + sql("MERGE INTO T USING " + + "(SELECT /*+ BROADCAST(A) */ * FROM (SELECT * FROM A) FOO) s ON T.id = s.id" + + " WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *") + ) + hAliasedInner.foreach { case (hints, joinHint) => + assert(hints.length == 0) + assert(joinHint == JoinHint(None, None)) + } + + // This hint applies to the join inside the source, not to the source as a whole + val hJoinInner = getHints( + sql("MERGE INTO T USING " + + "(SELECT /*+ BROADCAST(A) */ A.* FROM A JOIN B WHERE A.id = B.id) s ON T.id = s.id" + + " WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *") + ) + hJoinInner.foreach { case (hints, joinHint) => + assert(hints.length == 0) + assert(joinHint == JoinHint(None, None)) + } + + // Two hints - top one takes effect + val hTwo = getHints( + sql("MERGE INTO T USING (SELECT /*+ BROADCAST, MERGE */ * FROM A) s ON T.id = s.id" + + " WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *") + ) + hTwo.foreach { case (hints, joinHint) => + assert(hints.length == 2) + assert(hints(0).hints == HintInfo(strategy = Some(BROADCAST))) + assert(hints(1).hints == HintInfo(strategy = Some(SHUFFLE_MERGE))) + // top one takes effect + assert(joinHint == JoinHint(Some(HintInfo(strategy = Some(BROADCAST))), None)) + } + } + } + } + + test("materialize source for non-deterministic source formats") { + val targetSchema = StructType(Array( + StructField("id", IntegerType, nullable = false), + StructField("value", StringType, nullable = true))) + val targetData = Seq( + Row(1, "update"), + Row(2, "skip"), + Row(3, "delete")) + val sourceData = Seq(1, 3, 4).toDF("id") + val expectedResult = Seq( + Row(1, "new"), // Updated + Row(2, "skip"), // Copied + // 3 is deleted + Row(4, "new")) // Inserted + + // There are more, but these are easiest to test for. + val nonDeterministicFormats = List("parquet", "json") + + // Return MergeIntoMaterializeSourceReason string + def executeMerge(sourceDf: DataFrame): String = { + val sourceDfWithAction = sourceDf.withColumn("value", lit("new")) + var materializedSource: String = "" + withTable("target") { + val targetRdd = spark.sparkContext.parallelize(targetData) + val targetDf = spark.createDataFrame(targetRdd, targetSchema) + targetDf.write.format("delta").mode("overwrite").saveAsTable("target") + val targetTable = io.delta.tables.DeltaTable.forName("target") + + val events: Seq[UsageRecord] = Log4jUsageLogger.track { + targetTable.merge(sourceDfWithAction, col("target.id") === sourceDfWithAction("id")) + .whenMatched(col("target.value") === lit("update")).updateAll() + .whenMatched(col("target.value") === lit("delete")).delete() + .whenNotMatched().insertAll() + .execute() + } + + // Can't return values out of withTable. + materializedSource = mergeSourceMaterializeReason(events) + + checkAnswer( + spark.read.format("delta").table("target"), + expectedResult) + } + materializedSource + } + + def checkSourceMaterialization( + format: String, + reason: String): Unit = { + // Test once by name and once using path, as they produce different plans. + withTable("source") { + sourceData.write.format(format).saveAsTable("source") + val sourceDf = spark.read.format(format).table("source") + assert(executeMerge(sourceDf) == reason, s"Wrong materialization reason for $format") + } + + withTempPath { sourcePath => + sourceData.write.format(format).save(sourcePath.toString) + val sourceDf = spark.read.format(format).load(sourcePath.toString) + assert(executeMerge(sourceDf) == reason, s"Wrong materialization reason for $format") + } + } + + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> "auto") { + for (format <- nonDeterministicFormats) { + checkSourceMaterialization( + format, + reason = MergeIntoMaterializeSourceReason.NON_DETERMINISTIC_SOURCE_NON_DELTA.toString) + } + + // Delta should not materialize source. + checkSourceMaterialization( + "delta", reason = MergeIntoMaterializeSourceReason.NOT_MATERIALIZED_AUTO.toString) + } + + // Mixed safe/unsafe queries should materialize source. + def checkSourceMaterializationForMixedSources( + format1: String, + format2: String, + shouldMaterializeSource: Boolean): Unit = { + + def checkWithSources(source1Df: DataFrame, source2Df: DataFrame): Unit = { + val sourceDf = source1Df.union(source2Df) + val materializeReason = executeMerge(sourceDf) + if (shouldMaterializeSource) { + assert(materializeReason == + MergeIntoMaterializeSourceReason.NON_DETERMINISTIC_SOURCE_NON_DELTA.toString, + s"$format1 union $format2 are not deterministic as a source and should materialize.") + } else { + assert(materializeReason == + MergeIntoMaterializeSourceReason.NOT_MATERIALIZED_AUTO.toString, + s"$format1 union $format2 is deterministic as a source and should not materialize.") + } + } + + // Test once by name and once using path, as they produce different plans. + withTable("source1", "source2") { + sourceData.filter(col("id") < 2).write.format(format1).saveAsTable("source1") + val source1Df = spark.read.format(format1).table("source1") + sourceData.filter(col("id") >= 2).write.format(format2).saveAsTable("source2") + val source2Df = spark.read.format(format2).table("source2") + checkWithSources(source1Df, source2Df) + } + + withTempPaths(2) { case Seq(source1, source2) => + sourceData.filter(col("id") < 2).write + .mode("overwrite").format(format1).save(source1.toString) + val source1Df = spark.read.format(format1).load(source1.toString) + sourceData.filter(col("id") >= 2).write + .mode("overwrite").format(format2).save(source2.toString) + val source2Df = spark.read.format(format2).load(source2.toString) + checkWithSources(source1Df, source2Df) + } + } + + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> "auto") { + val allFormats = "delta" :: nonDeterministicFormats + // Try all combinations + for { + format1 <- allFormats + format2 <- allFormats + } checkSourceMaterializationForMixedSources( + format1 = format1, + format2 = format2, + shouldMaterializeSource = !(format1 == "delta" && format2 == "delta")) + } + + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> "none") { + // With "none", it should not materialize, even though parquet is non-deterministic. + checkSourceMaterialization( + "parquet", + reason = MergeIntoMaterializeSourceReason.NOT_MATERIALIZED_NONE.toString) + } + + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> "all") { + // With "all"", it should materialize, even though Delta is deterministic. + checkSourceMaterialization( + "delta", + reason = MergeIntoMaterializeSourceReason.MATERIALIZE_ALL.toString) + } + } + + test("materialize source for non-deterministic source queries - rand expr") { + val targetSchema = StructType(Array( + StructField("id", IntegerType, nullable = false), + StructField("value", FloatType, nullable = true))) + val targetData = Seq( + Row(1, 0.5f), + Row(2, 0.3f), + Row(3, 0.8f)) + val sourceData = Seq(1, 3).toDF("id") + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> "auto") { + + def executeMerge(sourceDf: DataFrame): Unit = { + val nonDeterministicSourceDf = sourceDf.withColumn("value", rand()) + withTable("target") { + val targetRdd = spark.sparkContext.parallelize(targetData) + val targetDf = spark.createDataFrame(targetRdd, targetSchema) + targetDf.write.format("delta").mode("overwrite").saveAsTable("target") + val targetTable = io.delta.tables.DeltaTable.forName("target") + + val events: Seq[UsageRecord] = Log4jUsageLogger.track { + targetTable + .merge(nonDeterministicSourceDf, col("target.id") === nonDeterministicSourceDf("id")) + .whenMatched(col("target.value") > nonDeterministicSourceDf("value")).delete() + .whenMatched().updateAll() + .whenNotMatched().insertAll() + .execute() + } + + val materializeReason = mergeSourceMaterializeReason(events) + assert(materializeReason == + MergeIntoMaterializeSourceReason.NON_DETERMINISTIC_SOURCE_OPERATORS.toString, + "Source has non deterministic operations and should have materialized source.") + } + } + + // Test once by name and once using path, as they produce different plans. + withTable("source") { + sourceData.write.format("delta").saveAsTable("source") + val sourceDf = spark.read.format("delta").table("source") + executeMerge(sourceDf) + } + + withTempPath { sourcePath => + sourceData.write.format("delta").save(sourcePath.toString) + val sourceDf = spark.read.format("delta").load(sourcePath.toString) + executeMerge(sourceDf) + } + } + } + + test("don't materialize source for deterministic source queries with current_date") { + val targetSchema = StructType(Array( + StructField("id", IntegerType, nullable = false), + StructField("date", DateType, nullable = true))) + val targetData = Seq( + Row(1, java.sql.Date.valueOf("2022-01-01")), + Row(2, java.sql.Date.valueOf("2022-02-01")), + Row(3, java.sql.Date.valueOf("2022-03-01"))) + val sourceData = Seq(1, 3).toDF("id") + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> "auto") { + + def executeMerge(sourceDf: DataFrame): Unit = { + val nonDeterministicSourceDf = sourceDf.withColumn("date", current_date()) + withTable("target") { + val targetRdd = spark.sparkContext.parallelize(targetData) + val targetDf = spark.createDataFrame(targetRdd, targetSchema) + targetDf.write.format("delta").mode("overwrite").saveAsTable("target") + val targetTable = io.delta.tables.DeltaTable.forName("target") + + val events: Seq[UsageRecord] = Log4jUsageLogger.track { + targetTable + .merge(nonDeterministicSourceDf, col("target.id") === nonDeterministicSourceDf("id")) + .whenMatched(col("target.date") < nonDeterministicSourceDf("date")).delete() + .whenMatched().updateAll() + .whenNotMatched().insertAll() + .execute() + } + + val materializeReason = mergeSourceMaterializeReason(events) + assert(materializeReason == + MergeIntoMaterializeSourceReason.NOT_MATERIALIZED_AUTO.toString, + "Source query is deterministic and should not be materialized.") + } + } + + // Test once by name and once using path, as they produce different plans. + withTable("source") { + sourceData.write.format("delta").saveAsTable("source") + val sourceDf = spark.read.format("delta").table("source") + executeMerge(sourceDf) + } + + withTempPath { sourcePath => + sourceData.write.format("delta").save(sourcePath.toString) + val sourceDf = spark.read.format("delta").load(sourcePath.toString) + executeMerge(sourceDf) + } + } + } + + test("materialize source for non-deterministic source queries - subquery") { + val sourceDataFrame = spark.range(0, 10) + .toDF("id") + .withColumn("value", rand()) + + val targetDataFrame = spark.range(0, 5) + .toDF("id") + .withColumn("value", rand()) + + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> "auto") { + + // Return MergeIntoMaterializeSourceReason + def executeMerge(sourceDf: DataFrame): Unit = { + withTable("target") { + targetDataFrame.write + .format("delta") + .saveAsTable("target") + val targetTable = io.delta.tables.DeltaTable.forName("target") + + val events: Seq[UsageRecord] = Log4jUsageLogger.track { + targetTable.merge(sourceDf, col("target.id") === sourceDf("id")) + .whenMatched(col("target.value") > sourceDf("value")).delete() + .whenMatched().updateAll() + .whenNotMatched().insertAll() + .execute() + } + + val materializeReason = mergeSourceMaterializeReason(events) + assert(materializeReason == + MergeIntoMaterializeSourceReason.NON_DETERMINISTIC_SOURCE_OPERATORS.toString, + "Source query has non deterministic subqueries and should materialize.") + } + } + + // Test once by name and once using path, as they produce different plans. + withTable("source") { + sourceDataFrame.write.format("delta").saveAsTable("source") + val sourceDf = spark.sql( + s""" + |SELECT id, 0.5 AS value + |FROM source + |WHERE id IN ( + | SELECT id FROM source + | WHERE id < rand() * ${sourceDataFrame.count()} ) + |""".stripMargin) + executeMerge(sourceDf) + } + + withTempPath { sourcePath => + sourceDataFrame.write.format("delta").save(sourcePath.toString) + val sourceDf = spark.sql( + s""" + |SELECT id, 0.5 AS value + |FROM delta.`$sourcePath` + |WHERE id IN ( + | SELECT id FROM delta.`$sourcePath` + | WHERE id < rand() * ${sourceDataFrame.count()} ) + |""".stripMargin) + executeMerge(sourceDf) + } + } + } + + test("don't materialize insert only merge") { + val tblName = "mergeTarget" + withTable(tblName) { + val targetDF = spark.range(100).toDF("id") + targetDF.write.format("delta").saveAsTable(tblName) + spark.range(90, 120).toDF("id").createOrReplaceTempView("s") + val mergeQuery = + s"MERGE INTO $tblName t USING s ON t.id = s.id WHEN NOT MATCHED THEN INSERT *" + val events: Seq[UsageRecord] = Log4jUsageLogger.track { + withSQLConf(DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> "auto") { + sql(mergeQuery) + } + } + + assert(mergeSourceMaterializeReason(events) == + MergeIntoMaterializeSourceReason.NOT_MATERIALIZED_AUTO_INSERT_ONLY.toString) + + checkAnswer( + spark.read.format("delta").table(tblName), + (0 until 120).map(i => Row(i.toLong))) + } + } + + private def mergeStats(events: Seq[UsageRecord]): MergeStats = { + val mergeStats = events.filter { e => + e.metric == MetricDefinitions.EVENT_TAHOE.name && + e.tags.get("opType").contains("delta.dml.merge.stats") + } + assert(mergeStats.size == 1) + JsonUtils.fromJson[MergeStats](mergeStats.head.blob) + } + + private def mergeSourceMaterializeReason(events: Seq[UsageRecord]): String = { + val stats = mergeStats(events) + assert(stats.materializeSourceReason.isDefined) + stats.materializeSourceReason.get + } +} + +// MERGE + materialize +class MergeIntoMaterializeSourceSuite extends MergeIntoMaterializeSourceTests +{ + override protected def sparkConf: SparkConf = super.sparkConf + .set(DeltaSQLConf.MERGE_USE_PERSISTENT_DELETION_VECTORS.key, "false") +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala new file mode 100644 index 00000000000..46e676b6d86 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala @@ -0,0 +1,1407 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.functions.expr +import org.apache.spark.sql.test.SharedSparkSession + +/** + * Tests for the metrics of MERGE INTO command in Delta log. + * + * This test suite checks the values of metrics that are emitted in Delta log by MERGE INTO command, + * with Changed Data Feed (CDF) enabled/disabled. + * + * Metrics related with number of affected rows are deterministic and so the expected values are + * explicitly checked. Metrics related with number of affected files and execution times are not + * deterministic, and so we check only their presence and some invariants. + * + */ +trait MergeIntoMetricsBase + extends QueryTest + with SharedSparkSession { self: DescribeDeltaHistorySuiteBase => + + import MergeIntoMetricsBase._ + import testImplicits._ + + /////////////////////// + // container classes // + /////////////////////// + + private case class MergeTestConfiguration(partitioned: Boolean, cdfEnabled: Boolean) { + + /** Return a [[MetricValue]] for this config with the provided default value. */ + def metricValue(defaultValue: Int): MetricValue = { + new MetricValue(this, defaultValue) + } + } + + /** + * Helper class to compute values of metrics that depend on the configuration. + * + * Objects are initialized with a test configuration and a default value. The value can then be + * overwritten with helper methods that check the test config and value() can be called to + * retrieve the final expected value for a test. + */ + private class MetricValue(testConfig: MergeTestConfiguration, defaultValue: Int) { + private var currentValue: Int = defaultValue + + def value: Int = currentValue + + // e.g. ifCDF + } + + //////////////// + // test utils // + //////////////// + + val testsToIgnore = Seq( + // The below tests fail due to incorrect numTargetRowsCopied metric. + "delete-only with condition", + "delete-only with update with unsatisfied condition", + "delete-only with unsatisfied condition", + "delete-only with target-only condition", + "delete-only with source-only condition", + "match-only with unsatisfied condition" + ) + + // Helper to generate tests with different configurations. + private def testMergeMetrics(name: String)(testFn: MergeTestConfiguration => Unit): Unit = { + for { + partitioned <- Seq(true, false) + cdfEnabled <- Seq(true, false) + } { + val testConfig = MergeTestConfiguration(partitioned = partitioned, cdfEnabled = cdfEnabled) + val testName = s"merge-metrics: $name - Partitioned = $partitioned, CDF = $cdfEnabled" + + if (testsToIgnore.contains(name)) { + // Currently multiple metrics are wrong for Merge. We have added tests for these scenarios + // but we need to ignore the failing tests until the metrics are fixed. + ignore(testName) { testFn(testConfig) } + } else { + test(testName) { testFn(testConfig) } + } + } + } + + /** + * Check invariants for row metrics of MERGE INTO command. + * + * @param metrics The merge operation metrics from the Delta history. + */ + private def checkMergeOperationRowMetricsInvariants(metrics: Map[String, String]): Unit = { + assert( + metrics("numTargetRowsUpdated").toLong === + metrics("numTargetRowsMatchedUpdated").toLong + + metrics("numTargetRowsNotMatchedBySourceUpdated").toLong) + assert( + metrics("numTargetRowsDeleted").toLong === + metrics("numTargetRowsMatchedDeleted").toLong + + metrics("numTargetRowsNotMatchedBySourceDeleted").toLong) + } + + /** + * Check invariants for file metrics of MERGE INTO command. + * + * @param metrics The merge operation metrics from the Delta history. + */ + private def checkMergeOperationFileMetricsInvariants(metrics: Map[String, String]): Unit = { + // numTargetFilesAdded should have a positive value if rows were added and be zero + // otherwise. + { + val numFilesAdded = metrics("numTargetFilesAdded").toLong + val numBytesAdded = metrics("numTargetBytesAdded").toLong + val numRowsWritten = + metrics("numTargetRowsInserted").toLong + + metrics("numTargetRowsUpdated").toLong + + metrics("numTargetRowsCopied").toLong + lazy val assertMsgNumFiles = { + val expectedNumFilesAdded = + if (numRowsWritten == 0) "0" else s"between 1 and $numRowsWritten" + s"""Unexpected value for numTargetFilesAdded metric. + | Expected: $expectedNumFilesAdded + | Actual: $numFilesAdded + | numRowsWritten: $numRowsWritten + | Metrics: ${metrics.toString} + |""".stripMargin + + } + lazy val assertMsgBytes = { + val expected = if (numRowsWritten == 0) "0" else "greater than 0" + s"""Unexpected value for numTargetBytesAdded metric. + | Expected: $expected + | Actual: $numBytesAdded + | numRowsWritten: $numRowsWritten + | numFilesAdded: $numFilesAdded + | Metrics: ${metrics.toString} + |""".stripMargin + } + if (numRowsWritten == 0) { + assert(numFilesAdded === 0, assertMsgNumFiles) + assert(numBytesAdded === 0, assertMsgBytes) + } else { + assert(numFilesAdded > 0 && numFilesAdded <= numRowsWritten, assertMsgNumFiles) + assert(numBytesAdded > 0, assertMsgBytes) + } + } + + // numTargetFilesRemoved should have a positive value if rows were updated or deleted and be + // zero otherwise. In case of classic merge we also count copied rows as changed, because if + // match clauses have conditions we may end up copying rows even if no other rows are + // updated/deleted. + { + val numFilesRemoved = metrics("numTargetFilesRemoved").toLong + val numBytesRemoved = metrics("numTargetBytesRemoved").toLong + val numRowsTouched = + metrics("numTargetRowsDeleted").toLong + + metrics("numTargetRowsUpdated").toLong + + metrics("numTargetRowsCopied").toLong + lazy val assertMsgNumFiles = { + val expectedNumFilesRemoved = + if (numRowsTouched == 0) "0" else s"between 1 and $numRowsTouched" + s"""Unexpected value for numTargetFilesRemoved metric. + | Expected: $expectedNumFilesRemoved + | Actual: $numFilesRemoved + | numRowsTouched: $numRowsTouched + | Metrics: ${metrics.toString} + |""".stripMargin + } + lazy val assertMsgBytes = { + val expectedNumBytesRemoved = + if (numRowsTouched == 0) "0" else "greater than 0" + s"""Unexpected value for numTargetBytesRemoved metric. + | Expected: $expectedNumBytesRemoved + | Actual: $numBytesRemoved + | numRowsTouched: $numRowsTouched + | Metrics: ${metrics.toString} + |""".stripMargin + } + + if (numRowsTouched == 0) { + assert(numFilesRemoved === 0, assertMsgNumFiles) + assert(numBytesRemoved === 0, assertMsgBytes) + } else { + assert(numFilesRemoved > 0 && numFilesRemoved <= numRowsTouched, assertMsgNumFiles) + assert(numBytesRemoved > 0, assertMsgBytes) + } + } + } + + /** + * Helper method to create a target table with the desired options, run a merge command and check + * the operation metrics in the Delta history. + * + * For operation metrics the following checks are performed: + * a) The operation metrics in Delta history must match [[DeltaOperationMetrics.MERGE]] schema, + * i.e. no metrics can be missing or unknown metrics can exist. + * b) All operation metrics must have a non-negative values. + * c) The values of metrics that are specified in 'expectedOpMetrics' argument must match the + * operation metrics. Metrics with a value of -1 are ignored, to allow callers always specify + * metrics that don't exist under some configurations. + * d) Row-related operation metrics that are not specified in 'expectedOpMetrics' must be zero. + * e) File/Time-related operation metrics that are not specified in 'expectedOpMetrics' can have + * non-zero values. These metrics are not deterministic and so this method only checks that + * some invariants hold. + * + * @param targetDf The DataFrame to generate the target table for the merge command. + * @param sourceDf The DataFrame to generate the source table for the merge command. + * @param mergeCmdFn The function that actually runs the merge command. + * @param expectedOpMetrics A map with values for expected operation metrics. + * @param testConfig The configuration options for this test + * @param overrideExpectedOpMetrics Sequence of expected operation metric values to override from + * those provided in expectedOpMetrics for specific + * configurations of partitioned and cdfEnabled. Elements + * provided as: + * ((partitioned, cdfEnabled), (metric_name, metric_value)) + */ + private def runMergeCmdAndTestMetrics( + targetDf: DataFrame, + sourceDf: DataFrame, + mergeCmdFn: MergeCmd, + expectedOpMetrics: Map[String, Int], + testConfig: MergeTestConfiguration, + overrideExpectedOpMetrics: Seq[((Boolean, Boolean), (String, Int))] = Seq.empty + ): Unit = { + withSQLConf( + DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true", + DeltaSQLConf.DELTA_SKIP_RECORDING_EMPTY_COMMITS.key -> "false", + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> testConfig.cdfEnabled.toString + ) { + withTempDir { tempDir => + def addExtraColumns(tableDf: DataFrame): DataFrame = { + // Add a column to be used for data partitioning and one extra column for filters in + // queries. + val numRows = tableDf.count() + val numPartitions = tableDf.rdd.getNumPartitions + val numRowsPerPart = if (numRows > 0) numRows / numPartitions else 1 + tableDf.withColumn("partCol", expr(s"floor(id / $numRowsPerPart)")) + .withColumn("extraCol", expr(s"$numRows - id")) + } + + // Add extra columns and create target table. + val tempPath = tempDir.getAbsolutePath + val partitionBy = if (testConfig.partitioned) Seq("partCol") else Seq() + val targetDfWithExtraCols = addExtraColumns(targetDf) + targetDfWithExtraCols + .write + .partitionBy(partitionBy: _*) + .format("delta") + .save(tempPath) + val targetTable = io.delta.tables.DeltaTable.forPath(tempPath) + + // Also add extra columns in source to be able to call updateAll()/insertAll(). + val sourceDfWithExtraCols = addExtraColumns(sourceDf) + + // Run MERGE INTO command + mergeCmdFn(targetTable, sourceDfWithExtraCols) + + // Query the operation metrics from the Delta log history. + val operationMetrics: Map[String, String] = getOperationMetrics(targetTable.history(1)) + + // Get the default row operation metrics and override them with the provided ones. + val metricsWithDefaultZeroValue = mergeRowMetrics.map(_ -> "0").toMap + var expectedOpMetricsWithDefaults = metricsWithDefaultZeroValue ++ + expectedOpMetrics.filter(m => m._2 >= 0).mapValues(_.toString) + + overrideExpectedOpMetrics.foreach { case ((partitioned, cdfEnabled), (metric, value)) => + if (partitioned == testConfig.partitioned && cdfEnabled == testConfig.cdfEnabled) { + expectedOpMetricsWithDefaults = expectedOpMetricsWithDefaults + + (metric -> value.toString) + } + } + + // Check that all operation metrics are positive numbers. + for ((metricName, metricValue) <- operationMetrics) { + assert(metricValue.toLong >= 0, + s"Invalid negative value for metric $metricName = $metricValue") + } + + // Check that operation metrics match the schema and that values match the expected ones. + checkOperationMetrics( + expectedOpMetricsWithDefaults, + operationMetrics, + DeltaOperationMetrics.MERGE + ) + // Check row metrics invariants. + checkMergeOperationRowMetricsInvariants(operationMetrics) + // Check file metrics invariants. + checkMergeOperationFileMetricsInvariants(operationMetrics) + // Check time metrics invariants. + checkOperationTimeMetricsInvariant(mergeTimeMetrics, operationMetrics) + // Check CDF metrics invariants. + checkMergeOperationCdfMetricsInvariants(operationMetrics, testConfig.cdfEnabled) + } + } + } + + ///////////////////////////// + // insert-only merge tests // + ///////////////////////////// + + testMergeMetrics("insert-only") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable.as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenNotMatched() + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 50, + "numTargetRowsInserted" -> 50 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("insert-only with skipping") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 100, end = 200, step = 1, numPartitions = 5).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable.as("t") + .merge(sourceDf.as("s"), "s.id = t.id and t.partCol >= 2") + .whenNotMatched() + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 100, + "numTargetRowsInserted" -> 100 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("insert-only with condition") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenNotMatched("s.id >= 125") + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 25, + "numTargetRowsInserted" -> 25 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("insert-only when all rows match") { testConfig => { + val targetDf = spark.range(start = 0, end = 200, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenNotMatched() + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("insert-only with unsatisfied condition") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenNotMatched("s.id > 150") + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("insert-only with empty source") { testConfig => { + val targetDf = spark.range(start = 0, end = 200, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(0).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenNotMatched() + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 0 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("insert-only with empty target") { testConfig => { + val targetDf = spark.range(0).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenNotMatched() + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 100, + "numTargetRowsInserted" -> 100 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("insert-only with disjoint tables") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 100, end = 200, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .updateAll() + .whenNotMatched() + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 100, + "numTargetRowsInserted" -> 100 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("insert-only with update/delete with unsatisfied conditions") { testConfig => { + val targetDf = spark.range(start = 0, end = 50, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id + t.id > 200") + .updateAll() + .whenMatched("s.id + t.id < 0") + .delete() + .whenNotMatched() + .insertAll() + .execute() + } + // In classic merge we are copying all rows from job1. + val expectedOpMetrics = Map( + "numSourceRows" -> 150, + "numOutputRows" -> 150, + "numTargetRowsInserted" -> 100, + "numTargetRowsCopied" -> 50, + "numTargetFilesRemoved" -> 5 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + ///////////////////////////// + // delete-only merge tests // + ///////////////////////////// + + testMergeMetrics("delete-only") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 100, + "numTargetRowsDeleted" -> 50, + "numTargetRowsMatchedDeleted" -> 50, + "numTargetRowsRemoved" -> -1, + "numOutputRows" -> 10, + "numTargetRowsCopied" -> 10, + "numTargetFilesAdded" -> 1, + "numTargetFilesRemoved" -> -1 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with skipping") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id and t.partCol >= 2") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 100, + "numOutputRows" -> 10, + "numTargetRowsCopied" -> 10, + "numTargetRowsDeleted" -> 50, + "numTargetRowsMatchedDeleted" -> 50, + "numTargetRowsRemoved" -> -1, + "numTargetFilesAdded" -> 1, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with disjoint tables") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 100, end = 200, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numTargetFilesAdded" -> 0, + "numTargetFilesRemoved" -> 0 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only delete all rows") { testConfig => { + val targetDf = spark.range(start = 100, end = 200, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 300, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 300, + "numOutputRows" -> 0, + "numTargetRowsCopied" -> 0, + "numTargetRowsDeleted" -> 100, + "numTargetRowsMatchedDeleted" -> 100, + "numTargetRowsRemoved" -> -1, + "numTargetFilesAdded" -> 0, + "numTargetFilesRemoved" -> 5 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with condition") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id + t.id < 50") + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 150, + "numOutputRows" -> 15, + "numTargetRowsCopied" -> 15, + "numTargetRowsDeleted" -> 25, + "numTargetRowsMatchedDeleted" -> 25, + "numTargetRowsRemoved" -> -1, + "numTargetFilesAdded" -> 1, + "numTargetFilesRemoved" -> 2 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with update with unsatisfied condition") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id + t.id > 1000") + .updateAll() + .whenMatched("s.id + t.id < 50") + .delete() + .execute() + } + // In case of partitioned tables, files are mixed-in even though finally there are no matches. + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 100, + "numOutputRows" -> 15, + "numTargetRowsCopied" -> 15, + "numTargetRowsDeleted" -> 25, + "numTargetRowsMatchedDeleted" -> 25, + "numTargetRowsRemoved" -> -1, + "numTargetFilesAdded" -> 1, + "numTargetFilesRemoved" -> 2 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with condition on delete and insert with no matching rows") { + testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id + t.id < 50") + .delete() + .whenNotMatched() + .insertAll() + .execute() + } + // In classic merge we are copying all rows from job1. + // In case of partitioned tables, files are mixed-in even though finally there are no matches. + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 100, + "numOutputRows" -> 75, + "numTargetRowsCopied" -> 75, + "numTargetRowsDeleted" -> 25, + "numTargetRowsMatchedDeleted" -> 25, + "numTargetRowsRemoved" -> -1, + "numTargetFilesRemoved" -> 5 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + } + } + + testMergeMetrics("delete-only with unsatisfied condition") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 150, step = 1, numPartitions = 15).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id + t.id > 1000") + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 150, + "numTargetFilesAdded" -> 0, + "numTargetFilesRemoved" -> 0 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with target-only condition") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 150, step = 1, numPartitions = 15).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("t.id >= 45") + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 150, + "numOutputRows" -> 5, + "numTargetRowsCopied" -> 5, + "numTargetRowsDeleted" -> 55, + "numTargetRowsMatchedDeleted" -> 55, + "numTargetRowsRemoved" -> -1, + "numTargetFilesAdded" -> 1, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with source-only condition") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 100).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id >= 70") + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 100, + "numOutputRows" -> 10, + "numTargetRowsCopied" -> 10, + "numTargetRowsDeleted" -> 30, + "numTargetRowsMatchedDeleted" -> 30, + "numTargetRowsRemoved" -> -1, + "numTargetFilesAdded" -> 1, + "numTargetFilesRemoved" -> 2 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with empty source") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 4).toDF() + val sourceDf = spark.range(0).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("t.id > 25") + .delete() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 0, + "numTargetFilesAdded" -> 0, + "numTargetFilesRemoved" -> 0 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with empty target") { testConfig => { + val targetDf = spark.range(0).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 3).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map( + // This actually goes through a special code path in MERGE because the optimizer optimizes + // away the join to the source table entirely if the target table is empty. + "numSourceRows" -> 100, + "numTargetFilesAdded" -> 0, + "numTargetFilesRemoved" -> 0 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only without join empty source") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(0).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "t.id >= 50") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 0, + "numTargetFilesAdded" -> 0, + "numTargetFilesRemoved" -> 0 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only without join with source with 1 row") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 1, step = 1, numPartitions = 1).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "t.id >= 50") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 1, + "numOutputRows" -> 10, + "numTargetRowsCopied" -> 10, + "numTargetRowsDeleted" -> 50, + "numTargetRowsMatchedDeleted" -> 50, + "numTargetRowsRemoved" -> -1, + "numTargetFilesAdded" -> 1, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only without join") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 0, end = 200, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "t.id >= 50") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 200, + "numOutputRows" -> 10, + "numTargetRowsCopied" -> 10, + "numTargetRowsDeleted" -> 50, + "numTargetRowsMatchedDeleted" -> 50, + "numTargetRowsRemoved" -> -1, + "numTargetFilesAdded" -> 1, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("delete-only with duplicates") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + // This will cause duplicates due to rounding. + val sourceDf = spark + .range(start = 50, end = 150, step = 1, numPartitions = 2) + .toDF() + .select(floor($"id" / 2).as("id")) + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .delete() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 100, + "numOutputRows" -> 10, + "numTargetRowsDeleted" -> 50, + "numTargetRowsMatchedDeleted" -> 50, + "numTargetRowsRemoved" -> -1, + "numTargetRowsCopied" -> 10, + "numTargetFilesAdded" -> 2, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig, + // When cdf=true in this test we hit the corner case where there are duplicate matches with a + // delete clause and we generate duplicate cdc data. This is further detailed in + // MergeIntoCommand at the definition of isDeleteWithDuplicateMatchesAndCdc. Our fix for this + // scenario includes deduplicating the output data which reshuffles the output data. + // Thus when the table is not partitioned, the data is rewritten into 1 new file rather than 2 + overrideExpectedOpMetrics = Seq(((false, true), ("numTargetFilesAdded", 1))) + ) + }} + + ///////////////////////////// + // match-only merge tests // + ///////////////////////////// + testMergeMetrics("match-only") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .updateAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 60, + "numTargetRowsUpdated" -> 50, + "numTargetRowsMatchedUpdated" -> 50, + "numTargetRowsCopied" -> 10, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("match-only with skipping") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id and t.partCol >= 2") + .whenMatched() + .updateAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 60, + "numTargetRowsUpdated" -> 50, + "numTargetRowsMatchedUpdated" -> 50, + "numTargetRowsCopied" -> 10, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("match-only with update/delete with unsatisfied conditions") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id + t.id > 1000") + .delete() + .whenMatched("s.id + t.id < 1000") + .updateAll() + .whenNotMatched("s.id > 1000") + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 60, + "numTargetRowsUpdated" -> 50, + "numTargetRowsMatchedUpdated" -> 50, + "numTargetRowsCopied" -> 10, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("match-only with unsatisfied condition") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id + t.id > 1000") + .updateAll() + .execute() + } + + val expectedOpMetrics = Map( + "numSourceRows" -> 100 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + ///////////////////////////////////////////// + // not matched by source only merge tests // + ///////////////////////////////////////////// + testMergeMetrics("not matched by source update only") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenNotMatchedBySource("t.id < 20") + .updateExpr(Map("t.extraCol" -> "t.extraCol + 1")) + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 100, + "numOutputRows" -> 100, + "numTargetRowsUpdated" -> 20, + "numTargetRowsNotMatchedBySourceUpdated" -> 20, + "numTargetRowsCopied" -> 80, + "numTargetFilesRemoved" -> 5 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + ///////////////////////////// + // full merge tests // + ///////////////////////////// + testMergeMetrics("upsert") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .updateAll() + .whenNotMatched() + .insertAll() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 110, + "numTargetRowsInserted" -> 50, + "numTargetRowsUpdated" -> 50, + "numTargetRowsMatchedUpdated" -> 50, + "numTargetRowsCopied" -> 10, + "numTargetFilesRemoved" -> 3 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("replace target with source") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched() + .updateAll() + .whenNotMatched() + .insertAll() + .whenNotMatchedBySource() + .delete() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 100, + "numTargetRowsInserted" -> 50, + "numTargetRowsUpdated" -> 50, + "numTargetRowsMatchedUpdated" -> 50, + "numTargetRowsDeleted" -> 50, + "numTargetRowsNotMatchedBySourceDeleted" -> 50, + "numTargetRowsCopied" -> 0, + "numTargetFilesRemoved" -> 5 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics("upsert and delete with conditions") { testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 10).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 3).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("t.id >= 55 and t.id < 60") + .updateAll() + .whenMatched("t.id < 70") + .delete() + .whenNotMatched() + .insertAll() + .whenNotMatchedBySource("t.id < 10") + .updateExpr(Map("t.extraCol" -> "t.extraCol + 1")) + .whenNotMatchedBySource("t.id >= 45") + .delete() + .execute() + } + val expectedOpMetrics = Map( + "numSourceRows" -> 100, + "numOutputRows" -> 130, + "numTargetRowsInserted" -> 50, + "numTargetRowsUpdated" -> 15, + "numTargetRowsMatchedUpdated" -> 5, + "numTargetRowsNotMatchedBySourceUpdated" -> 10, + "numTargetRowsDeleted" -> 20, + "numTargetRowsMatchedDeleted" -> 15, + "numTargetRowsNotMatchedBySourceDeleted" -> 5, + "numTargetRowsCopied" -> 65, + "numTargetFilesRemoved" -> 10 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} + + testMergeMetrics( + "update/delete/insert with some unsatisfied conditions") { + testConfig => { + val targetDf = spark.range(start = 0, end = 100, step = 1, numPartitions = 5).toDF() + val sourceDf = spark.range(start = 50, end = 150, step = 1, numPartitions = 10).toDF() + val mergeCmdFn: MergeCmd = (targetTable, sourceDf) => { + targetTable + .as("t") + .merge(sourceDf.as("s"), "s.id = t.id") + .whenMatched("s.id + t.id > 1000") + .delete() + .whenNotMatchedBySource("t.id > 1000") + .delete() + .whenNotMatchedBySource("t.id < 1000") + .updateExpr(Map("t.extraCol" -> "t.extraCol + 1")) + .whenNotMatched("s.id > 1000") + .insertAll() + .execute() + } + val expectedOpMetrics = Map[String, Int]( + "numSourceRows" -> 100, + "numOutputRows" -> 100, + "numTargetRowsUpdated" -> 50, + "numTargetRowsNotMatchedBySourceUpdated" -> 50, + "numTargetRowsCopied" -> 50, + "numTargetFilesRemoved" -> 5 + ) + runMergeCmdAndTestMetrics( + targetDf = targetDf, + sourceDf = sourceDf, + mergeCmdFn = mergeCmdFn, + expectedOpMetrics = expectedOpMetrics, + testConfig = testConfig + ) + }} +} + +object MergeIntoMetricsBase extends QueryTest with SharedSparkSession { + + /////////////////////// + // helpful constants // + /////////////////////// + + // Metrics related with affected number of rows. Values should always be deterministic. + val mergeRowMetrics = Set( + "numSourceRows", + "numTargetRowsInserted", + "numTargetRowsUpdated", + "numTargetRowsMatchedUpdated", + "numTargetRowsNotMatchedBySourceUpdated", + "numTargetRowsDeleted", + "numTargetRowsMatchedDeleted", + "numTargetRowsNotMatchedBySourceDeleted", + "numTargetRowsCopied", + "numOutputRows" + ) + // Metrics related with affected number of files. Values depend on the file layout. + val mergeFileMetrics = Set( + "numTargetFilesAdded", "numTargetFilesRemoved", "numTargetBytesAdded", "numTargetBytesRemoved") + // Metrics related with execution times. + val mergeTimeMetrics = Set("executionTimeMs", "scanTimeMs", "rewriteTimeMs") + // Metrics related with CDF. Available only when CDF is available. + val mergeCdfMetrics = Set("numTargetChangeFilesAdded") + // DV Metrics. + val mergeDVMetrics = Set( + "numTargetDeletionVectorsAdded", + "numTargetDeletionVectorsUpdated", + "numTargetDeletionVectorsRemoved") + + // Ensure that all metrics are properly copied here. + assert( + DeltaOperationMetrics.MERGE.size == + mergeRowMetrics.size + + mergeFileMetrics.size + + mergeTimeMetrics.size + + mergeCdfMetrics.size + + mergeDVMetrics.size + ) + + /////////////////// + // helpful types // + /////////////////// + + type MergeCmd = (io.delta.tables.DeltaTable, DataFrame) => Unit + + ///////////////////// + // helpful methods // + ///////////////////// + + /** + * Check invariants for the CDF metrics of MERGE INTO command. Checking the actual values + * is avoided since they depend on the file layout and the type of merge. + * + * @param metrics The merge operation metrics from the Delta history. + * @param cdfEnabled Whether CDF was enabled or not. + */ + def checkMergeOperationCdfMetricsInvariants( + metrics: Map[String, String], + cdfEnabled: Boolean): Unit = { + val numRowsUpdated = metrics("numTargetRowsUpdated").toLong + val numRowsDeleted = metrics("numTargetRowsDeleted").toLong + val numRowsInserted = metrics("numTargetRowsInserted").toLong + val numRowsChanged = numRowsUpdated + numRowsDeleted + numRowsInserted + val numTargetChangeFilesAdded = metrics("numTargetChangeFilesAdded").toLong + + lazy val assertMsg = + s"""Unexpected value for numTargetChangeFilesAdded metric: + | Expected : ${if (numRowsChanged == 0) 0 else "Positive integer value"} + | Actual : $numTargetChangeFilesAdded + | cdfEnabled: $cdfEnabled + | numRowsChanged: $numRowsChanged + | Metrics: ${metrics.toString} + |""".stripMargin + + if (!cdfEnabled || numRowsChanged == 0) { + assert(numTargetChangeFilesAdded === 0, assertMsg) + } else { + // In case of insert-only merges where only new files are added, CDF data are not required + // since the CDF reader can read the corresponding added files. However, there are cases + // where we produce CDF data even in insert-only merges (see 'insert-only-dynamic-predicate' + // testcase for an example). Here we skip the assertion, since both behaviours can be + // considered valid. + val isInsertOnly = numRowsInserted > 0 && numRowsChanged == numRowsInserted + if (!isInsertOnly) { + assert(numTargetChangeFilesAdded > 0, assertMsg) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoNotMatchedBySourceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoNotMatchedBySourceSuite.scala new file mode 100644 index 00000000000..1e2a53317d9 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoNotMatchedBySourceSuite.scala @@ -0,0 +1,520 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.Row + +trait MergeIntoNotMatchedBySourceSuite extends MergeIntoSuiteBase { + import testImplicits._ + + // All CDC suites run using MergeIntoSQLSuite only. The SQL API for NOT MATCHED BY SOURCE will + // only be available with Spark 3.4. In the meantime, we explicitly run NOT MATCHED BY SOURCE + // tests with CDF enabled and disabled against the Scala API. Use [[testExtendedMerge] + // instead once we can run tests against the SQL API. + protected def testExtendedMergeWithCDC( + name: String, + namePrefix: String = "not matched by source")( + source: Seq[(Int, Int)], + target: Seq[(Int, Int)], + mergeOn: String, + mergeClauses: MergeClause*)( + result: Seq[(Int, Int)], + cdc: Seq[(Int, Int, String)]): Unit = { + + for { + isPartitioned <- BOOLEAN_DOMAIN + cdcEnabled <- BOOLEAN_DOMAIN + } { + test(s"$namePrefix - $name - isPartitioned: $isPartitioned - cdcEnabled: $cdcEnabled") { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> cdcEnabled.toString) { + withKeyValueData(source, target, isPartitioned) { case (sourceName, targetName) => + withSQLConf(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED.key -> "true") { + executeMerge(s"$targetName t", s"$sourceName s", mergeOn, mergeClauses: _*) + } + val deltaPath = if (targetName.startsWith("delta.`")) { + targetName.stripPrefix("delta.`").stripSuffix("`") + } else targetName + checkAnswer(readDeltaTable(deltaPath), result.map { case (k, v) => Row(k, v) }) + } + if (cdcEnabled) { + checkAnswer(getCDCForLatestOperation(deltaLog, DeltaOperations.OP_MERGE), cdc.toDF()) + } + } + } + } + } + + // Test analysis errors with NOT MATCHED BY SOURCE clauses. + testErrorsInUnlimitedClauses( + "error on multiple not matched by source update clauses without condition")( + mergeOn = "s.key = t.key", + updateNotMatched(condition = "t.key == 3", set = "value = 2 * value"), + updateNotMatched(set = "value = 3 * value"), + updateNotMatched(set = "value = 4 * value"))( + errorStrs = "when there are more than one not matched by source clauses in a merge " + + "statement, only the last not matched by source clause can omit the condition" :: Nil) + + testErrorsInUnlimitedClauses( + "error on multiple not matched by source update/delete clauses without condition")( + mergeOn = "s.key = t.key", + updateNotMatched(condition = "t.key == 3", set = "value = 2 * value"), + deleteNotMatched(), + updateNotMatched(set = "value = 4 * value"))( + errorStrs = "when there are more than one not matched by source clauses in a merge " + + "statement, only the last not matched by source clause can omit the condition" :: Nil) + + testErrorsInUnlimitedClauses( + "error on non-empty condition following empty condition in not matched by source " + + "update clauses")( + mergeOn = "s.key = t.key", + updateNotMatched(set = "value = 2 * value"), + updateNotMatched(condition = "t.key < 3", set = "value = value"))( + errorStrs = "when there are more than one not matched by source clauses in a merge " + + "statement, only the last not matched by source clause can omit the condition" :: Nil) + + testErrorsInUnlimitedClauses( + "error on non-empty condition following empty condition in not matched by source " + + "delete clauses")( + mergeOn = "s.key = t.key", + deleteNotMatched(), + deleteNotMatched(condition = "t.key < 3"))( + errorStrs = "when there are more than one not matched by source clauses in a merge " + + "statement, only the last not matched by source clause can omit the condition" :: Nil) + + testAnalysisErrorsInExtendedMerge("update not matched condition - unknown reference")( + mergeOn = "s.key = t.key", + updateNotMatched(condition = "unknownAttrib > 1", set = "tgtValue = tgtValue + 1"))( + // Should show unknownAttrib as invalid ref and (key, tgtValue, srcValue) as valid column names. + errorStrs = "UPDATE condition" :: "unknownAttrib" :: "key" :: "tgtValue" :: Nil) + + testAnalysisErrorsInExtendedMerge("update not matched condition - aggregation function")( + mergeOn = "s.key = t.key", + updateNotMatched(condition = "max(0) > 0", set = "tgtValue = tgtValue + 1"))( + errorStrs = "UPDATE condition" :: "aggregate functions are not supported" :: Nil) + + testAnalysisErrorsInExtendedMerge("update not matched condition - subquery")( + mergeOn = "s.key = t.key", + updateNotMatched(condition = "s.value in (select value from t)", set = "tgtValue = 1"))( + errorStrs = Nil + ) // subqueries fail for unresolved reference to `t` + + testAnalysisErrorsInExtendedMerge("delete not matched condition - unknown reference")( + mergeOn = "s.key = t.key", + deleteNotMatched(condition = "unknownAttrib > 1"))( + // Should show unknownAttrib as invalid ref and (key, tgtValue, srcValue) as valid column names. + errorStrs = "DELETE condition" :: "unknownAttrib" :: "key" :: "tgtValue" :: Nil) + + testAnalysisErrorsInExtendedMerge("delete not matched condition - aggregation function")( + mergeOn = "s.key = t.key", + deleteNotMatched(condition = "max(0) > 0"))( + errorStrs = "DELETE condition" :: "aggregate functions are not supported" :: Nil) + + testAnalysisErrorsInExtendedMerge("delete not matched condition - subquery")( + mergeOn = "s.key = t.key", + deleteNotMatched(condition = "s.srcValue in (select tgtValue from t)"))( + errorStrs = Nil) // subqueries fail for unresolved reference to `t` + + // Test correctness with NOT MATCHED BY SOURCE clauses. + testExtendedMergeWithCDC("all 3 types of match clauses without conditions")( + source = (0, 0) :: (1, 1) :: (5, 5) :: Nil, + target = (2, 20) :: (1, 10) :: (5, 50) :: Nil, + mergeOn = "s.key = t.key", + update(set = "*"), + insert(values = "*"), + deleteNotMatched())( + result = Seq( + (0, 0), // No matched by target, inserted + (1, 1), // Matched, updated + // (2, 20) Not matched by source, deleted + (5, 5) // Matched, updated + ), + cdc = Seq( + (0, 0, "insert"), + (1, 10, "update_preimage"), + (1, 1, "update_postimage"), + (2, 20, "delete"), + (5, 50, "update_preimage"), + (5, 5, "update_postimage"))) + + testExtendedMergeWithCDC("all 3 types of match clauses with conditions")( + source = (0, 0) :: (1, 1) :: (5, 5) :: (6, 6) :: Nil, + target = (1, 10) :: (2, 20) :: (5, 50) :: (7, 70) :: Nil, + mergeOn = "s.key = t.key", + update(set = "*", condition = "t.value < 30"), + insert(values = "*", condition = "s.value < 4"), + deleteNotMatched(condition = "t.value > 40"))( + result = Seq( + (0, 0), // Not matched by target, inserted + (1, 1), // Matched, updated + (2, 20), // Not matched by source, no change + (5, 50) // Matched, not updated + // (6, 6) Not matched by target, no change + // (7, 7) Not matched by source, deleted + ), + cdc = Seq( + (0, 0, "insert"), + (1, 10, "update_preimage"), + (1, 1, "update_postimage"), + (7, 70, "delete"))) + + testExtendedMergeWithCDC("unconditional delete only when not matched by source")( + source = (0, 0) :: (1, 1) :: (5, 5) :: Nil, + target = (2, 20) :: (1, 10) :: (5, 50) :: (6, 60) :: Nil, + mergeOn = "s.key = t.key", + deleteNotMatched())( + result = Seq( + (1, 10), // Matched, no change + // (2, 20) Not matched by source, deleted + (5, 50) // Matched, no change + // (6, 60) Not matched by source, deleted + ), + cdc = Seq((2, 20, "delete"), (6, 60, "delete"))) + + testExtendedMergeWithCDC("conditional delete only when not matched by source")( + source = (0, 0) :: (1, 1) :: (5, 5) :: Nil, + target = (1, 10) :: (2, 20) :: (5, 50) :: (6, 60) :: Nil, + mergeOn = "s.key = t.key", + deleteNotMatched(condition = "t.value > 40"))( + result = Seq( + (1, 10), // Matched, no change + (2, 20), // Not matched by source, no change + (5, 50) // Matched, no change + // (6, 60) Not matched by source, deleted + ), + cdc = Seq((6, 60, "delete"))) + + testExtendedMergeWithCDC("delete only matched and not matched by source")( + source = (1, 1) :: (2, 2) :: (5, 5) :: (6, 6) :: Nil, + target = (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: Nil, + mergeOn = "s.key = t.key", + delete("s.value % 2 = 0"), + deleteNotMatched("t.value % 20 = 0"))( + result = Seq( + (1, 10), // Matched, no change + // (2, 20) Matched, deleted + (3, 30) // Not matched by source, no change + // (4, 40) Not matched by source, deleted + ), + cdc = Seq((2, 20, "delete"), (4, 40, "delete"))) + + testExtendedMergeWithCDC("unconditionally delete matched and not matched by source")( + source = (0, 0) :: (1, 1) :: (5, 5) :: (6, 6) :: Nil, + target = (1, 10) :: (2, 20) :: (5, 50) :: Nil, + mergeOn = "s.key = t.key", + delete(), + deleteNotMatched())( + result = Seq.empty, + cdc = Seq((1, 10, "delete"), (2, 20, "delete"), (5, 50, "delete"))) + + testExtendedMergeWithCDC("unconditional not matched by source update")( + source = (0, 0) :: (1, 1) :: (5, 5) :: Nil, + target = (1, 10) :: (2, 20) :: (4, 40) :: (5, 50) :: Nil, + mergeOn = "s.key = t.key", + updateNotMatched(set = "t.value = t.value + 1"))( + result = Seq( + (1, 10), // Matched, no change + (2, 21), // Not matched by source, updated + (4, 41), // Not matched by source, updated + (5, 50) // Matched, no change + ), + cdc = Seq( + (2, 20, "update_preimage"), + (2, 21, "update_postimage"), + (4, 40, "update_preimage"), + (4, 41, "update_postimage"))) + + testExtendedMergeWithCDC("conditional not matched by source update")( + source = (0, 0) :: (1, 1) :: (5, 5) :: Nil, + target = (1, 10) :: (2, 20) :: (4, 40) :: (5, 50) :: Nil, + mergeOn = "s.key = t.key", + updateNotMatched(condition = "t.value = 20", set = "t.value = t.value + 1"))( + result = Seq( + (1, 10), // Matched, no change + (2, 21), // Not matched by source, updated + (4, 40), // Not matched by source, no change + (5, 50) // Matched, no change + ), + cdc = Seq((2, 20, "update_preimage"), (2, 21, "update_postimage"))) + + testExtendedMergeWithCDC("not matched by source update and delete with skipping")( + source = (0, 0) :: (1, 1) :: (2, 2) :: (5, 5) :: Nil, + target = (1, 10) :: (2, 20) :: (4, 40) :: (5, 50) :: Nil, + mergeOn = "s.key = t.key and t.key > 4", + updateNotMatched(condition = "t.key = 1", set = "t.value = t.value + 1"), + deleteNotMatched(condition = "t.key = 4"))( + result = Seq( + (1, 11), // Not matched by source based on merge condition, updated + (2, 20), // Not matched by source based on merge condition, no change + // (4, 40), Not matched by source, deleted + (5, 50) // Matched, no change + ), + cdc = Seq( + (1, 10, "update_preimage"), + (1, 11, "update_postimage"), + (4, 40, "delete"))) + + testExtendedMergeWithCDC( + "matched delete and not matched by source update with skipping")( + source = (0, 0) :: (1, 1) :: (2, 2) :: (5, 5) :: (6, 6) :: Nil, + target = (1, 10) :: (2, 20) :: (4, 40) :: (5, 50) :: (6, 60) :: Nil, + mergeOn = "s.key = t.key and t.key > 4", + delete(condition = "t.key = 5"), + updateNotMatched(condition = "t.key = 1", set = "t.value = t.value + 1"))( + result = Seq( + (1, 11), // Not matched by source based on merge condition, updated + (2, 20), // Not matched by source based on merge condition, no change + (4, 40), // Not matched by source, no change + // (5, 50), Matched, deleted + (6, 60) // Matched, no change + ), + cdc = Seq( + (1, 10, "update_preimage"), + (1, 11, "update_postimage"), + (5, 50, "delete"))) + + testExtendedMergeWithCDC("not matched by source update + delete clauses")( + source = (0, 0) :: (1, 1) :: (5, 5) :: Nil, + target = (1, 10) :: (2, 20) :: (7, 70) :: Nil, + mergeOn = "s.key = t.key", + deleteNotMatched("t.value % 20 = 0"), + updateNotMatched(set = "t.value = t.value + 1"))( + result = Seq( + (1, 10), // Matched, no change + // (2, 20) Not matched by source, deleted + (7, 71) // Not matched by source, updated + ), + cdc = Seq((2, 20, "delete"), (7, 70, "update_preimage"), (7, 71, "update_postimage"))) + + testExtendedMergeWithCDC("unconditional not matched by source update + not matched insert")( + source = (0, 0) :: (1, 1) :: (4, 4) :: (5, 5) :: Nil, + target = (1, 10) :: (2, 20) :: (4, 40) :: (7, 70) :: Nil, + mergeOn = "s.key = t.key", + insert("*"), + updateNotMatched(set = "t.value = t.value + 1"))( + result = Seq( + (0, 0), // Not matched by target, inserted + (1, 10), // Matched, no change + (2, 21), // Not matched by source, updated + (4, 40), // Matched, no change + (5, 5), // Not matched by target, inserted + (7, 71) // Not matched by source, updated + ), + cdc = Seq( + (0, 0, "insert"), + (2, 20, "update_preimage"), + (2, 21, "update_postimage"), + (5, 5, "insert"), + (7, 70, "update_preimage"), + (7, 71, "update_postimage"))) + + testExtendedMergeWithCDC("not matched by source delete + not matched insert")( + source = (0, 0) :: (1, 1) :: (5, 5) :: Nil, + target = (1, 10) :: (2, 20) :: (7, 70) :: Nil, + mergeOn = "s.key = t.key", + insert("*"), + deleteNotMatched("t.value % 20 = 0"))( + result = Seq( + (0, 0), // Not matched by target, inserted + (1, 10), // Matched, no change + // (2, 20), Not matched by source, deleted + (5, 5), // Not matched by target, inserted + (7, 70) // Not matched by source, no change + ), + cdc = Seq((0, 0, "insert"), (2, 20, "delete"), (5, 5, "insert"))) + + testExtendedMergeWithCDC("multiple not matched by source clauses")( + source = (0, 0) :: (1, 1) :: (5, 5) :: Nil, + target = (6, 6) :: (7, 7) :: (8, 8) :: (9, 9) :: (10, 10) :: (11, 11) :: Nil, + mergeOn = "s.key = t.key", + updateNotMatched(condition = "t.key % 6 = 0", set = "t.value = t.value + 5"), + updateNotMatched(condition = "t.key % 6 = 1", set = "t.value = t.value + 4"), + updateNotMatched(condition = "t.key % 6 = 2", set = "t.value = t.value + 3"), + updateNotMatched(condition = "t.key % 6 = 3", set = "t.value = t.value + 2"), + updateNotMatched(condition = "t.key % 6 = 4", set = "t.value = t.value + 1"), + deleteNotMatched())( + result = Seq( + (6, 11), // Not matched by source, updated + (7, 11), // Not matched by source, updated + (8, 11), // Not matched by source, updated + (9, 11), // Not matched by source, updated + (10, 11) // Not matched by source, updated + // (11, 11) Not matched by source, deleted + ), + cdc = Seq( + (6, 6, "update_preimage"), + (6, 11, "update_postimage"), + (7, 7, "update_preimage"), + (7, 11, "update_postimage"), + (8, 8, "update_preimage"), + (8, 11, "update_postimage"), + (9, 9, "update_preimage"), + (9, 11, "update_postimage"), + (10, 10, "update_preimage"), + (10, 11, "update_postimage"), + (11, 11, "delete"))) + + testExtendedMergeWithCDC("not matched by source update + conditional insert")( + source = (1, 1) :: (0, 2) :: (5, 5) :: Nil, + target = (2, 2) :: (1, 4) :: (7, 3) :: Nil, + mergeOn = "s.key = t.key", + insert(condition = "s.value % 2 = 0", values = "*"), + updateNotMatched(set = "t.value = t.value + 1"))( + result = Seq( + (0, 2), // Not matched (by target), inserted + (2, 3), // Not matched by source, updated + (1, 4), // Matched, no change + // (5, 5) // Not matched (by target), not inserted + (7, 4) // Not matched by source, updated + ), + cdc = Seq( + (0, 2, "insert"), + (2, 2, "update_preimage"), + (2, 3, "update_postimage"), + (7, 3, "update_preimage"), + (7, 4, "update_postimage"))) + + testExtendedMergeWithCDC("not matched by source delete + conditional insert")( + source = (1, 1) :: (0, 2) :: (5, 5) :: Nil, + target = (2, 2) :: (1, 4) :: (7, 3) :: Nil, + mergeOn = "s.key = t.key", + insert(condition = "s.value % 2 = 0", values = "*"), + deleteNotMatched(condition = "t.value > 2"))( + result = Seq( + (0, 2), // Not matched (by target), inserted + (2, 2), // Not matched by source, no change + (1, 4) // Matched, no change + // (5, 5) // Not matched (by target), not inserted + // (7, 3) Not matched by source, deleted + ), + cdc = Seq((0, 2, "insert"), (7, 3, "delete"))) + + testExtendedMergeWithCDC("when not matched by source updates all rows")( + source = (1, 1) :: (0, 2) :: (5, 5) :: Nil, + target = (3, 3) :: (4, 4) :: (6, 6) :: (7, 7) :: (8, 8) :: (9, 9) :: Nil, + mergeOn = "s.key = t.key", + updateNotMatched(set = "t.value = t.value + 1"))( + result = Seq( + (3, 4), // Not matched by source, updated + (4, 5), // Not matched by source, updated + (6, 7), // Not matched by source, updated + (7, 8), // Not matched by source, updated + (8, 9), // Not matched by source, updated + (9, 10) // Not matched by source, updated + ), + cdc = Seq( + (3, 3, "update_preimage"), + (3, 4, "update_postimage"), + (4, 4, "update_preimage"), + (4, 5, "update_postimage"), + (6, 6, "update_preimage"), + (6, 7, "update_postimage"), + (7, 7, "update_preimage"), + (7, 8, "update_postimage"), + (8, 8, "update_preimage"), + (8, 9, "update_postimage"), + (9, 9, "update_preimage"), + (9, 10, "update_postimage"))) + + testExtendedMergeWithCDC("insert only with dummy not matched by source")( + source = (1, 1) :: (0, 2) :: (5, 5) :: Nil, + target = (2, 2) :: (1, 4) :: (7, 3) :: Nil, + mergeOn = "s.key = t.key", + insert(condition = "s.value % 2 = 0", values = "*"), + deleteNotMatched(condition = "t.value > 10"))( + result = Seq( + (0, 2), // Not matched (by target), inserted + (2, 2), // Not matched by source, no change + (1, 4), // Matched, no change + // (5, 5) // Not matched (by target), not inserted + (7, 3) // Not matched by source, no change + ), + cdc = Seq((0, 2, "insert"))) + + testExtendedMergeWithCDC("empty source")( + source = Nil, + target = (2, 2) :: (1, 4) :: (7, 3) :: Nil, + mergeOn = "s.key = t.key", + updateNotMatched(condition = "t.key = 2", set = "value = t.value + 1"), + deleteNotMatched(condition = "t.key = 7"))( + result = Seq( + (2, 3), // Not matched by source, updated + (1, 4) // Not matched by source, no change + // (7, 3) Not matched by source, deleted + ), + cdc = Seq( + (2, 2, "update_preimage"), + (2, 3, "update_postimage"), + (7, 3, "delete"))) + + testExtendedMergeWithCDC("empty source delete only")( + source = Nil, + target = (2, 2) :: (1, 4) :: (7, 3) :: Nil, + mergeOn = "s.key = t.key", + deleteNotMatched(condition = "t.key = 7"))( + result = Seq( + (2, 2), // Not matched by source, no change + (1, 4) // Not matched by source, no change + // (7, 3) Not matched by source, deleted + ), + cdc = Seq((7, 3, "delete"))) + + testExtendedMergeWithCDC("all 3 clauses - no changes")( + source = (1, 1) :: (0, 2) :: (5, 5) :: Nil, + target = (2, 2) :: (1, 4) :: (7, 3) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "t.value > 10", set = "*"), + insert(condition = "s.value > 10", values = "*"), + deleteNotMatched(condition = "t.value > 10"))( + result = Seq( + (2, 2), // Not matched by source, no change + (1, 4), // Matched, no change + (7, 3) // Not matched by source, no change + ), + cdc = Seq.empty) + + test(s"special character in path - not matched by source delete") { + val source = s"$tempDir/sou rce^" + val target = s"$tempDir/tar get=" + spark.range(0, 10, 2).write.format("delta").save(source) + spark.range(10).write.format("delta").save(target) + executeMerge( + tgt = s"delta.`$target` t", + src = s"delta.`$source` s", + cond = "t.id = s.id", + clauses = deleteNotMatched()) + checkAnswer(readDeltaTable(target), Seq(0, 2, 4, 6, 8).toDF("id")) + } + + test(s"special character in path - not matched by source update") { + val source = s"$tempDir/sou rce@" + val target = s"$tempDir/tar get#" + spark.range(0, 10, 2).write.format("delta").save(source) + spark.range(10).write.format("delta").save(target) + executeMerge( + tgt = s"delta.`$target` t", + src = s"delta.`$source` s", + cond = "t.id = s.id", + clauses = updateNotMatched(set = "id = t.id * 10")) + checkAnswer(readDeltaTable(target), Seq(0, 10, 2, 30, 4, 50, 6, 70, 8, 90).toDF("id")) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSQLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSQLSuite.scala new file mode 100644 index 00000000000..c40d0e19fe9 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSQLSuite.scala @@ -0,0 +1,399 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.{Analyzer, ResolveSessionCatalog} +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.logical.{DeltaMergeInto, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.FileSourceScanExec +import org.apache.spark.sql.functions.udf +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} + +class MergeIntoSQLSuite extends MergeIntoSuiteBase + with MergeIntoSQLTestUtils + with MergeIntoNotMatchedBySourceSuite + with DeltaSQLCommandTest + with DeltaTestUtilsForTempViews { + + import testImplicits._ + + test("CTE as a source in MERGE") { + withTable("source") { + Seq((1, 1), (0, 3)).toDF("key1", "value").write.saveAsTable("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + val cte = "WITH cte1 AS (SELECT key1 + 2 AS key3, value FROM source) " + val merge = basicMergeStmt( + target = s"delta.`$tempPath` as target", + source = "cte1 src", + condition = "src.key3 = target.key2", + update = "key2 = 20 + src.key3, value = 20 + src.value", + insert = "(key2, value) VALUES (src.key3 - 10, src.value + 10)") + + QueryTest.checkAnswer(sql(cte + merge), Seq(Row(2, 1, 0, 1))) + checkAnswer(readDeltaTable(tempPath), + Row(1, 4) :: // No change + Row(22, 23) :: // Update + Row(-7, 11) :: // Insert + Nil) + } + } + + test("inline tables with set operations in source query") { + withTable("source") { + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + executeMerge( + target = s"delta.`$tempPath` as trg", + source = + """ + |( SELECT * FROM VALUES (1, 6, "a") as t1(key1, value, others) + | UNION + | SELECT * FROM VALUES (0, 3, "b") as t2(key1, value, others) + |) src + """.stripMargin, + condition = "src.key1 = trg.key2", + update = "trg.key2 = 20 + key1, value = 20 + src.value", + insert = "(trg.key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(21, 26) :: // Update + Row(-10, 13) :: // Insert + Nil) + } + } + + testNestedDataSupport("conflicting assignments between two nested fields")( + source = """{ "key": "A", "value": { "a": { "x": 0 } } }""", + target = """{ "key": "A", "value": { "a": { "x": 1 } } }""", + update = "value.a.x = 2" :: "value.a.x = 3" :: Nil, + errorStrs = "There is a conflict from these SET columns" :: Nil) + + test("Negative case - basic syntax analysis SQL") { + withTable("source") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key1", "value").createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + // duplicate column names in update clause + var e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 1, key2 = 2", + insert = "(key2, value) VALUES (3, 4)") + }.getMessage + + errorContains(e, "There is a conflict from these SET columns") + + // duplicate column names in insert clause + e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 1, value = 2", + insert = "(key2, key2) VALUES (3, 4)") + }.getMessage + + errorContains(e, "Duplicate column names in INSERT clause") + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"no column is used from source table - column pruning, isPartitioned: $isPartitioned") { + withTable("source") { + val partitions = if (isPartitioned) "key2" :: Nil else Nil + append(Seq((2, 2), (1, 4)).toDF("key2", "value"), partitions) + Seq((1, 1, "a"), (0, 3, "b")).toDF("key1", "value", "col1") + .createOrReplaceTempView("source") + + // filter pushdown can cause empty join conditions and cross-join being used + withCrossJoinEnabled { + val merge = basicMergeStmt( + target = s"delta.`$tempPath`", + source = "source src", + condition = "key2 < 0", // no row match + update = "key2 = 20, value = 20", + insert = "(key2, value) VALUES (10, 10)") + + val df = sql(merge) + + val readSchema: Seq[StructType] = df.queryExecution.executedPlan.collect { + case f: FileSourceScanExec => f.requiredSchema + } + assert(readSchema.flatten.isEmpty, "column pruning does not work") + } + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(1, 4) :: // No change + Row(10, 10) :: // Insert + Row(10, 10) :: // Insert + Nil) + } + } + } + + test("negative case - omit multiple insert conditions") { + withTable("source") { + Seq((1, 1), (0, 3)).toDF("srcKey", "srcValue").write.saveAsTable("source") + append(Seq((2, 2), (1, 4)).toDF("trgKey", "trgValue")) + + // only the last NOT MATCHED clause can omit the condition + val e = intercept[ParseException]( + sql(s""" + |MERGE INTO delta.`$tempPath` + |USING source + |ON srcKey = trgKey + |WHEN NOT MATCHED THEN + | INSERT (trgValue, trgKey) VALUES (srcValue, srcKey + 1) + |WHEN NOT MATCHED THEN + | INSERT (trgValue, trgKey) VALUES (srcValue, srcKey) + """.stripMargin)) + assert(e.getMessage.contains( + "only the last NOT MATCHED [BY TARGET] clause can omit the condition")) + } + } + + def testNondeterministicOrder(insertOnly: Boolean): Unit = { + withTable("target") { + // For the spark sql random() function the seed is fixed for both invocations + val trueRandom = () => Math.random() + val trueRandomUdf = udf(trueRandom) + spark.udf.register("trueRandom", trueRandomUdf.asNondeterministic()) + + sql("CREATE TABLE target(`trgKey` INT, `trgValue` INT) using delta") + sql("INSERT INTO target VALUES (1,2), (3,4)") + // This generates different data sets on every execution + val sourceSql = + s""" + |(SELECT r.id AS srcKey, r.id AS srcValue + | FROM range(1, 100000) as r + | JOIN (SELECT trueRandom() * 100000 AS bound) ON r.id < bound + |) AS source + |""".stripMargin + + if (insertOnly) { + sql(s""" + |MERGE INTO target + |USING ${sourceSql} + |ON srcKey = trgKey + |WHEN NOT MATCHED THEN + | INSERT (trgValue, trgKey) VALUES (srcValue, srcKey) + |""".stripMargin) + } else { + sql(s""" + |MERGE INTO target + |USING ${sourceSql} + |ON srcKey = trgKey + |WHEN MATCHED THEN + | UPDATE SET trgValue = srcValue + |WHEN NOT MATCHED THEN + | INSERT (trgValue, trgKey) VALUES (srcValue, srcKey) + |""".stripMargin) + } + } + } + + test(s"detect nondeterministic source - flag on") { + withSQLConf( + // materializing source would fix determinism + DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> DeltaSQLConf.MergeMaterializeSource.NONE, + DeltaSQLConf.MERGE_FAIL_IF_SOURCE_CHANGED.key -> "true" + ) { + val e = intercept[UnsupportedOperationException]( + testNondeterministicOrder(insertOnly = false) + ) + assert(e.getMessage.contains("source dataset is not deterministic")) + } + } + + test(s"detect nondeterministic source - flag on - insertOnly") { + withSQLConf( + // materializing source would fix determinism + DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> DeltaSQLConf.MergeMaterializeSource.NONE, + DeltaSQLConf.MERGE_FAIL_IF_SOURCE_CHANGED.key -> "true") { + testNondeterministicOrder(insertOnly = true) + } + } + + test("detect nondeterministic source - flag off") { + withSQLConf( + // materializing source would fix determinism + DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> DeltaSQLConf.MergeMaterializeSource.NONE, + DeltaSQLConf.MERGE_FAIL_IF_SOURCE_CHANGED.key -> "false" + ) { + testNondeterministicOrder(insertOnly = false) + } + } + + test("detect nondeterministic source - flag on, materialized") { + withSQLConf( + // materializing source fixes determinism, so the source is no longer nondeterministic + DeltaSQLConf.MERGE_MATERIALIZE_SOURCE.key -> DeltaSQLConf.MergeMaterializeSource.ALL, + DeltaSQLConf.MERGE_FAIL_IF_SOURCE_CHANGED.key -> "true" + ) { + testNondeterministicOrder(insertOnly = false) + } + } + + test("merge into a dataset temp views with star") { + withTempView("v") { + def testMergeWithView(testClue: String): Unit = { + withClue(testClue) { + withTempView("src") { + sql("CREATE TEMP VIEW src AS SELECT * FROM VALUES (10, 1), (20, 2) AS t(value, key)") + sql( + s""" + |MERGE INTO v + |USING src + |ON src.key = v.key + |WHEN MATCHED THEN + | UPDATE SET * + |WHEN NOT MATCHED THEN + | INSERT * + |""".stripMargin) + checkAnswer(spark.sql(s"select * from v"), Seq(Row(0, 0), Row(1, 10), Row(2, 20))) + } + } + } + + // View on path-based table + append(Seq((0, 0), (1, 1)).toDF("key", "value")) + readDeltaTable(tempPath).createOrReplaceTempView("v") + testMergeWithView("with path-based table") + + // View on catalog table + withTable("tab") { + Seq((0, 0), (1, 1)).toDF("key", "value").write.format("delta").saveAsTable("tab") + spark.table("tab").as("name").createOrReplaceTempView("v") + testMergeWithView(s"delta.`$tempPath`") + } + } + } + + + testWithTempView("Update specific column does not work in temp views") { isSQLTempView => + withJsonData( + """{ "key": "A", "value": { "a": { "x": 1 } } }""", + """{ "key": "A", "value": { "a": { "x": 2 } } }""" + ) { (sourceName, targetName) => + createTempViewFromTable(targetName, isSQLTempView) + val fieldNames = spark.table(targetName).schema.fieldNames + val fieldNamesStr = fieldNames.mkString("`", "`, `", "`") + val e = intercept[DeltaAnalysisException] { + executeMerge( + target = "v t", + source = s"$sourceName s", + condition = "s.key = t.key", + update = "value.a.x = s.value.a.x", + insert = s"($fieldNamesStr) VALUES ($fieldNamesStr)") + } + assert(e.getMessage.contains("Unexpected assignment key")) + } + } + + test("Complex Data Type - Array of Struct") { + withTable("source") { + withTable("target") { + // scalastyle:off line.size.limit + sql("CREATE TABLE source(`smtUidNr` STRING,`evt` ARRAY>, `evtShu`: ARRAY>>>, `evtTypCd`: STRING, `evtUsrNr`: STRING, `evtUtcTcfQy`: STRING, `evtUtcTs`: STRING, `evtWstNa`: STRING, `loc`: ARRAY>, `mltDelOdrNr`: STRING, `mltPrfOfDelNa`: STRING, `mltSmtConNr`: STRING, `mnfOidNr`: STRING, `rpnEntLinNr`: STRING, `rpnEntLvlStsCd`: STRING, `rpnGovAcoTe`: STRING, `rpnInfSrcCrtLclTmZnNa`: STRING, `rpnInfSrcCrtLclTs`: STRING, `rpnInfSrcCrtUtcTcfQy`: STRING, `rpnInfSrcCrtUtcTs`: STRING, `rpnLinLvlStsCd`: STRING, `rpnPgaLinNr`: STRING, `smtDcvDt`: STRING, `smtNr`: STRING, `smtUidNr`: STRING, `xcpCtmDspCd`: STRING, `xcpGovAcoTe`: STRING, `xcpPgmCd`: STRING, `xcpRlvCd`: STRING, `xcpRlvDscTe`: STRING, `xcpRlvLclTmZnNa`: STRING, `xcpRlvLclTs`: STRING, `xcpRlvUtcTcfQy`: STRING, `xcpRlvUtcTs`: STRING, `xcpRsnCd`: STRING, `xcpRsnDscTe`: STRING, `xcpStsCd`: STRING, `xcpStsDscTe`: STRING>>,`msgTs` TIMESTAMP) using delta") + sql("CREATE TABLE target(`smtUidNr` STRING,`evt` ARRAY>, `evtShu`: ARRAY>>>, `evtTypCd`: STRING, `evtUsrNr`: STRING, `evtUtcTcfQy`: STRING, `evtUtcTs`: STRING, `evtWstNa`: STRING, `loc`: ARRAY>, `mltDelOdrNr`: STRING, `mltPrfOfDelNa`: STRING, `mltSmtConNr`: STRING, `mnfOidNr`: STRING, `rpnEntLinNr`: STRING, `rpnEntLvlStsCd`: STRING, `rpnGovAcoTe`: STRING, `rpnInfSrcCrtLclTmZnNa`: STRING, `rpnInfSrcCrtLclTs`: STRING, `rpnInfSrcCrtUtcTcfQy`: STRING, `rpnInfSrcCrtUtcTs`: STRING, `rpnLinLvlStsCd`: STRING, `smtDcvDt`: STRING, `smtNr`: STRING, `smtUidNr`: STRING, `xcpCtmDspCd`: STRING, `xcpRlvCd`: STRING, `xcpRlvDscTe`: STRING, `xcpRlvLclTmZnNa`: STRING, `xcpRlvLclTs`: STRING, `xcpRlvUtcTcfQy`: STRING, `xcpRlvUtcTs`: STRING, `xcpRsnCd`: STRING, `xcpRsnDscTe`: STRING, `xcpStsCd`: STRING, `xcpStsDscTe`: STRING, `cmyHdrOidNr`: STRING, `cmyLinNr`: STRING, `coeOidNr`: STRING, `rpnPgaLinNr`: STRING, `xcpGovAcoTe`: STRING, `xcpPgmCd`: STRING>>,`msgTs` TIMESTAMP) using delta") + // scalastyle:on line.size.limit + sql( + s""" + |MERGE INTO target as r + |USING source as u + |ON u.smtUidNr = r.smtUidNr + |WHEN MATCHED and u.msgTs > r.msgTs THEN + | UPDATE SET * + |WHEN NOT MATCHED THEN + | INSERT * + """.stripMargin) + } + } + } + + Seq(true, false).foreach { partitioned => + test(s"User defined _change_type column doesn't get dropped - partitioned=$partitioned") { + withTable("target") { + sql( + s"""CREATE TABLE target USING DELTA + |${if (partitioned) "PARTITIONED BY (part) " else ""} + |TBLPROPERTIES (delta.enableChangeDataFeed = false) + |AS SELECT id, int(id / 10) AS part, 'foo' as _change_type + |FROM RANGE(1000) + |""".stripMargin) + executeMerge( + target = "target as t", + source = + """( + | SELECT id * 42 AS id, int(id / 10) AS part, 'bar' as _change_type FROM RANGE(33) + |) s""".stripMargin, + condition = "t.id = s.id", + update = "*", + insert = "*") + + sql("SELECT id, _change_type FROM target").collect().foreach { row => + val _change_type = row.getString(1) + assert(_change_type === "foo" || _change_type === "bar", + s"Invalid _change_type for id=${row.get(0)}") + } + } + } + } + + test("SET * with schema evolution") { + withTable("tgt", "src") { + withSQLConf("spark.databricks.delta.schema.autoMerge.enabled" -> "true") { + sql("create table tgt(id int, delicious string, dummy_col string) using delta") + sql("create table src(id int, delicious string) using parquet") + // Make sure this MERGE command can resolve + sql( + """ + |merge into tgt as target + |using (select * from src) as source on target.id=source.id + |when matched then update set * + |when not matched then insert *; + |""".stripMargin) + } + } + } +} + +trait MergeIntoSQLColumnMappingSuiteBase extends DeltaColumnMappingSelectedTestMixin { + override protected def runOnlyTests: Seq[String] = + Seq("schema evolution - new nested column with update non-* and insert * - " + + "array of struct - longer target") +} + +class MergeIntoSQLIdColumnMappingSuite extends MergeIntoSQLSuite + with DeltaColumnMappingEnableIdMode + with MergeIntoSQLColumnMappingSuiteBase + +class MergeIntoSQLNameColumnMappingSuite extends MergeIntoSQLSuite + with DeltaColumnMappingEnableNameMode + with MergeIntoSQLColumnMappingSuiteBase diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoScalaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoScalaSuite.scala new file mode 100644 index 00000000000..9a970104c33 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoScalaSuite.scala @@ -0,0 +1,715 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.Locale + +import org.apache.spark.sql.delta.actions.SetTransaction +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaExcludedTestMixin, DeltaSQLCommandTest} + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.plans.Inner +import org.apache.spark.sql.catalyst.plans.logical.{Assignment, DeltaMergeIntoClause, Join} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.StructType + +class MergeIntoScalaSuite extends MergeIntoSuiteBase + with MergeIntoScalaTestUtils + with MergeIntoNotMatchedBySourceSuite + with DeltaSQLCommandTest + with DeltaTestUtilsForTempViews + with DeltaExcludedTestMixin { + + import testImplicits._ + + override def excluded: Seq[String] = super.excluded ++ Seq( + // Exclude tempViews, because DeltaTable.forName does not resolve them correctly, so no one can + // use them anyway with the Scala API. + // scalastyle:off line.size.limit + "basic case - merge to view on a Delta table by path, partitioned: true skippingEnabled: true useSqlView: true", + "basic case - merge to view on a Delta table by path, partitioned: true skippingEnabled: true useSqlView: false", + "basic case - merge to view on a Delta table by path, partitioned: false skippingEnabled: true useSqlView: true", + "basic case - merge to view on a Delta table by path, partitioned: false skippingEnabled: true useSqlView: false", + "basic case - merge to view on a Delta table by path, partitioned: true skippingEnabled: false useSqlView: true", + "basic case - merge to view on a Delta table by path, partitioned: true skippingEnabled: false useSqlView: false", + "basic case - merge to view on a Delta table by path, partitioned: false skippingEnabled: false useSqlView: true", + "basic case - merge to view on a Delta table by path, partitioned: false skippingEnabled: false useSqlView: false", + "Negative case - more operations between merge and delta target", + "test merge on temp view - basic - SQL TempView", + "test merge on temp view - basic - Dataset TempView", + "test merge on temp view - subset cols - SQL TempView", + "test merge on temp view - subset cols - Dataset TempView", + "test merge on temp view - superset cols - SQL TempView", + "test merge on temp view - superset cols - Dataset TempView", + "test merge on temp view - nontrivial projection - SQL TempView", + "test merge on temp view - nontrivial projection - Dataset TempView", + "test merge on temp view - view with too many internal aliases - SQL TempView", + "test merge on temp view - view with too many internal aliases - Dataset TempView", + "Update specific column works fine in temp views - SQL TempView", + "Update specific column works fine in temp views - Dataset TempView" + // scalastyle:on line.size.limit + ) + + + test("basic scala API") { + withTable("source") { + append(Seq((1, 10), (2, 20)).toDF("key1", "value1"), Nil) // target + val source = Seq((1, 100), (3, 30)).toDF("key2", "value2") // source + + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "key1 = key2") + .whenMatched().updateExpr(Map("key1" -> "key2", "value1" -> "value2")) + .whenNotMatched().insertExpr(Map("key1" -> "key2", "value1" -> "value2")) + .execute() + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 100) :: // Update + Row(2, 20) :: // No change + Row(3, 30) :: // Insert + Nil) + } + } + + + // test created to validate a fix for a bug where merge command was + // resulting in a empty target table when statistics collection is disabled + test("basic scala API - without stats") { + withSQLConf((DeltaSQLConf.DELTA_COLLECT_STATS.key, "false")) { + withTable("source") { + append(Seq((1, 10), (2, 20)).toDF("key1", "value1"), Nil) // target + val source = Seq((1, 100), (3, 30)).toDF("key2", "value2") // source + + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "key1 = key2") + .whenMatched().updateExpr(Map("key1" -> "key2", "value1" -> "value2")) + .whenNotMatched().insertExpr(Map("key1" -> "key2", "value1" -> "value2")) + .execute() + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 100) :: // Update + Row(2, 20) :: // No change + Row(3, 30) :: // Insert + Nil) + } + } + } + + test("extended scala API") { + withTable("source") { + append(Seq((1, 10), (2, 20), (4, 40)).toDF("key1", "value1"), Nil) // target + val source = Seq((1, 100), (3, 30), (4, 41)).toDF("key2", "value2") // source + + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "key1 = key2") + .whenMatched("key1 = 4").delete() + .whenMatched("key2 = 1").updateExpr(Map("key1" -> "key2", "value1" -> "value2")) + .whenNotMatched("key2 = 3").insertExpr(Map("key1" -> "key2", "value1" -> "value2")) + .execute() + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 100) :: // Update + Row(2, 20) :: // No change + Row(3, 30) :: // Insert + Nil) + } + } + + test("extended scala API with Column") { + withTable("source") { + append(Seq((1, 10), (2, 20), (4, 40)).toDF("key1", "value1"), Nil) // target + val source = Seq((1, 100), (3, 30), (4, 41)).toDF("key2", "value2") // source + + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, functions.expr("key1 = key2")) + .whenMatched(functions.expr("key1 = 4")).delete() + .whenMatched(functions.expr("key2 = 1")) + .update(Map("key1" -> functions.col("key2"), "value1" -> functions.col("value2"))) + .whenNotMatched(functions.expr("key2 = 3")) + .insert(Map("key1" -> functions.col("key2"), "value1" -> functions.col("value2"))) + .execute() + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 100) :: // Update + Row(2, 20) :: // No change + Row(3, 30) :: // Insert + Nil) + } + } + + test("updateAll and insertAll") { + withTable("source") { + append(Seq((1, 10), (2, 20), (4, 40), (5, 50)).toDF("key", "value"), Nil) + val source = Seq((1, 100), (3, 30), (4, 41), (5, 51), (6, 60)) + .toDF("key", "value").createOrReplaceTempView("source") + + executeMerge( + target = s"delta.`$tempPath` as t", + source = "source s", + condition = "s.key = t.key", + update = "*", + insert = "*") + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 100) :: // Update + Row(2, 20) :: // No change + Row(3, 30) :: // Insert + Row(4, 41) :: // Update + Row(5, 51) :: // Update + Row(6, 60) :: // Insert + Nil) + } + } + + test("updateAll and insertAll with columns containing dot") { + withTable("source") { + append(Seq((1, 10), (2, 20), (4, 40)).toDF("key", "the.value"), Nil) // target + val source = Seq((1, 100), (3, 30), (4, 41)).toDF("key", "the.value") // source + + io.delta.tables.DeltaTable.forPath(spark, tempPath).as("t") + .merge(source.as("s"), "t.key = s.key") + .whenMatched() + .updateAll() + .whenNotMatched() + .insertAll() + .execute() + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 100) :: // Update + Row(2, 20) :: // No change + Row(4, 41) :: // Update + Row(3, 30) :: // Insert + Nil) + } + } + + test("update with empty map should do nothing") { + append(Seq((1, 10), (2, 20)).toDF("trgKey", "trgValue"), Nil) // target + val source = Seq((1, 100), (3, 30)).toDF("srcKey", "srcValue") // source + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "srcKey = trgKey") + .whenMatched().updateExpr(Map[String, String]()) + .whenNotMatched().insertExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .execute() + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 10) :: // Not updated since no update clause + Row(2, 20) :: // No change due to merge condition + Row(3, 30) :: // Not updated since no update clause + Nil) + + // match condition should not be ignored when map is empty + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "srcKey = trgKey") + .whenMatched("trgKey = 1").updateExpr(Map[String, String]()) + .whenMatched().delete() + .whenNotMatched().insertExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .execute() + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 10) :: // Neither updated, nor deleted (condition is not ignored) + Row(2, 20) :: // No change due to merge condition + Nil) // Deleted (3, 30) + } + + // Checks specific to the APIs that are automatically handled by parser for SQL + test("check invalid merge API calls") { + withTable("source") { + append(Seq((1, 10), (2, 20)).toDF("trgKey", "trgValue"), Nil) // target + val source = Seq((1, 100), (3, 30)).toDF("srcKey", "srcValue") // source + + // There must be at least one WHEN clause in a MERGE statement + var e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "srcKey = trgKey") + .execute() + } + errorContains(e.getMessage, "There must be at least one WHEN clause in a MERGE statement") + + // When there are multiple MATCHED clauses in a MERGE statement, + // the first MATCHED clause must have a condition + e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "srcKey = trgKey") + .whenMatched().delete() + .whenMatched("trgKey = 1").updateExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .whenNotMatched().insertExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .execute() + } + errorContains(e.getMessage, "When there are more than one MATCHED clauses in a MERGE " + + "statement, only the last MATCHED clause can omit the condition.") + + e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "srcKey = trgKey") + .whenMatched().updateExpr(Map("trgKey" -> "srcKey", "*" -> "*")) + .whenNotMatched().insertExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .execute() + } + errorContains(e.getMessage, "cannot resolve `*` in UPDATE clause") + + e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "srcKey = trgKey") + .whenMatched().updateExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .whenNotMatched().insertExpr(Map("*" -> "*")) + .execute() + } + errorContains(e.getMessage, "cannot resolve `*` in INSERT clause") + + e = intercept[AnalysisException] { + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "srcKey = trgKey") + .whenNotMatchedBySource().updateExpr(Map("*" -> "*")) + .execute() + } + errorContains(e.getMessage, "cannot resolve `*` in UPDATE clause") + } + } + + test("merge after schema change") { + withSQLConf((DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key, "true")) { + withTempPath { targetDir => + val targetPath = targetDir.getCanonicalPath + spark.range(10).write.format("delta").save(targetPath) + val t = io.delta.tables.DeltaTable.forPath(spark, targetPath).as("t") + assert(t.toDF.schema == StructType.fromDDL("id LONG")) + + // Do one merge to change the schema. + t.merge(Seq((11L, "newVal11")).toDF("id", "newCol1").as("s"), "t.id = s.id") + .whenMatched().updateAll() + .whenNotMatched().insertAll() + .execute() + // assert(t.toDF.schema == StructType.fromDDL("id LONG, newCol1 STRING")) + + // SC-35564 - ideally this shouldn't throw an error, but right now we can't fix it without + // causing a regression. + val ex = intercept[Exception] { + t.merge(Seq((12L, "newVal12")).toDF("id", "newCol2").as("s"), "t.id = s.id") + .whenMatched().updateAll() + .whenNotMatched().insertAll() + .execute() + } + ex.getMessage.contains("schema of your Delta table has changed in an incompatible way") + } + } + } + + test("merge without table alias") { + withTempDir { dir => + val location = dir.getAbsolutePath + Seq((1, 1, 1), (2, 2, 2)).toDF("part", "id", "n").write + .format("delta") + .partitionBy("part") + .save(location) + val table = io.delta.tables.DeltaTable.forPath(spark, location) + val data1 = Seq((2, 2, 4, 2), (9, 3, 6, 9), (3, 3, 9, 3)).toDF("part", "id", "n", "part2") + table.alias("t").merge( + data1, + "t.part = part2") + .whenMatched().updateAll() + .whenNotMatched().insertAll() + .execute() + } + } + + test("pre-resolved exprs: should work in all expressions in absence of duplicate refs") { + withTempDir { dir => + val location = dir.getAbsolutePath + Seq((1, 1), (2, 2)).toDF("key", "value").write + .format("delta") + .save(location) + val table = io.delta.tables.DeltaTable.forPath(spark, location) + val target = table.toDF + val source = Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value") + + table.merge(source, target("key") === source("key")) + .whenMatched(target("key") === lit(1) && source("value") === lit(10)) + .update(Map("value" -> (target("value") + source("value")))) + .whenMatched(target("key") === lit(2) && source("value") === lit(20)) + .delete() + .whenNotMatched(source("key") === lit(3) && source("value") === lit(30)) + .insert(Map("key" -> source("key"), "value" -> source("value"))) + .execute() + + checkAnswer(table.toDF, Seq((1, 11), (3, 30)).toDF("key", "value")) + } + } + + test("pre-resolved exprs: negative cases with refs resolved to wrong Dataframes") { + withTempDir { dir => + val location = dir.getAbsolutePath + Seq((1, 1), (2, 2)).toDF("key", "value").write + .format("delta") + .save(location) + val table = io.delta.tables.DeltaTable.forPath(spark, location) + val target = table.toDF + val source = Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value") + val dummyDF = Seq((0, 0)).toDF("key", "value") + + def checkError(f: => Unit): Unit = { + val e = intercept[AnalysisException] { f } + Seq("Resolved attribute", "missing from").foreach { m => + assert(e.getMessage.toLowerCase(Locale.ROOT).contains(m.toLowerCase(Locale.ROOT))) + } + } + // Merge condition + checkError { + table.merge(source, target("key") === dummyDF("key")) + .whenMatched().delete().execute() + } + + // Matched clauses + checkError { + table.merge(source, target("key") === source("key")) + .whenMatched(dummyDF("key") === lit(1)).updateAll().execute() + } + + checkError { + table.merge(source, target("key") === source("key")) + .whenMatched().update(Map("key" -> dummyDF("key"))).execute() + } + + // Not matched clauses + checkError { + table.merge(source, target("key") === source("key")) + .whenNotMatched(dummyDF("key") === lit(1)).insertAll().execute() + } + checkError { + table.merge(source, target("key") === source("key")) + .whenNotMatched().insert(Map("key" -> dummyDF("key"))).execute() + } + } + } + + /** Make sure the joins generated by merge do not have the duplicate AttributeReferences */ + private def verifyNoDuplicateRefsAcrossSourceAndTarget(f: => Unit): Unit = { + val executedPlans = DeltaTestUtils.withLogicalPlansCaptured(spark, optimizedPlan = true) { f } + val plansWithInnerJoin = executedPlans.filter { p => + p.collect { case b: Join if b.joinType == Inner => b }.nonEmpty + } + assert(plansWithInnerJoin.size == 1, + "multiple plans found with inner join\n" + plansWithInnerJoin.mkString("\n")) + val join = plansWithInnerJoin.head.collect { case j: Join => j }.head + assert(join.left.outputSet.intersect(join.right.outputSet).isEmpty) + } + + test("self-merge: duplicate attrib refs should be removed") { + withTempDir { tempDir => + val df = spark.range(5).selectExpr("id as key", "id as value") + df.write.format("delta").save(tempDir.toString) + + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.toString) + val target = deltaTable.toDF + val source = target.filter("key = 4") + + val duplicateRefs = + target.queryExecution.analyzed.outputSet.intersect(source.queryExecution.analyzed.outputSet) + require(duplicateRefs.nonEmpty, "source and target were expected to have duplicate refs") + + verifyNoDuplicateRefsAcrossSourceAndTarget { + deltaTable.as("t") + .merge(source.as("s"), "t.key = s.key") + .whenMatched() + .delete() + .execute() + } + checkAnswer(deltaTable.toDF, spark.range(4).selectExpr("id as key", "id as value")) + } + } + + test( + "self-merge + pre-resolved exprs: merge condition fails with pre-resolved, duplicate refs") { + withTempDir { tempDir => + val df = spark.range(5).selectExpr("id as key", "id as value") + df.write.format("delta").save(tempDir.toString) + + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.toString) + val target = deltaTable.toDF + val source = target.filter("key = 4") + val e = intercept[AnalysisException] { + deltaTable.merge(source, target("key") === source("key")) // this is ambiguous + .whenMatched() + .delete() + .execute() + } + assert(e.getMessage.toLowerCase(Locale.ROOT).contains("ambiguous")) + } + } + + test( + "self-merge + pre-resolved exprs: duplicate refs should resolve in not-matched clauses") { + withTempDir { tempDir => + val df = spark.range(5).selectExpr("id as key", "id as value") + df.write.format("delta").save(tempDir.toString) + + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.toString) + val target = deltaTable.toDF + val source = target.filter("key = 4") + + // Insert clause can refer to only source attributes, so pre-resolved references, + // even when written as`target("column")`, are actually unambiguous + verifyNoDuplicateRefsAcrossSourceAndTarget { + deltaTable.as("t") + .merge(source.as("s"), "t.key = s.key") + .whenNotMatched(source("value") > 0 && target("key") > 0) + .insert(Map("key" -> source("key"), "value" -> target("value"))) + .whenMatched().update(Map("key" -> $"s.key")) // no-op + .execute() + } + // nothing should be inserted as source matches completely with target + checkAnswer(deltaTable.toDF, spark.range(5).selectExpr("id as key", "id as value")) + } + } + + test( + "self-merge + pre-resolved exprs: non-duplicate but pre-resolved refs should still resolve") { + withTempDir { tempDir => + val df = spark.range(5).selectExpr("id as key", "id as value") + df.write.format("delta").save(tempDir.toString) + + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.toString) + val target = deltaTable.toDF + val source = target.filter("key = 0").drop("value") + .withColumn("value", col("key") + lit(0)) + .withColumn("other", lit(0)) + // source is just one row (key, value, other) = (4, 4, 0) + + // `value` should not be duplicate ref as its recreated in the source and have different + // exprIds than the target value. + val duplicateRefs = + target.queryExecution.analyzed.outputSet.intersect(source.queryExecution.analyzed.outputSet) + require(duplicateRefs.map(_.name).toSet == Set("key"), + "unexpected duplicate refs, should be only 'key': " + duplicateRefs) + + // So both `source("value")` and `target("value")` are not ambiguous. + // `source("other")` is obviously not ambiguous. + verifyNoDuplicateRefsAcrossSourceAndTarget { + deltaTable.as("t") + .merge( + source.as("s"), + expr("t.key = s.key") && source("other") === 0 && target("value") === 4) + .whenMatched(source("value") > 0 && target("value") > 0 && source("other") === 0) + .update(Map( + "key" -> expr("s.key"), + "value" -> (target("value") + source("value") + source("other")))) + .whenNotMatched(source("value") > 0 && source("other") === 0) + .insert(Map( + "key" -> expr("s.key"), + "value" -> (source("value") + source("other")))) + .execute() + } + // key = 4 should be updated to same values, and nothing should be inserted + checkAnswer(deltaTable.toDF, spark.range(5).selectExpr("id as key", "id as value")) + } + } + + test("self-merge + pre-resolved exprs: negative cases in matched clauses with duplicate refs") { + // Only matched clauses can have attribute references from both source and target, hence + // pre-resolved expression can be ambiguous in presence of duplicate references from self-merge + withTempDir { tempDir => + val df = spark.range(5).selectExpr("id as key", "id as value") + df.write.format("delta").save(tempDir.toString) + + val deltaTable = io.delta.tables.DeltaTable.forPath(tempDir.toString) + val target = deltaTable.toDF + val source = target.filter("key = 4") + + def checkError(f: => Unit): Unit = { + val e = intercept[AnalysisException] { f } + assert(e.getMessage.toLowerCase(Locale.ROOT).contains("ambiguous")) + } + + checkError { + deltaTable + .merge(source, target("key") === source("key")) // this is ambiguous + .whenMatched() + .delete() + .execute() + } + + // Update + checkError { + deltaTable.as("t").merge(source.as("s"), "t.key = s.key") + .whenMatched(target("key") === functions.lit(4)) // can map to either key column + .updateAll() + .execute() + } + + checkError { + deltaTable.as("t").merge(source.as("s"), "t.key = s.key") + .whenMatched() + .update(Map("value" -> target("value").plus(1))) // can map to either value column + .execute() + } + + // Delete + checkError { + deltaTable.as("t").merge(source.as("s"), "t.key = s.key") + .whenMatched(target("key") === functions.lit(4)) // can map to either key column + .delete() + .execute() + } + } + } + + test("merge clause matched and not matched can interleave") { + append(Seq((1, 10), (2, 20)).toDF("trgKey", "trgValue"), Nil) // target + val source = Seq((1, 100), (2, 200), (3, 300), (4, 400)).toDF("srcKey", "srcValue") // source + io.delta.tables.DeltaTable.forPath(spark, tempPath) + .merge(source, "srcKey = trgKey") + .whenMatched("trgKey = 1").updateExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .whenNotMatched("srcKey = 3").insertExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .whenMatched().delete() + .whenNotMatched().insertExpr(Map("trgKey" -> "srcKey", "trgValue" -> "srcValue")) + .execute() + + checkAnswer( + readDeltaTable(tempPath), + Row(1, 100) :: // Update (1, 10) + // Delete (2, 20) + Row(3, 300) :: // Insert (3, 300) + Row(4, 400) :: // Insert (4, 400) + Nil) + } + + test("schema evolution with multiple update clauses") { + withSQLConf(("spark.databricks.delta.schema.autoMerge.enabled", "true")) { + withTable("target", "src") { + Seq((1, "a"), (2, "b"), (3, "c")).toDF("id", "targetValue") + .write.format("delta").saveAsTable("target") + val source = Seq((1, "x"), (2, "y"), (4, "z")).toDF("id", "srcValue") + + io.delta.tables.DeltaTable.forName("target") + .merge(source, col("target.id") === source.col("id")) + .whenMatched("target.id = 1").updateExpr(Map("targetValue" -> "srcValue")) + .whenMatched("target.id = 2").updateAll() + .whenNotMatched().insertAll() + .execute() + checkAnswer( + sql("select * from target"), + Row(1, "x", null) +: Row(2, "b", "y") +: Row(3, "c", null) +: Row(4, null, "z") +: Nil) + } + } + } + + // scalastyle:off argcount + override def testNestedDataSupport(name: String, namePrefix: String = "nested data support")( + source: String, + target: String, + update: Seq[String], + insert: String = null, + targetSchema: StructType = null, + sourceSchema: StructType = null, + result: String = null, + errorStrs: Seq[String] = null, + confs: Seq[(String, String)] = Seq.empty): Unit = { + // scalastyle:on argcount + + require(result == null ^ errorStrs == null, "either set the result or the error strings") + + val testName = + if (result != null) s"$namePrefix - $name" else s"$namePrefix - analysis error - $name" + + test(testName) { + withSQLConf(confs: _*) { + withJsonData(source, target, targetSchema, sourceSchema) { case (sourceName, targetName) => + val pathOrName = parsePath(targetName) + val fieldNames = readDeltaTable(pathOrName).schema.fieldNames + val keyName = s"`${fieldNames.head}`" + + def execMerge() = { + val t = DeltaTestUtils.getDeltaTableForIdentifierOrPath( + spark, + DeltaTestUtils.getTableIdentifierOrPath(targetName)) + val m = t.as("t") + .merge( + spark.table(sourceName).as("s"), + s"s.$keyName = t.$keyName") + val withUpdate = if (update == Seq("*")) { + m.whenMatched().updateAll() + } else { + val updateColExprMap = parseUpdate(update) + m.whenMatched().updateExpr(updateColExprMap) + } + + if (insert == "*") { + withUpdate.whenNotMatched().insertAll().execute() + } else { + val insertExprMaps = if (insert != null) { + parseInsert(insert, None) + } else { + fieldNames.map { f => s"t.`$f`" -> s"s.`$f`" }.toMap + } + + withUpdate.whenNotMatched().insertExpr(insertExprMaps).execute() + } + } + + if (result != null) { + execMerge() + val expectedDf = readFromJSON(strToJsonSeq(result), targetSchema) + checkAnswer(readDeltaTable(pathOrName), expectedDf) + } else { + val e = intercept[AnalysisException] { + execMerge() + } + errorStrs.foreach { s => errorContains(e.getMessage, s) } + } + } + } + } + } + + // Scala API won't hit the resolution exception. + testWithTempView("Update specific column works fine in temp views") { isSQLTempView => + withJsonData( + """{ "key": "A", "value": { "a": { "x": 1, "y": 2 } } }""", + """{ "key": "A", "value": { "a": { "x": 2, "y": 1 } } }""" + ) { (sourceName, targetName) => + createTempViewFromTable(targetName, isSQLTempView) + val fieldNames = spark.table(targetName).schema.fieldNames + val fieldNamesStr = fieldNames.mkString("`", "`, `", "`") + executeMerge( + target = "v t", + source = s"$sourceName s", + condition = "s.key = t.key", + update = "value.a.x = s.value.a.x", + insert = s"($fieldNamesStr) VALUES ($fieldNamesStr)") + checkAnswer( + spark.read.format("delta").table("v"), + spark.read.json( + strToJsonSeq("""{ "key": "A", "value": { "a": { "x": 1, "y": 1 } } }""").toDS) + ) + } + } + + test("delta merge into clause with invalid data type.") { + import org.apache.spark.sql.catalyst.dsl.expressions._ + intercept[DeltaAnalysisException] { + DeltaMergeIntoClause.toActions(Seq(Assignment("1".expr, "1".expr))) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSchemaEvolutionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSchemaEvolutionSuite.scala new file mode 100644 index 00000000000..1882884185b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSchemaEvolutionSuite.scala @@ -0,0 +1,2901 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest} +import org.apache.spark.sql.functions.{array, current_date, lit, struct} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{ArrayType, IntegerType, LongType, MapType, NullType, StringType, StructType} + + +/** + * Trait collecting all other schema evolution test traits for convenience. + */ +trait MergeIntoSchemaEvolutionAllTests extends MergeIntoSchemaEvolutionCoreTests + with MergeIntoSchemaEvolutionBaseTests + with MergeIntoSchemaEvolutionNotMatchedBySourceTests + with MergeIntoNestedStructEvolutionTests { + self: MergeIntoSchemaEvolutionMixin with MergeIntoTestUtils with SharedSparkSession => + } + +/** + * Trait collecting schema evolution test runner methods and other helpers. + */ +trait MergeIntoSchemaEvolutionMixin { + self: QueryTest with SharedSparkSession with MergeIntoTestUtils => + + /** + * Test runner used by most non-nested schema evolution tests. Runs the MERGE operation once with + * schema evolution disabled then with schema evolution enabled. Tests must provide for each case + * either the expected result or the expected error message but not both. + */ + protected def testEvolution(name: String)( + targetData: => DataFrame, + sourceData: => DataFrame, + clauses: Seq[MergeClause] = Seq.empty, + expected: => DataFrame = null, + expectedWithoutEvolution: => DataFrame = null, + expectErrorContains: String = null, + expectErrorWithoutEvolutionContains: String = null, + confs: Seq[(String, String)] = Seq(), + partitionCols: Seq[String] = Seq.empty): Unit = { + test(s"schema evolution - $name - with evolution disabled") { + withSQLConf(confs: _*) { + append(targetData, partitionCols) + withTempView("source") { + sourceData.createOrReplaceTempView("source") + + if (expectErrorWithoutEvolutionContains != null) { + val ex = intercept[AnalysisException] { + executeMerge(s"delta.`$tempPath` t", s"source s", "s.key = t.key", + clauses.toSeq: _*) + } + errorContains(ex.getMessage, expectErrorWithoutEvolutionContains) + } else { + executeMerge(s"delta.`$tempPath` t", s"source s", "s.key = t.key", + clauses.toSeq: _*) + checkAnswer( + spark.read.format("delta").load(tempPath), + expectedWithoutEvolution.collect()) + assert( + spark.read.format("delta").load(tempPath).schema.asNullable === + expectedWithoutEvolution.schema.asNullable) + } + } + } + } + + test(s"schema evolution - $name") { + withSQLConf((confs :+ (DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key, "true")): _*) { + append(targetData, partitionCols) + withTempView("source") { + sourceData.createOrReplaceTempView("source") + + if (expectErrorContains != null) { + val ex = intercept[AnalysisException] { + executeMerge(s"delta.`$tempPath` t", s"source s", "s.key = t.key", + clauses.toSeq: _*) + } + errorContains(ex.getMessage, expectErrorContains) + } else { + executeMerge(s"delta.`$tempPath` t", s"source s", "s.key = t.key", + clauses.toSeq: _*) + checkAnswer( + spark.read.format("delta").load(tempPath), + expected.collect()) + assert(spark.read.format("delta").load(tempPath).schema.asNullable === + expected.schema.asNullable) + } + } + } + } + } + + /** + * Test runner used by most nested schema evolution tests. Similar to `testEvolution()` except + * that the target & source data and expected results are parsed as JSON strings for convenience. + */ + // scalastyle:off argcount + protected def testNestedStructsEvolution(name: String)( + target: Seq[String], + source: Seq[String], + targetSchema: StructType, + sourceSchema: StructType, + clauses: Seq[MergeClause] = Seq.empty, + result: Seq[String] = null, + resultSchema: StructType = null, + resultWithoutEvolution: Seq[String] = null, + expectErrorContains: String = null, + expectErrorWithoutEvolutionContains: String = null, + confs: Seq[(String, String)] = Seq()): Unit = { + testEvolution(name) ( + targetData = readFromJSON(target, targetSchema), + sourceData = readFromJSON(source, sourceSchema), + clauses = clauses, + expected = + if (result != null ) { + val schema = if (resultSchema != null) resultSchema else targetSchema + readFromJSON(result, schema) + } else { + null + }, + expectErrorContains = expectErrorContains, + expectedWithoutEvolution = + if (resultWithoutEvolution != null) { + readFromJSON(resultWithoutEvolution, targetSchema) + } else { + null + }, + expectErrorWithoutEvolutionContains = expectErrorWithoutEvolutionContains, + confs = confs + ) + } +} + +/** + * Trait collecting a subset of tests providing core coverage for schema evolution. Mix this trait + * in other suites to get basic test coverage for schema evolution in combination with other + * features, e.g. CDF, DVs. + */ +trait MergeIntoSchemaEvolutionCoreTests { + self: MergeIntoSchemaEvolutionMixin with MergeIntoTestUtils with SharedSparkSession => + + import testImplicits._ + + testEvolution("new column with only insert *")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = insert("*") :: Nil, + expected = + ((0, 0, null) +: (3, 30, null) +: // unchanged + (1, 10, null) +: // not updated + (2, 2, "extra2") +: Nil // newly inserted + ).toDF("key", "value", "extra"), + expectedWithoutEvolution = + ((0, 0) +: (3, 30) +: (1, 10) +: (2, 2) +: Nil).toDF("key", "value") + ) + + testEvolution("new column with only update *")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = update("*") :: Nil, + expected = + ((0, 0, null) +: (3, 30, null) +: + (1, 1, "extra1") +: // updated + Nil // row 2 not inserted + ).toDF("key", "value", "extra"), + expectedWithoutEvolution = ((0, 0) +: (3, 30) +: (1, 1) +: Nil).toDF("key", "value") + ) + + testEvolution("new column with insert * and delete not matched by source")( + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + clauses = insert("*") :: + deleteNotMatched() :: Nil, + expected = Seq( + // (0, 0) Not matched by source, deleted + (1, 10, null), // Matched, updated + (2, 2, "extra2") // Not matched by target, inserted + // (3, 30) Not matched by source, deleted + ).toDF("key", "value", "extra"), + expectedWithoutEvolution = Seq((1, 10), (2, 2)).toDF("key", "value")) + + testNestedStructsEvolution("new nested source field added when updating top-level column")( + target = Seq("""{ "key": "A", "value": { "a": 1 } }"""), + source = Seq("""{ "key": "A", "value": { "a": 2, "b": 3 } }"""), + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)), + clauses = update("value = s.value") :: Nil, + result = Seq("""{ "key": "A", "value": { "a": 2, "b": 3 } }"""), + resultSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)), + expectErrorWithoutEvolutionContains = "Cannot cast") +} + +/** + * Trait collecting all base and misc tests for schema evolution. + */ +trait MergeIntoSchemaEvolutionBaseTests { + self: MergeIntoSchemaEvolutionMixin with MergeIntoTestUtils with SharedSparkSession => + + import testImplicits._ + + // Schema evolution with UPDATE SET alone + testEvolution("new column with update set")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = update(set = "key = s.key, value = s.value, extra = s.extra") :: Nil, + expected = ((0, 0, null) +: (3, 30, null) +: (1, 1, "extra1") +: Nil) + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in UPDATE clause") + + testEvolution("new column updated with value from existing column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, -1), (2, 2, -2)) + .toDF("key", "value", "extra"), + clauses = update(set = "extra = s.value") :: Nil, + expected = ((0, 0, null) +: (1, 10, 1) +: (3, 30, null) +: Nil) + .asInstanceOf[List[(Integer, Integer, Integer)]] + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in UPDATE clause") + + // Schema evolution with INSERT alone + testEvolution("new column with insert values")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = insert(values = "(key, value, extra) VALUES (s.key, s.value, s.extra)") :: Nil, + expected = ((0, 0, null) +: (1, 10, null) +: (3, 30, null) +: (2, 2, "extra2") +: Nil) + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in INSERT clause") + + testEvolution("new column inserted with value from existing column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, -1), (2, 2, -2)) + .toDF("key", "value", "extra"), + clauses = insert(values = "(key, extra) VALUES (s.key, s.value)") :: Nil, + expected = ((0, 0, null) +: (1, 10, null) +: (3, 30, null) +: (2, null, 2) +: Nil) + .asInstanceOf[List[(Integer, Integer, Integer)]] + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in INSERT clause") + + // Schema evolution (UPDATE) with two new columns in the source but only one added to the target. + testEvolution("new column with update set and column not updated")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1", "unused1"), (2, 2, "extra2", "unused2")) + .toDF("key", "value", "extra", "unused"), + clauses = update(set = "extra = s.extra") :: Nil, + expected = ((0, 0, null) +: (1, 10, "extra1") +: (3, 30, null) +: Nil) + .asInstanceOf[List[(Integer, Integer, String)]] + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in UPDATE clause") + + testEvolution("new column updated from other new column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1", "unused1"), (2, 2, "extra2", "unused2")) + .toDF("key", "value", "extra", "unused"), + clauses = update(set = "extra = s.unused") :: Nil, + expected = ((0, 0, null) +: (1, 10, "unused1") +: (3, 30, null) +: Nil) + .asInstanceOf[List[(Integer, Integer, String)]] + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in UPDATE clause") + + // Schema evolution (INSERT) with two new columns in the source but only one added to the target. + testEvolution("new column with insert values and column not inserted")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1", "unused1"), (2, 2, "extra2", "unused2")) + .toDF("key", "value", "extra", "unused"), + clauses = insert(values = "(key, extra) VALUES (s.key, s.extra)") :: Nil, + expected = ((0, 0, null) +: (1, 10, null) +: (3, 30, null) +: (2, null, "extra2") +: Nil) + .asInstanceOf[List[(Integer, Integer, String)]] + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in INSERT clause") + + testEvolution("new column inserted from other new column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1", "unused1"), (2, 2, "extra2", "unused2")) + .toDF("key", "value", "extra", "unused"), + clauses = insert(values = "(key, extra) VALUES (s.key, s.unused)") :: Nil, + expected = ((0, 0, null) +: (1, 10, null) +: (3, 30, null) +: (2, null, "unused2") +: Nil) + .asInstanceOf[List[(Integer, Integer, String)]] + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in INSERT clause") + + // Schema evolution with two new columns added by UPDATE and INSERT resp. + testEvolution("new column added by insert and other new column added by update")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1", "other1"), (2, 2, "extra2", "other2")) + .toDF("key", "value", "extra", "other"), + clauses = update(set = "extra = s.extra") :: + insert(values = "(key, other) VALUES (s.key, s.other)") :: Nil, + expected = + ((0, 0, null, null) +: + (1, 10, "extra1", null) +: + (3, 30, null, null) +: + (2, null, null, "other2") +: Nil) + .asInstanceOf[List[(Integer, Integer, String, String)]] + .toDF("key", "value", "extra", "other"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in UPDATE clause") + + // No schema evolution + testEvolution("old column updated from new column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, -1), (2, 2, -2)) + .toDF("key", "value", "extra"), + clauses = update(set = "value = s.extra") :: Nil, + expected = ((0, 0) +: (1, -1) +: (3, 30) +: Nil).toDF("key", "value"), + expectedWithoutEvolution = ((0, 0) +: (1, -1) +: (3, 30) +: Nil).toDF("key", "value")) + + testEvolution("old column inserted from new column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, -1), (2, 2, -2)) + .toDF("key", "value", "extra"), + clauses = insert(values = "(key) VALUES (s.extra)") :: Nil, + expected = ((0, 0) +: (1, 10) +: (3, 30) +: (-2, null) +: Nil) + .asInstanceOf[List[(Integer, Integer)]] + .toDF("key", "value"), + expectedWithoutEvolution = ((0, 0) +: (1, 10) +: (3, 30) +: (-2, null) +: Nil) + .asInstanceOf[List[(Integer, Integer)]] + .toDF("key", "value")) + + testEvolution("new column with insert existing column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = insert(values = "(key) VALUES (s.key)") :: Nil, + expected = ((0, 0) +: (1, 10) +: (2, null) +: (3, 30) +: Nil) + .asInstanceOf[List[(Integer, Integer)]] + .toDF("key", "value"), + expectedWithoutEvolution = ((0, 0) +: (1, 10) +: (2, null) +: (3, 30) +: Nil) + .asInstanceOf[List[(Integer, Integer)]] + .toDF("key", "value")) + + // Column doesn't exist with UPDATE/INSERT alone. + testEvolution("update set nonexistent column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = update(set = "nonexistent = s.extra") :: Nil, + expectErrorContains = "cannot resolve nonexistent in UPDATE clause", + expectErrorWithoutEvolutionContains = "cannot resolve nonexistent in UPDATE clause") + + testEvolution("insert values nonexistent column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = insert(values = "(nonexistent) VALUES (s.extra)") :: Nil, + expectErrorContains = "cannot resolve nonexistent in INSERT clause", + expectErrorWithoutEvolutionContains = "cannot resolve nonexistent in INSERT clause") + + testEvolution("new column with update set and update *")( + targetData = Seq((0, 0), (1, 10), (2, 20)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = update(condition = "s.key < 2", set = "value = s.value") :: update("*") :: Nil, + expected = + ((0, 0, null) +: + (1, 1, null) +: // updated by first clause + (2, 2, "extra2") +: // updated by second clause + Nil + ).toDF("key", "value", "extra"), + expectedWithoutEvolution = ((0, 0) +: (1, 1) +: (2, 2) +: Nil).toDF("key", "value") + ) + + testEvolution("update * with column not in source")( + targetData = Seq((0, 0, 0), (1, 10, 10), (3, 30, 30)).toDF("key", "value", "extra"), + sourceData = Seq((1, 1), (2, 2)).toDF("key", "value"), + clauses = update("*") :: Nil, + // update went through even though `extra` wasn't there + expected = ((0, 0, 0) +: (1, 1, 10) +: (3, 30, 30) +: Nil).toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in UPDATE clause" + ) + + testEvolution("insert * with column not in source")( + targetData = Seq((0, 0, 0), (1, 10, 10), (3, 30, 30)).toDF("key", "value", "extra"), + sourceData = Seq((1, 1), (2, 2)).toDF("key", "value"), + clauses = insert("*") :: Nil, + // insert went through even though `extra` wasn't there + expected = ((0, 0, 0) +: (1, 10, 10) +: (2, 2, null) +: (3, 30, 30) +: Nil) + .asInstanceOf[List[(Integer, Integer, Integer)]] + .toDF("key", "value", "extra"), + expectErrorWithoutEvolutionContains = "cannot resolve extra in INSERT clause" + ) + + testEvolution("explicitly insert subset of columns")( + targetData = Seq((0, 0, 0), (1, 10, 10), (3, 30, 30)).toDF("key", "value", "extra"), + sourceData = Seq((1, 1, 1), (2, 2, 2)).toDF("key", "value", "extra"), + clauses = insert("(key, value) VALUES (s.key, s.value)") :: Nil, + // 2 should have extra = null, since extra wasn't in the insert spec. + expected = ((0, 0, 0) +: (1, 10, 10) +: (2, 2, null) +: (3, 30, 30) +: Nil) + .asInstanceOf[List[(Integer, Integer, Integer)]] + .toDF("key", "value", "extra"), + expectedWithoutEvolution = ((0, 0, 0) +: (1, 10, 10) +: (2, 2, null) +: (3, 30, 30) +: Nil) + .asInstanceOf[List[(Integer, Integer, Integer)]] + .toDF("key", "value", "extra") + ) + + testEvolution("explicitly update one column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, 1), (2, 2, 2)).toDF("key", "value", "extra"), + clauses = update("value = s.value") :: Nil, + // Both results should be the same - we're checking that no evolution logic triggers + // even though there's an extra source column. + expected = ((0, 0) +: (1, 1) +: (3, 30) +: Nil).toDF("key", "value"), + expectedWithoutEvolution = ((0, 0) +: (1, 1) +: (3, 30) +: Nil).toDF("key", "value") + ) + + testEvolution("new column with update non-* and insert *")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, 1), (2, 2, 2)).toDF("key", "value", "extra"), + clauses = update("key = s.key, value = s.value") :: insert("*") :: Nil, + expected = ((0, 0, null) +: (2, 2, 2) +: (3, 30, null) +: + // null because `extra` isn't an update action, even though it's 1 in the source data + (1, 1, null) +: Nil) + .asInstanceOf[List[(Integer, Integer, Integer)]].toDF("key", "value", "extra"), + expectedWithoutEvolution = ((0, 0) +: (2, 2) +: (3, 30) +: (1, 1) +: Nil).toDF("key", "value") + ) + + testEvolution("new column with update * and insert non-*")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, 1), (2, 2, 2)).toDF("key", "value", "extra"), + clauses = update("*") :: insert("(key, value) VALUES (s.key, s.value)") :: Nil, + expected = ((0, 0, null) +: (1, 1, 1) +: (3, 30, null) +: + // null because `extra` isn't an insert action, even though it's 2 in the source data + (2, 2, null) +: Nil) + .asInstanceOf[List[(Integer, Integer, Integer)]].toDF("key", "value", "extra"), + expectedWithoutEvolution = ((0, 0) +: (2, 2) +: (3, 30) +: (1, 1) +: Nil).toDF("key", "value") + ) + + testEvolution(s"case-insensitive insert")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1), (2, 2)).toDF("key", "VALUE"), + clauses = insert("(key, value, VALUE) VALUES (s.key, s.value, s.VALUE)") :: Nil, + expected = ((0, 0) +: (1, 10) +: (3, 30) +: (2, 2) +: Nil).toDF("key", "value"), + expectedWithoutEvolution = ((0, 0) +: (1, 10) +: (3, 30) +: (2, 2) +: Nil).toDF("key", "value"), + confs = Seq(SQLConf.CASE_SENSITIVE.key -> "false") + ) + + testEvolution(s"case-sensitive insert")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1), (2, 2)).toDF("key", "VALUE"), + clauses = insert("(key, value, VALUE) VALUES (s.key, s.value, s.VALUE)") :: Nil, + expectErrorContains = "Cannot resolve s.value in INSERT clause", + expectErrorWithoutEvolutionContains = "Cannot resolve s.value in INSERT clause", + confs = Seq(SQLConf.CASE_SENSITIVE.key -> "true") + ) + + testEvolution("evolve partitioned table")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = update("*") :: insert("*") :: Nil, + expected = ((0, 0, null) +: (1, 1, "extra1") +: (2, 2, "extra2") +: (3, 30, null) +: Nil) + .toDF("key", "value", "extra"), + expectedWithoutEvolution = ((0, 0) +: (2, 2) +: (3, 30) +: (1, 1) +: Nil).toDF("key", "value") + ) + + testEvolution("star expansion with names including dots")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value.with.dotted.name"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF( + "key", "value.with.dotted.name", "extra.dotted"), + clauses = update("*") :: insert("*") :: Nil, + expected = ((0, 0, null) +: (1, 1, "extra1") +: (2, 2, "extra2") +: (3, 30, null) +: Nil) + .toDF("key", "value.with.dotted.name", "extra.dotted"), + expectedWithoutEvolution = ((0, 0) +: (2, 2) +: (3, 30) +: (1, 1) +: Nil) + .toDF("key", "value.with.dotted.name") + ) + + // Note that incompatible types are those where a cast to the target type can't resolve - any + // valid cast will be permitted. + testEvolution("incompatible types in update *")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, Array[Byte](1)), (2, Array[Byte](2))).toDF("key", "value"), + clauses = update("*") :: Nil, + expectErrorContains = + "Failed to merge incompatible data types IntegerType and BinaryType", + expectErrorWithoutEvolutionContains = "cannot cast" + ) + + testEvolution("incompatible types in insert *")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, Array[Byte](1)), (2, Array[Byte](2))).toDF("key", "value"), + clauses = insert("*") :: Nil, + expectErrorContains = "Failed to merge incompatible data types IntegerType and BinaryType", + expectErrorWithoutEvolutionContains = "cannot cast" + ) + + // All integral types other than long can be upcasted to integer. + testEvolution("upcast numeric source types into integer target")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1.toByte, 1.toShort), (2.toByte, 2.toShort)).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expected = Seq((0, 0), (1, 1), (2, 2), (3, 30)).toDF("key", "value"), + expectedWithoutEvolution = Seq((0, 0), (1, 1), (2, 2), (3, 30)).toDF("key", "value") + ) + + // Delta's automatic schema evolution allows converting table columns with a numeric type narrower + // than integer to integer, because in the underlying Parquet they're all stored as ints. + testEvolution("upcast numeric target types from integer source")( + targetData = Seq((0.toByte, 0.toShort), (1.toByte, 10.toShort)).toDF("key", "value"), + sourceData = Seq((1, 1), (2, 2)).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expected = + ((0.toByte, 0.toShort) +: + (1.toByte, 1.toShort) +: + (2.toByte, 2.toShort) +: Nil + ).toDF("key", "value"), + expectedWithoutEvolution = + ((0.toByte, 0.toShort) +: + (1.toByte, 1.toShort) +: + (2.toByte, 2.toShort) +: Nil + ).toDF("key", "value") + ) + + testEvolution("upcast int source type into long target")( + targetData = Seq((0, 0L), (1, 10L), (3, 30L)).toDF("key", "value"), + sourceData = Seq((1, 1), (2, 2)).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expected = ((0, 0L) +: (1, 1L) +: (2, 2L) +: (3, 30L) +: Nil).toDF("key", "value"), + expectedWithoutEvolution = + ((0, 0L) +: (1, 1L) +: (2, 2L) +: (3, 30L) +: Nil).toDF("key", "value") + ) + + testEvolution("write string into int column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, "1"), (2, "2"), (5, "notANumber")).toDF("key", "value"), + clauses = insert("*") :: Nil, + expected = ((0, 0) +: (1, 10) +: (2, 2) +: (3, 30) +: (5, null) +: Nil) + .asInstanceOf[List[(Integer, Integer)]].toDF("key", "value"), + expectedWithoutEvolution = + ((0, 0) +: (1, 10) +: (2, 2) +: (3, 30) +: (5, null) +: Nil) + .asInstanceOf[List[(Integer, Integer)]].toDF("key", "value"), + // Disable ANSI as this test needs to cast string "notANumber" to int + confs = Seq(SQLConf.STORE_ASSIGNMENT_POLICY.key -> "LEGACY") + ) + + // Upcasting is always allowed. + for (storeAssignmentPolicy <- StoreAssignmentPolicy.values) + testEvolution("upcast int source type into long target, storeAssignmentPolicy = " + + s"$storeAssignmentPolicy")( + targetData = Seq((0, 0L), (1, 1L), (3, 3L)).toDF("key", "value"), + sourceData = Seq((1, 1), (2, 2)).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expected = + ((0, 0L) +: (1, 1L) +: (2, 2L) +: (3, 3L) +: Nil).toDF("key", "value"), + expectedWithoutEvolution = + ((0, 0L) +: (1, 1L) +: (2, 2L) +: (3, 3L) +: Nil).toDF("key", "value"), + confs = Seq( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> storeAssignmentPolicy.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") + ) + + // Casts that are not valid implicit casts (e.g. string -> boolean) are never allowed with + // schema evolution enabled and allowed only when storeAssignmentPolicy is LEGACY or ANSI when + // schema evolution is disabled. + for (storeAssignmentPolicy <- StoreAssignmentPolicy.values - StoreAssignmentPolicy.STRICT) + testEvolution("invalid implicit cast string source type into boolean target, " + + s"storeAssignmentPolicy = $storeAssignmentPolicy")( + targetData = Seq((0, true), (1, false), (3, true)).toDF("key", "value"), + sourceData = Seq((1, "true"), (2, "false")).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expectErrorContains = "Failed to merge incompatible data types BooleanType and StringType", + expectedWithoutEvolution = ((0, true) +: (1, true) +: (2, false) +: (3, true) +: Nil) + .toDF("key", "value"), + confs = Seq( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> storeAssignmentPolicy.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") + ) + + // Casts that are not valid implicit casts (e.g. string -> boolean) are not allowed with + // storeAssignmentPolicy = STRICT. + testEvolution("invalid implicit cast string source type into boolean target, " + + s"storeAssignmentPolicy = ${StoreAssignmentPolicy.STRICT}")( + targetData = Seq((0, true), (1, false), (3, true)).toDF("key", "value"), + sourceData = Seq((1, "true"), (2, "false")).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expectErrorContains = "Failed to merge incompatible data types BooleanType and StringType", + expectErrorWithoutEvolutionContains = "cannot up cast s.value from \"string\" to \"boolean\"", + confs = Seq( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") + ) + + // Valid implicit casts that are not upcasts (e.g. string -> int) are allowed with + // storeAssignmentPolicy = LEGACY or ANSI. + for (storeAssignmentPolicy <- StoreAssignmentPolicy.values - StoreAssignmentPolicy.STRICT) + testEvolution("valid implicit cast string source type into int target, " + + s"storeAssignmentPolicy = ${storeAssignmentPolicy}")( + targetData = Seq((0, 0), (1, 1), (3, 3)).toDF("key", "value"), + sourceData = Seq((1, "1"), (2, "2")).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expected = ((0, 0)+: (1, 1) +: (2, 2) +: (3, 3) +: Nil).toDF("key", "value"), + expectedWithoutEvolution = ((0, 0) +: (1, 1) +: (2, 2) +: (3, 3) +: Nil).toDF("key", "value"), + confs = Seq( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> storeAssignmentPolicy.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") + ) + + // Valid implicit casts that are not upcasts (e.g. string -> int) are rejected with + // storeAssignmentPolicy = STRICT. + testEvolution("valid implicit cast string source type into int target, " + + s"storeAssignmentPolicy = ${StoreAssignmentPolicy.STRICT}")( + targetData = Seq((0, 0), (1, 1), (3, 3)).toDF("key", "value"), + sourceData = Seq((1, "1"), (2, "2")).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expectErrorContains = "cannot up cast s.value from \"string\" to \"int\"", + expectErrorWithoutEvolutionContains = "cannot up cast s.value from \"string\" to \"int\"", + confs = Seq( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") + ) + + // This is kinda bug-for-bug compatibility. It doesn't really make sense that infinity is casted + // to int as Int.MaxValue, but that's the behavior. + testEvolution("write double into int column")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1.1), (2, 2.2), (5, Double.PositiveInfinity)).toDF("key", "value"), + clauses = insert("*") :: Nil, + expected = + ((0, 0) +: (1, 10) +: (2, 2) +: (3, 30) +: (5, Int.MaxValue) +: Nil) + .asInstanceOf[List[(Integer, Integer)]].toDF("key", "value"), + expectedWithoutEvolution = + ((0, 0) +: (1, 10) +: (2, 2) +: (3, 30) +: (5, Int.MaxValue) +: Nil) + .asInstanceOf[List[(Integer, Integer)]].toDF("key", "value"), + // Disable ANSI as this test needs to cast Double.PositiveInfinity to int + confs = Seq(SQLConf.STORE_ASSIGNMENT_POLICY.key -> "LEGACY") + ) + + testEvolution("multiple casts with storeAssignmentPolicy = STRICT")( + targetData = Seq((0L, "0"), (1L, "10"), (3L, "30")).toDF("key", "value"), + sourceData = Seq((1, 1L), (2, 2L)).toDF("key", "value"), + clauses = update("*") :: insert("*") :: Nil, + expected = + ((0L, "0") +: (1L, "1") +: (2L, "2") +: (3L, "30") +: Nil).toDF("key", "value"), + expectedWithoutEvolution = + ((0L, "0") +: (1L, "1") +: (2L, "2") +: (3L, "30") +: Nil).toDF("key", "value"), + confs = Seq( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false")) + + testEvolution("new column with storeAssignmentPolicy = STRICT")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "one"), (2, 2, "two")).toDF("key", "value", "extra"), + clauses = update("value = CAST(s.value AS short)") :: insert("*") :: Nil, + expected = + ((0, 0, null) +: (1, 1, null) +: (2, 2, "two") +: (3, 30, null) +: Nil) + .toDF("key", "value", "extra"), + expectedWithoutEvolution = + ((0, 0) +: (1, 1) +: (2, 2) +: (3, 30) +: Nil).toDF("key", "value"), + confs = Seq( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false")) + + testEvolution("extra nested column in source - insert")( + targetData = Seq((1, (1, 10))).toDF("key", "x"), + sourceData = Seq((2, (2, 20, 30))).toDF("key", "x"), + clauses = insert("*") :: Nil, + expected = ((1, (1, 10, null)) +: (2, (2, 20, 30)) +: Nil) + .asInstanceOf[List[(Integer, (Integer, Integer, Integer))]].toDF("key", "x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + testEvolution("missing nested column in source - insert")( + targetData = Seq((1, (1, 2, 3))).toDF("key", "x"), + sourceData = Seq((2, (2, 3))).toDF("key", "x"), + clauses = insert("*") :: Nil, + expected = ((1, (1, 2, 3)) +: (2, (2, 3, null)) +: Nil) + .asInstanceOf[List[(Integer, (Integer, Integer, Integer))]].toDF("key", "x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + testEvolution("missing nested column resolved by name - insert")( + targetData = Seq((1, 1, 2, 3)).toDF("key", "a", "b", "c") + .selectExpr("key", "named_struct('a', a, 'b', b, 'c', c) as x"), + sourceData = Seq((2, 2, 4)).toDF("key", "a", "c") + .selectExpr("key", "named_struct('a', a, 'c', c) as x"), + clauses = insert("*") :: Nil, + expected = ((1, (1, 2, 3)) +: (2, (2, null, 4)) +: Nil) + .asInstanceOf[List[(Integer, (Integer, Integer, Integer))]].toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + testEvolution("extra nested column in source - update - single target partition")( + targetData = Seq((1, (1, 10)), (2, (2, 2000))).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'c', x._2) as x").repartition(1), + sourceData = Seq((1, (10, 100, 1000))).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + clauses = update("*") :: Nil, + expected = ((1, (10, 100, 1000)) +: (2, (2, null, 2000)) +: Nil) + .asInstanceOf[List[(Integer, (Integer, Integer, Integer))]].toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'c', x._3, 'b', x._2) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + testEvolution("multiple clauses")( + // 1 and 2 should be updated from the source, 3 and 4 should be deleted. Only 5 is unchanged + targetData = Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")).toDF("key", "targetVal"), + // 1 and 2 should be updated into the target, 6 and 7 should be inserted. 8 should be ignored + sourceData = Seq((1, "t"), (2, "u"), (3, "v"), (4, "w"), (6, "x"), (7, "y"), (8, "z")) + .toDF("key", "srcVal"), + clauses = + update("targetVal = srcVal", "s.key = 1") :: update("*", "s.key = 2") :: + delete("s.key = 3") :: delete("s.key = 4") :: + insert("(key) VALUES (s.key)", "s.key = 6") :: insert("*", "s.key = 7") :: Nil, + expected = + ((1, "t", null) :: (2, "b", "u") :: (5, "e", null) :: + (6, null, null) :: (7, null, "y") :: Nil) + .asInstanceOf[List[(Integer, String, String)]].toDF("key", "targetVal", "srcVal"), + // The UPDATE * clause won't resolve without evolution because the source and target columns + // don't match. + expectErrorWithoutEvolutionContains = "cannot resolve targetVal" + ) + + testEvolution("multiple INSERT * clauses with UPDATE")( + // 1 and 2 should be updated from the source, 3 and 4 should be deleted. Only 5 is unchanged + targetData = Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")).toDF("key", "targetVal"), + // 1 and 2 should be updated into the target, 6 and 7 should be inserted. 8 should be ignored + sourceData = Seq((1, "t"), (2, "u"), (3, "v"), (4, "w"), (6, "x"), (7, "y"), (8, "z")) + .toDF("key", "srcVal"), + clauses = + update("targetVal = srcVal", "s.key = 1") :: update("*", "s.key = 2") :: + delete("s.key = 3") :: delete("s.key = 4") :: + insert("*", "s.key = 6") :: insert("*", "s.key = 7") :: Nil, + expected = + ((1, "t", null) :: (2, "b", "u") :: (5, "e", null) :: + (6, null, "x") :: (7, null, "y") :: Nil) + .asInstanceOf[List[(Integer, String, String)]].toDF("key", "targetVal", "srcVal"), + // The UPDATE * clause won't resolve without evolution because the source and target columns + // don't match. + expectErrorWithoutEvolutionContains = "cannot resolve targetVal" + ) + + testEvolution("array of struct should work with containsNull as false")( + Seq(500000).toDF("key"), + Seq(500000, 100000).toDF("key") + .withColumn("generalDeduction", + struct(current_date().as("date"), array(struct(lit(0d).as("data"))))), + update("*") :: insert("*") :: Nil, + Seq(500000, 100000).toDF("key") + .withColumn("generalDeduction", + struct(current_date().as("date"), array(struct(lit(0d).as("data"))))), + Seq(500000, 100000).toDF("key") + ) + + testEvolution("test array_union with schema evolution")( + Seq(1).toDF("key") + .withColumn("openings", + array( + (2010 to 2019).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(s"$i").as("location") + ) + }: _*)), + Seq(1).toDF("key") + .withColumn("openings", + array( + (2020 to 8020).map { i => + struct( + lit(null).cast(StringType).as("opened_with"), + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at") + ) + }: _*)), + update(set = "openings = array_union(s.openings, s.openings)") :: insert("*") :: Nil, + Seq(1).toDF("key") + .withColumn("openings", + array( + (2020 to 8020).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(null).cast(StringType).as("location") + ) + }: _*)), + expectErrorWithoutEvolutionContains = "All nested columns must match" + ) + + testEvolution("test array_intersect with schema evolution")( + Seq(1).toDF("key") + .withColumn("openings", + array( + (2010 to 2019).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(s"$i").as("location") + ) + }: _*)), + Seq(1).toDF("key") + .withColumn("openings", + array( + (2020 to 8020).map { i => + struct( + lit(null).cast(StringType).as("opened_with"), + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at") + ) + }: _*)), + update(set = "openings = array_intersect(s.openings, s.openings)") :: insert("*") :: Nil, + Seq(1).toDF("key") + .withColumn("openings", + array( + (2020 to 8020).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(null).cast(StringType).as("location") + ) + }: _*)), + expectErrorWithoutEvolutionContains = "All nested columns must match" + ) + + testEvolution("test array_except with schema evolution")( + Seq(1).toDF("key") + .withColumn("openings", + array( + (2010 to 2020).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(s"$i").as("location") + ) + }: _*)), + Seq(1).toDF("key") + .withColumn("openings", + array( + (2020 to 8020).map { i => + struct( + lit(null).cast(StringType).as("opened_with"), + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at") + ) + }: _*)), + update(set = "openings = array_except(s.openings, s.openings)") :: insert("*") :: Nil, + Seq(1).toDF("key") + .withColumn( + "openings", + array().cast( + new ArrayType( + new StructType() + .add("opened_at", StringType) + .add("opened_with", StringType) + .add("location", StringType), + true + ) + ) + ), + expectErrorWithoutEvolutionContains = "All nested columns must match" + ) + + testEvolution("test array_remove with schema evolution")( + Seq(1).toDF("key") + .withColumn("openings", + array( + (2010 to 2019).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(s"$i").as("location") + ) + }: _*)), + Seq(1).toDF("key") + .withColumn("openings", + array( + (2020 to 8020).map { i => + struct( + lit(null).cast(StringType).as("opened_with"), + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at") + ) + }: _*)), + update( + set = "openings = array_remove(s.openings," + + "named_struct('opened_with', cast(null as string)," + + "'opened_at', '2020-01-19T09:29:00.000+0000'))") :: insert("*") :: Nil, + Seq(1).toDF("key") + .withColumn( + "openings", + array((2021 to 8020).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(null).cast(StringType).as("location") + ) + }: _*)), + expectErrorWithoutEvolutionContains = "All nested columns must match" + ) + + testEvolution("test array_distinct with schema evolution")( + Seq(1).toDF("key") + .withColumn("openings", + array( + (2010 to 2019).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(s"$i").as("location") + ) + }: _* + )), + Seq(1).toDF("key") + .withColumn("openings", + array( + ((2020 to 8020) ++ (2020 to 8020)).map { i => + struct( + lit(null).cast(StringType).as("opened_with"), + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at") + ) + }: _* + )), + update(set = "openings = array_distinct(s.openings)") :: insert("*") :: Nil, + Seq(1).toDF("key") + .withColumn( + "openings", + array((2020 to 8020).map { i => + struct( + lit(s"$i-01-19T09:29:00.000+0000").as("opened_at"), + lit(null).cast(StringType).as("opened_with"), + lit(null).cast(StringType).as("location") + ) + }: _*)), + expectErrorWithoutEvolutionContains = "All nested columns must match" + ) + + testEvolution("void columns are not allowed")( + targetData = Seq((1, 1)).toDF("key", "value"), + sourceData = Seq((1, 100, null), (2, 200, null)).toDF("key", "value", "extra"), + clauses = update("*") :: insert("*") :: Nil, + expectErrorContains = "Cannot add column 'extra' with type 'void'", + expectedWithoutEvolution = Seq((1, 100), (2, 200)).toDF("key", "value") + ) +} + +/** + * Trait collecting tests for schema evolution with a NOT MATCHED BY SOURCE clause. + */ +trait MergeIntoSchemaEvolutionNotMatchedBySourceTests { + self: MergeIntoSchemaEvolutionMixin with MergeIntoTestUtils with SharedSparkSession => + + import testImplicits._ + + // Test schema evolution with NOT MATCHED BY SOURCE clauses. + testEvolution("new column with insert * and conditional update not matched by source")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = insert("*") :: + updateNotMatched(condition = "key > 0", set = "value = value + 1") :: Nil, + expected = Seq( + (0, 0, null), // Not matched by source, no change + (1, 10, null), // Matched, no change + (2, 2, "extra2"), // Not matched by target, inserted + (3, 31, null) // Not matched by source, updated + ).toDF("key", "value", "extra"), + expectedWithoutEvolution = Seq((0, 0), (1, 10), (2, 2), (3, 31)).toDF("key", "value")) + + testEvolution("new column not inserted and conditional update not matched by source")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = updateNotMatched(condition = "key > 0", set = "value = value + 1") :: Nil, + expected = Seq( + (0, 0), // Not matched by source, no change + (1, 10), // Matched, no change + (3, 31) // Not matched by source, updated + ).toDF("key", "value"), + expectedWithoutEvolution = Seq((0, 0), (1, 10), (3, 31)).toDF("key", "value")) + + testEvolution("new column referenced in matched condition but not inserted")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = delete(condition = "extra = 'extra1'") :: + updateNotMatched(condition = "key > 0", set = "value = value + 1") :: Nil, + expected = Seq( + (0, 0), // Not matched by source, no change + // (1, 10), Matched, deleted + (3, 31) // Not matched by source, updated + ).toDF("key", "value"), + expectedWithoutEvolution = Seq((0, 0), (3, 31)).toDF("key", "value")) + + testEvolution("matched update * and conditional update not matched by source")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra1"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = update("*") :: + updateNotMatched(condition = "key > 0", set = "value = value + 1") :: Nil, + expected = Seq( + (0, 0, null), // Not matched by source, no change + (1, 1, "extra1"), // Matched, updated + (3, 31, null) // Not matched by source, updated + ).toDF("key", "value", "extra"), + expectedWithoutEvolution = Seq((0, 0), (1, 1), (3, 31)).toDF("key", "value")) + + // Migrating new column via WHEN NOT MATCHED BY SOURCE is not allowed. + testEvolution("update new column with not matched by source fails")( + targetData = Seq((0, 0), (1, 10), (3, 30)).toDF("key", "value"), + sourceData = Seq((1, 1, "extra3"), (2, 2, "extra2")).toDF("key", "value", "extra"), + clauses = updateNotMatched("extra = s.extra") :: Nil, + expectErrorContains = "cannot resolve extra in UPDATE clause", + expectErrorWithoutEvolutionContains = "cannot resolve extra in UPDATE clause") +} + +/** + * Trait collecting all tests for nested struct evolution. + */ +trait MergeIntoNestedStructEvolutionTests { + self: MergeIntoSchemaEvolutionMixin with MergeIntoTestUtils with SharedSparkSession => + + import testImplicits._ + + private implicit def strToJsonSeq(str: String): Seq[String] = { + str.split("\n").filter(_.trim.length > 0) + } + + // Nested Schema evolution with UPDATE alone + testNestedStructsEvolution("new nested source field not in update is ignored")( + target = """{ "key": "A", "value": { "a": 1 } }""", + source = """{ "key": "A", "value": { "a": 2, "b": 3 } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)), + clauses = update("value.a = s.value.a") :: Nil, + result = """{ "key": "A", "value": { "a": 2 } }""", + resultWithoutEvolution = """{ "key": "A", "value": { "a": 2 } }""") + + testNestedStructsEvolution("two new nested source fields with update: one added, one ignored")( + target = """{ "key": "A", "value": { "a": 1 } }""", + source = """{ "key": "A", "value": { "a": 2, "b": 3, "c": 4 } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType) + .add("b", IntegerType) + .add("c", IntegerType)), + clauses = update("value.b = s.value.b") :: Nil, + result = """{ "key": "A", "value": { "a": 1, "b": 3 } }""", + resultSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)), + expectErrorWithoutEvolutionContains = "No such struct field") + + // Nested Schema evolution with INSERT alone + testNestedStructsEvolution("new nested source field added when inserting top-level column")( + target = """{ "key": "A", "value": { "a": 1 } }""", + source = """{ "key": "B", "value": { "a": 2, "b": 3 } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)), + clauses = insert("(value) VALUES (s.value)") :: Nil, + result = + """{ "key": "A", "value": { "a": 1, "b": null } } + { "key": null, "value": { "a": 2, "b": 3 } }""".stripMargin, + resultSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)), + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("insert new nested source field not supported")( + target = """{ "key": "A", "value": { "a": 1 } }""", + source = """{ "key": "A", "value": { "a": 2, "b": 3, "c": 4 } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", IntegerType) + .add("b", IntegerType) + .add("c", IntegerType)), + clauses = insert("(value.b) VALUES (s.value.b)") :: Nil, + expectErrorContains = "Nested field is not supported in the INSERT clause of MERGE operation", + expectErrorWithoutEvolutionContains = "No such struct field") + + // scalastyle:off line.size.limit + testNestedStructsEvolution("new nested column with update non-* and insert * - array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1, "c": 2 } ] }""", + source = + """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } }, { "b": "3", "a": { "y": 30, "x": 20 } }, { "b": "4", "a": { "y": 30, "x": 20 } } ] } + { "key": "B", "value": [ { "b": "3", "a": { "y": 30, "x": 40 } }, { "b": "4", "a": { "y": 30, "x": 40 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType)) + .add("b", IntegerType) + .add("c", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + clauses = update("value = s.value") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20 }, "b": 2, "c": null}, { "a": { "x": 20, "y": 30}, "b": 3, "c": null }, { "a": { "x": 20, "y": 30}, "b": 4, "c": null } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": 30 }, "b": 3, "c": null }, { "a": { "x": 40, "y": 30}, "b": 4, "c": null } ] }""".stripMargin, + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("new nested column with update non-* and insert * - array of struct - longer target")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1, "c": 2 }, { "a": { "x": 3, "y": 2 }, "b": 2, "c": 2 } ] }""", + source = + """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } } ] } + { "key": "B", "value": [ { "b": "3", "a": { "y": 30, "x": 40 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType)) + .add("b", IntegerType) + .add("c", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + clauses = update("value = s.value") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20}, "b": 2, "c": null } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": 30}, "b": 3, "c": null } ] }""".stripMargin, + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("new nested column with update non-* and insert * - nested array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3 } ] }, "b": 1, "c": 4 } ] }""", + source = + """{ "key": "A", "value": [ { "b": "2", "a": {"x": [ { "d": "30", "c": 10 }, { "d": "20", "c": 10 }, { "d": "20", "c": 10 } ], "y": 20 } } ] } + { "key": "B", "value": [ { "b": "3", "a": {"x": [ { "d": "50", "c": 20 }, { "d": "20", "c": 10 } ], "y": 60 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + ))) + .add("b", IntegerType) + .add("c", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("x", ArrayType( + new StructType() + .add("d", StringType) + .add("c", IntegerType) + )) + .add("y", IntegerType)))), + clauses = update("value = s.value") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30 }, { "c": 10, "d": 20 }, { "c": 10, "d": 20 } ] }, "b": 2, "c": null}] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50 }, { "c": 10, "d": 20 } ] }, "b": 3, "c": null } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("new nested column with update non-* and insert * - nested array of struct - longer target")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3}, { "c": 2, "d": 3 } ] }, "b": 1, "c": 4 } ] }""", + source = + """{ "key": "A", "value": [ { "b": "2", "a": {"x": [ { "d": "30", "c": 10 } ], "y": 20 } } ] } + { "key": "B", "value": [ { "b": "3", "a": {"x": [ { "d": "50", "c": 20 } ], "y": 60 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + ))) + .add("b", IntegerType) + .add("c", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("x", ArrayType( + new StructType() + .add("d", StringType) + .add("c", IntegerType) + )) + .add("y", IntegerType)))), + clauses = update("value = s.value") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30} ] }, "b": 2, "c": null}] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50 } ] }, "b": 3, "c": null } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + // scalastyle:on line.size.limit + + testEvolution("new nested-nested column with update non-* and insert *")( + targetData = Seq((1, 1, 2, 3)).toDF("key", "a", "b", "c") + .selectExpr("key", "named_struct('y', named_struct('a', a, 'b', b, 'c', c)) as x"), + sourceData = Seq((1, 10, 30), (2, 20, 40)).toDF("key", "a", "c") + .selectExpr("key", "named_struct('y', named_struct('a', a, 'c', c)) as x"), + clauses = update("x.y.a = s.x.y.a") :: insert("*") :: Nil, + expected = Seq((1, 10, 2, 3), (2, 20, null, 40)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "c") + .selectExpr("key", "named_struct('y', named_struct('a', a, 'b', b, 'c', c)) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + testNestedStructsEvolution("nested void columns are not allowed")( + target = """{ "key": "A", "value": { "a": { "x": 1 }, "b": 1 } }""", + source = """{ "key": "A", "value": { "a": { "x": 2, "z": null } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", + new StructType() + .add("a", new StructType().add("x", IntegerType)) + .add("b", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", + new StructType() + .add("a", new StructType().add("x", IntegerType).add("z", NullType))), + clauses = update("*") :: Nil, + expectErrorContains = "Cannot add column 'value.a.z' with type 'void'", + expectErrorWithoutEvolutionContains = "All nested columns must match") + + // scalastyle:off line.size.limit + testNestedStructsEvolution("new nested-nested column with update non-* and insert * - array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2, "z": 3 }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } }, { "b": "3", "a": { "y": 20, "x": 30 } }, { "b": "4", "a": { "y": 20, "x": 30 } } ] } + { "key": "B", "value": [ { "b": "3", "a": { "y": 30, "x": 40 } }, { "b": "4", "a": { "y": 30, "x": 40 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + clauses = update("value = s.value") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": null }, "b": 2 }, { "a": { "x": 30, "y": 20, "z": null }, "b": 3}, { "a": { "x": 30, "y": 20, "z": null }, "b": 4 } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": 30, "z": null }, "b": 3 }, { "a": { "x": 40, "y": 30, "z": null }, "b": 4 } ] }""".stripMargin, + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("new nested-nested column with update non-* and insert * - array of struct - longer target")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2, "z": 3 }, "b": 1 }, { "a": { "x": 2, "y": 3, "z": 4 }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } } ] } + { "key": "B", "value": [ { "b": "3", "a": { "y": 30, "x": 40 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + clauses = update("value = s.value") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [{ "a": { "x": 10, "y": 20, "z": null }, "b": 2 }] } + { "key": "B", "value": [{ "a": { "x": 40, "y": 30, "z": null }, "b": 3 }] }""".stripMargin, + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("new nested-nested column with update non-* and insert * - nested array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "e": 1 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "b": "2", "a": {"x": [ { "d": "30", "c": 10 }, { "d": "30", "c": 40 }, { "d": "30", "c": 50 } ], "y": 20 } } ] } + { "key": "B", "value": [ { "b": "3", "a": {"x": [ { "d": "50", "c": 20 }, { "d": "50", "c": 30 } ], "y": 60 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("x", ArrayType( + new StructType() + .add("d", StringType) + .add("c", IntegerType) + )) + .add("y", IntegerType)))), + clauses = update("value = s.value") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30, "e": null }, { "c": 40, "d": 30, "e": null }, { "c": 50, "d": 30, "e": null } ] }, "b": 2 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50, "e": null }, { "c": 30, "d": 50, "e": null } ] }, "b": 3 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("new nested-nested column with update non-* and insert * - nested array of struct - longer target")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "e": 1 }, { "c": 2, "d": 3, "e": 4 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "b": "2", "a": { "x": [ { "d": "30", "c": 10 } ], "y": 20 } } ] } + { "key": "B", "value": [ { "b": "3", "a": { "x": [ { "d": "50", "c": 20 } ], "y": 60 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("x", ArrayType( + new StructType() + .add("d", StringType) + .add("c", IntegerType) + )) + .add("y", IntegerType)))), + clauses = update("value = s.value") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30, "e": null } ] }, "b": 2 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50, "e": null } ] }, "b": 3 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + // Struct evolution inside of map values. + testNestedStructsEvolution("new source column in map struct value")( + target = + """{ "key": "A", "map": { "key": { "a": 1 } } } + { "key": "C", "map": { "key": { "a": 3 } } }""", + source = + """{ "key": "A", "map": { "key": { "a": 2, "b": 2 } } } + { "key": "B", "map": { "key": { "a": 1, "b": 2 } } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType))), + resultSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{ "key": "A", "map": { "key": { "a": 2, "b": 2 } } } + { "key": "B", "map": { "key": { "a": 1, "b": 2 } } } + { "key": "C", "map": { "key": { "a": 3, "b": null } } }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("new source column in nested map struct value")( + target = + """{"key": "A", "map": { "key": { "innerKey": { "a": 1 } } } } + {"key": "C", "map": { "key": { "innerKey": { "a": 3 } } } }""", + source = + """{"key": "A", "map": { "key": { "innerKey": { "a": 2, "b": 3 } } } } + {"key": "B", "map": { "key": { "innerKey": { "a": 2, "b": 3 } } } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + MapType(StringType, new StructType().add("a", IntegerType)))), + sourceSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + MapType(StringType, new StructType().add("a", IntegerType).add("b", IntegerType)))), + resultSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + MapType(StringType, new StructType().add("a", IntegerType).add("b", IntegerType)))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{"key": "A", "map": { "key": { "innerKey": { "a": 2, "b": 3 } } } } + {"key": "B", "map": { "key": { "innerKey": { "a": 2, "b": 3 } } } } + {"key": "C", "map": { "key": { "innerKey": { "a": 3, "b": null } } } }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("source map struct value contains less columns than target")( + target = + """{ "key": "A", "map": { "key": { "a": 1, "b": 1 } } } + { "key": "C", "map": { "key": { "a": 3, "b": 1 } } }""", + source = + """{ "key": "A", "map": { "key": { "a": 2 } } } + { "key": "B", "map": { "key": { "a": 1 } } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType))), + resultSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{ "key": "A", "map": { "key": { "a": 2, "b": null } } } + { "key": "B", "map": { "key": { "a": 1, "b": null } } } + { "key": "C", "map": { "key": { "a": 3, "b": 1 } } }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("source nested map struct value contains less columns than target")( + target = + """{"key": "A", "map": { "key": { "innerKey": { "a": 1, "b": 1 } } } } + {"key": "C", "map": { "key": { "innerKey": { "a": 3, "b": 1 } } } }""", + source = + """{"key": "A", "map": { "key": { "innerKey": { "a": 2 } } } } + {"key": "B", "map": { "key": { "innerKey": { "a": 2 } } } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + MapType(StringType, new StructType().add("a", IntegerType).add("b", IntegerType)))), + sourceSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + MapType(StringType, new StructType().add("a", IntegerType)))), + resultSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + MapType(StringType, new StructType().add("a", IntegerType).add("b", IntegerType)))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{"key": "A", "map": { "key": { "innerKey": { "a": 2, "b": null } } } } + {"key": "B", "map": { "key": { "innerKey": { "a": 2, "b": null } } } } + {"key": "C", "map": { "key": { "innerKey": { "a": 3, "b": 1 } } } }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("source nested map struct value contains different type than target")( + target = + """{"key": "A", "map": { "key": { "a": 1, "b" : 1 } } } + {"key": "C", "map": { "key": { "a": 3, "b" : 1 } } }""", + source = + """{"key": "A", "map": { "key": { "a": 1, "b" : "2" } } } + {"key": "B", "map": { "key": { "a": 2, "b" : "2" } } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", StringType))), + resultSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{"key": "A", "map": { "key": { "a": 1, "b" : 2 } } } + {"key": "B", "map": { "key": { "a": 2, "b" : 2 } } } + {"key": "C", "map": { "key": { "a": 3, "b" : 1 } } }""", + resultWithoutEvolution = + """{"key": "A", "map": { "key": { "a": 1, "b" : 2 } } } + {"key": "B", "map": { "key": { "a": 2, "b" : 2 } } } + {"key": "C", "map": { "key": { "a": 3, "b" : 1 } } }""") + + testNestedStructsEvolution("source nested map struct value in different order")( + target = + """{"key": "A", "map": { "key": { "a" : 1, "b" : 1 } } } + {"key": "C", "map": { "key": { "a" : 3, "b" : 1 } } }""", + source = + """{"key": "A", "map": { "key": { "b" : 2, "a" : 1, "c" : 3 } } } + {"key": "B", "map": { "key": { "b" : 2, "a" : 2, "c" : 4 } } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType).add("c", IntegerType))), + resultSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType).add("b", IntegerType).add("c", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{"key": "A", "map": { "key": { "a": 1, "b" : 2, "c" : 3 } } } + {"key": "B", "map": { "key": { "a": 2, "b" : 2, "c" : 4 } } } + {"key": "C", "map": { "key": { "a": 3, "b" : 1, "c" : null } } }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("source map struct value to map array value")( + target = + """{ "key": "A", "map": { "key": [ 1, 2 ] } } + { "key": "C", "map": { "key": [ 3, 4 ] } }""", + source = + """{ "key": "A", "map": { "key": { "a": 2 } } } + { "key": "B", "map": { "key": { "a": 1 } } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + ArrayType(IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + new StructType().add("a", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + expectErrorContains = "Failed to merge incompatible data types", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("source struct nested in map array values contains more columns in different order")( + target = + """{ "key": "A", "map": { "key": [ { "a": 1, "b": 2 } ] } } + { "key": "C", "map": { "key": [ { "a": 3, "b": 4 } ] } }""", + source = + """{ "key": "A", "map": { "key": [ { "b": 6, "c": 7, "a": 5 } ] } } + { "key": "B", "map": { "key": [ { "b": 9, "c": 10, "a": 8 } ] } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + ArrayType( + new StructType().add("a", IntegerType).add("b", IntegerType)))), + sourceSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + ArrayType( + new StructType().add("a", IntegerType).add("b", IntegerType).add("c", IntegerType)))), + resultSchema = new StructType() + .add("key", StringType) + .add("map", MapType( + StringType, + ArrayType( + new StructType().add("a", IntegerType).add("b", IntegerType).add("c", IntegerType)))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{ "key": "A", "map": { "key": [ { "a": 5, "b": 6, "c": 7 } ] } } + { "key": "B", "map": { "key": [ { "a": 8, "b": 9, "c": 10 } ] } } + { "key": "C", "map": { "key": [ { "a": 3, "b": 4, "c": null } ] } }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + // Struct evolution inside of map keys. + testEvolution("new source column in map struct key")( + targetData = Seq((1, 2, 3, 4), (3, 5, 6, 7)).toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b), value) as x"), + sourceData = Seq((1, 10, 30, 50, 1), (2, 20, 40, 60, 2)).toDF("key", "a", "b", "c", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b, 'c', c), value) as x"), + clauses = update("*") :: insert("*") :: Nil, + expected = Seq((1, 10, 30, 50, 1), (2, 20, 40, 60, 2), (3, 5, 6, null, 7)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "c", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b, 'c', c), value) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + testEvolution("source nested map struct key contains less columns than target")( + targetData = Seq((1, 2, 3, 4, 5), (3, 6, 7, 8, 9)).toDF("key", "a", "b", "c", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b, 'c', c), value) as x"), + sourceData = Seq((1, 10, 50, 1), (2, 20, 60, 2)).toDF("key", "a", "c", "value") + .selectExpr("key", "map(named_struct('a', a, 'c', c), value) as x"), + clauses = update("*") :: insert("*") :: Nil, + expected = Seq((1, 10, null, 50, 1), (2, 20, null, 60, 2), (3, 6, 7, 8, 9)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "c", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b, 'c', c), value) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + testEvolution("source nested map struct key contains different type than target")( + targetData = Seq((1, 2, 3, 4), (3, 5, 6, 7)).toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b), value) as x"), + sourceData = Seq((1, 10, "30", 1), (2, 20, "40", 2)).toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b), value) as x"), + clauses = update("*") :: insert("*") :: Nil, + expected = Seq((1, 10, 30, 1), (2, 20, 40, 2), (3, 5, 6, 7)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b), value) as x"), + expectedWithoutEvolution = Seq((1, 10, 30, 1), (2, 20, 40, 2), (3, 5, 6, 7)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b), value) as x") + ) + + testEvolution("source nested map struct key in different order")( + targetData = Seq((1, 2, 3, 4), (3, 5, 6, 7)).toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b), value) as x"), + sourceData = Seq((1, 10, 30, 1), (2, 20, 40, 2)).toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('b', b, 'a', a), value) as x"), + clauses = update("*") :: insert("*") :: Nil, + expected = Seq((1, 30, 10, 1), (2, 40, 20, 2), (3, 5, 6, 7)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b), value) as x"), + expectedWithoutEvolution = Seq((1, 30, 10, 1), (2, 40, 20, 2), (3, 5, 6, 7)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "value") + .selectExpr("key", "map(named_struct('a', a, 'b', b), value) as x") + ) + + testEvolution( + "source struct nested in map array keys contains more columns")( + targetData = Seq((1, 2, 3, 4), (3, 5, 6, 7)).toDF("key", "a", "b", "value") + .selectExpr("key", "map(array(named_struct('a', a, 'b', b)), value) as x"), + sourceData = Seq((1, 10, 30, 50, 1), (2, 20, 40, 60, 2)).toDF("key", "a", "b", "c", "value") + .selectExpr("key", "map(array(named_struct('a', a, 'b', b, 'c', c)), value) as x"), + clauses = update("*") :: insert("*") :: Nil, + expected = Seq((1, 10, 30, 50, 1), (2, 20, 40, 60, 2), (3, 5, 6, null, 7)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "c", "value") + .selectExpr("key", "map(array(named_struct('a', a, 'b', b, 'c', c)), value) as x"), + expectErrorWithoutEvolutionContains = "cannot cast" + ) + + testEvolution("struct evolution in both map keys and values")( + targetData = Seq((1, 2, 3, 4, 5), (3, 6, 7, 8, 9)).toDF("key", "a", "b", "d", "e") + .selectExpr("key", "map(named_struct('a', a, 'b', b), named_struct('d', d, 'e', e)) as x"), + sourceData = Seq((1, 10, 30, 50, 70, 90, 110), (2, 20, 40, 60, 80, 100, 120)) + .toDF("key", "a", "b", "c", "d", "e", "f") + .selectExpr("key", "map(named_struct('a', a, 'b', b, 'c', c), named_struct('d', d, 'e', e, 'f', f)) as x"), + clauses = update("*") :: insert("*") :: Nil, + expected = Seq((1, 10, 30, 50, 70, 90, 110), (2, 20, 40, 60, 80, 100, 120), (3, 6, 7, null, 8, 9, null)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "c", "d", "e", "f") + .selectExpr("key", "map(named_struct('a', a, 'b', b, 'c', c), named_struct('d', d, 'e', e, 'f', f)) as x"), + expectErrorWithoutEvolutionContains = "cannot cast" + ) + // scalastyle:on line.size.limit + + // Note that the obvious dual of this test, "update * and insert non-*", doesn't exist + // because nested columns can't be explicitly INSERTed to. + testEvolution("new nested column with update non-* and insert *")( + targetData = Seq((1, 1, 2, 3)).toDF("key", "a", "b", "c") + .selectExpr("key", "named_struct('a', a, 'b', b, 'c', c) as x"), + sourceData = Seq((1, 10, 30), (2, 20, 40)).toDF("key", "a", "c") + .selectExpr("key", "named_struct('a', a, 'c', c) as x"), + clauses = update("x.a = s.x.a") :: insert("*") :: Nil, + expected = Seq((1, 10, 2, 3), (2, 20, null, 40)) + .asInstanceOf[List[(Integer, Integer, Integer, Integer)]] + .toDF("key", "a", "b", "c") + .selectExpr("key", "named_struct('a', a, 'b', b, 'c', c) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + // scalastyle:off line.size.limit + testNestedStructsEvolution("missing nested column resolved by name - insert - array of struct")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2, "z": 1 }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "z": 20 }, "b": "2" } ] } + { "key": "B", "value": [ { "a": { "x": 40, "z": 30 }, "b": "3" }, { "a": { "x": 40, "z": 30 }, "b": "4" }, { "a": { "x": 40, "z": 30 }, "b": "5" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("z", IntegerType)) + .add("b", StringType))), + clauses = insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2, "z": 1 }, "b": 1 } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": null, "z": 30 }, "b": 3 }, { "a": { "x": 40, "y": null, "z": 30 }, "b": 4 }, { "a": { "x": 40, "y": null, "z": 30 }, "b": 5 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("missing nested column resolved by name - insert - nested array of struct")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "e": 1 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "e": "30" } ] }, "b": "2" } ] } + { "key": "B", "value": [ {"a": {"y": 60, "x": [ { "c": 20, "e": "50" }, { "c": 20, "e": "60" }, { "c": 20, "e": "80" } ] }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("e", StringType) + ))) + .add("b", StringType))), + clauses = insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "e": 1 } ] }, "b": 1 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": null, "e": 50 }, { "c": 20, "d": null, "e": 60 }, { "c": 20, "d": null, "e": 80 } ] }, "b": 3 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + // scalastyle:on line.size.limit + + testEvolution("additional nested column in source resolved by name - insert")( + targetData = Seq((1, 10, 30)).toDF("key", "a", "c") + .selectExpr("key", "named_struct('a', a, 'c', c) as x"), + sourceData = Seq((2, 20, 30, 40)).toDF("key", "a", "b", "c") + .selectExpr("key", "named_struct('a', a, 'b', b, 'c', c) as x"), + clauses = insert("*") :: Nil, + expected = ((1, (10, null, 30)) +: ((2, (20, 30, 40)) +: Nil)) + .asInstanceOf[List[(Integer, (Integer, Integer, Integer))]].toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'c', x._3, 'b', x._2) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + // scalastyle:off line.size.limit + testNestedStructsEvolution("additional nested column in source resolved by name - insert - array of struct")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "z": 2 }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": 2 }, "b": "2 "} ] } + { "key": "B", "value": [ {"a": { "x": 40, "y": 30, "z": 3 }, "b": "3" }, {"a": { "x": 40, "y": 30, "z": 3 }, "b": "4" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("y", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 1, "z": 2, "y": null }, "b": 1 } ] } + { "key": "B", "value": [ { "a": { "x": 40, "z": 3, "y": 30 }, "b": 3 }, { "a": { "x": 40, "z": 3, "y": 30 }, "b": 4 } ] }""", + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("z", IntegerType) + .add("y", IntegerType)) + .add("b", IntegerType))), + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("additional nested column in source resolved by name - insert - nested array of struct")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "e": 3 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": "30", "e": 1 } ] }, "b": "2" } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": "50", "e": 2 }, { "c": 20, "d": "50", "e": 3 } ] }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("e", IntegerType) + .add("d", StringType) + ))) + .add("b", IntegerType))), + clauses = insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "e": 3, "d": null } ] }, "b": 1 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "e": 2, "d": "50" }, { "c": 20, "e": 3, "d": "50" } ] }, "b": 3 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + // scalastyle:on line.size.limit + + for (isPartitioned <- BOOLEAN_DOMAIN) + testEvolution(s"extra nested column in source - update, isPartitioned=$isPartitioned")( + targetData = Seq((1, (1, 10)), (2, (2, 2000))).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'c', x._2) as x"), + sourceData = Seq((1, (10, 100, 1000))).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + clauses = update("*") :: Nil, + expected = ((1, (10, 100, 1000)) +: (2, (2, null, 2000)) +: Nil) + .asInstanceOf[List[(Integer, (Integer, Integer, Integer))]].toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'c', x._3, 'b', x._2) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast", + partitionCols = if (isPartitioned) Seq("key") else Seq.empty + ) + + testEvolution("extra nested column in source - update, partition on unused column")( + targetData = Seq((1, 2, (1, 10)), (2, 2, (2, 2000))).toDF("key", "part", "x") + .selectExpr("part", "key", "named_struct('a', x._1, 'c', x._2) as x"), + sourceData = Seq((1, 2, (10, 100, 1000))).toDF("key", "part", "x") + .selectExpr("key", "part", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + clauses = update("*") :: Nil, + expected = ((1, 2, (10, 100, 1000)) +: (2, 2, (2, null, 2000)) +: Nil) + .asInstanceOf[List[(Integer, Integer, (Integer, Integer, Integer))]].toDF("key", "part", "x") + .selectExpr("part", "key", "named_struct('a', x._1, 'c', x._3, 'b', x._2) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast", + partitionCols = Seq("part") + ) + + // scalastyle:off line.size.limit + testNestedStructsEvolution("extra nested column in source - update - array of struct - longer source")( + target = + """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": 30 }, "b": 3 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": 2 }, "b": "2" }, { "a": { "x": 10, "y": 20, "z": 2 }, "b": "3" }, { "a": { "x": 10, "y": 20, "z": 2 }, "b": "4" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("y", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = update("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": 2 }, "b": 2 }, { "a": { "x": 10, "y": 20, "z": 2 }, "b": 3 }, { "a": { "x": 10, "y": 20, "z": 2 }, "b": 4 } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": 30, "z": null }, "b": 3 } ] }""", + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("extra nested column in source - update - array of struct - longer target")( + target = + """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 }, { "a": { "x": 1, "y": 2 }, "b": 2 } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": 30 }, "b": 3 }, { "a": { "x": 40, "y": 30 }, "b": 4 }, { "a": { "x": 40, "y": 30 }, "b": 5 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": 2 }, "b": "2" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("y", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = update("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": 2 }, "b": 2 } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": 30, "z": null }, "b": 3 }, { "a": { "x": 40, "y": 30, "z": null }, "b": 4 }, { "a": { "x": 40, "y": 30, "z": null }, "b": 5 } ] }""".stripMargin, + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("extra nested column in source - update - nested array of struct - longer source")( + target = + """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3 } ] }, "b": 1 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50 } ] }, "b": 3 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": "30", "e": 1 }, { "c": 10, "d": "30", "e": 2 }, { "c": 10, "d": "30", "e": 3 } ] }, "b": 2 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + clauses = update("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30, "e": 1 }, { "c": 10, "d": 30, "e": 2 }, { "c": 10, "d": 30, "e": 3 } ] }, "b": 2 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50, "e": null } ] }, "b": 3 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("extra nested column in source - update - nested array of struct - longer target")( + target = + """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3 }, { "c": 1, "d": 2 } ] }, "b": 1 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50 }, { "c": 20, "d": 40 }, { "c": 20, "d": 60 } ] }, "b": 3 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": "30", "e": 1 } ] }, "b": "2" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + clauses = update("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30, "e": 1 } ] }, "b": 2 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50, "e": null }, { "c": 20, "d": 40, "e": null }, { "c": 20, "d": 60, "e": null } ] }, "b": 3 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + // scalastyle:on line.size.limit + + testEvolution("missing nested column in source - update")( + targetData = Seq((1, (1, 10, 100)), (2, (2, 20, 200))).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + sourceData = Seq((1, (0, 0))).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'c', x._2) as x"), + clauses = update("*") :: Nil, + expected = ((1, (0, 10, 0)) +: (2, (2, 20, 200)) +: Nil).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + expectErrorWithoutEvolutionContains = "Cannot cast" + ) + + // scalastyle:off line.size.limit + testNestedStructsEvolution("missing nested column in source - update - array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2, "z": 3 }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "z": 2 }, "b": "2" }, { "a": { "x": 10, "z": 2 }, "b": "3" }, { "a": { "x": 10, "z": 2 }, "b": "4" } ] } + { "key": "B", "value": [ { "a": { "x": 40, "z": 3 }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = update("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": null, "z": 2 }, "b": 2 }, { "a": { "x": 10, "y": null, "z": 2 }, "b": 3 }, { "a": { "x": 10, "y": null, "z": 2 }, "b": 4 } ] }""", + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + expectErrorWithoutEvolutionContains = "Cannot cast") + + // scalastyle:off line.size.limit + testNestedStructsEvolution("missing nested column in source - update - array of struct - longer target")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2, "z": 3 }, "b": 1 }, { "a": { "x": 1, "y": 2, "z": 3 }, "b": 2 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "z": 2 }, "b": "2" } ] } + { "key": "B", "value": [ { "a": { "x": 40, "z": 3 }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = update("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": null, "z": 2 }, "b": 2 } ] }""", + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("missing nested column in source - update - nested array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "e": 4 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": {"y": 20, "x": [ { "c": 10, "e": 1 }, { "c": 10, "e": 2 }, { "c": 10, "e": 3 } ] }, "b": "2" } ] } + { "key": "B", "value": [ { "a": {"y": 60, "x": [{ "c": 20, "e": 2 } ] }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + clauses = update("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": null, "e": 1 }, { "c": 10, "d": null, "e": 2 }, { "c": 10, "d": null, "e": 3} ] }, "b": 2 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("missing nested column in source - update - nested array of struct - longer target")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "e": 4 }, { "c": 1, "d": 3, "e": 5 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "e": 1 } ] }, "b": "2" } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "e": 2 } ] }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + clauses = update("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": null, "e": 1 } ] }, "b": 2 } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + // scalastyle:on line.size.limit + + testEvolution("nested columns resolved by name with same column count but different names")( + targetData = Seq((1, 1, 2, 3)).toDF("key", "a", "b", "c") + .selectExpr("key", "struct(a, b, c) as x"), + sourceData = Seq((1, 10, 20, 30), (2, 20, 30, 40)).toDF("key", "a", "b", "d") + .selectExpr("key", "struct(a, b, d) as x"), + clauses = update("*") :: insert("*") :: Nil, + // We evolve to the schema (key, x.{a, b, c, d}). + expected = ((1, (10, 20, 3, 30)) +: (2, (20, 30, null, 40)) +: Nil) + .asInstanceOf[List[(Integer, (Integer, Integer, Integer, Integer))]] + .toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3, 'd', x._4) as x"), + expectErrorWithoutEvolutionContains = "All nested columns must match." + ) + + // scalastyle:off line.size.limit + testNestedStructsEvolution("nested columns resolved by name with same column count but different names - array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2, "o": 4 }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": 2 }, "b": "2" }, { "a": { "x": 10, "y": 20, "z": 2 }, "b": "3" }, { "a": { "x": 10, "y": 20, "z": 2 }, "b": "4" } ] } + { "key": "B", "value": [ {"a": { "x": 40, "y": 30, "z": 3 }, "b": "3" }, {"a": { "x": 40, "y": 30, "z": 3 }, "b": "4" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("o", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("y", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "o": null, "z": 2 }, "b": 2 }, { "a": { "x": 10, "y": 20, "o": null, "z": 2 }, "b": 3 }, { "a": { "x": 10, "y": 20, "o": null, "z": 2 }, "b": 4 } ] } + { "key": "B", "value": [ {"a": { "x": 40, "y": 30, "o": null, "z": 3 }, "b": 3 }, {"a": { "x": 40, "y": 30, "o": null, "z": 3 }, "b": 4 } ] }""", + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("o", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("nested columns resolved by name with same column count but different names - array of struct - longer target")( + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2, "o": 4 }, "b": 1 }, { "a": { "x": 1, "y": 2, "o": 4 }, "b": 2 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": 2 }, "b": "2" } ] } + { "key": "B", "value": [ {"a": { "x": 40, "y": 30, "z": 3 }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("o", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("y", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "o": null, "z": 2 }, "b": 2 } ] } + { "key": "B", "value": [ {"a": { "x": 40, "y": 30, "o": null, "z": 3 }, "b": 3 } ] }""", + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("o", IntegerType) + .add("z", IntegerType)) + .add("b", IntegerType))), + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("nested columns resolved by name with same column count but different names - nested array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "f": 4 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": "30", "e": 1 }, { "c": 10, "d": "30", "e": 2 }, { "c": 10, "d": "30", "e": 3 } ] }, "b": "2" } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": "50", "e": 2 }, { "c": 20, "d": "50", "e": 3 } ] }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("f", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("f", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30, "f": null, "e": 1 }, { "c": 10, "d": 30, "f": null, "e": 2 }, { "c": 10, "d": 30, "f": null, "e": 3 } ] }, "b": 2 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50, "f": null, "e": 2 }, { "c": 20, "d": 50, "f": null, "e": 3 } ] }, "b": 3} ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("nested columns resolved by name with same column count but different names - nested array of struct - longer target")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "f": 4 }, { "c": 1, "d": 3, "f": 4 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": "30", "e": 1 } ] }, "b": "2" } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": "50", "e": 2 } ] }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("f", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("f", IntegerType) + .add("e", IntegerType) + ))) + .add("b", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30, "f": null, "e": 1 } ] }, "b": 2 } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": 50, "f": null, "e": 2 } ] }, "b": 3} ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + // scalastyle:on line.size.limit + + testEvolution("nested columns resolved by position with same column count but different names")( + targetData = Seq((1, 1, 2, 3)).toDF("key", "a", "b", "c") + .selectExpr("key", "struct(a, b, c) as x"), + sourceData = Seq((1, 10, 20, 30), (2, 20, 30, 40)).toDF("key", "a", "b", "d") + .selectExpr("key", "struct(a, b, d) as x"), + clauses = update("*") :: insert("*") :: Nil, + expectErrorContains = "cannot cast", + expectedWithoutEvolution = ((1, (10, 20, 30)) +: (2, (20, 30, 40)) +: Nil) + .asInstanceOf[List[(Integer, (Integer, Integer, Integer))]] + .toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil + ) + + // scalastyle:off line.size.limit + testNestedStructsEvolution("nested columns resolved by position with same column count but different names - array of struct - longer source")( + target = """{ "key": "A", "value": [{ "a": { "x": 1, "y": 2, "o": 4 }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "z": 2 }, "b": "2" }, { "a": { "x": 10, "y": 20, "z": 3 }, "b": "2" }, { "a": { "x": 10, "y": 20, "z": 3 }, "b": "3" } ] } + { "key": "B", "value": [ { "a": { "x": 40, "y": 30, "z": 3 }, "b": "3" }, { "a": { "x": 40, "y": 30, "z": 3 }, "b": "4" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("o", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("y", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = update("*") :: insert("*") :: Nil, + resultWithoutEvolution = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20, "o": 2 }, "b": 2 }, { "a": { "x": 10, "y": 20, "o": 3 }, "b": 2 }, { "a": { "x": 10, "y": 20, "o": 3 }, "b": 3 } ] } + { "key": "B", "value": [ {"a": { "x": 40, "y": 30, "o": 3 }, "b": 3 }, {"a": { "x": 40, "y": 30, "o": 3 }, "b": 4 } ] }""", + expectErrorContains = "cannot cast", + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + + testNestedStructsEvolution("nested columns resolved by position with same column count but different names - array of struct - longer target")( + target = """{ "key": "A", "value": [{ "a": { "x": 1, "y": 2, "o": 4 }, "b": 1}, { "a": { "x": 1, "y": 2, "o": 4 }, "b": 2}] }""", + source = + """{ "key": "A", "value": [{ "a": { "x": 10, "y": 20, "z": 2 }, "b": "2" } ] } + { "key": "B", "value": [{"a": { "x": 40, "y": 30, "z": 3 }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType) + .add("y", IntegerType) + .add("o", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("x", IntegerType).add("y", IntegerType).add("z", IntegerType)) + .add("b", StringType))), + clauses = update("*") :: insert("*") :: Nil, + resultWithoutEvolution = + """{ "key": "A", "value": [{ "a": { "x": 10, "y": 20, "o": 2}, "b": 2}] } + { "key": "B", "value": [{"a": { "x": 40, "y": 30, "o": 3}, "b": 3}] }""", + expectErrorContains = "cannot cast", + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + + testNestedStructsEvolution("nested columns resolved by position with same column count but different names - nested array of struct - longer source")( + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3, "f": 4 } ] }, "b": 1 } ] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": "30", "e": 1 }, { "c": 10, "d": "30", "e": 2 }, { "c": 10, "d": "30", "e": 3} ] }, "b": "2" } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": "50", "e": 2 }, { "c": 20, "d": "50", "e": 3 } ] }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("f", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + clauses = update("*") :: insert("*") :: Nil, + resultWithoutEvolution = + """{ "key": "A", "value": [ { "a": {"y": 20, "x": [ { "c": 10, "d": 30, "f": 1 }, { "c": 10, "d": 30, "f": 2 }, { "c": 10, "d": 30, "f": 3 } ] }, "b": 2 } ] } + { "key": "B", "value": [ { "a": {"y": 60, "x": [ { "c": 20, "d": 50, "f": 2 }, { "c": 20, "d": 50, "f": 3 } ] }, "b": 3}]}""", + expectErrorContains = "cannot cast", + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + + testNestedStructsEvolution("nested columns resolved by position with same column count but different names - nested array of struct - longer target")( + target = """{ "key": "A", "value": [{ "a": { "y": 2, "x": [ { "c": 1, "d": 3, "f": 5 }, { "c": 1, "d": 3, "f": 6 } ] }, "b": 1}] }""", + source = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": "30", "e": 1 } ] }, "b": "2" } ] } + { "key": "B", "value": [ { "a": { "y": 60, "x": [ { "c": 20, "d": "50", "e": 2 } ] }, "b": "3" } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType) + .add("f", IntegerType) + ))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + .add("e", IntegerType) + ))) + .add("b", StringType))), + clauses = update("*") :: insert("*") :: Nil, + resultWithoutEvolution = + """{ "key": "A", "value": [ { "a": {"y": 20, "x": [ { "c": 10, "d": 30, "f": 1 } ] }, "b": 2 } ] } + { "key": "B", "value": [ { "a": {"y": 60, "x": [ { "c": 20, "d": 50, "f": 2 } ] }, "b": 3 } ] }""", + expectErrorContains = "cannot cast", + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + // scalastyle:on line.size.limit + + testEvolution("struct in different order")( + targetData = Seq((1, (1, 10, 100)), (2, (2, 20, 200))).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + sourceData = Seq((1, (100, 10, 1)), (3, (300, 30, 3))).toDF("key", "x") + .selectExpr("key", "named_struct('c', x._1, 'b', x._2, 'a', x._3) as x"), + clauses = update("*") :: insert("*") :: Nil, + expected = ((1, (1, 10, 100)) +: (2, (2, 20, 200)) +: (3, (3, 30, 300)) +: Nil).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x"), + expectedWithoutEvolution = + ((1, (1, 10, 100)) +: (2, (2, 20, 200)) +: (3, (3, 30, 300)) +: Nil).toDF("key", "x") + .selectExpr("key", "named_struct('a', x._1, 'b', x._2, 'c', x._3) as x") + ) + + // scalastyle:off line.size.limit + testNestedStructsEvolution("struct in different order - array of struct")( + target = """{ "key": "A", "value": [{ "a": { "x": 1, "y": 2 }, "b": 1 }] }""", + source = """{ "key": "A", "value": [{ "b": "2", "a": { "y": 20, "x": 10}}, { "b": "3", "a": { "y": 30, "x": 40}}] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + clauses = update("*") :: insert("*") :: Nil, + result = """{ "key": "A", "value": [{ "a": { "x": 10, "y": 20 }, "b": 2 }, { "a": { "y": 30, "x": 40}, "b": 3 }] }""", + resultWithoutEvolution = """{ "key": "A", "value": [{ "a": { "x": 10, "y": 20 }, "b": 2 }, { "a": { "y": 30, "x": 40}, "b": 3 }] }""") + + testNestedStructsEvolution("struct in different order - nested array of struct")( + target = """{ "key": "A", "value": [{ "a": { "y": 2, "x": [{ "c": 1, "d": 3}]}, "b": 1 }] }""", + source = """{ "key": "A", "value": [{ "b": "2", "a": {"x": [{ "d": "30", "c": 10}, { "d": "40", "c": 3}], "y": 20}}]}""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType)))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("x", ArrayType( + new StructType() + .add("d", StringType) + .add("c", IntegerType) + )) + .add("y", IntegerType)))), + clauses = update("*") :: insert("*") :: Nil, + result = """{ "key": "A", "value": [{ "a": { "y": 20, "x": [{ "c": 10, "d": 30}, { "c": 3, "d": 40}]}, "b": 2 }]}""", + resultWithoutEvolution = """{ "key": "A", "value": [{ "a": { "y": 20, "x": [{ "c": 10, "d": 30}, { "c": 3, "d": 40}]}, "b": 2 }]}""") + // scalastyle:on line.size.limit + + testNestedStructsEvolution("array of struct with same columns but in different order" + + " which can be casted implicitly - by name")( + target = """{ "key": "A", "value": [ { "a": 1, "b": 2 } ] }""", + source = + """{ "key": "A", "value": [ { "b": 4, "a": 3 } ] } + { "key": "B", "value": [ { "b": 2, "a": 5 } ] }""".stripMargin, + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", IntegerType) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", IntegerType) + .add("a", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = """{ "key": "A", "value": [ { "a": 3, "b": 4 } ] } + { "key": "B", "value": [ { "a": 5, "b": 2 } ] }""".stripMargin, + resultWithoutEvolution = """{ "key": "A", "value": [ { "a": 3, "b": 4 } ] } + { "key": "B", "value": [ { "a": 5, "b": 2 } ] }""".stripMargin) + + testNestedStructsEvolution("array of struct with same columns but in different order" + + " which can be casted implicitly - by position")( + target = """{ "key": "A", "value": [ { "a": 1, "b": 2 } ] }""", + source = """{ "key": "A", "value": [ { "b": 4, "a": 3 } ] } + { "key": "B", "value": [ { "b": 2, "a": 5 } ] }""".stripMargin, + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", IntegerType) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", IntegerType) + .add("a", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = + """{ "key": "A", "value": [ { "a": 4, "b": 3 } ] } + { "key": "B", "value": [ { "a": 2, "b": 5 } ] }""".stripMargin, + resultWithoutEvolution = + """{ "key": "A", "value": [ { "a": 4, "b": 3 } ] } + { "key": "B", "value": [ { "a": 2, "b": 5 } ] }""".stripMargin, + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + + testNestedStructsEvolution("array of struct with same column count but all different names" + + " - by name")( + target = """{ "key": "A", "value": [ { "a": 1, "b": 2 } ] }""", + source = + """{ "key": "A", "value": [ { "c": 4, "d": 3 } ] } + { "key": "B", "value": [ { "c": 2, "d": 5 } ] }""".stripMargin, + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", IntegerType) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + resultSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", IntegerType) + .add("b", IntegerType) + .add("c", IntegerType) + .add("d", IntegerType))), + result = + """{ "key": "A", "value": [ { "a": null, "b": null, "c": 4, "d": 3 } ] } + { "key": "B", "value": [ { "a": null, "b": null, "c": 2, "d": 5 } ] }""".stripMargin, + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("array of struct with same column count but all different names" + + " - by position")( + target = """{ "key": "A", "value": [ { "a": 1, "b": 2 } ] }""", + source = """{ "key": "A", "value": [ { "c": 4, "d": 3 } ] } + { "key": "B", "value": [ { "c": 2, "d": 5 } ] }""".stripMargin, + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", IntegerType) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + resultWithoutEvolution = + """{ "key": "A", "value": [ { "a": 4, "b": 3 } ] } + { "key": "B", "value": [ { "a": 2, "b": 5 } ] }""".stripMargin, + expectErrorContains = " cannot cast", + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + + testNestedStructsEvolution("array of struct with same columns but in different order" + + " which cannot be casted implicitly - by name")( + target = """{ "key": "A", "value": [ { "a": {"c" : 1}, "b": 2 } ] }""", + source = """{ "key": "A", "value": [ { "b": 4, "a": {"c" : 3 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("c", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", IntegerType) + .add("a", new StructType().add("c", IntegerType)))), + clauses = update("*") :: insert("*") :: Nil, + result = """{ "key": "A", "value": [ { "a": { "c" : 3 }, "b": 4 } ] }""", + resultWithoutEvolution = """{ "key": "A", "value": [ { "a": { "c" : 3 }, "b": 4 } ] }""") + + testNestedStructsEvolution("array of struct with same columns but in different order" + + " which cannot be casted implicitly - by position")( + target = """{ "key": "A", "value": [ { "a": {"c" : 1}, "b": 2 } ] }""", + source = """{ "key": "A", "value": [ { "b": 4, "a": {"c" : 3 } } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("c", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", IntegerType) + .add("a", new StructType().add("c", IntegerType)))), + clauses = update("*") :: insert("*") :: Nil, + expectErrorContains = " cannot cast", + expectErrorWithoutEvolutionContains = " cannot cast", + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + + testNestedStructsEvolution("array of struct with additional column in target - by name")( + target = """{ "key": "A", "value": [ { "a": 1, "b": 2, "c": 3 } ] }""", + source = """{ "key": "A", "value": [ { "b": 4, "a": 3 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", IntegerType) + .add("b", IntegerType) + .add("c", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", IntegerType) + .add("a", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + result = """{ "key": "A", "value": [ { "a": 3, "b": 4, "c": null } ] }""", + expectErrorWithoutEvolutionContains = "Cannot cast") + + testNestedStructsEvolution("array of struct with additional column in target - by position")( + target = """{ "key": "A", "value": [ { "a": 1, "b": 2, "c": 3 } ] }""", + source = """{ "key": "A", "value": [ { "b": 4, "a": 3 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", IntegerType) + .add("b", IntegerType) + .add("c", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", IntegerType) + .add("a", IntegerType))), + clauses = update("*") :: insert("*") :: Nil, + expectErrorContains = " cannot cast", + expectErrorWithoutEvolutionContains = "cannot cast", + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + + testNestedStructsEvolution("add non-nullable column to target schema")( + target = """{ "key": "A" }""", + source = """{ "key": "B", "value": 4}""", + targetSchema = new StructType() + .add("key", StringType), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = false), + clauses = update("*") :: Nil, + result = """{ "key": "A", "value": null }""".stripMargin, + resultSchema = new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = false), + resultWithoutEvolution = """{ "key": "A" }""") + + testNestedStructsEvolution("struct in array with storeAssignmentPolicy = STRICT")( + target = """{ "key": "A", "value": [ { "a": 1 } ] }""", + source = """{ "key": "A", "value": [ { "a": 2 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", + ArrayType(new StructType() + .add("a", LongType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", + ArrayType(new StructType() + .add("a", IntegerType))), + clauses = update("*") :: Nil, + result = """{ "key": "A", "value": [ { "a": 2 } ] }""", + resultWithoutEvolution = """{ "key": "A", "value": [ { "a": 2 } ] }""", + confs = Seq( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false")) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala new file mode 100644 index 00000000000..cdead65864e --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala @@ -0,0 +1,3349 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File +import java.lang.{Integer => JInt} +import java.util.Locale + +import scala.language.implicitConversions + +import com.databricks.spark.util.{Log4jUsageLogger, MetricDefinitions, UsageRecord} +import org.apache.spark.sql.delta.commands.merge.MergeStats +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.ScanReportHelper +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{functions, AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeArrayData} +import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecution +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + +abstract class MergeIntoSuiteBase + extends QueryTest + with SharedSparkSession + with SQLTestUtils + with ScanReportHelper + with DeltaTestUtilsForTempViews + with MergeIntoTestUtils + with MergeIntoSchemaEvolutionMixin + with MergeIntoSchemaEvolutionAllTests { + import testImplicits._ + + Seq(true, false).foreach { isPartitioned => + test(s"basic case - merge to Delta table by path, isPartitioned: $isPartitioned") { + withTable("source") { + val partitions = if (isPartitioned) "key2" :: Nil else Nil + append(Seq((2, 2), (1, 4)).toDF("key2", "value"), partitions) + Seq((1, 1), (0, 3)).toDF("key1", "value").createOrReplaceTempView("source") + + executeMerge( + target = s"delta.`$tempPath`", + source = "source src", + condition = "src.key1 = key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(21, 21) :: // Update + Row(-10, 13) :: // Insert + Nil) + } + } + } + + Seq(true, false).foreach { skippingEnabled => + Seq(true, false).foreach { partitioned => + Seq(true, false).foreach { useSQLView => + test("basic case - merge to view on a Delta table by path, " + + s"partitioned: $partitioned skippingEnabled: $skippingEnabled useSqlView: $useSQLView") { + withTable("delta_target", "source") { + withSQLConf(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> skippingEnabled.toString) { + Seq((1, 1), (0, 3), (1, 6)).toDF("key1", "value").createOrReplaceTempView("source") + val partitions = if (partitioned) "key2" :: Nil else Nil + append(Seq((2, 2), (1, 4)).toDF("key2", "value"), partitions) + if (useSQLView) { + sql(s"CREATE OR REPLACE TEMP VIEW delta_target AS " + + s"SELECT * FROM delta.`$tempPath` t") + } else { + readDeltaTable(tempPath).createOrReplaceTempView("delta_target") + } + + executeMerge( + target = "delta_target", + source = "source src", + condition = "src.key1 = key2 AND src.value < delta_target.value", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(sql("SELECT key2, value FROM delta_target"), + Row(2, 2) :: // No change + Row(21, 21) :: // Update + Row(-10, 13) :: // Insert + Row(-9, 16) :: // Insert + Nil) + } + } + } + } + } + } + + Seq(true, false).foreach { skippingEnabled => + Seq(true, false).foreach { isPartitioned => + test("basic case - merge to Delta table by name, " + + s"isPartitioned: $isPartitioned skippingEnabled: $skippingEnabled") { + withTable("delta_target", "source") { + withSQLConf(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> skippingEnabled.toString) { + Seq((1, 1), (0, 3), (1, 6)).toDF("key1", "value").createOrReplaceTempView("source") + val partitionByClause = if (isPartitioned) "PARTITIONED BY (key2)" else "" + sql( + s""" + |CREATE TABLE delta_target(key2 INT, value INT) + |USING delta + |OPTIONS('path'='$tempPath') + |$partitionByClause + """.stripMargin) + + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + executeMerge( + target = "delta_target", + source = "source src", + condition = "src.key1 = key2 AND src.value < delta_target.value", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(sql("SELECT key2, value FROM delta_target"), + Row(2, 2) :: // No change + Row(21, 21) :: // Update + Row(-10, 13) :: // Insert + Row(-9, 16) :: // Insert + Nil) + } + } + } + } + } + + test("basic case - update value from both source and target table") { + withTable("source") { + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + Seq((1, 1), (0, 3)).toDF("key1", "value").createOrReplaceTempView("source") + + executeMerge( + target = s"delta.`$tempPath` as trgNew", + source = "source src", + condition = "src.key1 = key2", + update = "key2 = 20 + key2, value = trgNew.value + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(21, 5) :: // Update + Row(-10, 13) :: // Insert + Nil) + } + } + + test("basic case - columns are specified in wrong order") { + withTable("source") { + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + Seq((1, 1), (0, 3)).toDF("key1", "value").createOrReplaceTempView("source") + + executeMerge( + target = s"delta.`$tempPath` as trgNew", + source = "source src", + condition = "src.key1 = key2", + update = "value = trgNew.value + src.value, key2 = 20 + key2", + insert = "(value, key2) VALUES (src.value + 10, key1 - 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(21, 5) :: // Update + Row(-10, 13) :: // Insert + Nil) + } + } + + test("basic case - not all columns are specified in update") { + withTable("source") { + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + Seq((1, 1), (0, 3)).toDF("key1", "value").createOrReplaceTempView("source") + + executeMerge( + target = s"delta.`$tempPath` as trgNew", + source = "source src", + condition = "src.key1 = key2", + update = "value = trgNew.value + 3", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(1, 7) :: // Update + Row(-10, 13) :: // Insert + Nil) + } + } + + test("basic case - multiple inserts") { + withTable("source") { + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + Seq((1, 1), (0, 3), (3, 5)).toDF("key1", "value").createOrReplaceTempView("source") + + executeMerge( + tgt = s"delta.`$tempPath` as trgNew", + src = "source src", + cond = "src.key1 = key2", + insert(condition = "key1 = 0", values = "(key2, value) VALUES (src.key1, src.value + 3)"), + insert(values = "(key2, value) VALUES (src.key1 - 10, src.value + 10)")) + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(1, 4) :: // No change + Row(0, 6) :: // Insert + Row(-7, 15) :: // Insert + Nil) + } + } + + test("basic case - upsert with only rows inserted") { + withTable("source") { + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + Seq((1, 1), (0, 3)).toDF("key1", "value").createOrReplaceTempView("source") + + executeMerge( + tgt = s"delta.`$tempPath` as trgNew", + src = "source src", + cond = "src.key1 = key2", + update(condition = "key2 = 5", set = "value = src.value + 3"), + insert(values = "(key2, value) VALUES (src.key1 - 10, src.value + 10)")) + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(1, 4) :: // No change + Row(-10, 13) :: // Insert + Nil) + } + } + + protected def testNullCase(name: String)( + target: Seq[(JInt, JInt)], + source: Seq[(JInt, JInt)], + condition: String, + expectedResults: Seq[(JInt, JInt)]) = { + Seq(true, false).foreach { isPartitioned => + test(s"basic case - null handling - $name, isPartitioned: $isPartitioned") { + withView("sourceView") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(target.toDF("key", "value"), partitions) + source.toDF("key", "value").createOrReplaceTempView("sourceView") + + executeMerge( + target = s"delta.`$tempPath` as t", + source = "sourceView s", + condition = condition, + update = "t.value = s.value", + insert = "(t.key, t.value) VALUES (s.key, s.value)") + + checkAnswer( + readDeltaTable(tempPath), + expectedResults.map { r => Row(r._1, r._2) } + ) + + Utils.deleteRecursively(new File(tempPath)) + } + } + } + } + + testNullCase("null value in target")( + target = Seq((null, null), (1, 1)), + source = Seq((1, 10), (2, 20)), + condition = "s.key = t.key", + expectedResults = Seq( + (null, null), // No change + (1, 10), // Update + (2, 20) // Insert + )) + + testNullCase("null value in source")( + target = Seq((1, 1)), + source = Seq((1, 10), (2, 20), (null, null)), + condition = "s.key = t.key", + expectedResults = Seq( + (1, 10), // Update + (2, 20), // Insert + (null, null) // Insert + )) + + testNullCase("null value in both source and target")( + target = Seq((1, 1), (null, null)), + source = Seq((1, 10), (2, 20), (null, 0)), + condition = "s.key = t.key", + expectedResults = Seq( + (null, null), // No change as null in source does not match null in target + (1, 10), // Update + (2, 20), // Insert + (null, 0) // Insert + )) + + testNullCase("null value in both source and target + IS NULL in condition")( + target = Seq((1, 1), (null, null)), + source = Seq((1, 10), (2, 20), (null, 0)), + condition = "s.key = t.key AND s.key IS NULL", + expectedResults = Seq( + (null, null), // No change as s.key != t.key + (1, 1), // No change as s.key is not null + (null, 0), // Insert + (1, 10), // Insert + (2, 20) // Insert + )) + + testNullCase("null value in both source and target + IS NOT NULL in condition")( + target = Seq((1, 1), (null, null)), + source = Seq((1, null), (2, 20), (null, 0)), + condition = "s.key = t.key AND t.value IS NOT NULL", + expectedResults = Seq( + (null, null), // No change as t.value is null + (1, null), // Update as t.value is not null + (null, 0), // Insert + (2, 20) // Insert + )) + + testNullCase("null value in both source and target + <=> in condition")( + target = Seq((1, 1), (null, null)), + source = Seq((1, 10), (2, 20), (null, 0)), + condition = "s.key <=> t.key", + expectedResults = Seq( + (null, 0), // Update + (1, 10), // Update + (2, 20) // Insert + )) + + testNullCase("NULL in condition")( + target = Seq((1, 1), (null, null)), + source = Seq((1, 10), (2, 20), (null, 0)), + condition = "s.key = t.key AND NULL", + expectedResults = Seq( + (null, null), // No change as NULL condition did not match anything + (1, 1), // No change as NULL condition did not match anything + (null, 0), // Insert + (1, 10), // Insert + (2, 20) // Insert + )) + + test("basic case - only insert") { + withTable("source") { + Seq((5, 5)).toDF("key1", "value").createOrReplaceTempView("source") + append(Seq.empty[(Int, Int)].toDF("key2", "value")) + + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(-5, 15) :: // Insert + Nil) + } + } + + test("basic case - both source and target are empty") { + withTable("source") { + Seq.empty[(Int, Int)].toDF("key1", "value").createOrReplaceTempView("source") + append(Seq.empty[(Int, Int)].toDF("key2", "value")) + + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), Nil) + } + } + + test("basic case - only update") { + withTable("source") { + Seq((1, 5), (2, 9)).toDF("key1", "value").createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(21, 25) :: // Update + Row(22, 29) :: // Update + Nil) + } + } + + test("same column names in source and target") { + withTable("source") { + Seq((1, 5), (2, 9)).toDF("key", "value").createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key", "value")) + + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key = target.key", + update = "target.key = 20 + src.key, target.value = 20 + src.value", + insert = "(key, value) VALUES (src.key - 10, src.value + 10)") + + checkAnswer( + readDeltaTable(tempPath), + Row(21, 25) :: // Update + Row(22, 29) :: // Update + Nil) + } + } + + test("Source is a query") { + withTable("source") { + Seq((1, 6, "a"), (0, 3, "b")).toDF("key1", "value", "others") + .createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + executeMerge( + target = s"delta.`$tempPath` as trg", + source = "(SELECT key1, value, others FROM source) src", + condition = "src.key1 = trg.key2", + update = "trg.key2 = 20 + key1, value = 20 + src.value", + insert = "(trg.key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer( + readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(21, 26) :: // Update + Row(-10, 13) :: // Insert + Nil) + + withCrossJoinEnabled { + executeMerge( + target = s"delta.`$tempPath` as trg", + source = "(SELECT 5 as key1, 5 as value) src", + condition = "src.key1 = trg.key2", + update = "trg.key2 = 20 + key1, value = 20 + src.value", + insert = "(trg.key2, value) VALUES (key1 - 10, src.value + 10)") + } + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: + Row(21, 26) :: + Row(-10, 13) :: + Row(-5, 15) :: // new row + Nil) + } + } + + test("self merge") { + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + executeMerge( + target = s"delta.`$tempPath` as target", + source = s"delta.`$tempPath` as src", + condition = "src.key2 = target.key2", + update = "key2 = 20 + src.key2, value = 20 + src.value", + insert = "(key2, value) VALUES (src.key2 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(22, 22) :: // UPDATE + Row(21, 24) :: // UPDATE + Nil) + } + + test("order by + limit in source query #1") { + withTable("source") { + Seq((1, 6, "a"), (0, 3, "b")).toDF("key1", "value", "others") + .createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + executeMerge( + target = s"delta.`$tempPath` as trg", + source = "(SELECT key1, value, others FROM source order by key1 limit 1) src", + condition = "src.key1 = trg.key2", + update = "trg.key2 = 20 + key1, value = 20 + src.value", + insert = "(trg.key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(1, 4) :: // No change + Row(2, 2) :: // No change + Row(-10, 13) :: // Insert + Nil) + } + } + + test("order by + limit in source query #2") { + withTable("source") { + Seq((1, 6, "a"), (0, 3, "b")).toDF("key1", "value", "others") + .createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + executeMerge( + target = s"delta.`$tempPath` as trg", + source = "(SELECT key1, value, others FROM source order by value DESC limit 1) src", + condition = "src.key1 = trg.key2", + update = "trg.key2 = 20 + key1, value = 20 + src.value", + insert = "(trg.key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(21, 26) :: // UPDATE + Nil) + } + } + + testQuietly("Negative case - more than one source rows match the same target row") { + withTable("source") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key1", "value").createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + val e = intercept[Exception] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + }.toString + + val expectedEx = DeltaErrors.multipleSourceRowMatchingTargetRowInMergeException(spark) + assert(e.contains(expectedEx.getMessage)) + } + } + + test("More than one target rows match the same source row") { + withTable("source") { + Seq((1, 5), (2, 9)).toDF("key1", "value").createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value"), Seq("key2")) + + withCrossJoinEnabled { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "key1 = 1", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + } + + checkAnswer(readDeltaTable(tempPath), + Row(-8, 19) :: // Insert + Row(21, 25) :: // Update + Row(21, 25) :: // Update + Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"Merge table using different data types - implicit casting, parts: $isPartitioned") { + withTable("source") { + Seq((1, "5"), (3, "9"), (3, "a")).toDF("key1", "value").createOrReplaceTempView("source") + val partitions = if (isPartitioned) "key2" :: Nil else Nil + append(Seq((2, 2), (1, 4)).toDF("key2", "value"), partitions) + + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "key1 = key2", + update = "key2 = 33 + cast(key2 as double), value = '20'", + insert = "(key2, value) VALUES ('44', try_cast(src.value as double) + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(44, 19) :: // Insert + // NULL is generated when the type casting does not work for some values) + Row(44, null) :: // Insert + Row(34, 20) :: // Update + Row(2, 2) :: // No change + Nil) + } + } + } + + def errorNotContains(errMsg: String, str: String): Unit = { + assert(!errMsg.toLowerCase(Locale.ROOT).contains(str.toLowerCase(Locale.ROOT))) + } + + + test("Negative case - basic syntax analysis") { + withTable("source") { + Seq((1, 1), (0, 3)).toDF("key1", "value").createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + + // insert expressions have target table reference + var e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = key1, value = src.value", + insert = "(key2, value) VALUES (3, src.value + key2)") + }.getMessage + + errorContains(e, "cannot resolve key2") + + // to-update columns have source table reference + e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key1 = 1, value = 2", + insert = "(key2, value) VALUES (3, 4)") + }.getMessage + + errorContains(e, "Cannot resolve key1 in UPDATE clause") + errorContains(e, "key2") // should show key2 as a valid name in target columns + + // to-insert columns have source table reference + e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 1, value = 2", + insert = "(key1, value) VALUES (3, 4)") + }.getMessage + + errorContains(e, "Cannot resolve key1 in INSERT clause") + errorContains(e, "key2") // should contain key2 as a valid name in target columns + + // ambiguous reference + e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 1, value = value", + insert = "(key2, value) VALUES (3, 4)") + }.getMessage + + Seq("value", "is ambiguous", "could be").foreach(x => errorContains(e, x)) + + // non-deterministic search condition + e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2 and rand() > 0.5", + update = "key2 = 1, value = 2", + insert = "(key2, value) VALUES (3, 4)") + }.getMessage + + errorContains(e, "Non-deterministic functions are not supported in the search condition") + + // aggregate function + e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as target", + source = "source src", + condition = "src.key1 = target.key2 and max(target.key2) > 20", + update = "key2 = 1, value = 2", + insert = "(key2, value) VALUES (3, 4)") + }.getMessage + + errorContains(e, "Aggregate functions are not supported in the search condition") + } + } + + test("Merge should use the same SparkSession consistently") { + withTempDir { dir => + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "false") { + val r = dir.getCanonicalPath + val sourcePath = s"$r/source" + val targetPath = s"$r/target" + val numSourceRecords = 20 + spark.range(numSourceRecords) + .withColumn("x", $"id") + .withColumn("y", $"id") + .write.mode("overwrite").format("delta").save(sourcePath) + spark.range(1) + .withColumn("x", $"id") + .write.mode("overwrite").format("delta").save(targetPath) + val spark2 = spark.newSession + spark2.conf.set(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key, "true") + val target = io.delta.tables.DeltaTable.forPath(spark2, targetPath) + val source = spark.read.format("delta").load(sourcePath).alias("s") + val merge = target.alias("t") + .merge(source, "t.id = s.id") + .whenMatched.updateExpr(Map("t.x" -> "t.x + 1")) + .whenNotMatched.insertAll() + .execute() + // The target table should have the same number of rows as the source after the merge + assert(spark.read.format("delta").load(targetPath).count() == numSourceRecords) + } + } + } + + // Enable this test in OSS when Spark has the change to report better errors + // when MERGE is not supported. + ignore("Negative case - non-delta target") { + withTable("source", "target") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key1", "value").createOrReplaceTempView("source") + Seq((1, 1), (0, 3), (1, 5)).toDF("key2", "value").write.saveAsTable("target") + + val e = intercept[AnalysisException] { + executeMerge( + target = "target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + }.getMessage + assert(e.contains("does not support MERGE") || + // The MERGE Scala API is for Delta only and reports error differently. + e.contains("is not a Delta table") || + e.contains("MERGE destination only supports Delta sources")) + } + } + + test("Negative case - update assignments conflict because " + + "same column with different references") { + withTable("source") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key1", "value").createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + val e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath` as t", + source = "source s", + condition = "s.key1 = t.key2", + update = "key2 = key1, t.key2 = key1", + insert = "(key2, value) VALUES (3, 4)") + }.getMessage + + errorContains(e, "there is a conflict from these set columns") + } + } + + test("Negative case - more operations between merge and delta target") { + withTempView("source", "target") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key1", "value").createOrReplaceTempView("source") + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + spark.read.format("delta").load(tempPath).filter("value <> 0").createTempView("target") + + val e = intercept[AnalysisException] { + executeMerge( + target = "target", + source = "source src", + condition = "src.key1 = target.key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + }.getMessage + errorContains(e, "Expect a full scan of Delta sources, but found a partial scan") + } + } + + test("Negative case - MERGE to the child directory") { + val df = Seq((1, 1), (0, 3), (1, 5)).toDF("key2", "value") + val partitions = "key2" :: Nil + append(df, partitions) + + val e = intercept[AnalysisException] { + executeMerge( + target = s"delta.`$tempPath/key2=1` target", + source = "(SELECT 5 as key1, 5 as value) src", + condition = "src.key1 = target.key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + }.getMessage + errorContains(e, "Expect a full scan of Delta sources, but found a partial scan") + } + + test(s"special character in path - matched delete") { + val source = s"$tempDir/sou rce~" + val target = s"$tempDir/tar get>" + spark.range(0, 10, 2).write.format("delta").save(source) + spark.range(10).write.format("delta").save(target) + executeMerge( + tgt = s"delta.`$target` t", + src = s"delta.`$source` s", + cond = "t.id = s.id", + clauses = delete()) + checkAnswer(readDeltaTable(target), Seq(1, 3, 5, 7, 9).toDF("id")) + } + + test(s"special character in path - matched update") { + val source = s"$tempDir/sou rce(" + val target = s"$tempDir/tar get*" + spark.range(0, 10, 2).write.format("delta").save(source) + spark.range(10).write.format("delta").save(target) + executeMerge( + tgt = s"delta.`$target` t", + src = s"delta.`$source` s", + cond = "t.id = s.id", + clauses = update(set = "id = t.id * 10")) + checkAnswer(readDeltaTable(target), Seq(0, 1, 20, 3, 40, 5, 60, 7, 80, 9).toDF("id")) + } + + Seq(true, false).foreach { isPartitioned => + test(s"single file, isPartitioned: $isPartitioned") { + withTable("source") { + val df = spark.range(5).selectExpr("id as key1", "id as key2", "id as col1").repartition(1) + + val partitions = if (isPartitioned) "key1" :: "key2" :: Nil else Nil + append(df, partitions) + + df.createOrReplaceTempView("source") + + executeMerge( + target = s"delta.`$tempPath` target", + source = "(SELECT key1 as srcKey, key2, col1 FROM source where key1 < 3) AS source", + condition = "srcKey = target.key1", + update = "target.key1 = srcKey - 1000, target.key2 = source.key2 + 1000, " + + "target.col1 = source.col1", + insert = "(key1, key2, col1) VALUES (srcKey, source.key2, source.col1)") + + checkAnswer(readDeltaTable(tempPath), + Row(-998, 1002, 2) :: // Update + Row(-999, 1001, 1) :: // Update + Row(-1000, 1000, 0) :: // Update + Row(4, 4, 4) :: // No change + Row(3, 3, 3) :: // No change + Nil) + } + } + } + + protected def testLocalPredicates(name: String)( + target: Seq[(String, String, String)], + source: Seq[(String, String)], + condition: String, + expectedResults: Seq[(String, String, String)], + numFilesPerPartition: Int = 2) = { + Seq(true, false).foreach { isPartitioned => + test(s"$name, isPartitioned: $isPartitioned") { withTable("source") { + val partitions = if (isPartitioned) "key2" :: Nil else Nil + append(target.toDF("key2", "value", "op").repartition(numFilesPerPartition), partitions) + source.toDF("key1", "value").createOrReplaceTempView("source") + + // Local predicates are likely to be pushed down leading empty join conditions + // and cross-join being used + withCrossJoinEnabled { executeMerge( + target = s"delta.`$tempPath` trg", + source = "source src", + condition = condition, + update = "key2 = src.key1, value = src.value, op = 'update'", + insert = "(key2, value, op) VALUES (src.key1, src.value, 'insert')") + } + + checkAnswer( + readDeltaTable(tempPath), + expectedResults.map { r => Row(r._1, r._2, r._3) } + ) + + Utils.deleteRecursively(new File(tempPath)) + } + }} + } + + testLocalPredicates("basic case - local predicates - predicate has no matches, only inserts")( + target = Seq(("2", "2", "noop"), ("1", "4", "noop"), ("3", "2", "noop"), ("4", "4", "noop")), + source = Seq(("1", "8"), ("0", "3")), + condition = "src.key1 = key2 and key2 != '1'", + expectedResults = + ("2", "2", "noop") :: + ("1", "4", "noop") :: + ("3", "2", "noop") :: + ("4", "4", "noop") :: + ("1", "8", "insert") :: + ("0", "3", "insert") :: + Nil) + + testLocalPredicates("basic case - local predicates - predicate has matches, updates and inserts")( + target = Seq(("1", "2", "noop"), ("1", "4", "noop"), ("3", "2", "noop"), ("4", "4", "noop")), + source = Seq(("1", "8"), ("0", "3")), + condition = "src.key1 = key2 and key2 < '3'", + expectedResults = + ("3", "2", "noop") :: + ("4", "4", "noop") :: + ("1", "8", "update") :: + ("1", "8", "update") :: + ("0", "3", "insert") :: + Nil) + + testLocalPredicates("basic case - local predicates - predicate has matches, only updates")( + target = Seq(("1", "2", "noop"), ("1", "4", "noop"), ("3", "2", "noop"), ("4", "4", "noop")), + source = Seq(("1", "8")), + condition = "key2 < '3'", + expectedResults = + ("3", "2", "noop") :: + ("4", "4", "noop") :: + ("1", "8", "update") :: + ("1", "8", "update") :: + Nil) + + testLocalPredicates("basic case - local predicates - always false predicate, only inserts")( + target = Seq(("1", "2", "noop"), ("1", "4", "noop"), ("3", "2", "noop"), ("4", "4", "noop")), + source = Seq(("1", "8"), ("0", "3")), + condition = "1 != 1", + expectedResults = + ("1", "2", "noop") :: + ("1", "4", "noop") :: + ("3", "2", "noop") :: + ("4", "4", "noop") :: + ("1", "8", "insert") :: + ("0", "3", "insert") :: + Nil) + + testLocalPredicates("basic case - local predicates - always true predicate, all updated")( + target = Seq(("1", "2", "noop"), ("1", "4", "noop"), ("3", "2", "noop"), ("4", "4", "noop")), + source = Seq(("1", "8")), + condition = "1 = 1", + expectedResults = + ("1", "8", "update") :: + ("1", "8", "update") :: + ("1", "8", "update") :: + ("1", "8", "update") :: + Nil) + + testLocalPredicates("basic case - local predicates - single file, updates and inserts")( + target = Seq(("1", "2", "noop"), ("1", "4", "noop"), ("3", "2", "noop"), ("4", "4", "noop")), + source = Seq(("1", "8"), ("3", "10"), ("0", "3")), + condition = "src.key1 = key2 and key2 < '3'", + expectedResults = + ("3", "2", "noop") :: + ("4", "4", "noop") :: + ("1", "8", "update") :: + ("1", "8", "update") :: + ("0", "3", "insert") :: + ("3", "10", "insert") :: + Nil, + numFilesPerPartition = 1 + ) + + Seq(true, false).foreach { isPartitioned => + test(s"basic case - column pruning, isPartitioned: $isPartitioned") { + withTable("source") { + val partitions = if (isPartitioned) "key2" :: Nil else Nil + append(Seq((2, 2), (1, 4)).toDF("key2", "value"), partitions) + Seq((1, 1, "a"), (0, 3, "b")).toDF("key1", "value", "col1") + .createOrReplaceTempView("source") + + executeMerge( + target = s"delta.`$tempPath`", + source = "source src", + condition = "src.key1 = key2", + update = "key2 = 20 + key1, value = 20 + src.value", + insert = "(key2, value) VALUES (key1 - 10, src.value + 10)") + + checkAnswer(readDeltaTable(tempPath), + Row(2, 2) :: // No change + Row(21, 21) :: // Update + Row(-10, 13) :: // Insert + Nil) + } + } + } + + test("merge into cached table") { + // Merge with a cached target only works in the join-based implementation right now + withTable("source") { + append(Seq((2, 2), (1, 4)).toDF("key2", "value")) + Seq((1, 1), (0, 3), (3, 3)).toDF("key1", "value").createOrReplaceTempView("source") + spark.table(s"delta.`$tempPath`").cache() + spark.table(s"delta.`$tempPath`").collect() + + append(Seq((100, 100), (3, 5)).toDF("key2", "value")) + // cache is in effect, as the above change is not reflected + checkAnswer(spark.table(s"delta.`$tempPath`"), + Row(2, 2) :: Row(1, 4) :: Row(100, 100) :: Row(3, 5) :: Nil) + + executeMerge( + target = s"delta.`$tempPath` as trgNew", + source = "source src", + condition = "src.key1 = key2", + update = "value = trgNew.value + 3", + insert = "(key2, value) VALUES (key1, src.value + 10)") + + checkAnswer(spark.table(s"delta.`$tempPath`"), + Row(100, 100) :: // No change (newly inserted record) + Row(2, 2) :: // No change + Row(1, 7) :: // Update + Row(3, 8) :: // Update (on newly inserted record) + Row(0, 13) :: // Insert + Nil) + } + } + + // scalastyle:off argcount + def testNestedDataSupport(name: String, namePrefix: String = "nested data support")( + source: String, + target: String, + update: Seq[String], + insert: String = null, + targetSchema: StructType = null, + sourceSchema: StructType = null, + result: String = null, + errorStrs: Seq[String] = null, + confs: Seq[(String, String)] = Seq.empty): Unit = { + // scalastyle:on argcount + + require(result == null ^ errorStrs == null, "either set the result or the error strings") + + val testName = + if (result != null) s"$namePrefix - $name" else s"$namePrefix - analysis error - $name" + + test(testName) { + withSQLConf(confs: _*) { + withJsonData(source, target, targetSchema, sourceSchema) { case (sourceName, targetName) => + val fieldNames = spark.table(targetName).schema.fieldNames + val fieldNamesStr = fieldNames.mkString("`", "`, `", "`") + val keyName = s"`${fieldNames.head}`" + + def execMerge() = executeMerge( + target = s"$targetName t", + source = s"$sourceName s", + condition = s"s.$keyName = t.$keyName", + update = update.mkString(", "), + insert = Option(insert).getOrElse(s"($fieldNamesStr) VALUES ($fieldNamesStr)")) + + if (result != null) { + execMerge() + val expectedDf = readFromJSON(strToJsonSeq(result), targetSchema) + checkAnswer(spark.table(targetName), expectedDf) + } else { + val e = intercept[AnalysisException] { + execMerge() + } + errorStrs.foreach { s => errorContains(e.getMessage, s) } + } + } + } + } + } + + testNestedDataSupport("no update when not matched, only insert")( + source = """ + { "key": { "x": "X3", "y": 3}, "value": { "a": 300, "b": "B300" } }""", + target = """ + { "key": { "x": "X1", "y": 1}, "value": { "a": 1, "b": "B1" } } + { "key": { "x": "X2", "y": 2}, "value": { "a": 2, "b": "B2" } }""", + update = "value.b = 'UPDATED'" :: Nil, + result = """ + { "key": { "x": "X1", "y": 1}, "value": { "a": 1, "b": "B1" } } + { "key": { "x": "X2", "y": 2}, "value": { "a": 2, "b": "B2" } } + { "key": { "x": "X3", "y": 3}, "value": { "a": 300, "b": "B300" } }""") + + testNestedDataSupport("update entire nested column")( + source = """ + { "key": { "x": "X1", "y": 1}, "value": { "a": 100, "b": "B100" } }""", + target = """ + { "key": { "x": "X1", "y": 1}, "value": { "a": 1, "b": "B1" } } + { "key": { "x": "X2", "y": 2}, "value": { "a": 2, "b": "B2" } }""", + update = "value = s.value" :: Nil, + result = """ + { "key": { "x": "X1", "y": 1}, "value": { "a": 100, "b": "B100" } } + { "key": { "x": "X2", "y": 2}, "value": { "a": 2, "b": "B2" } }""") + + testNestedDataSupport("update one nested field")( + source = """ + { "key": { "x": "X1", "y": 1}, "value": { "a": 100, "b": "B100" } }""", + target = """ + { "key": { "x": "X1", "y": 1}, "value": { "a": 1, "b": "B1" } } + { "key": { "x": "X2", "y": 2}, "value": { "a": 2, "b": "B2" } }""", + update = "value.b = s.value.b" :: Nil, + result = """ + { "key": { "x": "X1", "y": 1}, "value": { "a": 1, "b": "B100" } } + { "key": { "x": "X2", "y": 2}, "value": { "a": 2, "b": "B2" } }""") + + testNestedDataSupport("update multiple fields at different levels")( + source = """ + { "key": { "x": "X1", "y": { "i": 1.0 } }, "value": { "a": 100, "b": "B100" } }""", + target = """ + { "key": { "x": "X1", "y": { "i": 1.0 } }, "value": { "a": 1, "b": "B1" } } + { "key": { "x": "X2", "y": { "i": 2.0 } }, "value": { "a": 2, "b": "B2" } }""", + update = + "key.x = 'XXX'" :: "key.y.i = 9000" :: + "value = named_struct('a', 9000, 'b', s.value.b)" :: Nil, + result = """ + { "key": { "x": "XXX", "y": { "i": 9000 } }, "value": { "a": 9000, "b": "B100" } } + { "key": { "x": "X2" , "y": { "i": 2.0 } }, "value": { "a": 2, "b": "B2" } }""") + + testNestedDataSupport("update multiple fields at different levels to NULL")( + source = """ + { "key": { "x": "X1", "y": { "i": 1.0 } }, "value": { "a": 100, "b": "B100" } }""", + target = """ + { "key": { "x": "X1", "y": { "i": 1.0 } }, "value": { "a": 1, "b": "B1" } } + { "key": { "x": "X2", "y": { "i": 2.0 } }, "value": { "a": 2, "b": "B2" } }""", + update = "value = NULL" :: "key.x = NULL" :: "key.y.i = NULL" :: Nil, + result = """ + { "key": { "x": null, "y": { "i" : null } }, "value": null } + { "key": { "x": "X2" , "y": { "i" : 2.0 } }, "value": { "a": 2, "b": "B2" } }""") + + testNestedDataSupport("update multiple fields at different levels with implicit casting")( + source = """ + { "key": { "x": "X1", "y": { "i": 1.0 } }, "value": { "a": 100, "b": "B100" } }""", + target = """ + { "key": { "x": "X1", "y": { "i": 1.0 } }, "value": { "a": 1, "b": "B1" } } + { "key": { "x": "X2", "y": { "i": 2.0 } }, "value": { "a": 2, "b": "B2" } }""", + update = + "key.x = 'XXX' " :: "key.y.i = '9000'" :: + "value = named_struct('a', '9000', 'b', s.value.b)" :: Nil, + result = """ + { "key": { "x": "XXX", "y": { "i": 9000 } }, "value": { "a": 9000, "b": "B100" } } + { "key": { "x": "X2" , "y": { "i": 2.0 } }, "value": { "a": 2, "b": "B2" } }""") + + testNestedDataSupport("update array fields at different levels")( + source = """ + { "key": { "x": "X1", "y": [ 1, 11 ] }, "value": [ -1, -10 , -100 ] }""", + target = """ + { "key": { "x": "X1", "y": [ 1, 11 ] }, "value": [ 1, 10 , 100 ]} } + { "key": { "x": "X2", "y": [ 2, 22 ] }, "value": [ 2, 20 , 200 ]} }""", + update = "value = array(-9000)" :: "key.y = array(-1, -11)" :: Nil, + result = """ + { "key": { "x": "X1", "y": [ -1, -11 ] }, "value": [ -9000 ]} } + { "key": { "x": "X2", "y": [ 2, 22 ] }, "value": [ 2, 20 , 200 ]} }""") + + testNestedDataSupport("update using quoted names at different levels", "dotted name support")( + source = """ + { "key": { "x": "X1", "y.i": 1.0 }, "value.a": "A" }""", + target = """ + { "key": { "x": "X1", "y.i": 1.0 }, "value.a": "A1" } + { "key": { "x": "X2", "y.i": 2.0 }, "value.a": "A2" }""", + update = "`t`.key.`y.i` = 9000" :: "t.`value.a` = 'UPDATED'" :: Nil, + result = """ + { "key": { "x": "X1", "y.i": 9000 }, "value.a": "UPDATED" } + { "key": { "x": "X2", "y.i" : 2.0 }, "value.a": "A2" }""") + + testNestedDataSupport("unknown nested field")( + source = """{ "key": "A", "value": { "a": 0 } }""", + target = """{ "key": "A", "value": { "a": 1 } }""", + update = "value.c = 'UPDATED'" :: Nil, + errorStrs = "No such struct field" :: Nil) + + testNestedDataSupport("assigning simple type to struct field")( + source = """{ "key": "A", "value": { "a": { "x": 1 } } }""", + target = """{ "key": "A", "value": { "a": { "x": 1 } } }""", + update = "value.a = 'UPDATED'" :: Nil, + errorStrs = "data type mismatch" :: Nil) + + testNestedDataSupport("conflicting assignments between two nested fields at different levels")( + source = """{ "key": "A", "value": { "a": { "x": 0 } } }""", + target = """{ "key": "A", "value": { "a": { "x": 1 } } }""", + update = "value.a.x = 2" :: "value.a = named_struct('x', 3)" :: Nil, + errorStrs = "There is a conflict from these SET columns" :: Nil) + + testNestedDataSupport("conflicting assignments between nested field and top-level column")( + source = """{ "key": "A", "value": { "a": 0 } }""", + target = """{ "key": "A", "value": { "a": 1 } }""", + update = "value.a = 2" :: "value = named_struct('a', 3)" :: Nil, + errorStrs = "There is a conflict from these SET columns" :: Nil) + + testNestedDataSupport("nested field not supported in INSERT")( + source = """{ "key": "A", "value": { "a": 0 } }""", + target = """{ "key": "B", "value": { "a": 1 } }""", + update = "value.a = 2" :: Nil, + insert = """(key, value.a) VALUES (s.key, s.value.a)""", + errorStrs = "Nested field is not supported in the INSERT clause" :: Nil) + + testNestedDataSupport("updating map type")( + source = """{ "key": "A", "value": { "a": 0 } }""", + target = """{ "key": "A", "value": { "a": 1 } }""", + update = "value.a = 2" :: Nil, + targetSchema = + new StructType().add("key", StringType).add("value", MapType(StringType, IntegerType)), + errorStrs = "Updating nested fields is only supported for StructType" :: Nil) + + testNestedDataSupport("updating array type")( + source = """{ "key": "A", "value": [ { "a": 0 } ] }""", + target = """{ "key": "A", "value": [ { "a": 1 } ] }""", + update = "value.a = 2" :: Nil, + targetSchema = + new StructType().add("key", StringType).add("value", MapType(StringType, IntegerType)), + errorStrs = "Updating nested fields is only supported for StructType" :: Nil) + + testNestedDataSupport("resolution by name - update specific column")( + source = """{ "key": "A", "value": { "b": 2, "a": { "y": 20, "x": 10} } }""", + target = """{ "key": "A", "value": { "a": { "x": 1, "y": 2 }, "b": 1 }}""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("b", IntegerType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType))), + update = "value.a = s.value.a", + result = """{ "key": "A", "value": { "a": { "x": 10, "y": 20 }, "b": 1 } }""") + + // scalastyle:off line.size.limit + testNestedDataSupport("resolution by name - update specific column - array of struct - longer source")( + source = """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } }, { "b": "3", "a": { "y": 30, "x": 40 } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + update = "value = s.value", + result = """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20 }, "b": 2 }, { "a": { "y": 30, "x": 40}, "b": 3 } ] }""") + + testNestedDataSupport("resolution by name - update specific column - array of struct - longer target")( + source = """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } } ] }""", + target = """{ "key": "A", "value": [{ "a": { "x": 1, "y": 2 }, "b": 1 }, { "a": { "x": 2, "y": 3 }, "b": 2 }] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + update = "value = s.value", + result = """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20 }, "b": 2 } ] }""") + + testNestedDataSupport("resolution by name - update specific column - nested array of struct - longer source")( + source = """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": [ { "c": 10, "d": "30" }, { "c": 3, "d": "40" } ] } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3} ] }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType)))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + ))))), + update = "value = s.value", + result = + """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30 }, { "c": 3, "d": 40 } ] }, "b": 2 } ] }""") + + testNestedDataSupport("resolution by name - update specific column - nested array of struct - longer target")( + source = """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": [ { "c": 10, "d": "30" } ] } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3 }, { "c": 2, "d": 4 } ] }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType)))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + ))))), + update = "value = s.value", + result = """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30}]}, "b": 2 } ] }""") + // scalastyle:on line.size.limit + + testNestedDataSupport("resolution by name - update *")( + source = """{ "key": "A", "value": { "b": 2, "a": { "y": 20, "x": 10} } }""", + target = """{ "key": "A", "value": { "a": { "x": 1, "y": 2 }, "b": 1 }}""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("b", IntegerType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType))), + update = "*", + result = """{ "key": "A", "value": { "a": { "x": 10, "y": 20 } , "b": 2} }""") + + // scalastyle:off line.size.limit + testNestedDataSupport("resolution by name - update * - array of struct - longer source")( + source = """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } }, { "b": "3", "a": { "y": 30, "x": 40 } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + update = "*", + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20 }, "b": 2 }, { "a": { "y": 30, "x": 40}, "b": 3 } ] }""".stripMargin) + + testNestedDataSupport("resolution by name - update * - array of struct - longer target")( + source = """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 }, { "a": { "x": 2, "y": 3 }, "b": 4 }] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + update = "*", + result = + """{ "key": "A", "value": [ { "a": { "x": 10, "y": 20 }, "b": 2 } ] }""".stripMargin) + + testNestedDataSupport("resolution by name - update * - nested array of struct - longer source")( + source = """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": [{ "c": 10, "d": "30"}, { "c": 3, "d": "40" } ] } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3 } ] }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType)))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + ))))), + update = "*", + result = """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30}, { "c": 3, "d": 40 } ] }, "b": 2 } ] }""") + + testNestedDataSupport("resolution by name - update * - nested array of struct - longer target")( + source = """{ "key": "A", "value": [ { "b": "2", "a": { "y": 20, "x": [ { "c": 10, "d": "30" } ] } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3}, { "c": 2, "d": 4} ] }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType)))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + ))))), + update = "*", + result = """{ "key": "A", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30 } ] }, "b": 2 } ] }""") + // scalastyle:on line.size.limit + + testNestedDataSupport("resolution by name - insert specific column")( + source = """{ "key": "B", "value": { "b": 2, "a": { "y": 20, "x": 10 } } }""", + target = """{ "key": "A", "value": { "a": { "x": 1, "y": 2 }, "b": 1 } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("b", IntegerType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType))), + update = "*", + insert = "(key, value) VALUES (s.key, s.value)", + result = + """ + |{ "key": "A", "value": { "a": { "x": 1, "y": 2 }, "b": 1 } }, + |{ "key": "B", "value": { "a": { "x": 10, "y": 20 }, "b": 2 } }""".stripMargin) + + // scalastyle:off line.size.limit + testNestedDataSupport("resolution by name - insert specific column - array of struct")( + source = """{ "key": "B", "value": [ { "b": "2", "a": { "y": 20, "x": 10 } }, { "b": "3", "a": { "y": 30, "x": 40 } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + update = "*", + insert = "(key, value) VALUES (s.key, s.value)", + result = + """ { "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 } ] }, + { "key": "B", "value": [ { "a": { "x": 10, "y": 20 }, "b": 2 }, { "a": { "y": 30, "x": 40}, "b": 3 } ] }""") + + testNestedDataSupport("resolution by name - insert specific column - nested array of struct")( + source = """{ "key": "B", "value": [ { "b": "2", "a": { "y": 20, "x": [ { "c": 10, "d": "30" }, { "c": 3, "d": "40" } ] } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3 } ] }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType)))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + ))))), + update = "*", + insert = "(key, value) VALUES (s.key, s.value)", + result = + """ + { "key": "A", "value": [ { "a": { "y": 2, "x": [ { "c": 1, "d": 3 } ] }, "b": 1 } ] }, + { "key": "B", "value": [ { "a": { "y": 20, "x": [ { "c": 10, "d": 30 }, { "c": 3, "d": 40 } ] }, "b": 2 } ] }""") + // scalastyle:on line.size.limit + + testNestedDataSupport("resolution by name - insert *")( + source = """{ "key": "B", "value": { "b": 2, "a": { "y": 20, "x": 10} } }""", + target = """{ "key": "A", "value": { "a": { "x": 1, "y": 2 }, "b": 1 } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("b", IntegerType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType))), + update = "*", + insert = "*", + result = + """ + |{ "key": "A", "value": { "a": { "x": 1, "y": 2 }, "b": 1 } }, + |{ "key": "B", "value": { "a": { "x": 10, "y": 20 }, "b": 2 } }""".stripMargin) + + // scalastyle:off line.size.limit + testNestedDataSupport("resolution by name - insert * - array of struct")( + source = """{ "key": "B", "value": [ { "b": "2", "a": { "y": 20, "x": 10} }, { "b": "3", "a": { "y": 30, "x": 40 } } ] }""", + target = """{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 } ] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)))), + update = "*", + insert = "*", + result = + """ + |{ "key": "A", "value": [ { "a": { "x": 1, "y": 2 }, "b": 1 } ] }, + |{ "key": "B", "value": [ { "a": { "x": 10, "y": 20 }, "b": 2 }, { "a": { "y": 30, "x": 40}, "b": 3 } ] }""".stripMargin) + + testNestedDataSupport("resolution by name - insert * - nested array of struct")( + source = """{ "key": "B", "value": [{ "b": "2", "a": { "y": 20, "x": [ { "c": 10, "d": "30"}, { "c": 3, "d": "40"} ] } } ] }""", + target = """{ "key": "A", "value": [{ "a": { "y": 2, "x": [ { "c": 1, "d": 3} ] }, "b": 1 }] }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", IntegerType)))) + .add("b", IntegerType))), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", ArrayType( + new StructType() + .add("b", StringType) + .add("a", new StructType() + .add("y", IntegerType) + .add("x", ArrayType( + new StructType() + .add("c", IntegerType) + .add("d", StringType) + ))))), + update = "*", + insert = "*", + result = + """ + |{ "key": "A", "value": [{ "a": { "y": 2, "x": [{ "c": 1, "d": 3}]}, "b": 1 }] }, + |{ "key": "B", "value": [{ "a": { "y": 20, "x": [{ "c": 10, "d": 30}, { "c": 3, "d": 40}]}, "b": 2 }]}""".stripMargin) + // scalastyle:on line.size.limit + + // Note that value.b has to be in the right position for this test to avoid throwing an error + // trying to write its integer value into the value.a struct. + testNestedDataSupport("update resolution by position with conf")( + source = """{ "key": "A", "value": { "a": { "y": 20, "x": 10}, "b": 2 }}""", + target = """{ "key": "A", "value": { "a": { "x": 1, "y": 2 }, "b": 1 } }""", + targetSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", new StructType().add("x", IntegerType).add("y", IntegerType)) + .add("b", IntegerType)), + sourceSchema = new StructType() + .add("key", StringType) + .add("value", new StructType() + .add("a", new StructType().add("y", IntegerType).add("x", IntegerType)) + .add("b", IntegerType)), + update = "*", + insert = "(key, value) VALUES (s.key, s.value)", + result = """{ "key": "A", "value": { "a": { "x": 20, "y": 10 }, "b": 2 } }""", + confs = (DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false") +: Nil) + + protected def testAnalysisErrorsInExtendedMerge( + name: String, + namePrefix: String = "extended syntax")( + mergeOn: String, + mergeClauses: MergeClause*)( + errorStrs: Seq[String], + notErrorStrs: Seq[String] = Nil): Unit = { + test(s"$namePrefix - analysis errors - $name") { + withKeyValueData( + source = Seq.empty, + target = Seq.empty, + sourceKeyValueNames = ("key", "srcValue"), + targetKeyValueNames = ("key", "tgtValue") + ) { case (sourceName, targetName) => + val errMsg = intercept[AnalysisException] { + executeMerge(s"$targetName t", s"$sourceName s", mergeOn, mergeClauses: _*) + }.getMessage + errorStrs.foreach { s => errorContains(errMsg, s) } + notErrorStrs.foreach { s => errorNotContains(errMsg, s) } + } + } + } + + testAnalysisErrorsInExtendedMerge("update condition - ambiguous reference")( + mergeOn = "s.key = t.key", + update(condition = "key > 1", set = "tgtValue = srcValue"))( + errorStrs = "reference" :: "key" :: "is ambiguous" :: Nil) + + testAnalysisErrorsInExtendedMerge("update condition - unknown reference")( + mergeOn = "s.key = t.key", + update(condition = "unknownAttrib > 1", set = "tgtValue = srcValue"))( + // Should show unknownAttrib as invalid ref and (key, tgtValue, srcValue) as valid column names. + errorStrs = "UPDATE condition" :: "unknownAttrib" :: "key" :: "tgtValue" :: "srcValue" :: Nil) + + testAnalysisErrorsInExtendedMerge("update condition - aggregation function")( + mergeOn = "s.key = t.key", + update(condition = "max(0) > 0", set = "tgtValue = srcValue"))( + errorStrs = "UPDATE condition" :: "aggregate functions are not supported" :: Nil) + + testAnalysisErrorsInExtendedMerge("update condition - subquery")( + mergeOn = "s.key = t.key", + update(condition = "s.value in (select value from t)", set = "tgtValue = srcValue"))( + errorStrs = Nil) // subqueries fail for unresolved reference to `t` + + testAnalysisErrorsInExtendedMerge("delete condition - ambiguous reference")( + mergeOn = "s.key = t.key", + delete(condition = "key > 1"))( + errorStrs = "reference" :: "key" :: "is ambiguous" :: Nil) + + testAnalysisErrorsInExtendedMerge("delete condition - unknown reference")( + mergeOn = "s.key = t.key", + delete(condition = "unknownAttrib > 1"))( + // Should show unknownAttrib as invalid ref and (key, tgtValue, srcValue) as valid column names. + errorStrs = "DELETE condition" :: "unknownAttrib" :: "key" :: "tgtValue" :: "srcValue" :: Nil) + + testAnalysisErrorsInExtendedMerge("delete condition - aggregation function")( + mergeOn = "s.key = t.key", + delete(condition = "max(0) > 0"))( + errorStrs = "DELETE condition" :: "aggregate functions are not supported" :: Nil) + + testAnalysisErrorsInExtendedMerge("delete condition - subquery")( + mergeOn = "s.key = t.key", + delete(condition = "s.srcValue in (select tgtValue from t)"))( + errorStrs = Nil) // subqueries fail for unresolved reference to `t` + + testAnalysisErrorsInExtendedMerge("insert condition - unknown reference")( + mergeOn = "s.key = t.key", + insert(condition = "unknownAttrib > 1", values = "(key, tgtValue) VALUES (s.key, s.srcValue)"))( + // Should show unknownAttrib as invalid ref and (key, srcValue) as valid column names, + // but not show tgtValue as a valid name as target columns cannot be present in insert clause. + errorStrs = "INSERT condition" :: "unknownAttrib" :: "key" :: "srcValue" :: Nil, + notErrorStrs = "tgtValue") + + testAnalysisErrorsInExtendedMerge("insert condition - reference to target table column")( + mergeOn = "s.key = t.key", + insert(condition = "tgtValue > 1", values = "(key, tgtValue) VALUES (s.key, s.srcValue)"))( + // Should show tgtValue as invalid ref and (key, srcValue) as valid column names + errorStrs = "INSERT condition" :: "tgtValue" :: "key" :: "srcValue" :: Nil) + + testAnalysisErrorsInExtendedMerge("insert condition - aggregation function")( + mergeOn = "s.key = t.key", + insert(condition = "max(0) > 0", values = "(key, tgtValue) VALUES (s.key, s.srcValue)"))( + errorStrs = "INSERT condition" :: "aggregate functions are not supported" :: Nil) + + testAnalysisErrorsInExtendedMerge("insert condition - subquery")( + mergeOn = "s.key = t.key", + insert( + condition = "s.srcValue in (select srcValue from s)", + values = "(key, tgtValue) VALUES (s.key, s.srcValue)"))( + errorStrs = Nil) // subqueries fail for unresolved reference to `s` + + + protected def testExtendedMerge( + name: String, + namePrefix: String = "extended syntax")( + source: Seq[(Int, Int)], + target: Seq[(Int, Int)], + mergeOn: String, + mergeClauses: MergeClause*)( + result: Seq[(Int, Int)]): Unit = { + Seq(true, false).foreach { isPartitioned => + test(s"$namePrefix - $name - isPartitioned: $isPartitioned ") { + withKeyValueData(source, target, isPartitioned) { case (sourceName, targetName) => + withSQLConf(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED.key -> "true") { + executeMerge(s"$targetName t", s"$sourceName s", mergeOn, mergeClauses: _*) + } + val deltaPath = if (targetName.startsWith("delta.`")) { + targetName.stripPrefix("delta.`").stripSuffix("`") + } else targetName + checkAnswer( + readDeltaTable(deltaPath), + result.map { case (k, v) => Row(k, v) }) + } + } + } + } + + protected def testExtendedMergeErrorOnMultipleMatches( + name: String)( + source: Seq[(Int, Int)], + target: Seq[(Int, Int)], + mergeOn: String, + mergeClauses: MergeClause*): Unit = { + test(s"extended syntax - $name") { + withKeyValueData(source, target) { case (sourceName, targetName) => + val errMsg = intercept[UnsupportedOperationException] { + executeMerge(s"$targetName t", s"$sourceName s", mergeOn, mergeClauses: _*) + }.getMessage.toLowerCase(Locale.ROOT) + assert(errMsg.contains("cannot perform merge as multiple source rows matched")) + } + } + } + + testExtendedMerge("only update")( + source = (0, 0) :: (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update(set = "key = s.key, value = s.value"))( + result = Seq( + (1, 10), // (1, 1) updated + (2, 2) + )) + + testExtendedMergeErrorOnMultipleMatches("only update with multiple matches")( + source = (0, 0) :: (1, 10) :: (1, 11) :: (2, 20) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update(set = "key = s.key, value = s.value")) + + testExtendedMerge("only conditional update")( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.value <> 20 AND t.value <> 3", set = "key = s.key, value = s.value"))( + result = Seq( + (1, 10), // updated + (2, 2), // not updated due to source-only condition `s.value <> 20` + (3, 3) // not updated due to target-only condition `t.value <> 3` + )) + + testExtendedMergeErrorOnMultipleMatches("only conditional update with multiple matches")( + source = (0, 0) :: (1, 10) :: (1, 11) :: (2, 20) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.value = 10", set = "key = s.key, value = s.value")) + + testExtendedMerge("only delete")( + source = (0, 0) :: (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + delete())( + result = Seq( + (2, 2) // (1, 1) deleted + )) // (3, 30) not inserted as not insert clause + + // This is not ambiguous even when there are multiple matches + testExtendedMerge(s"only delete with multiple matches")( + source = (0, 0) :: (1, 10) :: (1, 100) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + delete())( + result = Seq( + (2, 2) // (1, 1) matches multiple source rows but unambiguously deleted + ) + ) + + testExtendedMerge("only conditional delete")( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: Nil, + mergeOn = "s.key = t.key", + delete(condition = "s.value <> 20 AND t.value <> 3"))( + result = Seq( + (2, 2), // not deleted due to source-only condition `s.value <> 20` + (3, 3) // not deleted due to target-only condition `t.value <> 3` + )) // (1, 1) deleted + + testExtendedMergeErrorOnMultipleMatches("only conditional delete with multiple matches")( + source = (0, 0) :: (1, 10) :: (1, 100) :: (2, 20) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + delete(condition = "s.value = 10")) + + testExtendedMerge("conditional update + delete")( + source = (0, 0) :: (1, 10) :: (2, 20) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.key <> 1", set = "key = s.key, value = s.value"), + delete())( + result = Seq( + (2, 20), // (2, 2) updated, (1, 1) deleted as it did not match update condition + (3, 3) + )) + + testExtendedMergeErrorOnMultipleMatches("conditional update + delete with multiple matches")( + source = (0, 0) :: (1, 10) :: (2, 20) :: (2, 200) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.value = 20", set = "key = s.key, value = s.value"), + delete()) + + testExtendedMerge("conditional update + conditional delete")( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: (4, 4) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.key <> 1", set = "key = s.key, value = s.value"), + delete(condition = "s.key <> 2"))( + result = Seq( + (2, 20), // (2, 2) updated as it matched update condition + (3, 30), // (3, 3) updated even though it matched update and delete conditions, as update 1st + (4, 4) + )) // (1, 1) deleted as it matched delete condition + + testExtendedMergeErrorOnMultipleMatches( + "conditional update + conditional delete with multiple matches")( + source = (0, 0) :: (1, 10) :: (1, 100) :: (2, 20) :: (2, 200) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.value = 20", set = "key = s.key, value = s.value"), + delete(condition = "s.value = 10")) + + testExtendedMerge("conditional delete + conditional update (order matters)")( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: (4, 4) :: Nil, + mergeOn = "s.key = t.key", + delete(condition = "s.key <> 2"), + update(condition = "s.key <> 1", set = "key = s.key, value = s.value"))( + result = Seq( + (2, 20), // (2, 2) updated as it matched update condition + (4, 4) // (4, 4) unchanged + )) // (1, 1) and (3, 3) deleted as they matched delete condition (before update cond) + + testExtendedMerge("only insert")( + source = (0, 0) :: (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + insert(values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (0, 0), // (0, 0) inserted + (1, 1), // (1, 1) not updated as no update clause + (2, 2), // (2, 2) not updated as no update clause + (3, 30) // (3, 30) inserted + )) + + testExtendedMerge("only conditional insert")( + source = (0, 0) :: (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + insert(condition = "s.value <> 30", values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (0, 0), // (0, 0) inserted by condition but not (3, 30) + (1, 1), + (2, 2) + )) + + testExtendedMerge("update + conditional insert")( + source = (0, 0) :: (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update("key = s.key, value = s.value"), + insert(condition = "s.value <> 30", values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (0, 0), // (0, 0) inserted by condition but not (3, 30) + (1, 10), // (1, 1) updated + (2, 2) + )) + + testExtendedMerge("conditional update + conditional insert")( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.key > 1", set = "key = s.key, value = s.value"), + insert(condition = "s.key > 1", values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (1, 1), // (1, 1) not updated by condition + (2, 20), // (2, 2) updated by condition + (3, 30) // (3, 30) inserted by condition but not (0, 0) + )) + + // This is specifically to test the MergeIntoDeltaCommand.writeOnlyInserts code paths + testExtendedMerge("update + conditional insert clause with data to only insert, no updates")( + source = (0, 0) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update("key = s.key, value = s.value"), + insert(condition = "s.value <> 30", values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (0, 0), // (0, 0) inserted by condition but not (3, 30) + (1, 1), + (2, 2) + )) + + testExtendedMerge(s"delete + insert with multiple matches for both") ( + source = (1, 10) :: (1, 100) :: (3, 30) :: (3, 300) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + delete(), + insert(values = "(key, value) VALUES (s.key, s.value)")) ( + result = Seq( + // (1, 1) matches multiple source rows but unambiguously deleted + (2, 2), // existed previously + (3, 30), // inserted + (3, 300) // inserted + ) + ) + + testExtendedMerge("conditional update + conditional delete + conditional insert")( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.key < 2", set = "key = s.key, value = s.value"), + delete(condition = "s.key < 3"), + insert(condition = "s.key > 1", values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (1, 10), // (1, 1) updated by condition, but not (2, 2) or (3, 3) + (3, 3), // neither updated nor deleted as it matched neither condition + (4, 40) // (4, 40) inserted by condition, but not (0, 0) + )) // (2, 2) deleted by condition but not (1, 1) or (3, 3) + + testExtendedMergeErrorOnMultipleMatches( + "conditional update + conditional delete + conditional insert with multiple matches")( + source = (0, 0) :: (1, 10) :: (1, 100) :: (2, 20) :: (2, 200) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.value = 20", set = "key = s.key, value = s.value"), + delete(condition = "s.value = 10"), + insert(condition = "s.value = 0", values = "(key, value) VALUES (s.key, s.value)")) + + // complex merge condition = has target-only and source-only predicates + testExtendedMerge( + "conditional update + conditional delete + conditional insert + complex merge condition ")( + source = (-1, -10) :: (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: (5, 50) :: Nil, + target = (-1, -1) :: (1, 1) :: (2, 2) :: (3, 3) :: (5, 5) :: Nil, + mergeOn = "s.key = t.key AND t.value > 0 AND s.key < 5", + update(condition = "s.key < 2", set = "key = s.key, value = s.value"), + delete(condition = "s.key < 3"), + insert(condition = "s.key > 1", values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (-1, -1), // (-1, -1) not matched with (-1, -10) by target-only condition 't.value > 0', so + // not updated, But (-1, -10) not inserted as insert condition is 's.key > 1' + // (0, 0) not matched any target but not inserted as insert condition is 's.key > 1' + (1, 10), // (1, 1) matched with (1, 10) and updated as update condition is 's.key < 2' + // (2, 2) matched with (2, 20) and deleted as delete condition is 's.key < 3' + (3, 3), // (3, 3) matched with (3, 30) but neither updated nor deleted as it did not + // satisfy update or delete condition + (4, 40), // (4, 40) not matched any target, so inserted as insert condition is 's.key > 1' + (5, 5), // (5, 5) not matched with (5, 50) by source-only condition 's.key < 5', no update + (5, 50) // (5, 50) inserted as inserted as insert condition is 's.key > 1' + )) + + test("extended syntax - different # cols in source than target") { + val sourceData = + (0, 0, 0) :: (1, 10, 100) :: (2, 20, 200) :: (3, 30, 300) :: (4, 40, 400) :: Nil + val targetData = (1, 1) :: (2, 2) :: (3, 3) :: Nil + + withTempView("source") { + append(targetData.toDF("key", "value"), Nil) + sourceData.toDF("key", "value", "extra").createOrReplaceTempView("source") + executeMerge( + s"delta.`$tempPath` t", + "source s", + cond = "s.key = t.key", + update(condition = "s.key < 2", set = "key = s.key, value = s.value + s.extra"), + delete(condition = "s.key < 3"), + insert(condition = "s.key > 1", values = "(key, value) VALUES (s.key, s.value + s.extra)")) + + checkAnswer( + readDeltaTable(tempPath), + Seq( + Row(1, 110), // (1, 1) updated by condition, but not (2, 2) or (3, 3) + Row(3, 3), // neither updated nor deleted as it matched neither condition + Row(4, 440) // (4, 40) inserted by condition, but not (0, 0) + )) // (2, 2) deleted by condition but not (1, 1) or (3, 3) + } + } + + protected def withJsonData( + source: Seq[String], + target: Seq[String], + schema: StructType = null, + sourceSchema: StructType = null)( + thunk: (String, String) => Unit): Unit = { + + def toDF(strs: Seq[String]) = { + if (sourceSchema != null && strs == source) { + spark.read.schema(sourceSchema).json(strs.toDS) + } else if (schema != null) { + spark.read.schema(schema).json(strs.toDS) + } else { + spark.read.json(strs.toDS) + } + } + append(toDF(target), Nil) + withTempView("source") { + toDF(source).createOrReplaceTempView("source") + thunk("source", s"delta.`$tempPath`") + } + } + + test("extended syntax - nested data - conditions and actions") { + withJsonData( + source = + """{ "key": { "x": "X1", "y": 1}, "value" : { "a": 100, "b": "B100" } } + { "key": { "x": "X2", "y": 2}, "value" : { "a": 200, "b": "B200" } } + { "key": { "x": "X3", "y": 3}, "value" : { "a": 300, "b": "B300" } } + { "key": { "x": "X4", "y": 4}, "value" : { "a": 400, "b": "B400" } }""", + target = + """{ "key": { "x": "X1", "y": 1}, "value" : { "a": 1, "b": "B1" } } + { "key": { "x": "X2", "y": 2}, "value" : { "a": 2, "b": "B2" } }""" + ) { case (sourceName, targetName) => + executeMerge( + s"$targetName t", + s"$sourceName s", + cond = "s.key = t.key", + update(condition = "s.key.y < 2", set = "key = s.key, value = s.value"), + insert(condition = "s.key.x < 'X4'", values = "(key, value) VALUES (s.key, s.value)")) + + checkAnswer( + readDeltaTable(tempPath), + spark.read.json(Seq( + """{ "key": { "x": "X1", "y": 1}, "value" : { "a": 100, "b": "B100" } }""", // updated + """{ "key": { "x": "X2", "y": 2}, "value" : { "a": 2, "b": "B2" } }""", // not updated + """{ "key": { "x": "X3", "y": 3}, "value" : { "a": 300, "b": "B300" } }""" // inserted + ).toDS)) + } + } + + protected implicit def strToJsonSeq(str: String): Seq[String] = { + str.split("\n").filter(_.trim.length > 0) + } + + def testStar( + name: String)( + source: Seq[String], + target: Seq[String], + mergeClauses: MergeClause*)( + result: Seq[String] = null, + errorStrs: Seq[String] = null) { + + require(result == null ^ errorStrs == null, "either set the result or the error strings") + val testName = + if (result != null) s"star syntax - $name" else s"star syntax - analysis error - $name" + + test(testName) { + withJsonData(source, target) { case (sourceName, targetName) => + def execMerge() = + executeMerge(s"$targetName t", s"$sourceName s", "s.key = t.key", mergeClauses: _*) + if (result != null) { + execMerge() + val deltaPath = if (targetName.startsWith("delta.`")) { + targetName.stripPrefix("delta.`").stripSuffix("`") + } else targetName + checkAnswer( + readDeltaTable(deltaPath), + readFromJSON(result)) + } else { + val e = intercept[AnalysisException] { execMerge() } + errorStrs.foreach { s => errorContains(e.getMessage, s) } + } + } + } + } + + testStar("basic star expansion")( + source = + """{ "key": "a", "value" : 10 } + { "key": "c", "value" : 30 }""", + target = + """{ "key": "a", "value" : 1 } + { "key": "b", "value" : 2 }""", + update(set = "*"), + insert(values = "*"))( + result = + """{ "key": "a", "value" : 10 } + { "key": "b", "value" : 2 } + { "key": "c", "value" : 30 }""") + + testStar("multiples columns and extra columns in source")( + source = + """{ "key": "a", "value" : 10, "value2" : 100, "value3" : 1000 } + { "key": "c", "value" : 30, "value2" : 300, "value3" : 3000 }""", + target = + """{ "key": "a", "value" : 1, "value2" : 1 } + { "key": "b", "value" : 2, "value2" : 2 }""", + update(set = "*"), + insert(values = "*"))( + result = + """{ "key": "a", "value" : 10, "value2" : 100 } + { "key": "b", "value" : 2, "value2" : 2 } + { "key": "c", "value" : 30, "value2" : 300 }""") + + testExtendedMerge("insert only merge")( + source = (0, 0) :: (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + insert(values = "*"))( + result = Seq( + (0, 0), // inserted + (1, 1), // existed previously + (2, 2), // existed previously + (3, 30) // inserted + )) + + testExtendedMerge("insert only merge with insert condition on source")( + source = (0, 0) :: (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + insert(values = "*", condition = "s.key = s.value"))( + result = Seq( + (0, 0), // inserted + (1, 1), // existed previously + (2, 2) // existed previously + )) + + testExtendedMerge("insert only merge with predicate insert")( + source = (0, 0) :: (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + insert(values = "(t.key, t.value) VALUES (s.key + 10, s.value + 10)"))( + result = Seq( + (10, 10), // inserted + (1, 1), // existed previously + (2, 2), // existed previously + (13, 40) // inserted + )) + + testExtendedMerge(s"insert only merge with multiple matches") ( + source = (0, 0) :: (1, 10) :: (1, 100) :: (3, 30) :: (3, 300) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + mergeOn = "s.key = t.key", + insert(values = "(key, value) VALUES (s.key, s.value)")) ( + result = Seq( + (0, 0), // inserted + (1, 1), // existed previously + (2, 2), // existed previously + (3, 30), // inserted + (3, 300) // key exists but still inserted + ) + ) + + + protected def testNullCaseInsertOnly(name: String)( + target: Seq[(JInt, JInt)], + source: Seq[(JInt, JInt)], + condition: String, + expectedResults: Seq[(JInt, JInt)], + insertCondition: Option[String] = None) = { + Seq(true, false).foreach { isPartitioned => + test(s"basic case - null handling - $name, isPartitioned: $isPartitioned") { + withView("sourceView") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(target.toDF("key", "value"), partitions) + source.toDF("key", "value").createOrReplaceTempView("sourceView") + withSQLConf(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED.key -> "true") { + if (insertCondition.isDefined) { + executeMerge( + s"delta.`$tempPath` as t", + "sourceView s", + condition, + insert("(t.key, t.value) VALUES (s.key, s.value)", + condition = insertCondition.get)) + } else { + executeMerge( + s"delta.`$tempPath` as t", + "sourceView s", + condition, + insert("(t.key, t.value) VALUES (s.key, s.value)")) + } + } + checkAnswer( + readDeltaTable(tempPath), + expectedResults.map { r => Row(r._1, r._2) } + ) + + Utils.deleteRecursively(new File(tempPath)) + } + } + } + } + + testNullCaseInsertOnly("insert only merge - null in source") ( + target = Seq((1, 1)), + source = Seq((1, 10), (2, 20), (null, null)), + condition = "s.key = t.key", + expectedResults = Seq( + (1, 1), // Existing value + (2, 20), // Insert + (null, null) // Insert + )) + + testNullCaseInsertOnly("insert only merge - null value in both source and target")( + target = Seq((1, 1), (null, null)), + source = Seq((1, 10), (2, 20), (null, 0)), + condition = "s.key = t.key", + expectedResults = Seq( + (null, null), // No change as null in source does not match null in target + (1, 1), // Existing value + (2, 20), // Insert + (null, 0) // Insert + )) + + testNullCaseInsertOnly("insert only merge - null in insert clause")( + target = Seq((1, 1), (2, 20)), + source = Seq((1, 10), (3, 30), (null, 0)), + condition = "s.key = t.key", + expectedResults = Seq( + (1, 1), // Existing value + (2, 20), // Existing value + (null, 0) // Insert + ), + insertCondition = Some("s.key IS NULL") + ) + + test("insert only merge - turn off feature flag") { + withSQLConf(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED.key -> "false") { + withKeyValueData( + source = (1, 10) :: (3, 30) :: Nil, + target = (1, 1) :: Nil + ) { case (sourceName, targetName) => + insertOnlyMergeFeatureFlagOff(sourceName, targetName) + } + } + } + + protected def insertOnlyMergeFeatureFlagOff(sourceName: String, targetName: String): Unit = { + executeMerge( + tgt = s"$targetName t", + src = s"$sourceName s", + cond = "s.key = t.key", + insert(values = "(key, value) VALUES (s.key, s.value)")) + + checkAnswer(sql(s"SELECT key, value FROM $targetName"), + Row(1, 1) :: Row(3, 30) :: Nil) + + val metrics = spark.sql(s"DESCRIBE HISTORY $targetName LIMIT 1") + .select("operationMetrics") + .collect().head.getMap(0).asInstanceOf[Map[String, String]] + assert(metrics.contains("numTargetFilesRemoved")) + // If insert-only code path is not used, then the general code path will rewrite existing + // target files when DVs are not enabled. + if (!spark.conf.get(DeltaSQLConf.MERGE_USE_PERSISTENT_DELETION_VECTORS)) { + assert(metrics("numTargetFilesRemoved").toInt > 0) + } + } + + test("insert only merge - multiple matches when feature flag off") { + withSQLConf(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED.key -> "false") { + // Verify that in case of multiple matches, it throws error rather than producing + // incorrect results. + withKeyValueData( + source = (1, 10) :: (1, 100) :: (2, 20) :: Nil, + target = (1, 1) :: Nil + ) { case (sourceName, targetName) => + val errMsg = intercept[UnsupportedOperationException] { + executeMerge( + s"$targetName t", + s"$sourceName s", + "s.key = t.key", + insert(values = "(key, value) VALUES (s.key, s.value)")) + }.getMessage.toLowerCase(Locale.ROOT) + assert(errMsg.contains("cannot perform merge as multiple source rows matched")) + } + + // Verify that in case of multiple matches, it throws error rather than producing + // incorrect results. + withKeyValueData( + source = (1, 10) :: (1, 100) :: (2, 20) :: (2, 200) :: Nil, + target = (1, 1) :: Nil + ) { case (sourceName, targetName) => + val errMsg = intercept[UnsupportedOperationException] { + executeMerge( + s"$targetName t", + s"$sourceName s", + "s.key = t.key", + insert(condition = "s.value = 20", values = "(key, value) VALUES (s.key, s.value)")) + }.getMessage.toLowerCase(Locale.ROOT) + assert(errMsg.contains("cannot perform merge as multiple source rows matched")) + } + } + } + + def testMergeWithRepartition( + name: String, + partitionColumns: Seq[String], + srcRange: Range, + expectLessFilesWithRepartition: Boolean, + clauses: MergeClause*): Unit = { + test(s"merge with repartition - $name", + DisableAdaptiveExecution("AQE coalese would partition number")) { + withTempView("source") { + withTempDir { basePath => + val tgt1 = basePath + "target" + val tgt2 = basePath + "targetRepartitioned" + + val df = spark.range(100).withColumn("part1", 'id % 5).withColumn("part2", 'id % 3) + df.write.format("delta").partitionBy(partitionColumns: _*).save(tgt1) + df.write.format("delta").partitionBy(partitionColumns: _*).save(tgt2) + val cond = "src.id = t.id" + val src = srcRange.toDF("id") + .withColumn("part1", 'id % 5) + .withColumn("part2", 'id % 3) + .createOrReplaceTempView("source") + // execute merge without repartition + withSQLConf(DeltaSQLConf.MERGE_REPARTITION_BEFORE_WRITE.key -> "false") { + executeMerge( + tgt = s"delta.`$tgt1` as t", + src = "source src", + cond = cond, + clauses = clauses: _*) + } + // execute merge with repartition - default behavior + executeMerge( + tgt = s"delta.`$tgt2` as t", + src = "source src", + cond = cond, + clauses = clauses: _*) + checkAnswer( + io.delta.tables.DeltaTable.forPath(tgt2).toDF, + io.delta.tables.DeltaTable.forPath(tgt1).toDF + ) + val filesAfterNoRepartition = DeltaLog.forTable(spark, tgt1).snapshot.numOfFiles + val filesAfterRepartition = DeltaLog.forTable(spark, tgt2).snapshot.numOfFiles + // check if there are fewer are number of files for merge with repartition + if (expectLessFilesWithRepartition) { + assert(filesAfterNoRepartition > filesAfterRepartition) + } else { + assert(filesAfterNoRepartition === filesAfterRepartition) + } + } + } + } + } + + testMergeWithRepartition( + name = "partition on multiple columns", + partitionColumns = Seq("part1", "part2"), + srcRange = Range(80, 110), + expectLessFilesWithRepartition = true, + update("t.part2 = 1"), + insert("(id, part1, part2) VALUES (id, part1, part2)") + ) + + testMergeWithRepartition( + name = "insert only merge", + partitionColumns = Seq("part1"), + srcRange = Range(110, 150), + expectLessFilesWithRepartition = true, + insert("(id, part1, part2) VALUES (id, part1, part2)") + ) + + testMergeWithRepartition( + name = "non partitioned table", + partitionColumns = Seq(), + srcRange = Range(80, 180), + expectLessFilesWithRepartition = false, + update("t.part2 = 1"), + insert("(id, part1, part2) VALUES (id, part1, part2)") + ) + + protected def testMatchedOnlyOptimization( + name: String)( + source: Seq[(Int, Int)], + target: Seq[(Int, Int)], + mergeOn: String, + mergeClauses: MergeClause*) ( + result: Seq[(Int, Int)]): Unit = { + Seq(true, false).foreach { matchedOnlyEnabled => + Seq(true, false).foreach { isPartitioned => + val s = if (matchedOnlyEnabled) "enabled" else "disabled" + test(s"matched only merge - $s - $name - isPartitioned: $isPartitioned ") { + withKeyValueData(source, target, isPartitioned) { case (sourceName, targetName) => + withSQLConf(DeltaSQLConf.MERGE_MATCHED_ONLY_ENABLED.key -> s"$matchedOnlyEnabled") { + executeMerge(s"$targetName t", s"$sourceName s", mergeOn, mergeClauses: _*) + } + val deltaPath = if (targetName.startsWith("delta.`")) { + targetName.stripPrefix("delta.`").stripSuffix("`") + } else targetName + checkAnswer( + readDeltaTable(deltaPath), + result.map { case (k, v) => Row(k, v) }) + } + } + } + } + } + + testMatchedOnlyOptimization("with update") ( + source = Seq((1, 100), (3, 300), (5, 500)), + target = Seq((1, 10), (2, 20), (3, 30)), + mergeOn = "s.key = t.key", + update("t.key = s.key, t.value = s.value")) ( + result = Seq( + (1, 100), // updated + (2, 20), // existed previously + (3, 300) // updated + ) + ) + + testMatchedOnlyOptimization("with delete") ( + source = Seq((1, 100), (3, 300), (5, 500)), + target = Seq((1, 10), (2, 20), (3, 30)), + mergeOn = "s.key = t.key", + delete()) ( + result = Seq( + (2, 20) // existed previously + ) + ) + + testMatchedOnlyOptimization("with update and delete")( + source = Seq((1, 100), (3, 300), (5, 500)), + target = Seq((1, 10), (3, 30), (5, 30)), + mergeOn = "s.key = t.key", + update("t.value = s.value", "t.key < 3"), delete("t.key > 3")) ( + result = Seq( + (1, 100), // updated + (3, 30) // existed previously + ) + ) + + protected def testNullCaseMatchedOnly(name: String) ( + source: Seq[(JInt, JInt)], + target: Seq[(JInt, JInt)], + mergeOn: String, + result: Seq[(JInt, JInt)]) = { + Seq(true, false).foreach { isPartitioned => + withSQLConf(DeltaSQLConf.MERGE_MATCHED_ONLY_ENABLED.key -> "true") { + test(s"matched only merge - null handling - $name, isPartitioned: $isPartitioned") { + withView("sourceView") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(target.toDF("key", "value"), partitions) + source.toDF("key", "value").createOrReplaceTempView("sourceView") + + executeMerge( + tgt = s"delta.`$tempPath` as t", + src = "sourceView s", + cond = mergeOn, + update("t.value = s.value")) + + checkAnswer( + readDeltaTable(tempPath), + result.map { r => Row(r._1, r._2) } + ) + + Utils.deleteRecursively(new File(tempPath)) + } + } + } + } + } + + testNullCaseMatchedOnly("null in source") ( + source = Seq((1, 10), (2, 20), (null, null)), + target = Seq((1, 1)), + mergeOn = "s.key = t.key", + result = Seq( + (1, 10) // update + ) + ) + + testNullCaseMatchedOnly("null value in both source and target") ( + source = Seq((1, 10), (2, 20), (null, 0)), + target = Seq((1, 1), (null, null)), + mergeOn = "s.key = t.key", + result = Seq( + (null, null), // No change as null in source does not match null in target + (1, 10) // update + ) + ) + + test("data skipping - target-only condition") { + withKeyValueData( + source = (1, 10) :: Nil, + target = (1, 1) :: (2, 2) :: Nil, + isKeyPartitioned = true) { case (sourceName, targetName) => + + val report = getScanReport { + executeMerge( + target = s"$targetName t", + source = s"$sourceName s", + condition = "s.key = t.key AND t.key <= 1", + update = "t.key = s.key, t.value = s.value", + insert = "(key, value) VALUES (s.key, s.value)") + }.head + + checkAnswer(sql(getDeltaFileStmt(tempPath)), + Row(1, 10) :: // Updated + Row(2, 2) :: // File should be skipped + Nil) + + assert(report.size("scanned").bytesCompressed != report.size("total").bytesCompressed) + } + } + + test("insert only merge - target data skipping") { + val tblName = "merge_target" + withTable(tblName) { + spark.range(10).withColumn("part", 'id % 5).withColumn("value", 'id + 'id) + .write.format("delta").partitionBy("part").mode("append").saveAsTable(tblName) + + val source = "source" + withTable(source) { + spark.range(20).withColumn("part", functions.lit(1)).withColumn("value", 'id + 'id) + .write.format("delta").saveAsTable(source) + + val scans = getScanReport { + withSQLConf(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED.key -> "true") { + executeMerge( + s"$tblName t", + s"$source s", + "s.id = t.id AND t.part = 1", + insert(condition = "s.id % 5 = s.part", values = "*")) + } + } + checkAnswer( + spark.table(tblName).where("part = 1"), + Row(1, 1, 2) :: Row(6, 1, 12) :: Row(11, 1, 22) :: Row(16, 1, 32) :: Nil + ) + + assert(scans.length === 2, "We should scan the source and target " + + "data once in an insert only optimization") + + // check if the source and target tables are scanned just once + val sourceRoot = DeltaTableUtils.findDeltaTableRoot( + spark, new Path(spark.table(source).inputFiles.head)).get.toString + val targetRoot = DeltaTableUtils.findDeltaTableRoot( + spark, new Path(spark.table(tblName).inputFiles.head)).get.toString + assert(scans.map(_.path).toSet == Set(sourceRoot, targetRoot)) + + // check scanned files + val targetScans = scans.find(_.path == targetRoot) + val deltaLog = DeltaLog.forTable(spark, targetScans.get.path) + val numTargetFiles = deltaLog.snapshot.numOfFiles + assert(targetScans.get.metrics("numFiles") < numTargetFiles) + // check scanned sizes + val scanSizes = targetScans.head.size + assert(scanSizes("total").bytesCompressed.get > scanSizes("scanned").bytesCompressed.get, + "Should have partition pruned target table") + } + } + } + + /** + * Test whether data skipping on matched predicates of a merge command is performed. + * @param name The name of the test case. + * @param source The source for merge. + * @param target The target for merge. + * @param dataSkippingOnTargetOnly The boolean variable indicates whether + * when matched clauses are on target fields only. + * Data Skipping should be performed before inner join if + * this variable is true. + * @param isMatchedOnly The boolean variable indicates whether the merge command only + * contains when matched clauses. + * @param mergeClauses Merge Clauses. + */ + protected def testMergeDataSkippingOnMatchPredicates( + name: String)( + source: Seq[(Int, Int)], + target: Seq[(Int, Int)], + dataSkippingOnTargetOnly: Boolean, + isMatchedOnly: Boolean, + mergeClauses: MergeClause*)( + result: Seq[(Int, Int)]): Unit = { + test(s"data skipping with matched predicates - $name") { + withKeyValueData(source, target) { case (sourceName, targetName) => + val stats = performMergeAndCollectStatsForDataSkippingOnMatchPredicates( + sourceName, + targetName, + result, + mergeClauses) + // Data skipping on match predicates should only be performed when it's a + // matched only merge. + if (isMatchedOnly) { + // The number of files removed/added should be 0 because of the additional predicates. + assert(stats.targetFilesRemoved == 0) + assert(stats.targetFilesAdded == 0) + // Verify that the additional predicates on data skipping + // before inner join filters file out for match predicates only + // on target. + if (dataSkippingOnTargetOnly) { + assert(stats.targetBeforeSkipping.files.get > stats.targetAfterSkipping.files.get) + } + } else { + if (!spark.conf.get(DeltaSQLConf.MERGE_USE_PERSISTENT_DELETION_VECTORS)) { + assert(stats.targetFilesRemoved > 0) + } + // If there is no insert clause and the flag is enabled, data skipping should be + // performed on targetOnly predicates. + // However, with insert clauses, it's expected that no additional data skipping + // is performed on matched clauses. + assert(stats.targetBeforeSkipping.files.get == stats.targetAfterSkipping.files.get) + assert(stats.targetRowsUpdated == 0) + } + } + } + } + + protected def performMergeAndCollectStatsForDataSkippingOnMatchPredicates( + sourceName: String, + targetName: String, + result: Seq[(Int, Int)], + mergeClauses: Seq[MergeClause]): MergeStats = { + var events: Seq[UsageRecord] = Seq.empty + // Perform merge on merge condition with matched clauses. + events = Log4jUsageLogger.track { + executeMerge(s"$targetName t", s"$sourceName s", "s.key = t.key", mergeClauses: _*) + } + val deltaPath = if (targetName.startsWith("delta.`")) { + targetName.stripPrefix("delta.`").stripSuffix("`") + } else targetName + + checkAnswer( + readDeltaTable(deltaPath), + result.map { case (k, v) => Row(k, v) }) + + // Verify merge stats from usage events + val mergeStats = events.filter { e => + e.metric == MetricDefinitions.EVENT_TAHOE.name && + e.tags.get("opType").contains("delta.dml.merge.stats") + } + + assert(mergeStats.size == 1) + + JsonUtils.fromJson[MergeStats](mergeStats.head.blob) + } + + testMergeDataSkippingOnMatchPredicates("match conditions on target fields only")( + source = Seq((1, 100), (3, 300), (5, 500)), + target = Seq((1, 10), (2, 20), (3, 30)), + dataSkippingOnTargetOnly = true, + isMatchedOnly = true, + update(condition = "t.key == 10", set = "*"), + update(condition = "t.value == 100", set = "*"))( + result = Seq((1, 10), (2, 20), (3, 30)) + ) + + testMergeDataSkippingOnMatchPredicates("match conditions on source fields only")( + source = Seq((1, 100), (3, 300), (5, 500)), + target = Seq((1, 10), (2, 20), (3, 30)), + dataSkippingOnTargetOnly = false, + isMatchedOnly = true, + update(condition = "s.key == 10", set = "*"), + update(condition = "s.value == 10", set = "*"))( + result = Seq((1, 10), (2, 20), (3, 30)) + ) + + testMergeDataSkippingOnMatchPredicates("match on source and target fields")( + source = Seq((1, 100), (3, 300), (5, 500)), + target = Seq((1, 10), (2, 20), (3, 30)), + dataSkippingOnTargetOnly = false, + isMatchedOnly = true, + update(condition = "s.key == 10", set = "*"), + update(condition = "s.value == 10", set = "*"), + delete(condition = "t.key == 4"))( + result = Seq((1, 10), (2, 20), (3, 30)) + ) + + testMergeDataSkippingOnMatchPredicates("with insert clause")( + source = Seq((1, 100), (3, 300), (5, 500)), + target = Seq((1, 10), (2, 20), (3, 30)), + dataSkippingOnTargetOnly = false, + isMatchedOnly = false, + update(condition = "t.key == 10", set = "*"), + insert(condition = null, values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq((1, 10), (2, 20), (3, 30), (5, 500)) + ) + + testMergeDataSkippingOnMatchPredicates("when matched and conjunction")( + source = Seq((1, 100), (3, 300), (5, 500)), + target = Seq((1, 10), (2, 20), (3, 30)), + dataSkippingOnTargetOnly = true, + isMatchedOnly = true, + update(condition = "t.key == 1 AND t.value == 5", set = "*"))( + result = Seq((1, 10), (2, 20), (3, 30))) + + /* unlimited number of merge clauses tests */ + + protected def testUnlimitedClauses( + name: String)( + source: Seq[(Int, Int)], + target: Seq[(Int, Int)], + mergeOn: String, + mergeClauses: MergeClause*)( + result: Seq[(Int, Int)]): Unit = + testExtendedMerge(name, "unlimited clauses")(source, target, mergeOn, mergeClauses : _*)(result) + + protected def testErrorsInUnlimitedClauses( + name: String)( + mergeOn: String, + mergeClauses: MergeClause*)( + errorStrs: Seq[String], + notErrorStrs: Seq[String] = Nil): Unit = { + test(s"unlimited clauses - analysis errors - $name") { + withKeyValueData( + source = Seq.empty, + target = Seq.empty, + sourceKeyValueNames = ("key", "srcValue"), + targetKeyValueNames = ("key", "tgtValue") + ) { case (sourceName, targetName) => + val errMsg = intercept[Exception] { + executeMerge(s"$targetName t", s"$sourceName s", mergeOn, mergeClauses: _*) + }.getMessage + errorStrs.foreach { s => errorContains(errMsg, s) } + notErrorStrs.foreach { s => errorNotContains(errMsg, s) } + } + } + } + + testUnlimitedClauses("two conditional update + two conditional delete + insert")( + source = (0, 0) :: (1, 100) :: (3, 300) :: (4, 400) :: (5, 500) :: Nil, + target = (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: Nil, + mergeOn = "s.key = t.key", + delete(condition = "s.key < 2"), + delete(condition = "s.key > 4"), + update(condition = "s.key == 3", set = "key = s.key, value = s.value"), + update(condition = "s.key == 4", set = "key = s.key, value = 2 * s.value"), + insert(condition = null, values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (0, 0), // insert (0, 0) + // delete (1, 10) + (2, 20), // neither updated nor deleted as it didn't match + (3, 300), // update (3, 30) + (4, 800), // update (4, 40) + (5, 500) // insert (5, 500) + )) + + testUnlimitedClauses("two conditional delete + conditional update + update + insert")( + source = (0, 0) :: (1, 100) :: (2, 200) :: (3, 300) :: (4, 400) :: Nil, + target = (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: Nil, + mergeOn = "s.key = t.key", + delete(condition = "s.key < 2"), + delete(condition = "s.key > 3"), + update(condition = "s.key == 2", set = "key = s.key, value = s.value"), + update(condition = null, set = "key = s.key, value = 2 * s.value"), + insert(condition = null, values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (0, 0), // insert (0, 0) + // delete (1, 10) + (2, 200), // update (2, 20) + (3, 600) // update (3, 30) + // delete (4, 40) + )) + + testUnlimitedClauses("conditional delete + two conditional update + two conditional insert")( + source = (1, 100) :: (2, 200) :: (3, 300) :: (4, 400) :: (6, 600) :: Nil, + target = (1, 10) :: (2, 20) :: (3, 30) :: Nil, + mergeOn = "s.key = t.key", + delete(condition = "s.key < 2"), + update(condition = "s.key == 2", set = "key = s.key, value = s.value"), + update(condition = "s.key == 3", set = "key = s.key, value = 2 * s.value"), + insert(condition = "s.key < 5", values = "(key, value) VALUES (s.key, s.value)"), + insert(condition = "s.key > 5", values = "(key, value) VALUES (s.key, 1 + s.value)"))( + result = Seq( + // delete (1, 10) + (2, 200), // update (2, 20) + (3, 600), // update (3, 30) + (4, 400), // insert (4, 400) + (6, 601) // insert (6, 600) + )) + + testUnlimitedClauses("conditional update + update + conditional delete + conditional insert")( + source = (1, 100) :: (2, 200) :: (3, 300) :: (4, 400) :: (5, 500) :: Nil, + target = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.key < 2", set = "key = s.key, value = s.value"), + update(condition = "s.key < 3", set = "key = s.key, value = 2 * s.value"), + delete(condition = "s.key < 4"), + insert(condition = "s.key > 4", values = "(key, value) VALUES (s.key, s.value)"))( + result = Seq( + (0, 0), // no change + (1, 100), // (1, 10) updated by matched_0 + (2, 400), // (2, 20) updated by matched_1 + // (3, 30) deleted by matched_2 + (5, 500) // (5, 500) inserted + )) + + testUnlimitedClauses("conditional insert + insert")( + source = (1, 100) :: (2, 200) :: (3, 300) :: (4, 400) :: (5, 500) :: Nil, + target = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + mergeOn = "s.key = t.key", + insert(condition = "s.key < 5", values = "(key, value) VALUES (s.key, s.value)"), + insert(condition = null, values = "(key, value) VALUES (s.key, s.value + 1)"))( + result = Seq( + (0, 0), // no change + (1, 10), // no change + (2, 20), // no change + (3, 30), // no change + (4, 400), // (4, 400) inserted by notMatched_0 + (5, 501) // (5, 501) inserted by notMatched_1 + )) + + testUnlimitedClauses("2 conditional inserts")( + source = (1, 100) :: (2, 200) :: (3, 300) :: (4, 400) :: (5, 500) :: (6, 600) :: Nil, + target = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + mergeOn = "s.key = t.key", + insert(condition = "s.key < 5", values = "(key, value) VALUES (s.key, s.value)"), + insert(condition = "s.key = 5", values = "(key, value) VALUES (s.key, s.value + 1)"))( + result = Seq( + (0, 0), // no change + (1, 10), // no change + (2, 20), // no change + (3, 30), // no change + (4, 400), // (4, 400) inserted by notMatched_0 + (5, 501) // (5, 501) inserted by notMatched_1 + // (6, 600) not inserted as not insert condition matched + )) + + testUnlimitedClauses("update/delete (no matches) + conditional insert + insert")( + source = (4, 400) :: (5, 500) :: Nil, + target = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "t.key = 0", set = "key = s.key, value = s.value"), + delete(condition = null), + insert(condition = "s.key < 5", values = "(key, value) VALUES (s.key, s.value)"), + insert(condition = null, values = "(key, value) VALUES (s.key, s.value + 1)"))( + result = Seq( + (0, 0), // no change + (1, 10), // no change + (2, 20), // no change + (3, 30), // no change + (4, 400), // (4, 400) inserted by notMatched_0 + (5, 501) // (5, 501) inserted by notMatched_1 + )) + + testUnlimitedClauses("update/delete (no matches) + 2 conditional inserts")( + source = (4, 400) :: (5, 500) :: (6, 600) :: Nil, + target = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "t.key = 0", set = "key = s.key, value = s.value"), + delete(condition = null), + insert(condition = "s.key < 5", values = "(key, value) VALUES (s.key, s.value)"), + insert(condition = "s.key = 5", values = "(key, value) VALUES (s.key, s.value + 1)"))( + result = Seq( + (0, 0), // no change + (1, 10), // no change + (2, 20), // no change + (3, 30), // no change + (4, 400), // (4, 400) inserted by notMatched_0 + (5, 501) // (5, 501) inserted by notMatched_1 + // (6, 600) not inserted as not insert condition matched + )) + + testUnlimitedClauses("2 update + 2 delete + 4 insert")( + source = (1, 100) :: (2, 200) :: (3, 300) :: (4, 400) :: (5, 500) :: (6, 600) :: (7, 700) :: + (8, 800) :: (9, 900) :: Nil, + target = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: Nil, + mergeOn = "s.key = t.key", + update(condition = "s.key == 1", set = "key = s.key, value = s.value"), + delete(condition = "s.key == 2"), + update(condition = "s.key == 3", set = "key = s.key, value = 2 * s.value"), + delete(condition = null), + insert(condition = "s.key == 5", values = "(key, value) VALUES (s.key, s.value)"), + insert(condition = "s.key == 6", values = "(key, value) VALUES (s.key, 1 + s.value)"), + insert(condition = "s.key == 7", values = "(key, value) VALUES (s.key, 2 + s.value)"), + insert(condition = null, values = "(key, value) VALUES (s.key, 3 + s.value)"))( + result = Seq( + (0, 0), // no change + (1, 100), // (1, 10) updated by matched_0 + // (2, 20) deleted by matched_1 + (3, 600), // (3, 30) updated by matched_2 + // (4, 40) deleted by matched_3 + (5, 500), // (5, 500) inserted by notMatched_0 + (6, 601), // (6, 600) inserted by notMatched_1 + (7, 702), // (7, 700) inserted by notMatched_2 + (8, 803), // (8, 800) inserted by notMatched_3 + (9, 903) // (9, 900) inserted by notMatched_3 + )) + + testErrorsInUnlimitedClauses("error on multiple insert clauses without condition")( + mergeOn = "s.key = t.key", + update(condition = "s.key == 3", set = "key = s.key, value = 2 * srcValue"), + insert(condition = null, values = "(key, value) VALUES (s.key, srcValue)"), + insert(condition = null, values = "(key, value) VALUES (s.key, 1 + srcValue)"))( + errorStrs = "when there are more than one not matched" :: + "clauses in a merge statement, only the last not matched" :: + "clause can omit the condition" :: Nil) + + testErrorsInUnlimitedClauses("error on multiple update clauses without condition")( + mergeOn = "s.key = t.key", + update(condition = "s.key == 3", set = "key = s.key, value = 2 * srcValue"), + update(condition = null, set = "key = s.key, value = 3 * srcValue"), + update(condition = null, set = "key = s.key, value = 4 * srcValue"), + insert(condition = null, values = "(key, value) VALUES (s.key, srcValue)"))( + errorStrs = "when there are more than one matched clauses in a merge statement, " + + "only the last matched clause can omit the condition" :: Nil) + + testErrorsInUnlimitedClauses("error on multiple update/delete clauses without condition")( + mergeOn = "s.key = t.key", + update(condition = "s.key == 3", set = "key = s.key, value = 2 * srcValue"), + delete(condition = null), + update(condition = null, set = "key = s.key, value = 4 * srcValue"), + insert(condition = null, values = "(key, value) VALUES (s.key, srcValue)"))( + errorStrs = "when there are more than one matched clauses in a merge statement, " + + "only the last matched clause can omit the condition" :: Nil) + + testErrorsInUnlimitedClauses( + "error on non-empty condition following empty condition for update clauses")( + mergeOn = "s.key = t.key", + update(condition = null, set = "key = s.key, value = 2 * srcValue"), + update(condition = "s.key < 3", set = "key = s.key, value = srcValue"), + insert(condition = null, values = "(key, value) VALUES (s.key, srcValue)"))( + errorStrs = "when there are more than one matched clauses in a merge statement, " + + "only the last matched clause can omit the condition" :: Nil) + + testErrorsInUnlimitedClauses( + "error on non-empty condition following empty condition for insert clauses")( + mergeOn = "s.key = t.key", + update(condition = null, set = "key = s.key, value = srcValue"), + insert(condition = null, values = "(key, value) VALUES (s.key, srcValue)"), + insert(condition = "s.key < 3", values = "(key, value) VALUES (s.key, 1 + srcValue)"))( + errorStrs = "when there are more than one not matched" :: + "clauses in a merge statement, only the last not matched" :: + "clause can omit the condition" :: Nil) + + /* end unlimited number of merge clauses tests */ + + test("SC-70829 - prevent re-resolution with star and schema evolution") { + val source = "source" + val target = "target" + withTable(source, target) { + + sql(s"""CREATE TABLE $source (id string, new string, old string, date DATE) USING delta""") + sql(s"""CREATE TABLE $target (id string, old string, date DATE) USING delta""") + + withSQLConf("spark.databricks.delta.schema.autoMerge.enabled" -> "true") { + executeMerge( + tgt = s"$target t", + src = s"$source s", + // functions like date_sub requires additional work to resolve + cond = "s.id = t.id AND t.date >= date_sub(current_date(), 3)", + update(set = "*"), + insert(values = "*")) + } + } + } + + /** + * @param function the unsupported function. + * @param functionType The type of the unsupported expression to be tested. + * @param sourceData the data in the source table. + * @param targetData the data in the target table. + * @param mergeCondition the merge condition containing the unsupported expression. + * @param clauseCondition the clause condition containing the unsupported expression. + * @param clauseAction the clause action containing the unsupported expression. + * @param expectExceptionInAction whether expect exception thrown in action. + * @param customConditionErrorRegex the customized error regex for condition. + * @param customActionErrorRegex the customized error regex for action. + */ + def testUnsupportedExpression( + function: String, + functionType: String, + sourceData: => DataFrame, + targetData: => DataFrame, + mergeCondition: String, + clauseCondition: String, + clauseAction: String, + expectExceptionInAction: Option[Boolean] = None, + customConditionErrorRegex: Option[String] = None, + customActionErrorRegex: Option[String] = None) { + test(s"$functionType functions in merge" + + s" - expect exception in action: ${expectExceptionInAction.getOrElse(true)}") { + withTable("source", "target") { + sourceData.write.format("delta").saveAsTable("source") + targetData.write.format("delta").saveAsTable("target") + + val expectedErrorRegex = "(?s).*(?i)unsupported.*(?i).*Invalid expressions.*" + + def checkExpression( + expectException: Boolean, + condition: Option[String] = None, + clause: Option[MergeClause] = None, + expectedRegex: Option[String] = None) { + if (expectException) { + val dataBeforeException = spark.read.format("delta").table("target").collect() + val e = intercept[Exception] { + executeMerge( + tgt = "target as t", + src = "source as s", + cond = condition.getOrElse("s.a = t.a"), + clause.getOrElse(update(set = "b = s.b")) + ) + } + + def extractErrorClass(e: Throwable): String = + e match { + case dt: DeltaThrowable => s"\\[${dt.getErrorClass}\\] " + case _ => "" + } + + val (message, errorClass) = if (e.getCause != null) { + (e.getCause.getMessage, extractErrorClass(e.getCause)) + } else (e.getMessage, extractErrorClass(e)) + assert(message.matches(errorClass + expectedRegex.getOrElse(expectedErrorRegex))) + checkAnswer(spark.read.format("delta").table("target"), dataBeforeException) + } else { + executeMerge( + tgt = "target as t", + src = "source as s", + cond = condition.getOrElse("s.a = t.a"), + clause.getOrElse(update(set = "b = s.b")) + ) + } + } + + // on merge condition + checkExpression( + expectException = true, + condition = Option(mergeCondition), + expectedRegex = customConditionErrorRegex + ) + + // on update condition + checkExpression( + expectException = true, + clause = Option(update(condition = clauseCondition, set = "b = s.b")), + expectedRegex = customConditionErrorRegex + ) + + // on update action + checkExpression( + expectException = expectExceptionInAction.getOrElse(true), + clause = Option(update(set = s"b = $clauseAction")), + expectedRegex = customActionErrorRegex + ) + + // on insert condition + checkExpression( + expectException = true, + clause = Option( + insert(values = "(a, b, c) VALUES (s.a, s.b, s.c)", condition = clauseCondition)), + expectedRegex = customConditionErrorRegex + ) + + sql("update source set a = 2") + // on insert action + checkExpression( + expectException = expectExceptionInAction.getOrElse(true), + clause = Option(insert(values = s"(a, b, c) VALUES ($clauseAction, s.b, s.c)")), + expectedRegex = customActionErrorRegex + ) + } + } + } + + testUnsupportedExpression( + function = "row_number", + functionType = "Window", + sourceData = Seq((1, 2, 3)).toDF("a", "b", "c"), + targetData = Seq((1, 5, 6)).toDF("a", "b", "c"), + mergeCondition = "(row_number() over (order by s.c)) = (row_number() over (order by t.c))", + clauseCondition = "row_number() over (order by s.c) > 1", + clauseAction = "row_number() over (order by s.c)" + ) + + testUnsupportedExpression( + function = "max", + functionType = "Aggregate", + sourceData = Seq((1, 2, 3)).toDF("a", "b", "c"), + targetData = Seq((1, 5, 6)).toDF("a", "b", "c"), + mergeCondition = "t.a = max(s.a)", + clauseCondition = "max(s.b) > 1", + clauseAction = "max(s.c)", + customConditionErrorRegex = + Option("Aggregate functions are not supported in the .* condition of MERGE operation.*") + ) + + testWithTempView("test merge on temp view - basic") { isSQLTempView => + withTable("tab") { + withTempView("src") { + Seq((0, 3), (1, 2)).toDF("key", "value").write.format("delta").saveAsTable("tab") + createTempViewFromTable("tab", isSQLTempView) + sql("CREATE TEMP VIEW src AS SELECT * FROM VALUES (1, 2), (3, 4) AS t(a, b)") + executeMerge( + target = "v", + source = "src", + condition = "src.a = v.key AND src.b = v.value", + update = "v.value = src.b + 1", + insert = "(v.key, v.value) VALUES (src.a, src.b)") + checkAnswer(spark.table("v"), Seq(Row(0, 3), Row(1, 3), Row(3, 4))) + } + } + } + + protected def testInvalidTempViews(name: String)( + text: String, + expectedErrorMsgForSQLTempView: String = null, + expectedErrorMsgForDataSetTempView: String = null, + expectedErrorClassForSQLTempView: String = null, + expectedErrorClassForDataSetTempView: String = null): Unit = { + testWithTempView(s"test merge on temp view - $name") { isSQLTempView => + withTable("tab") { + withTempView("src") { + Seq((0, 3), (1, 2)).toDF("key", "value").write.format("delta").saveAsTable("tab") + createTempViewFromSelect(text, isSQLTempView) + sql("CREATE TEMP VIEW src AS SELECT * FROM VALUES (1, 2), (3, 4) AS t(a, b)") + val doesExpectError = if (isSQLTempView) { + expectedErrorMsgForSQLTempView != null || expectedErrorClassForSQLTempView != null + } else { + expectedErrorMsgForDataSetTempView != null || + expectedErrorClassForDataSetTempView != null + } + if (doesExpectError) { + val ex = intercept[AnalysisException] { + executeMerge( + target = "v", + source = "src", + condition = "src.a = v.key AND src.b = v.value", + update = "v.value = src.b + 1", + insert = "(v.key, v.value) VALUES (src.a, src.b)") + } + testErrorMessageAndClass( + isSQLTempView, + ex, + expectedErrorMsgForSQLTempView, + expectedErrorMsgForDataSetTempView, + expectedErrorClassForSQLTempView, + expectedErrorClassForDataSetTempView) + } else { + executeMerge( + target = "v", + source = "src", + condition = "src.a = v.key AND src.b = v.value", + update = "v.value = src.b + 1", + insert = "(v.key, v.value) VALUES (src.a, src.b)") + checkAnswer(spark.table("v"), Seq(Row(0, 3), Row(1, 3), Row(3, 4))) + } + } + } + } + } + + testInvalidTempViews("subset cols")( + text = "SELECT key FROM tab", + expectedErrorMsgForSQLTempView = "cannot", + expectedErrorMsgForDataSetTempView = "cannot" + ) + + testInvalidTempViews("superset cols")( + text = "SELECT key, value, 1 FROM tab", + // The analyzer can't tell whether the table originally had the extra column or not. + expectedErrorMsgForSQLTempView = + "The schema of your Delta table has changed in an incompatible way", + expectedErrorMsgForDataSetTempView = + "The schema of your Delta table has changed in an incompatible way" + ) + + testInvalidTempViews("nontrivial projection")( + text = "SELECT value as key, key as value FROM tab", + expectedErrorMsgForSQLTempView = "Attribute(s) with the same name appear", + expectedErrorMsgForDataSetTempView = "Attribute(s) with the same name appear" + ) + + testInvalidTempViews("view with too many internal aliases")( + text = "SELECT * FROM (SELECT * FROM tab AS t1) AS t2", + expectedErrorMsgForSQLTempView = "Attribute(s) with the same name appear", + expectedErrorMsgForDataSetTempView = null + ) + + test("UDT Data Types - simple and nested") { + withTable("source") { + withTable("target") { + // scalastyle:off line.size.limit + val targetData = Seq( + Row(SimpleTest(0), ComplexTest(10, Array(1, 2, 3))), + Row(SimpleTest(1), ComplexTest(20, Array(4, 5))), + Row(SimpleTest(2), ComplexTest(30, Array(6, 7, 8)))) + val sourceData = Seq( + Row(SimpleTest(0), ComplexTest(40, Array(9, 10))), + Row(SimpleTest(3), ComplexTest(50, Array(11)))) + val resultData = Seq( + Row(SimpleTest(0), ComplexTest(40, Array(9, 10))), + Row(SimpleTest(1), ComplexTest(20, Array(4, 5))), + Row(SimpleTest(2), ComplexTest(30, Array(6, 7, 8))), + Row(SimpleTest(3), ComplexTest(50, Array(11)))) + + val schema = StructType(Array( + StructField("id", new SimpleTestUDT), + StructField("complex", new ComplexTestUDT))) + + val df = spark.createDataFrame(sparkContext.parallelize(targetData), schema) + df.collect() + + spark.createDataFrame(sparkContext.parallelize(targetData), schema) + .write.format("delta").saveAsTable("target") + + spark.createDataFrame(sparkContext.parallelize(sourceData), schema) + .write.format("delta").saveAsTable("source") + // scalastyle:on line.size.limit + sql( + s""" + |MERGE INTO target as t + |USING source as s + |ON t.id = s.id + |WHEN MATCHED THEN + | UPDATE SET * + |WHEN NOT MATCHED THEN + | INSERT * + """.stripMargin) + + checkAnswer(sql("select * from target"), resultData) + } + } + } + test("recorded operations - write all changes") { + var events: Seq[UsageRecord] = Seq.empty + withKeyValueData( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: (5, 5) :: (6, 6) :: Nil, + isKeyPartitioned = true) { case (sourceName, targetName) => + + events = Log4jUsageLogger.track { + executeMerge( + tgt = s"$targetName t", + src = s"$sourceName s", + cond = "s.key = t.key", + update(condition = "s.key > 1", set = "key = s.key, value = s.value"), + insert(condition = "s.key < 1", values = "(key, value) VALUES (s.key, s.value)"), + deleteNotMatched(condition = "t.key > 5")) + } + + checkAnswer(sql(getDeltaFileStmt(tempPath)), Seq( + Row(0, 0), // inserted + Row(1, 1), // existed previously + Row(2, 20), // updated + Row(3, 30), // updated + Row(5, 5) // existed previously + // Row(6, 6) deleted + )) + } + + // Get recorded operations from usage events + val opTypes = events.filter { e => + e.metric == "sparkOperationDuration" && e.opType.get.typeName.contains("delta.dml.merge") + }.map(_.opType.get.typeName).toSet + + assert(opTypes == expectedOpTypes) + } + + protected lazy val expectedOpTypes: Set[String] = Set( + "delta.dml.merge.findTouchedFiles", "delta.dml.merge.writeAllChanges", "delta.dml.merge") + + test("insert only merge - recorded operation") { + var events: Seq[UsageRecord] = Seq.empty + withKeyValueData( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: Nil, + isKeyPartitioned = true) { case (sourceName, targetName) => + + withSQLConf(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED.key -> "true") { + events = Log4jUsageLogger.track { + executeMerge( + tgt = s"$targetName t", + src = s"$sourceName s", + cond = "s.key = t.key AND t.key > 1", + insert(condition = "s.key = 4", values = "(key, value) VALUES (s.key, s.value)")) + } + } + + checkAnswer(sql(getDeltaFileStmt(tempPath)), Seq( + Row(1, 1), // existed previously + Row(2, 2), // existed previously + Row(3, 3), // existed previously + Row(4, 40) // inserted + )) + } + + // Get recorded operations from usage events + val opTypes = events.filter { e => + e.metric == "sparkOperationDuration" && e.opType.get.typeName.contains("delta.dml.merge") + }.map(_.opType.get.typeName).toSet + + assert(opTypes == Set( + "delta.dml.merge", "delta.dml.merge.writeInsertsOnlyWhenNoMatchedClauses")) + } + + test("recorded operations - write inserts only") { + var events: Seq[UsageRecord] = Seq.empty + withKeyValueData( + source = (0, 0) :: (1, 10) :: (2, 20) :: (3, 30) :: (4, 40) :: Nil, + target = (1, 1) :: (2, 2) :: (3, 3) :: Nil, + isKeyPartitioned = true) { case (sourceName, targetName) => + + events = Log4jUsageLogger.track { + executeMerge( + tgt = s"$targetName t", + src = s"$sourceName s", + cond = "s.key = t.key AND s.key > 5", + update(condition = "s.key > 10", set = "key = s.key, value = s.value"), + insert(condition = "s.key < 1", values = "(key, value) VALUES (s.key, s.value)")) + } + + checkAnswer(sql(getDeltaFileStmt(tempPath)), Seq( + Row(0, 0), // inserted + Row(1, 1), // existed previously + Row(2, 2), // existed previously + Row(3, 3) // existed previously + )) + } + + // Get recorded operations from usage events + val opTypes = events.filter { e => + e.metric == "sparkOperationDuration" && e.opType.get.typeName.contains("delta.dml.merge") + }.map(_.opType.get.typeName).toSet + + assert(opTypes == expectedOpTypesInsertOnly) + } + + protected lazy val expectedOpTypesInsertOnly: Set[String] = Set( + "delta.dml.merge.findTouchedFiles", + "delta.dml.merge.writeInsertsOnlyWhenNoMatches", + "delta.dml.merge") +} + + +@SQLUserDefinedType(udt = classOf[SimpleTestUDT]) +case class SimpleTest(value: Int) + +class SimpleTestUDT extends UserDefinedType[SimpleTest] { + override def sqlType: DataType = IntegerType + + override def serialize(input: SimpleTest): Any = input.value + + override def deserialize(datum: Any): SimpleTest = datum match { + case a: Int => SimpleTest(a) + } + + override def userClass: Class[SimpleTest] = classOf[SimpleTest] +} + +@SQLUserDefinedType(udt = classOf[ComplexTestUDT]) +case class ComplexTest(key: Int, values: Array[Int]) + +class ComplexTestUDT extends UserDefinedType[ComplexTest] { + override def sqlType: DataType = StructType(Seq( + StructField("key", IntegerType), + StructField("values", ArrayType(IntegerType, containsNull = false)))) + + override def serialize(input: ComplexTest): Any = { + val row = new GenericInternalRow(2) + row.setInt(0, input.key) + row.update(1, UnsafeArrayData.fromPrimitiveArray(input.values)) + row + } + + override def deserialize(datum: Any): ComplexTest = datum match { + case row: InternalRow => + ComplexTest(row.getInt(0), row.getArray(1).toIntArray()) + } + + override def userClass: Class[ComplexTest] = classOf[ComplexTest] +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoTestUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoTestUtils.scala new file mode 100644 index 00000000000..89cc74c75c9 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoTestUtils.scala @@ -0,0 +1,280 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import io.delta.tables._ + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.catalyst.util.FailFastMode +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.StructType + +/** + * Base trait collecting helper methods to run MERGE tests. Merge test suite will want to mix in + * either [[MergeIntoSQLTestUtils]] or [[MergeIntoScalaTestUtils]] to run merge tests using the SQL + * or Scala API resp. + */ +trait MergeIntoTestUtils extends DeltaDMLTestUtils with MergeHelpers { + self: SharedSparkSession => + + import testImplicits._ + + protected def executeMerge( + target: String, + source: String, + condition: String, + update: String, + insert: String): Unit + + protected def executeMerge( + tgt: String, + src: String, + cond: String, + clauses: MergeClause*): Unit + + protected def withCrossJoinEnabled(body: => Unit): Unit = { + withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") { body } + } + + /** + * Parse the input JSON data into a dataframe, one row per input element. + * Throws an exception on malformed inputs or records that don't comply with the provided schema. + */ + protected def readFromJSON(data: Seq[String], schema: StructType = null): DataFrame = { + if (schema != null) { + spark.read + .schema(schema) + .option("mode", FailFastMode.name) + .json(data.toDS) + } else { + spark.read + .option("mode", FailFastMode.name) + .json(data.toDS) + } + } +} + +trait MergeIntoSQLTestUtils extends SQLTestUtils with MergeIntoTestUtils { + self: SharedSparkSession => + + protected def basicMergeStmt( + target: String, + source: String, + condition: String, + update: String, + insert: String): String = { + s""" + |MERGE INTO $target + |USING $source + |ON $condition + |WHEN MATCHED THEN UPDATE SET $update + |WHEN NOT MATCHED THEN INSERT $insert + """.stripMargin + } + + override protected def executeMerge( + target: String, + source: String, + condition: String, + update: String, + insert: String): Unit = + sql(basicMergeStmt(target, source, condition, update, insert)) + + protected def mergeStmt( + target: String, + source: String, + condition: String, + clauses: MergeClause*): String = + s"MERGE INTO $target USING $source ON $condition\n" + clauses.map(_.sql).mkString("\n") + + override protected def executeMerge( + tgt: String, + src: String, + cond: String, + clauses: MergeClause*): Unit = sql(mergeStmt(tgt, src, cond, clauses: _*)) +} + +trait MergeIntoScalaTestUtils extends MergeIntoTestUtils { + self: SharedSparkSession => + + override protected def executeMerge( + target: String, + source: String, + condition: String, + update: String, + insert: String): Unit = { + executeMerge( + tgt = target, + src = source, + cond = condition, + this.update(set = update), + this.insert(values = insert)) + } + + override protected def executeMerge( + tgt: String, + src: String, + cond: String, + clauses: MergeClause*): Unit = { + + def buildClause(clause: MergeClause, mergeBuilder: DeltaMergeBuilder) + : DeltaMergeBuilder = clause match { + case _: MatchedClause => + val actionBuilder: DeltaMergeMatchedActionBuilder = + if (clause.condition != null) mergeBuilder.whenMatched(clause.condition) + else mergeBuilder.whenMatched() + if (clause.action.startsWith("DELETE")) { // DELETE clause + actionBuilder.delete() + } else { // UPDATE clause + val setColExprStr = clause.action.trim.stripPrefix("UPDATE SET") + if (setColExprStr.trim == "*") { // UPDATE SET * + actionBuilder.updateAll() + } else if (setColExprStr.contains("array_")) { // UPDATE SET x = array_union(..) + val setColExprPairs = parseUpdate(Seq(setColExprStr)) + actionBuilder.updateExpr(setColExprPairs) + } else { // UPDATE SET x = a, y = b, z = c + val setColExprPairs = parseUpdate(setColExprStr.split(",")) + actionBuilder.updateExpr(setColExprPairs) + } + } + case _: NotMatchedClause => // INSERT clause + val actionBuilder: DeltaMergeNotMatchedActionBuilder = + if (clause.condition != null) mergeBuilder.whenNotMatched(clause.condition) + else mergeBuilder.whenNotMatched() + val valueStr = clause.action.trim.stripPrefix("INSERT") + if (valueStr.trim == "*") { // INSERT * + actionBuilder.insertAll() + } else { // INSERT (x, y, z) VALUES (a, b, c) + val valueColExprsPairs = parseInsert(valueStr, Some(clause)) + actionBuilder.insertExpr(valueColExprsPairs) + } + case _: NotMatchedBySourceClause => + val actionBuilder: DeltaMergeNotMatchedBySourceActionBuilder = + if (clause.condition != null) mergeBuilder.whenNotMatchedBySource(clause.condition) + else mergeBuilder.whenNotMatchedBySource() + if (clause.action.startsWith("DELETE")) { // DELETE clause + actionBuilder.delete() + } else { // UPDATE clause + val setColExprStr = clause.action.trim.stripPrefix("UPDATE SET") + if (setColExprStr.contains("array_")) { // UPDATE SET x = array_union(..) + val setColExprPairs = parseUpdate(Seq(setColExprStr)) + actionBuilder.updateExpr(setColExprPairs) + } else { // UPDATE SET x = a, y = b, z = c + val setColExprPairs = parseUpdate(setColExprStr.split(",")) + actionBuilder.updateExpr(setColExprPairs) + } + } + } + + val deltaTable = DeltaTestUtils.getDeltaTableForIdentifierOrPath( + spark, + DeltaTestUtils.getTableIdentifierOrPath(tgt)) + + val sourceDataFrame: DataFrame = { + val (tableOrQuery, optionalAlias) = DeltaTestUtils.parseTableAndAlias(src) + var df = + if (tableOrQuery.startsWith("(")) spark.sql(tableOrQuery) else spark.table(tableOrQuery) + optionalAlias.foreach { alias => df = df.as(alias) } + df + } + + var mergeBuilder = deltaTable.merge(sourceDataFrame, cond) + clauses.foreach { clause => + mergeBuilder = buildClause(clause, mergeBuilder) + } + mergeBuilder.execute() + deltaTable.toDF + } + + protected def parseUpdate(update: Seq[String]): Map[String, String] = { + update.map { _.split("=").toList }.map { + case setCol :: setExpr :: Nil => setCol.trim -> setExpr.trim + case _ => fail("error parsing update actions " + update) + }.toMap + } + + protected def parseInsert(valueStr: String, clause: Option[MergeClause]): Map[String, String] = { + valueStr.split("VALUES").toList match { + case colsStr :: exprsStr :: Nil => + def parse(str: String): Seq[String] = { + str.trim.stripPrefix("(").stripSuffix(")").split(",").map(_.trim) + } + val cols = parse(colsStr) + val exprs = parse(exprsStr) + require(cols.size == exprs.size, + s"Invalid insert action ${clause.get.action}: cols = $cols, exprs = $exprs") + cols.zip(exprs).toMap + + case list => + fail(s"Invalid insert action ${clause.get.action} split into $list") + } + } + + protected def parsePath(nameOrPath: String): String = { + if (nameOrPath.startsWith("delta.`")) { + nameOrPath.stripPrefix("delta.`").stripSuffix("`") + } else nameOrPath + } +} + +trait MergeHelpers { + /** A simple representative of a any WHEN clause in a MERGE statement */ + protected sealed trait MergeClause { + def condition: String + def action: String + def clause: String + def sql: String = { + assert(action != null, "action not specified yet") + val cond = if (condition != null) s"AND $condition" else "" + s"WHEN $clause $cond THEN $action" + } + } + + protected case class MatchedClause(condition: String, action: String) extends MergeClause { + override def clause: String = "MATCHED" + } + + protected case class NotMatchedClause(condition: String, action: String) extends MergeClause { + override def clause: String = "NOT MATCHED" + } + + protected case class NotMatchedBySourceClause(condition: String, action: String) + extends MergeClause { + override def clause: String = "NOT MATCHED BY SOURCE" + } + + protected def update(set: String = null, condition: String = null): MergeClause = { + MatchedClause(condition, s"UPDATE SET $set") + } + + protected def delete(condition: String = null): MergeClause = { + MatchedClause(condition, s"DELETE") + } + + protected def insert(values: String = null, condition: String = null): MergeClause = { + NotMatchedClause(condition, s"INSERT $values") + } + + protected def updateNotMatched(set: String = null, condition: String = null): MergeClause = { + NotMatchedBySourceClause(condition, s"UPDATE SET $set") + } + + protected def deleteNotMatched(condition: String = null): MergeClause = { + NotMatchedBySourceClause(condition, s"DELETE") + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoTimestampConsistencySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoTimestampConsistencySuite.scala new file mode 100644 index 00000000000..fc3ee94db4b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoTimestampConsistencySuite.scala @@ -0,0 +1,173 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.sql.Timestamp + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.expressions.{CurrentTimestamp, Now} +import org.apache.spark.sql.functions.{current_timestamp, lit, timestamp_seconds} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +class MergeIntoTimestampConsistencySuite extends MergeIntoTimestampConsistencySuiteBase { + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set(DeltaSQLConf.MERGE_USE_PERSISTENT_DELETION_VECTORS.key, "false") + } +} + + +abstract class MergeIntoTimestampConsistencySuiteBase extends QueryTest + with SharedSparkSession with DeltaSQLCommandTest { + private def withTestTables(block: => Unit): Unit = { + def setupTablesAndRun(): Unit = { + spark.range(0, 5) + .toDF("id") + .withColumn("updated", lit(false)) + .withColumn("timestampOne", timestamp_seconds(lit(1))) + .withColumn("timestampTwo", timestamp_seconds(lit(1337))) + .write + .format("delta") + .saveAsTable("target") + spark.range(0, 10) + .toDF("id") + .withColumn("updated", lit(true)) + .withColumn("timestampOne", current_timestamp()) + .withColumn("timestampTwo", current_timestamp()) + .createOrReplaceTempView("source") + + block + } + + Utils.tryWithSafeFinally(setupTablesAndRun) { + sql("DROP VIEW IF EXISTS source") + sql("DROP TABLE IF EXISTS target") + } + } + + test("Consistent timestamps between source and ON condition") { + withTestTables { + sql(s"""MERGE INTO target t + | USING source s + | ON s.id = t.id AND s.timestampOne = now() + | WHEN MATCHED THEN UPDATE SET *""".stripMargin) + + assertAllRowsAreUpdated() + } + } + + test("Consistent timestamps between source and WHEN MATCHED condition") { + withTestTables { + sql(s"""MERGE INTO target t + | USING source s + | ON s.id = t.id + | WHEN MATCHED AND s.timestampOne = now() AND s.timestampTwo = now() + | THEN UPDATE SET *""".stripMargin) + + assertAllRowsAreUpdated() + } + } + + test("Consistent timestamps between source and UPDATE SET") { + withTestTables { + sql( + s"""MERGE INTO target t + | USING source s + | ON s.id = t.id + | WHEN MATCHED THEN UPDATE + | SET updated = s.updated, t.timestampOne = s.timestampOne, t.timestampTwo = now() + |""".stripMargin) + + assertUpdatedTimestampsInTargetAreAllEqual() + } + } + + test("Consistent timestamps between source and WHEN NOT MATCHED condition") { + withTestTables { + sql(s"""MERGE INTO target t + | USING source s + | ON s.id = t.id + | WHEN NOT MATCHED AND s.timestampOne = now() AND s.timestampTwo = now() + | THEN INSERT * + |""".stripMargin) + + assertNewSourceRowsInserted() + } + } + + test("Consistent timestamps between source and INSERT VALUES") { + withTestTables { + sql( + s"""MERGE INTO target t + | USING source s + | ON s.id = t.id + | WHEN NOT MATCHED THEN INSERT (id, updated, timestampOne, timestampTwo) + | VALUES (s.id, s.updated, s.timestampOne, now()) + |""".stripMargin) + + assertUpdatedTimestampsInTargetAreAllEqual() + } + } + + test("Consistent timestamps with subquery in source") { + withTestTables { + val sourceWithSubqueryTable = "source_with_subquery" + withTempView(s"$sourceWithSubqueryTable") { + sql( + s"""CREATE OR REPLACE TEMPORARY VIEW $sourceWithSubqueryTable + | AS SELECT * FROM source WHERE timestampOne IN (SELECT now()) + |""".stripMargin).collect() + + sql(s"""MERGE INTO target t + | USING $sourceWithSubqueryTable s + | ON s.id = t.id + | WHEN MATCHED THEN UPDATE SET *""".stripMargin) + + assertAllRowsAreUpdated() + } + } + } + + + private def assertAllRowsAreUpdated(): Unit = { + val nonUpdatedRowsCount = sql("SELECT * FROM target WHERE updated = FALSE").count() + assert(0 === nonUpdatedRowsCount, "Un-updated rows in target table") + } + + private def assertNewSourceRowsInserted(): Unit = { + val numNotInsertedSourceRows = + sql("SELECT * FROM source s LEFT ANTI JOIN target t ON s.id = t.id").count() + assert(0 === numNotInsertedSourceRows, "Un-inserted rows in source table") + } + + private def assertUpdatedTimestampsInTargetAreAllEqual(): Unit = { + import testImplicits._ + + val timestampCombinations = + sql(s"""SELECT timestampOne, timestampTwo + | FROM target WHERE updated = TRUE GROUP BY timestampOne, timestampTwo + |""".stripMargin) + val rows = timestampCombinations.as[(Timestamp, Timestamp)].collect() + assert(1 === rows.length, "Multiple combinations of timestamp values in target table") + assert(rows(0)._1 === rows(0)._2, + "timestampOne and timestampTwo are not equal in target table") + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/NonFateSharingFutureSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/NonFateSharingFutureSuite.scala new file mode 100644 index 00000000000..d1d89baf565 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/NonFateSharingFutureSuite.scala @@ -0,0 +1,92 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.concurrent.atomic.AtomicInteger + +import scala.concurrent.duration._ +import scala.util.control.ControlThrowable + +import org.apache.spark.sql.delta.util.threads.DeltaThreadPool + +import org.apache.spark.{SparkException, SparkFunSuite} +import org.apache.spark.sql.test.SharedSparkSession + +class NonFateSharingFutureSuite extends SparkFunSuite with SharedSparkSession { + test("function only runs once on success") { + val count = new AtomicInteger + val future = DeltaThreadPool("test", 1).submitNonFateSharing { _ => count.incrementAndGet } + assert(future.get(10.seconds) === 1) + assert(future.get(10.seconds) === 1) + spark.cloneSession().withActive { + assert(future.get(10.seconds) === 1) + } + } + + test("non-fatal exception in future is ignored") { + val count = new AtomicInteger + val future = DeltaThreadPool("test", 1).submitNonFateSharing { _ => + count.incrementAndGet match { + case 1 => throw new Exception + case i => i + } + } + + // Make sure the future already failed before waiting on it. This should happen ~immediately + // unless the test runner is horribly overloaded/slow/etc, and stabilizes the assertions below. + eventually(timeout(100.seconds)) { + assert(count.get == 1) + } + + spark.cloneSession().withActive { + assert(future.get(1.seconds) === 2) + } + assert(future.get(1.seconds) === 3) + + spark.cloneSession().withActive { + assert(future.get(1.seconds) === 4) + } + assert(future.get(1.seconds) === 5) + } + + test("fatal exception in future only propagates once, and only to owning session") { + val count = new AtomicInteger + val future = DeltaThreadPool("test", 1).submitNonFateSharing { _ => + count.incrementAndGet match { + case 1 => throw new InternalError + case i => i + } + } + + // Make sure the future already failed before waiting on it. This should happen ~immediately + // unless the test runner is horribly overloaded/slow/etc, and stabilizes the assertions below. + eventually(timeout(100.seconds)) { + assert(count.get == 1) + } + + spark.cloneSession().withActive { + assert(future.get(1.seconds) === 2) + } + intercept[InternalError] { + future.get(1.seconds) + } + spark.cloneSession().withActive { + assert(future.get(1.seconds) === 3) + } + assert(future.get(1.seconds) === 4) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionLegacyTests.scala b/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionLegacyTests.scala new file mode 100644 index 00000000000..e6a3181368d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionLegacyTests.scala @@ -0,0 +1,711 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile, FileAction, Metadata, RemoveFile, SetTransaction} +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{StringType, StructField, StructType} + +// These tests are potentially a subset of the tests already in OptimisticTransactionSuite. +// These tests can potentially be removed but only after confirming that these tests are +// truly a subset of the tests in OptimisticTransactionSuite. +trait OptimisticTransactionLegacyTests + extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + private val addA = createTestAddFile(path = "a") + private val addB = createTestAddFile(path = "b") + private val addC = createTestAddFile(path = "c") + + import testImplicits._ + + test("block append against metadata change") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, tempDir) + // Initialize the log. + log.startTransaction().commitManually() + + val txn = log.startTransaction() + val winningTxn = log.startTransaction() + winningTxn.commit(Metadata() :: Nil, ManualUpdate) + intercept[MetadataChangedException] { + txn.commit(addA :: Nil, ManualUpdate) + } + } + } + + test("block read+append against append") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, tempDir) + // Initialize the log. + log.startTransaction().commitManually() + + val txn = log.startTransaction() + // reads the table + txn.filterFiles() + val winningTxn = log.startTransaction() + winningTxn.commit(addA :: Nil, ManualUpdate) + intercept[ConcurrentAppendException] { + txn.commit(addB :: Nil, ManualUpdate) + } + } + } + + test("allow blind-append against any data change") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, tempDir) + // Initialize the log and add data. + log.startTransaction().commitManually(addA) + + val txn = log.startTransaction() + val winningTxn = log.startTransaction() + winningTxn.commit(addA.remove :: addB :: Nil, ManualUpdate) + txn.commit(addC :: Nil, ManualUpdate) + checkAnswer(log.update().allFiles.select("path"), Row("b") :: Row("c") :: Nil) + } + } + + test("allow read+append+delete against no data change") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, tempDir) + // Initialize the log and add data. ManualUpdate is just a no-op placeholder. + log.startTransaction().commitManually(addA) + + val txn = log.startTransaction() + txn.filterFiles() + val winningTxn = log.startTransaction() + winningTxn.commit(Nil, ManualUpdate) + txn.commit(addA.remove :: addB :: Nil, ManualUpdate) + checkAnswer(log.update().allFiles.select("path"), Row("b") :: Nil) + } + } + + + val A_P1 = "part=1/a" + val B_P1 = "part=1/b" + val C_P1 = "part=1/c" + val C_P2 = "part=2/c" + val D_P2 = "part=2/d" + val E_P3 = "part=3/e" + val F_P3 = "part=3/f" + val G_P4 = "part=4/g" + + private val addA_P1 = AddFile(A_P1, Map("part" -> "1"), 1, 1, dataChange = true) + private val addB_P1 = AddFile(B_P1, Map("part" -> "1"), 1, 1, dataChange = true) + private val addC_P1 = AddFile(C_P1, Map("part" -> "1"), 1, 1, dataChange = true) + private val addC_P2 = AddFile(C_P2, Map("part" -> "2"), 1, 1, dataChange = true) + private val addD_P2 = AddFile(D_P2, Map("part" -> "2"), 1, 1, dataChange = true) + private val addE_P3 = AddFile(E_P3, Map("part" -> "3"), 1, 1, dataChange = true) + private val addF_P3 = AddFile(F_P3, Map("part" -> "3"), 1, 1, dataChange = true) + private val addG_P4 = AddFile(G_P4, Map("part" -> "4"), 1, 1, dataChange = true) + + test("allow concurrent commit on disjoint partitions") { + withLog(addA_P1 :: addE_P3 :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 reads P3 (but not P1) + val tx1Read = tx1.filterFiles(('part === 3).expr :: Nil) + assert(tx1Read.map(_.path) == E_P3 :: Nil) + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 modifies only P1 + tx2.commit(addB_P1 :: Nil, ManualUpdate) + + // free to commit because P1 modified by TX2 was not read + tx1.commit(addC_P2 :: addE_P3.remove :: Nil, ManualUpdate) + checkAnswer( + log.update().allFiles.select("path"), + Row(A_P1) :: // start (E_P3 was removed by TX1) + Row(B_P1) :: // TX2 + Row(C_P2) :: Nil) // TX1 + } + } + + test("allow concurrent commit on disjoint partitions reading all partitions") { + withLog(addA_P1 :: addD_P2 :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 read P1 + tx1.filterFiles(('part isin 1).expr :: Nil) + + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addC_P2 :: addD_P2.remove :: Nil, ManualUpdate) + + tx1.commit(addE_P3 :: addF_P3 :: Nil, ManualUpdate) + + checkAnswer( + log.update().allFiles.select("path"), + Row(A_P1) :: // start + Row(C_P2) :: // TX2 + Row(E_P3) :: Row(F_P3) :: Nil) // TX1 + } + } + + test("block concurrent commit when read partition was appended to by concurrent write") { + withLog(addA_P1 :: addD_P2 :: addE_P3 :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 reads only P1 + val tx1Read = tx1.filterFiles(('part === 1).expr :: Nil) + assert(tx1Read.map(_.path) == A_P1 :: Nil) + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 modifies only P1 + tx2.commit(addB_P1 :: Nil, ManualUpdate) + + intercept[ConcurrentAppendException] { + // P1 was modified + tx1.commit(addC_P2 :: addE_P3 :: Nil, ManualUpdate) + } + } + } + + test("block concurrent commit on full table scan") { + withLog(addA_P1 :: addD_P2 :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 full table scan + tx1.filterFiles() + tx1.filterFiles(('part === 1).expr :: Nil) + + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addC_P2 :: addD_P2.remove :: Nil, ManualUpdate) + + intercept[ConcurrentAppendException] { + tx1.commit(addE_P3 :: addF_P3 :: Nil, ManualUpdate) + } + } + } + + val A_1_1 = "a=1/b=1/a" + val B_1_2 = "a=1/b=2/b" + val C_2_1 = "a=2/b=1/c" + val D_3_1 = "a=3/b=1/d" + + val addA_1_1_nested = AddFile( + A_1_1, Map("a" -> "1", "b" -> "1"), + 1, 1, dataChange = true) + val addB_1_2_nested = AddFile( + B_1_2, Map("a" -> "1", "b" -> "2"), + 1, 1, dataChange = true) + val addC_2_1_nested = AddFile( + C_2_1, Map("a" -> "2", "b" -> "1"), + 1, 1, dataChange = true) + val addD_3_1_nested = AddFile( + D_3_1, Map("a" -> "3", "b" -> "1"), + 1, 1, dataChange = true) + + test("allow concurrent adds to disjoint nested partitions when read is disjoint from write") { + withLog(addA_1_1_nested :: Nil, partitionCols = "a" :: "b" :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 reads a=1/b=1 + val tx1Read = tx1.filterFiles(('a === 1 and 'b === 1).expr :: Nil) + assert(tx1Read.map(_.path) == A_1_1 :: Nil) + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 reads all partitions and modifies only a=1/b=2 + tx2.commit(addB_1_2_nested :: Nil, ManualUpdate) + + // TX1 reads a=1/b=1 which was not modified by TX2, hence TX1 can write to a=2/b=1 + tx1.commit(addC_2_1_nested :: Nil, ManualUpdate) + checkAnswer( + log.update().allFiles.select("path"), + Row(A_1_1) :: // start + Row(B_1_2) :: // TX2 + Row(C_2_1) :: Nil) // TX1 + } + } + + test("allow concurrent adds to same nested partitions when read is disjoint from write") { + withLog(addA_1_1_nested :: Nil, partitionCols = "a" :: "b" :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 reads a=1/b=1 + val tx1Read = tx1.filterFiles(('a === 1 and 'b === 1).expr :: Nil) + assert(tx1Read.map(_.path) == A_1_1 :: Nil) + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 modifies a=1/b=2 + tx2.commit(addB_1_2_nested :: Nil, ManualUpdate) + + // TX1 reads a=1/b=1 which was not modified by TX2, hence TX1 can write to a=2/b=1 + val add = AddFile( + "a=1/b=2/x", Map("a" -> "1", "b" -> "2"), + 1, 1, dataChange = true) + tx1.commit(add :: Nil, ManualUpdate) + checkAnswer( + log.update().allFiles.select("path"), + Row(A_1_1) :: // start + Row(B_1_2) :: // TX2 + Row("a=1/b=2/x") :: Nil) // TX1 + } + } + + test("allow concurrent add when read at lvl1 partition is disjoint from concur. write at lvl2") { + withLog( + addA_1_1_nested :: addB_1_2_nested :: Nil, + partitionCols = "a" :: "b" :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 reads a=1 + val tx1Read = tx1.filterFiles(('a === 1).expr :: Nil) + assert(tx1Read.map(_.path).toSet == Set(A_1_1, B_1_2)) + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 modifies only a=2/b=1 + tx2.commit(addC_2_1_nested :: Nil, ManualUpdate) + + // free to commit a=2/b=1 + tx1.commit(addD_3_1_nested :: Nil, ManualUpdate) + checkAnswer( + log.update().allFiles.select("path"), + Row(A_1_1) :: Row(B_1_2) :: // start + Row(C_2_1) :: // TX2 + Row(D_3_1) :: Nil) // TX1 + } + } + + test("block commit when read at lvl1 partition reads lvl2 file concur. deleted") { + withLog( + addA_1_1_nested :: addB_1_2_nested :: Nil, + partitionCols = "a" :: "b" :: Nil) { log => + + val tx1 = log.startTransaction() + // TX1 reads a=1 + val tx1Read = tx1.filterFiles(('a === 1).expr :: Nil) + assert(tx1Read.map(_.path).toSet == Set(A_1_1, B_1_2)) + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 modifies a=1/b=1 + tx2.commit(addA_1_1_nested.remove :: Nil, ManualUpdate) + + intercept[ConcurrentDeleteReadException] { + // TX2 modified a=1, which was read by TX1 + tx1.commit(addD_3_1_nested :: Nil, ManualUpdate) + } + } + } + + test("block commit when full table read conflicts with concur. write in lvl2 nested partition") { + withLog(addA_1_1_nested :: Nil, partitionCols = "a" :: "b" :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 full table scan + tx1.filterFiles() + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 modifies only a=1/b=2 + tx2.commit(addB_1_2_nested :: Nil, ManualUpdate) + + intercept[ConcurrentAppendException] { + // TX2 modified table all of which was read by TX1 + tx1.commit(addC_2_1_nested :: Nil, ManualUpdate) + } + } + } + + test("block commit when part. range read conflicts with concur. write in lvl2 nested partition") { + withLog( + addA_1_1_nested :: Nil, + partitionCols = "a" :: "b" :: Nil) { log => + + val tx1 = log.startTransaction() + // TX1 reads multiple nested partitions a >= 1 or b > 1 + val tx1Read = tx1.filterFiles(('a >= 1 or 'b > 1).expr :: Nil) + assert(tx1Read.map(_.path).toSet == Set(A_1_1)) + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 modifies a=1/b=2 + tx2.commit(addB_1_2_nested :: Nil, ManualUpdate) + + intercept[ConcurrentAppendException] { + // partition a=1/b=2 conflicts with our read a >= 1 or 'b > 1 + tx1.commit(addD_3_1_nested :: Nil, ManualUpdate) + } + } + } + + test("block commit with concurrent removes on same file") { + withLog(addB_1_2_nested :: Nil, partitionCols = "a" :: "b" :: Nil) { log => + val tx1 = log.startTransaction() + // TX1 reads a=2 so that read is disjoint with write partition. + tx1.filterFiles(('a === 2).expr :: Nil) + + val tx2 = log.startTransaction() + tx2.filterFiles() + // TX2 modifies a=1/b=2 + tx2.commit(addB_1_2_nested.remove :: Nil, ManualUpdate) + + intercept[ConcurrentDeleteDeleteException] { + // TX1 read does not conflict with TX2 as disjoint partitions + // But TX2 removed the same file that TX1 is trying to remove + tx1.commit(addB_1_2_nested.remove:: Nil, ManualUpdate) + } + } + } + + test("block commit when full table read conflicts with add in any partition") { + withLog(addA_P1 :: addC_P2 :: Nil) { log => + val tx1 = log.startTransaction() + tx1.filterFiles() + + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addC_P2.remove :: addB_P1 :: Nil, ManualUpdate) + + intercept[ConcurrentAppendException] { + // TX1 read whole table but TX2 concurrently modified partition P2 + tx1.commit(addD_P2 :: Nil, ManualUpdate) + } + } + } + + test("block commit when full table read conflicts with delete in any partition") { + withLog(addA_P1 :: addC_P2 :: Nil) { log => + val tx1 = log.startTransaction() + tx1.filterFiles() + + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addA_P1.remove :: Nil, ManualUpdate) + + intercept[ConcurrentDeleteReadException] { + // TX1 read whole table but TX2 concurrently modified partition P1 + tx1.commit(addB_P1.remove :: Nil, ManualUpdate) + } + } + } + + test("block concurrent replaceWhere initial empty") { + withLog(addA_P1 :: Nil) { log => + val tx1 = log.startTransaction() + // replaceWhere (part >= 2) -> empty read + val tx1Read = tx1.filterFiles(('part >= 2).expr :: Nil) + assert(tx1Read.isEmpty) + + val tx2 = log.startTransaction() + // replaceWhere (part >= 2) -> empty read + val tx2Read = tx2.filterFiles(('part >= 2).expr :: Nil) + assert(tx2Read.isEmpty) + tx2.commit(addE_P3 :: Nil, ManualUpdate) + + intercept[ConcurrentAppendException] { + // Tx2 have modified P2 which conflicts with our read (part >= 2) + tx1.commit(addC_P2 :: Nil, ManualUpdate) + } + } + } + + test("allow concurrent replaceWhere disjoint partitions initial empty") { + withLog(addA_P1 :: Nil) { log => + val tx1 = log.startTransaction() + // replaceWhere (part > 2 and part <= 3) -> empty read + val tx1Read = tx1.filterFiles(('part > 1 and 'part <= 3).expr :: Nil) + assert(tx1Read.isEmpty) + + val tx2 = log.startTransaction() + // replaceWhere (part > 3) -> empty read + val tx2Read = tx2.filterFiles(('part > 3).expr :: Nil) + assert(tx2Read.isEmpty) + + tx1.commit(addC_P2 :: Nil, ManualUpdate) + // P2 doesn't conflict with read predicate (part > 3) + tx2.commit(addG_P4 :: Nil, ManualUpdate) + checkAnswer( + log.update().allFiles.select("path"), + Row(A_P1) :: // start + Row(C_P2) :: // TX1 + Row(G_P4) :: Nil) // TX2 + } + } + + test("block concurrent replaceWhere NOT empty but conflicting predicate") { + withLog(addA_P1 :: addG_P4 :: Nil) { log => + val tx1 = log.startTransaction() + // replaceWhere (part <= 3) -> read P1 + val tx1Read = tx1.filterFiles(('part <= 3).expr :: Nil) + assert(tx1Read.map(_.path) == A_P1 :: Nil) + val tx2 = log.startTransaction() + // replaceWhere (part >= 2) -> read P4 + val tx2Read = tx2.filterFiles(('part >= 2).expr :: Nil) + assert(tx2Read.map(_.path) == G_P4 :: Nil) + + tx1.commit(addA_P1.remove :: addC_P2 :: Nil, ManualUpdate) + intercept[ConcurrentAppendException] { + // Tx1 have modified P2 which conflicts with our read (part >= 2) + tx2.commit(addG_P4.remove :: addE_P3 :: Nil, ManualUpdate) + } + } + } + + test("block concurrent commit on read & add conflicting partitions") { + withLog(addA_P1 :: Nil) { log => + val tx1 = log.startTransaction() + // read P1 + val tx1Read = tx1.filterFiles(('part === 1).expr :: Nil) + assert(tx1Read.map(_.path) == A_P1 :: Nil) + + // tx2 commits before tx1 + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addB_P1 :: Nil, ManualUpdate) + + intercept[ConcurrentAppendException] { + // P1 read by TX1 was modified by TX2 + tx1.commit(addE_P3 :: Nil, ManualUpdate) + } + } + } + + test("block concurrent commit on read & delete conflicting partitions") { + withLog(addA_P1 :: addB_P1 :: Nil) { log => + val tx1 = log.startTransaction() + // read P1 + tx1.filterFiles(('part === 1).expr :: Nil) + + // tx2 commits before tx1 + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addA_P1.remove :: Nil, ManualUpdate) + + intercept[ConcurrentDeleteReadException] { + // P1 read by TX1 was removed by TX2 + tx1.commit(addE_P3 :: Nil, ManualUpdate) + } + } + } + + test("block 2 concurrent replaceWhere transactions") { + withLog(addA_P1 :: Nil) { log => + val tx1 = log.startTransaction() + // read P1 + tx1.filterFiles(('part === 1).expr :: Nil) + + val tx2 = log.startTransaction() + // read P1 + tx2.filterFiles(('part === 1).expr :: Nil) + + // tx1 commits before tx2 + tx1.commit(addA_P1.remove :: addB_P1 :: Nil, ManualUpdate) + + intercept[ConcurrentAppendException] { + // P1 read & deleted by TX1 is being modified by TX2 + tx2.commit(addA_P1.remove :: addC_P1 :: Nil, ManualUpdate) + } + } + } + + test("block 2 concurrent replaceWhere transactions changing partitions") { + withLog(addA_P1 :: addC_P2 :: addE_P3 :: Nil) { log => + val tx1 = log.startTransaction() + // read P3 + tx1.filterFiles(('part === 3 or 'part === 1).expr :: Nil) + + val tx2 = log.startTransaction() + // read P3 + tx2.filterFiles(('part === 3 or 'part === 2).expr :: Nil) + + // tx1 commits before tx2 + tx1.commit(addA_P1.remove :: addE_P3.remove :: addB_P1 :: Nil, ManualUpdate) + + intercept[ConcurrentDeleteReadException] { + // P3 read & deleted by TX1 is being modified by TX2 + tx2.commit(addC_P2.remove :: addE_P3.remove :: addD_P2 :: Nil, ManualUpdate) + } + } + } + + test("block concurrent full table scan after concurrent write completes") { + withLog(addA_P1 :: addE_P3 :: Nil) { log => + val tx1 = log.startTransaction() + + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addC_P2 :: Nil, ManualUpdate) + + tx1.filterFiles(('part === 1).expr :: Nil) + // full table scan + tx1.filterFiles() + + intercept[ConcurrentAppendException] { + tx1.commit(addA_P1.remove :: Nil, ManualUpdate) + } + } + } + + test("block concurrent commit mixed metadata and data predicate") { + withLog(addA_P1 :: addE_P3 :: Nil) { log => + val tx1 = log.startTransaction() + + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addC_P2 :: Nil, ManualUpdate) + + // actually a full table scan + tx1.filterFiles(('part === 1 or 'year > 2019).expr :: Nil) + + intercept[ConcurrentAppendException] { + tx1.commit(addA_P1.remove :: Nil, ManualUpdate) + } + } + } + + test("block concurrent read (2 scans) and add when read partition was changed by concur. write") { + withLog(addA_P1 :: addE_P3 :: Nil) { log => + val tx1 = log.startTransaction() + tx1.filterFiles(('part === 1).expr :: Nil) + + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addC_P2 :: Nil, ManualUpdate) + + tx1.filterFiles(('part > 1 and 'part < 3).expr :: Nil) + + intercept[ConcurrentAppendException] { + // P2 added by TX2 conflicts with our read condition 'part > 1 and 'part < 3 + tx1.commit(addA_P1.remove :: Nil, ManualUpdate) + } + } + } + + def setDataChangeFalse(fileActions: Seq[FileAction]): Seq[FileAction] = { + fileActions.map { + case a: AddFile => a.copy(dataChange = false) + case r: RemoveFile => r.copy(dataChange = false) + case cdc: AddCDCFile => cdc // change files are always dataChange = false + } + } + + test("no data change: allow data rearrange when new files concurrently added") { + withLog(addA_P1 :: addB_P1 :: Nil) { log => + val tx1 = log.startTransaction() + tx1.filterFiles() + + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit( + addE_P3 :: Nil, + ManualUpdate) + + // tx1 rearranges files + tx1.commit( + setDataChangeFalse(addA_P1.remove :: addB_P1.remove :: addC_P1 :: Nil), + ManualUpdate) + + checkAnswer( + log.update().allFiles.select("path"), + Row(C_P1) :: Row(E_P3) :: Nil) + } + } + + test("no data change: block data rearrange when concurrently delete removes same file") { + withLog(addA_P1 :: addB_P1 :: Nil) { log => + val tx1 = log.startTransaction() + tx1.filterFiles() + + // tx2 removes file + val tx2 = log.startTransaction() + tx2.filterFiles() + tx2.commit(addA_P1.remove :: Nil, ManualUpdate) + + intercept[ConcurrentDeleteReadException] { + // tx1 reads to rearrange the same file that tx2 deleted + tx1.commit( + setDataChangeFalse(addA_P1.remove :: addB_P1.remove :: addC_P1 :: Nil), + ManualUpdate) + } + } + } + + test("readWholeTable should block concurrent delete") { + withLog(addA_P1 :: Nil) { log => + val tx1 = log.startTransaction() + tx1.readWholeTable() + + // tx2 removes file + val tx2 = log.startTransaction() + tx2.commit(addA_P1.remove :: Nil, ManualUpdate) + + intercept[ConcurrentDeleteReadException] { + // tx1 reads the whole table but tx2 removes files before tx1 commits + tx1.commit(addB_P1 :: Nil, ManualUpdate) + } + } + } + + def withLog( + actions: Seq[Action], + partitionCols: Seq[String] = "part" :: Nil)( + test: DeltaLog => Unit): Unit = { + + val schema = StructType(partitionCols.map(p => StructField(p, StringType)).toArray) + val metadata = Metadata(partitionColumns = partitionCols, schemaString = schema.json) + val actionWithMetaData = actions :+ metadata + + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, tempDir) + // Initialize the log and add data. ManualUpdate is just a no-op placeholder. + log.startTransaction().commit(Seq(metadata), ManualUpdate) + log.startTransaction().commitManually(actionWithMetaData: _*) + test(log) + } + } + + test("allow concurrent set-txns with different app ids") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, tempDir) + // Initialize the log. + log.startTransaction().commitManually() + + val txn = log.startTransaction() + txn.txnVersion("t1") + val winningTxn = log.startTransaction() + winningTxn.commit(SetTransaction("t2", 1, Some(1234L)) :: Nil, ManualUpdate) + txn.commit(Nil, ManualUpdate) + + assert(log.update().transactions === Map("t2" -> 1)) + } + } + + test("block concurrent set-txns with the same app id") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, tempDir) + // Initialize the log. + log.startTransaction().commitManually() + + val txn = log.startTransaction() + txn.txnVersion("t1") + val winningTxn = log.startTransaction() + winningTxn.commit(SetTransaction("t1", 1, Some(1234L)) :: Nil, ManualUpdate) + + intercept[ConcurrentTransactionException] { + txn.commit(Nil, ManualUpdate) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuite.scala new file mode 100644 index 00000000000..2ad167f4313 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuite.scala @@ -0,0 +1,702 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile +import org.apache.spark.sql.delta.actions.{Action, AddFile, CommitInfo, Metadata, Protocol, RemoveFile, SetTransaction} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.Row +import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} +import org.apache.spark.sql.types.{IntegerType, StructType} +import org.apache.spark.util.ManualClock + + +class OptimisticTransactionSuite + extends OptimisticTransactionLegacyTests + with OptimisticTransactionSuiteBase { + + import testImplicits._ + + // scalastyle:off: removeFile + private val addA = createTestAddFile(path = "a") + private val addB = createTestAddFile(path = "b") + + /* ************************** * + * Allowed concurrent actions * + * ************************** */ + + check( + "append / append", + conflicts = false, + reads = Seq( + t => t.metadata + ), + concurrentWrites = Seq( + addA), + actions = Seq( + addB)) + + check( + "disjoint txns", + conflicts = false, + reads = Seq( + t => t.txnVersion("t1") + ), + concurrentWrites = Seq( + SetTransaction("t2", 0, Some(1234L))), + actions = Nil) + + check( + "disjoint delete / read", + conflicts = false, + setup = Seq( + Metadata( + schemaString = new StructType().add("x", IntegerType).json, + partitionColumns = Seq("x")), + AddFile("a", Map("x" -> "2"), 1, 1, dataChange = true) + ), + reads = Seq( + t => t.filterFiles(EqualTo('x, Literal(1)) :: Nil) + ), + concurrentWrites = Seq( + RemoveFile("a", Some(4))), + actions = Seq()) + + check( + "disjoint add / read", + conflicts = false, + setup = Seq( + Metadata( + schemaString = new StructType().add("x", IntegerType).json, + partitionColumns = Seq("x")) + ), + reads = Seq( + t => t.filterFiles(EqualTo('x, Literal(1)) :: Nil) + ), + concurrentWrites = Seq( + AddFile("a", Map("x" -> "2"), 1, 1, dataChange = true)), + actions = Seq()) + + /* ***************************** * + * Disallowed concurrent actions * + * ***************************** */ + + check( + "delete / delete", + conflicts = true, + reads = Nil, + concurrentWrites = Seq( + RemoveFile("a", Some(4))), + actions = Seq( + RemoveFile("a", Some(5)))) + + check( + "add / read + write", + conflicts = true, + setup = Seq( + Metadata( + schemaString = new StructType().add("x", IntegerType).json, + partitionColumns = Seq("x")) + ), + reads = Seq( + t => t.filterFiles(EqualTo('x, Literal(1)) :: Nil) + ), + concurrentWrites = Seq( + AddFile("a", Map("x" -> "1"), 1, 1, dataChange = true)), + actions = Seq(AddFile("b", Map("x" -> "1"), 1, 1, dataChange = true)), + // commit info should show operation as truncate, because that's the operation used by the + // harness + errorMessageHint = Some("[x=1]" :: "TRUNCATE" :: Nil)) + + check( + "add / read + no write", // no write = no real conflicting change even though data was added + conflicts = false, // so this should not conflict + setup = Seq( + Metadata( + schemaString = new StructType().add("x", IntegerType).json, + partitionColumns = Seq("x")) + ), + reads = Seq( + t => t.filterFiles(EqualTo('x, Literal(1)) :: Nil) + ), + concurrentWrites = Seq( + AddFile("a", Map("x" -> "1"), 1, 1, dataChange = true)), + actions = Seq()) + + check( + "add in part=2 / read from part=1,2 and write to part=1", + conflicts = true, + setup = Seq( + Metadata( + schemaString = new StructType().add("x", IntegerType).json, + partitionColumns = Seq("x")) + ), + reads = Seq( + t => { + // Filter files twice - once for x=1 and again for x=2 + t.filterFiles(Seq(EqualTo('x, Literal(1)))) + t.filterFiles(Seq(EqualTo('x, Literal(2)))) + } + ), + concurrentWrites = Seq( + AddFile( + path = "a", + partitionValues = Map("x" -> "1"), + size = 1, + modificationTime = 1, + dataChange = true) + ), + actions = Seq( + AddFile( + path = "b", + partitionValues = Map("x" -> "2"), + size = 1, + modificationTime = 1, + dataChange = true) + )) + + check( + "delete / read", + conflicts = true, + setup = Seq( + Metadata( + schemaString = new StructType().add("x", IntegerType).json, + partitionColumns = Seq("x")), + AddFile("a", Map("x" -> "1"), 1, 1, dataChange = true) + ), + reads = Seq( + t => t.filterFiles(EqualTo('x, Literal(1)) :: Nil) + ), + concurrentWrites = Seq( + RemoveFile("a", Some(4))), + actions = Seq(), + errorMessageHint = Some("a in partition [x=1]" :: "TRUNCATE" :: Nil)) + + check( + "schema change", + conflicts = true, + reads = Seq( + t => t.metadata + ), + concurrentWrites = Seq( + Metadata()), + actions = Nil) + + check( + "conflicting txns", + conflicts = true, + reads = Seq( + t => t.txnVersion("t1") + ), + concurrentWrites = Seq( + SetTransaction("t1", 0, Some(1234L))), + actions = Nil) + + check( + "upgrade / upgrade", + conflicts = true, + reads = Seq( + t => t.metadata + ), + concurrentWrites = Seq( + Action.supportedProtocolVersion()), + actions = Seq( + Action.supportedProtocolVersion())) + + check( + "taint whole table", + conflicts = true, + setup = Seq( + Metadata( + schemaString = new StructType().add("x", IntegerType).json, + partitionColumns = Seq("x")), + AddFile("a", Map("x" -> "2"), 1, 1, dataChange = true) + ), + reads = Seq( + t => t.filterFiles(EqualTo('x, Literal(1)) :: Nil), + // `readWholeTable` should disallow any concurrent change, even if the change + // is disjoint with the earlier filter + t => t.readWholeTable() + ), + concurrentWrites = Seq( + AddFile("b", Map("x" -> "3"), 1, 1, dataChange = true)), + actions = Seq( + AddFile("c", Map("x" -> "4"), 1, 1, dataChange = true))) + + check( + "taint whole table + concurrent remove", + conflicts = true, + setup = Seq( + Metadata(schemaString = new StructType().add("x", IntegerType).json), + AddFile("a", Map.empty, 1, 1, dataChange = true) + ), + reads = Seq( + // `readWholeTable` should disallow any concurrent `RemoveFile`s. + t => t.readWholeTable() + ), + concurrentWrites = Seq( + RemoveFile("a", Some(4L))), + actions = Seq( + AddFile("b", Map.empty, 1, 1, dataChange = true))) + + test("initial commit without metadata should fail") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + val txn = log.startTransaction() + withSQLConf(DeltaSQLConf.DELTA_COMMIT_VALIDATION_ENABLED.key -> "true") { + val e = intercept[DeltaIllegalStateException] { + txn.commit(Nil, ManualUpdate) + } + assert(e.getMessage == DeltaErrors.metadataAbsentException().getMessage) + } + } + } + + test("initial commit with multiple metadata actions should fail") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getAbsolutePath)) + val txn = log.startTransaction() + val e = intercept[AssertionError] { + txn.commit(Seq(Metadata(), Metadata()), ManualUpdate) + } + assert(e.getMessage.contains("Cannot change the metadata more than once in a transaction.")) + } + } + + test("AddFile with different partition schema compared to metadata should fail") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getAbsolutePath)) + log.startTransaction().commit(Seq(Metadata( + schemaString = StructType.fromDDL("col2 string, a int").json, + partitionColumns = Seq("col2"))), ManualUpdate) + withSQLConf(DeltaSQLConf.DELTA_COMMIT_VALIDATION_ENABLED.key -> "true") { + val e = intercept[IllegalStateException] { + log.startTransaction().commit(Seq(AddFile( + log.dataPath.toString, Map("col3" -> "1"), 12322, 0L, true, null, null)), ManualUpdate) + } + assert(e.getMessage == DeltaErrors.addFilePartitioningMismatchException( + Seq("col3"), Seq("col2")).getMessage) + } + } + } + + test("isolation level shouldn't be null") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + log.startTransaction().commit(Seq(Metadata()), ManualUpdate) + + val txn = log.startTransaction() + txn.commit(addA :: Nil, ManualUpdate) + + val isolationLevels = log.history.getHistory(Some(10)).map(_.isolationLevel) + assert(isolationLevels.size == 2) + assert(isolationLevels(0).exists(_.contains("Serializable"))) + assert(isolationLevels(0).exists(_.contains("Serializable"))) + } + } + + test("every transaction should use a unique identifier in the commit") { + withTempDir { tempDir => + // Initialize delta table. + val clock = new ManualClock() + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath), clock) + log.startTransaction().commit(Seq(Metadata()), ManualUpdate) + clock.advance(100) + + // Start two transactions which commits at same time with same content. + val txn1 = log.startTransaction() + val txn2 = log.startTransaction() + clock.advance(100) + val version1 = txn1.commit(Seq(), ManualUpdate) + val version2 = txn2.commit(Seq(), ManualUpdate) + + // Validate that actions in both transactions are not exactly same. + def readActions(version: Long): Seq[Action] = { + log.store.read(FileNames.deltaFile(log.logPath, version), log.newDeltaHadoopConf()) + .map(Action.fromJson) + } + def removeTxnIdAndMetricsFromActions(actions: Seq[Action]): Seq[Action] = actions.map { + case c: CommitInfo => c.copy(txnId = None, operationMetrics = None) + case other => other + } + val actions1 = readActions(version1) + val actions2 = readActions(version2) + val actionsWithoutTxnId1 = removeTxnIdAndMetricsFromActions(actions1) + val actionsWithoutTxnId2 = removeTxnIdAndMetricsFromActions(actions2) + assert(actions1 !== actions2) + // Without the txn id, the actions are same as of today but they need not be in future. In + // future we might have other fields which may make these actions from two different + // transactions different. In that case, the below assertion can be removed. + assert(actionsWithoutTxnId1 === actionsWithoutTxnId2) + } + } + + test("pre-command actions committed") { + withTempDir { tempDir => + // Initialize delta table. + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + log.startTransaction().commit(Seq(Metadata()), ManualUpdate) + + val txn = log.startTransaction() + txn.updateSetTransaction("TestAppId", 1L, None) + val version = txn.commit(Seq(), ManualUpdate) + + def readActions(version: Long): Seq[Action] = { + log.store.read(FileNames.deltaFile(log.logPath, version), log.newDeltaHadoopConf()) + .map(Action.fromJson) + } + val actions = readActions(version) + assert(actions.collectFirst { + case SetTransaction("TestAppId", 1L, _) => + }.isDefined) + } + } + + test("has SetTransaction version conflicts") { + withTempDir { tempDir => + // Initialize delta table. + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + log.startTransaction().commit(Seq(Metadata()), ManualUpdate) + + val txn = log.startTransaction() + txn.updateSetTransaction("TestAppId", 1L, None) + val e = intercept[IllegalArgumentException] { + txn.commit(Seq(SetTransaction("TestAppId", 2L, None)), ManualUpdate) + } + assert(e.getMessage == DeltaErrors.setTransactionVersionConflict("TestAppId", 2L, 1L) + .getMessage) + } + } + + test("removes duplicate SetTransactions") { + withTempDir { tempDir => + // Initialize delta table. + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + log.startTransaction().commit(Seq(Metadata()), ManualUpdate) + + val txn = log.startTransaction() + txn.updateSetTransaction("TestAppId", 1L, None) + val version = txn.commit(Seq(SetTransaction("TestAppId", 1L, None)), ManualUpdate) + def readActions(version: Long): Seq[Action] = { + log.store.read(FileNames.deltaFile(log.logPath, version), log.newDeltaHadoopConf()) + .map(Action.fromJson) + } + assert(readActions(version).collectFirst { + case SetTransaction("TestAppId", 1L, _) => + }.isDefined) + } + } + + test("preCommitLogSegment is updated during conflict checking") { + withTempDir { tempDir => + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + log.startTransaction().commit(Seq(Metadata()), ManualUpdate) + sql(s"ALTER TABLE delta.`${tempDir.getAbsolutePath}` " + + s"SET TBLPROPERTIES (${DeltaConfigs.CHECKPOINT_INTERVAL.key} = 10)") + val testTxn = log.startTransaction() + val testTxnStartTs = System.currentTimeMillis() + for (_ <- 1 to 11) { + log.startTransaction().commit(Seq.empty, ManualUpdate) + } + val testTxnEndTs = System.currentTimeMillis() + + // preCommitLogSegment should not get updated until a commit is triggered + assert(testTxn.preCommitLogSegment.version == 1) + assert(testTxn.preCommitLogSegment.lastCommitTimestamp < testTxnStartTs) + assert(testTxn.preCommitLogSegment.deltas.size == 2) + assert(testTxn.preCommitLogSegment.checkpointProvider.isEmpty) + + testTxn.commit(Seq.empty, ManualUpdate) + + // preCommitLogSegment should get updated to the version right before the txn commits + assert(testTxn.preCommitLogSegment.version == 12) + assert(testTxn.preCommitLogSegment.lastCommitTimestamp < testTxnEndTs) + assert(testTxn.preCommitLogSegment.deltas.size == 2) + assert(testTxn.preCommitLogSegment.checkpointProvider.version == 10) + } + } + + /** + * Here we test whether ConflictChecker correctly resolves conflicts when using + * OptimisticTransaction.filterFiles(partitions) to perform dynamic partition overwrites. + * + */ + private def testDynamicPartitionOverwrite( + caseName: String, + concurrentActions: String => Seq[Action], + expectedException: Option[String => String] = None) = { + + // We test with a partition column named "partitionValues" to make sure we correctly skip + // rewriting the filters + for (partCol <- Seq("part", "partitionValues")) { + test("filterFiles(partitions) correctly updates readPredicates and ConflictChecker " + + s"correctly detects conflicts for $caseName with partition column [$partCol]") { + withTempDir { tempDir => + + val tablePath = tempDir.getCanonicalPath + val log = DeltaLog.forTable(spark, tablePath) + // set up + log.startTransaction.commit(Seq( + Metadata( + schemaString = new StructType() + .add(partCol, IntegerType) + .add("value", IntegerType).json, + partitionColumns = Seq(partCol)) + ), ManualUpdate) + log.startTransaction.commit( + Seq(AddFile("a", Map(partCol -> "0"), 1, 1, dataChange = true), + AddFile("b", Map(partCol -> "1"), 1, 1, dataChange = true)), + ManualUpdate) + + + // new data we want to overwrite dynamically to the table + val newData = Seq(AddFile("x", Map(partCol -> "0"), 1, 1, dataChange = true)) + + // txn1: read files in partitions of our new data (part=0) + val txn = log.startTransaction() + val addFiles = + txn.filterFiles(newData.map(_.partitionValues).toSet) + + // txn2 + log.startTransaction().commit(concurrentActions(partCol), ManualUpdate) + + // txn1: remove files read in the partition and commit newData + def commitTxn1 = { + txn.commit(addFiles.map(_.remove) ++ newData, ManualUpdate) + } + + if (expectedException.nonEmpty) { + val e = intercept[DeltaConcurrentModificationException] { + commitTxn1 + } + assert(e.getMessage.contains(expectedException.get(partCol))) + } else { + commitTxn1 + } + } + } + } + } + + testDynamicPartitionOverwrite( + caseName = "concurrent append in same partition", + concurrentActions = partCol => Seq(AddFile("y", Map(partCol -> "0"), 1, 1, dataChange = true)), + expectedException = Some(partCol => + s"Files were added to partition [$partCol=0] by a concurrent update.") + ) + + testDynamicPartitionOverwrite( + caseName = "concurrent append in different partition", + concurrentActions = partCol => Seq(AddFile("y", Map(partCol -> "1"), 1, 1, dataChange = true)) + ) + + testDynamicPartitionOverwrite( + caseName = "concurrent delete in same partition", + concurrentActions = partCol => Seq( + RemoveFile("a", None, partitionValues = Map(partCol -> "0"))), + expectedException = Some(partCol => + "This transaction attempted to delete one or more files that were deleted (for example a) " + + "by a concurrent update") + ) + + testDynamicPartitionOverwrite( + caseName = "concurrent delete in different partition", + concurrentActions = partCol => Seq( + RemoveFile("b", None, partitionValues = Map(partCol -> "1"))) + ) + + test("can set partition columns in first commit") { + withTempDir { tableDir => + val partitionColumns = Array("part") + val exampleAddFile = AddFile( + path = "test-path", + partitionValues = Map("part" -> "one"), + size = 1234, + modificationTime = 5678, + dataChange = true, + stats = """{"numRecords": 1}""", + tags = Map.empty) + val deltaLog = DeltaLog.forTable(spark, tableDir) + val schema = new StructType() + .add("id", "long") + .add("part", "string") + deltaLog.withNewTransaction { txn => + val protocol = Action.supportedProtocolVersion() + val metadata = Metadata( + schemaString = schema.json, + partitionColumns = partitionColumns) + txn.commit(Seq(protocol, metadata, exampleAddFile), DeltaOperations.ManualUpdate) + } + val snapshot = deltaLog.update() + assert(snapshot.metadata.partitionColumns.sameElements(partitionColumns)) + } + } + + test("only single Protocol action per commit - implicit") { + withTempDir { tableDir => + val deltaLog = DeltaLog.forTable(spark, tableDir) + val schema = new StructType() + .add("id", "long") + .add("col", "string") + val e = intercept[java.lang.AssertionError] { + deltaLog.withNewTransaction { txn => + val protocol = Protocol(2, 3) + val metadata = Metadata( + schemaString = schema.json, + configuration = Map("delta.enableChangeDataFeed" -> "true")) + txn.commit(Seq(protocol, metadata), DeltaOperations.ManualUpdate) + } + } + assert(e.getMessage.contains( + "assertion failed: Cannot change the protocol more than once in a transaction.")) + } + } + + test("only single Protocol action per commit - explicit") { + withTempDir { tableDir => + val deltaLog = DeltaLog.forTable(spark, tableDir) + val e = intercept[java.lang.AssertionError] { + deltaLog.withNewTransaction { txn => + val protocol1 = Protocol(2, 3) + val protocol2 = Protocol(1, 4) + txn.commit(Seq(protocol1, protocol2), DeltaOperations.ManualUpdate) + } + } + assert(e.getMessage.contains( + "assertion failed: Cannot change the protocol more than once in a transaction.")) + } + } + + test("DVs cannot be added to files without numRecords stat") { + withTempPath { tempPath => + val path = tempPath.getPath + val deltaLog = DeltaLog.forTable(spark, path) + val firstFile = writeDuplicateActionsData(path).head + enableDeletionVectorsInTable(deltaLog) + val (addFileWithDV, removeFile) = addDVToFileInTable(path, firstFile) + val addFileWithDVWithoutStats = addFileWithDV.copy(stats = null) + testRuntimeErrorOnCommit(Seq(addFileWithDVWithoutStats, removeFile), deltaLog) { e => + val expErrorClass = "DELTA_DELETION_VECTOR_MISSING_NUM_RECORDS" + assert(e.getErrorClass == expErrorClass) + assert(e.getSqlState == "2D521") + } + } + } + + test("commitInfo tags") { + withTempDir { tableDir => + val deltaLog = DeltaLog.forTable(spark, tableDir) + val schema = new StructType().add("id", "long") + + def checkLastCommitTags(expectedTags: Option[Map[String, String]]): Unit = { + val ci = deltaLog.getChanges(deltaLog.update().version).map(_._2).flatten.collectFirst { + case ci: CommitInfo => ci + }.head + assert(ci.tags === expectedTags) + } + + val metadata = Metadata(schemaString = schema.json) + // Check empty tags + deltaLog.withNewTransaction { txn => + txn.commit(metadata :: Nil, DeltaOperations.ManualUpdate, tags = Map.empty) + } + checkLastCommitTags(expectedTags = None) + + deltaLog.withNewTransaction { txn => + txn.commit(addA :: Nil, DeltaOperations.Write(SaveMode.Append), tags = Map.empty) + } + checkLastCommitTags(expectedTags = None) + + // Check non-empty tags + val tags1 = Map("testTag1" -> "testValue1") + deltaLog.withNewTransaction { txn => + txn.commit(metadata :: Nil, DeltaOperations.ManualUpdate, tags = tags1) + } + checkLastCommitTags(expectedTags = Some(tags1)) + + val tags2 = Map("testTag1" -> "testValue1", "testTag2" -> "testValue2") + deltaLog.withNewTransaction { txn => + txn.commit(addB :: Nil, DeltaOperations.Write(SaveMode.Append), tags = tags2) + } + checkLastCommitTags(expectedTags = Some(tags2)) + } + } + + + test("empty commits are elided on write by default") { + withTempDir { tableDir => + val df = Seq((1, 0), (2, 1)).toDF("key", "value") + df.write.format("delta").mode("append").save(tableDir.getCanonicalPath) + + val deltaLog = DeltaLog.forTable(spark, tableDir) + + val expectedSnapshot = deltaLog.update() + val expectedDeltaVersion = expectedSnapshot.version + + val emptyDf = Seq.empty[(Integer, Integer)].toDF("key", "value") + emptyDf.write.format("delta").mode("append").save(tableDir.getCanonicalPath) + + val actualSnapshot = deltaLog.update() + val actualDeltaVersion = actualSnapshot.version + + checkAnswer(spark.read.format("delta").load(tableDir.getCanonicalPath), + Row(1, 0) :: Row(2, 1) :: Nil) + + assert(expectedDeltaVersion === actualDeltaVersion) + } + } + + Seq(true, false).foreach { skip => + test(s"Elide empty commits when requested - skipRecordingEmptyCommits=$skip") { + withSQLConf(DeltaSQLConf.DELTA_SKIP_RECORDING_EMPTY_COMMITS.key -> skip.toString) { + withTempDir { tableDir => + val df = Seq((1, 0), (2, 1)).toDF("key", "value") + df.write.format("delta").mode("append").save(tableDir.getCanonicalPath) + + val deltaLog = DeltaLog.forTable(spark, tableDir) + + val expectedSnapshot = deltaLog.update() + val expectedDeltaVersion = if (skip) { + expectedSnapshot.version + } else { + expectedSnapshot.version + 1 + } + + val emptyDf = Seq.empty[(Integer, Integer)].toDF("key", "value") + emptyDf.write.format("delta").mode("append").save(tableDir.getCanonicalPath) + + val actualSnapshot = deltaLog.update() + val actualDeltaVersion = actualSnapshot.version + + checkAnswer(spark.read.format("delta").load(tableDir.getCanonicalPath), + Row(1, 0) :: Row(2, 1) :: Nil) + + assert(expectedDeltaVersion === actualDeltaVersion) + } + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuiteBase.scala new file mode 100644 index 00000000000..05e92508e46 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuiteBase.scala @@ -0,0 +1,189 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.ConcurrentModificationException + +import org.apache.spark.sql.delta.DeltaOperations.{ManualUpdate, Truncate} +import org.apache.spark.sql.delta.actions.{Action, AddFile, FileAction, Metadata, RemoveFile} +import org.apache.spark.sql.delta.deletionvectors.RoaringBitmapArray +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +trait OptimisticTransactionSuiteBase + extends QueryTest + with SharedSparkSession + with DeletionVectorsTestUtils { + + + /** + * Check whether the test transaction conflict with the concurrent writes by executing the + * given params in the following order: + * - setup (including setting table isolation level + * - reads + * - concurrentWrites + * - actions + * + * When `conflicts` == true, this function checks to make sure the commit of `actions` fails with + * [[ConcurrentModificationException]], otherwise checks that the commit is successful. + * + * @param name test name + * @param conflicts should test transaction is expected to conflict or not + * @param setup sets up the initial delta log state (set schema, partitioning, etc.) + * @param reads reads made in the test transaction + * @param concurrentWrites writes made by concurrent transactions after the test txn reads + * @param actions actions to be committed by the test transaction + * @param errorMessageHint What to expect in the error message + * @param exceptionClass A substring to expect in the exception class name + */ + protected def check( + name: String, + conflicts: Boolean, + setup: Seq[Action] = Seq(Metadata(), Action.supportedProtocolVersion()), + reads: Seq[OptimisticTransaction => Unit], + concurrentWrites: Seq[Action], + actions: Seq[Action], + errorMessageHint: Option[Seq[String]] = None, + exceptionClass: Option[String] = None): Unit = { + + val concurrentTxn: OptimisticTransaction => Unit = + (opt: OptimisticTransaction) => opt.commit(concurrentWrites, Truncate()) + + def initialSetup(log: DeltaLog): Unit = { + // Setup the log + setup.foreach { action => + log.startTransaction().commit(Seq(action), ManualUpdate) + } + } + check( + name, + conflicts, + initialSetup _, + reads, + Seq(concurrentTxn), + actions, + operation = Truncate(), // a data-changing operation + errorMessageHint = errorMessageHint, + exceptionClass = exceptionClass, + additionalSQLConfs = Seq.empty + ) + } + + /** + * Check whether the test transaction conflict with the concurrent writes by executing the + * given params in the following order: + * - sets up the initial delta log state using `initialSetup` (set schema, partitioning, etc.) + * - reads + * - concurrentWrites + * - actions + * + * When `conflicts` == true, this function checks to make sure the commit of `actions` fails with + * [[ConcurrentModificationException]], otherwise checks that the commit is successful. + * + * @param name test name + * @param conflicts should test transaction is expected to conflict or not + * @param initialSetup sets up the initial delta log state (set schema, partitioning, etc.) + * @param reads reads made in the test transaction + * @param concurrentTxns concurrent txns that may write data after the test txn reads + * @param actions actions to be committed by the test transaction + * @param errorMessageHint What to expect in the error message + * @param exceptionClass A substring to expect in the exception class name + */ + protected def check( + name: String, + conflicts: Boolean, + initialSetup: DeltaLog => Unit, + reads: Seq[OptimisticTransaction => Unit], + concurrentTxns: Seq[OptimisticTransaction => Unit], + actions: Seq[Action], + operation: DeltaOperations.Operation, + errorMessageHint: Option[Seq[String]], + exceptionClass: Option[String], + additionalSQLConfs: Seq[(String, String)]): Unit = { + + val conflict = if (conflicts) "should conflict" else "should not conflict" + test(s"$name - $conflict") { + withSQLConf(additionalSQLConfs: _*) { + val tempDir = Utils.createTempDir() + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + // Setup the log + initialSetup(log) + + // Perform reads + val txn = log.startTransaction() + reads.foreach(_ (txn)) + + // Execute concurrent txn while current transaction is active + concurrentTxns.foreach(txn => txn(log.startTransaction())) + + // Try commit and check expected conflict behavior + if (conflicts) { + val e = intercept[ConcurrentModificationException] { + txn.commit(actions, operation) + } + errorMessageHint.foreach { expectedParts => + assert(expectedParts.forall(part => e.getMessage.contains(part))) + } + if (exceptionClass.nonEmpty) { + assert(e.getClass.getName.contains(exceptionClass.get)) + } + } else { + txn.commit(actions, operation) + } + } + } + } + + /** + * Write 3 files at target path and return AddFiles. + */ + protected def writeDuplicateActionsData(path: String): Seq[AddFile] = { + val deltaLog = DeltaLog.forTable(spark, path) + spark.range(start = 0, end = 6, step = 1, numPartitions = 3) + .write.format("delta").save(path) + val files = deltaLog.update().allFiles.collect().sortBy(_.insertionTime) + for (file <- files) { + assert(file.numPhysicalRecords.isDefined) + } + files + } + + protected def addDVToFileInTable(path: String, file: AddFile): (AddFile, RemoveFile) = { + val deltaLog = DeltaLog.forTable(spark, path) + val dv = writeDV(deltaLog, RoaringBitmapArray(0L)) + updateFileDV(file, dv) + } + + protected def testRuntimeErrorOnCommit( + actions: Seq[FileAction], + deltaLog: DeltaLog)( + checkErrorFun: DeltaRuntimeException => Unit): Unit = { + val operation = DeltaOperations.Optimize(Seq.empty, zOrderBy = Seq.empty) + val txn = deltaLog.startTransaction() + val e = intercept[DeltaRuntimeException] { + withSQLConf(DeltaSQLConf.DELTA_DUPLICATE_ACTION_CHECK_ENABLED.key -> "true") { + txn.commit(actions, operation) + } + } + checkErrorFun(e) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableSQLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableSQLSuite.scala new file mode 100644 index 00000000000..cee1fd1849c --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableSQLSuite.scala @@ -0,0 +1,131 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + + +import org.apache.spark.sql.{AnalysisException, DataFrame} + +/** Restore tests using the SQL. */ +class RestoreTableSQLSuite extends RestoreTableSuiteBase { + + override def restoreTableToVersion( + tblId: String, + version: Int, + isTable: Boolean, + expectNoOp: Boolean = false): DataFrame = { + val identifier = if (isTable) { + tblId + } else { + s"delta.`$tblId`" + } + spark.sql(s"RESTORE TABLE $identifier VERSION AS OF ${version}") + } + + override def restoreTableToTimestamp( + tblId: String, + timestamp: String, + isTable: Boolean, + expectNoOp: Boolean = false): DataFrame = { + val identifier = if (isTable) { + tblId + } else { + s"delta.`$tblId`" + } + spark.sql(s"RESTORE $identifier TO TIMESTAMP AS OF '${timestamp}'") + } + + test("restoring a table that doesn't exist") { + val ex = intercept[AnalysisException] { + sql(s"RESTORE TABLE not_exists VERSION AS OF 0") + } + assert(ex.getMessage.contains("Table not found") + || ex.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND")) + } + + test("restoring a view") { + withTempView("tmp") { + sql("CREATE OR REPLACE TEMP VIEW tmp AS SELECT * FROM range(10)") + val ex = intercept[AnalysisException] { + sql(s"RESTORE tmp TO VERSION AS OF 0") + } + assert(ex.getMessage.contains("only supported for Delta tables")) + } + } + + test("restoring a view over a Delta table") { + withTable("delta_table") { + withView("tmp") { + sql("CREATE TABLE delta_table USING delta AS SELECT * FROM range(10)") + sql("CREATE VIEW tmp AS SELECT * FROM delta_table") + val ex = intercept[AnalysisException] { + sql(s"RESTORE TABLE tmp VERSION AS OF 0") + } + assert(ex.getMessage.contains("only supported for Delta tables")) + } + } + } +} + + +class RestoreTableSQLNameColumnMappingSuite extends RestoreTableSQLSuite + with DeltaColumnMappingEnableNameMode { + + import testImplicits._ + + override protected def runOnlyTests = Seq( + "path based table", + "metastore based table" + ) + + + test("restore prior to column mapping upgrade should fail") { + withTempDir { tempDir => + val df1 = Seq(1, 2, 3).toDF("id") + val df2 = Seq(4, 5, 6).toDF("id") + + def deltaLog: DeltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + + withColumnMappingConf("none") { + df1.write.format("delta").save(tempDir.getAbsolutePath) + require(deltaLog.update().version == 0) + + df2.write.format("delta").mode("append").save(tempDir.getAbsolutePath) + assert(deltaLog.update().version == 1) + } + + // upgrade to column mapping mode + sql( + s""" + |ALTER TABLE delta.`$tempDir` + |SET TBLPROPERTIES ( + | ${DeltaConfigs.COLUMN_MAPPING_MODE.key} = '$columnMappingModeString', + | ${DeltaConfigs.MIN_READER_VERSION.key} = '2', + | ${DeltaConfigs.MIN_WRITER_VERSION.key} = '5' + |) + |""".stripMargin) + + assert(deltaLog.update().version == 2) + + // try restore back to version 1 before column mapping should fail + intercept[ColumnMappingUnsupportedException] { + restoreTableToVersion(tempDir.getAbsolutePath, version = 1, isTable = false) + } + } + } + +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableScalaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableScalaSuite.scala new file mode 100644 index 00000000000..05748891ffe --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableScalaSuite.scala @@ -0,0 +1,273 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.Log4jUsageLogger +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.commands.DeletionVectorUtils +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.test.DeltaExcludedTestMixin +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.util.Utils + +/** Restore tests using the Scala APIs. */ +class RestoreTableScalaSuite extends RestoreTableSuiteBase { + + override def restoreTableToVersion( + tblId: String, + version: Int, + isTable: Boolean, + expectNoOp: Boolean = false): DataFrame = { + val deltaTable = if (isTable) { + io.delta.tables.DeltaTable.forName(spark, tblId) + } else { + io.delta.tables.DeltaTable.forPath(spark, tblId) + } + + deltaTable.restoreToVersion(version) + } + + override def restoreTableToTimestamp( + tblId: String, + timestamp: String, + isTable: Boolean, + expectNoOp: Boolean = false): DataFrame = { + val deltaTable = if (isTable) { + io.delta.tables.DeltaTable.forName(spark, tblId) + } else { + io.delta.tables.DeltaTable.forPath(spark, tblId) + } + + deltaTable.restoreToTimestamp(timestamp) + } +} + +class RestoreTableScalaDeletionVectorSuite + extends RestoreTableScalaSuite + with DeletionVectorsTestUtils + with DeltaExcludedTestMixin { + + import testImplicits._ + + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectors(spark.conf) + } + override def excluded: Seq[String] = super.excluded ++ + Seq( + // These tests perform a delete to produce a file to vacuum, but with persistent DVs enabled, + // we actually just add a DV to the file instead, so there's no unreferenced file for vacuum. + "restore after vacuum", + "restore after vacuum - cloned table", + // These rely on the new-table protocol version to be lower than the latest, + // but this isn't true for DVs. + "restore downgrade protocol (allowed=true)", + "restore downgrade protocol (allowed=false)", + "restore downgrade protocol with table features (allowed=true)", + "restore downgrade protocol with table features (allowed=false)", + "cdf + RESTORE with write amplification reduction", + "RESTORE doesn't account for session defaults" + ) + + case class RestoreAndCheckArgs(versionToRestore: Int, expectedResult: DataFrame) + type RestoreAndCheckFunction = RestoreAndCheckArgs => Unit + + /** + * Tests `testFun` once by restoring to version and once to timestamp. + * + * `testFun` is expected to perform setup before executing the `RestoreAndTestFunction` and + * cleanup afterwards. + */ + protected def testRestoreByTimestampAndVersion + (testName: String) + (testFun: (String, RestoreAndCheckFunction) => Unit): Unit = { + for (restoreToVersion <- BOOLEAN_DOMAIN) { + val restoringTo = if (restoreToVersion) "version" else "timestamp" + test(testName + s" - restoring to $restoringTo") { + withTempDir{ dir => + val path = dir.toString + val restoreAndCheck: RestoreAndCheckFunction = (args: RestoreAndCheckArgs) => { + val deltaLog = DeltaLog.forTable(spark, path) + if (restoreToVersion) { + restoreTableToVersion(path, args.versionToRestore, isTable = false) + } else { + // Set a custom timestamp for the commit + val desiredDateS = "1996-01-12" + setTimestampToCommitFileAtVersion( + deltaLog, + version = args.versionToRestore, + date = desiredDateS) + // Set all previous versions to something lower, so we don't error out. + for (version <- 0 until args.versionToRestore) { + val previousDateS = "1996-01-11" + setTimestampToCommitFileAtVersion( + deltaLog, + version = version, + date = previousDateS) + } + + restoreTableToTimestamp(path, desiredDateS, isTable = false) + } + checkAnswer(spark.read.format("delta").load(path), args.expectedResult) + } + testFun(path, restoreAndCheck) + } + } + } + } + + testRestoreByTimestampAndVersion( + "Restoring table with persistent DVs to version without DVs") { (path, restoreAndCheck) => + val deltaLog = DeltaLog.forTable(spark, path) + val df1 = Seq(1, 2, 3, 4, 5).toDF("id") + val values2 = Seq(6, 7, 8, 9, 10) + val df2 = values2.toDF("id") + + // Write all values into version 0. + df1.union(df2).coalesce(1).write.format("delta").save(path) // version 0 + checkAnswer(spark.read.format("delta").load(path), expectedAnswer = df1.union(df2)) + val snapshotV0 = deltaLog.update() + assert(snapshotV0.version === 0) + + // Delete values 2 so that version 1 is `df1`. + spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (${values2.mkString(", ")})") // version 1 + assert(getFilesWithDeletionVectors(deltaLog).size > 0) + checkAnswer(spark.read.format("delta").load(path), expectedAnswer = df1) + val snapshotV1 = deltaLog.snapshot + assert(snapshotV1.version === 1) + + restoreAndCheck(RestoreAndCheckArgs(versionToRestore = 0, expectedResult = df1.union(df2))) + assert(getFilesWithDeletionVectors(deltaLog).size === 0) + } + + testRestoreByTimestampAndVersion( + "Restoring table with persistent DVs to version with DVs") { (path, restoreAndCheck) => + val deltaLog = DeltaLog.forTable(spark, path) + val df1 = Seq(1, 2, 3, 4, 5).toDF("id") + val values2 = Seq(6, 7) + val df2 = values2.toDF("id") + val values3 = Seq(8, 9, 10) + val df3 = values3.toDF("id") + + // Write all values into version 0. + df1.union(df2).union(df3).coalesce(1).write.format("delta").save(path) // version 0 + + // Delete values 2 and 3 in reverse order, so that version 1 is `df1.union(df2)`. + spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (${values3.mkString(", ")})") // version 1 + assert(getFilesWithDeletionVectors(deltaLog).size > 0) + checkAnswer(spark.read.format("delta").load(path), expectedAnswer = df1.union(df2)) + spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (${values2.mkString(", ")})") // version 2 + assert(getFilesWithDeletionVectors(deltaLog).size > 0) + + restoreAndCheck(RestoreAndCheckArgs(versionToRestore = 1, expectedResult = df1.union(df2))) + assert(getFilesWithDeletionVectors(deltaLog).size > 0) + } + + testRestoreByTimestampAndVersion("Restoring table with persistent DVs to version " + + "without persistent DVs enabled") { (path, restoreAndCheck) => + withSQLConf( + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey -> "false", + // Disable the log clean up. Tests sets the timestamp on commit files to long back + // in time that triggers the commit file clean up as part of the [[MetadataCleanup]] + DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.defaultTablePropertyKey -> "false") { + val deltaLog = DeltaLog.forTable(spark, path) + val df1 = Seq(1, 2, 3, 4, 5).toDF("id") + val values2 = Seq(6, 7, 8, 9, 10) + val df2 = values2.toDF("id") + + // Write all values into version 0. + df1.union(df2).coalesce(1).write.format("delta").save(path) // version 0 + checkAnswer(spark.read.format("delta").load(path), expectedAnswer = df1.union(df2)) + val snapshotV0 = deltaLog.update() + assert(snapshotV0.version === 0) + assert(!DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.fromMetaData(snapshotV0.metadata)) + + // Upgrade to us DVs + spark.sql(s"ALTER TABLE delta.`$path` SET TBLPROPERTIES " + + s"(${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key} = true)") + val snapshotV1 = deltaLog.update() + assert(snapshotV1.version === 1) + assert(DeletionVectorUtils.deletionVectorsReadable(snapshotV1)) + assert(DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.fromMetaData(snapshotV1.metadata)) + + // Delete values 2 so that version 1 is `df1`. + spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (${values2.mkString(", ")})") // version 2 + assert(getFilesWithDeletionVectors(deltaLog).size > 0) + checkAnswer(spark.read.format("delta").load(path), expectedAnswer = df1) + val snapshotV2 = deltaLog.update() + assert(snapshotV2.version === 2) + + // Restore to before the version upgrade. Protocol version should be retained (to make the + // history readable), but DV creation should be disabled again. + restoreAndCheck(RestoreAndCheckArgs(versionToRestore = 0, expectedResult = df1.union(df2))) + val snapshotV3 = deltaLog.update() + assert(getFilesWithDeletionVectors(deltaLog).size === 0) + assert(DeletionVectorUtils.deletionVectorsReadable(snapshotV3)) + assert(!DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.fromMetaData(snapshotV3.metadata)) + // Check that we can still read versions that did have DVs. + checkAnswer( + spark.read.format("delta").option("versionAsOf", "2").load(path), + expectedAnswer = df1) + } + } + test("CDF + DV + RESTORE") { + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true") { + withTempDir { tempDir => + val df0 = Seq(0, 1).toDF("id") // version 0 = [0, 1] + df0.write.format("delta").save(tempDir.getAbsolutePath) + + val df1 = Seq(2).toDF("id") // version 1: append to df0 = [0, 1, 2] + df1.write.mode("append").format("delta").save(tempDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tempDir.getAbsolutePath) + deltaTable.delete("id < 1") // version 2: delete (0) = [1, 2] + + deltaTable.updateExpr( + "id > 1", + Map("id" -> "4") + ) // version 3: update 2 --> 4 = [1, 4] + + // version 4: restore to version 2 (delete 4, insert 2) = [1, 2] + restoreTableToVersion(tempDir.getAbsolutePath, 2, false) + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, tempDir), 4, 4, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(4, "delete", 4) :: Row(2, "insert", 4) :: Nil + ) + + // version 5: restore to version 1 (insert 0) = [0, 1, 2] + restoreTableToVersion(tempDir.getAbsolutePath, 1, false) + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, tempDir), 5, 5, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(0, "insert", 5) :: Nil + ) + + // version 6: restore to version 0 (delete 2) = [0, 1] + restoreTableToVersion(tempDir.getAbsolutePath, 0, false) + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, tempDir), 6, 6, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(2, "delete", 6) :: Nil + ) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableSuiteBase.scala new file mode 100644 index 00000000000..54486b8e76d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/RestoreTableSuiteBase.scala @@ -0,0 +1,419 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import org.apache.spark.sql.delta.actions.{Protocol, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.{TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ + +/** Base suite containing the restore tests. */ +trait RestoreTableSuiteBase extends QueryTest with SharedSparkSession + with DeltaSQLCommandTest { + + import testImplicits._ + + // Will be overridden in sub-class + /** + * @param tblId - the table identifier either table name or path + * @param version - version to restore to + * @param isMetastoreTable - whether its a path based table or metastore table + * @param expectNoOp - whether the restore is no-op or not + */ + protected def restoreTableToVersion( + tblId: String, + version: Int, + isMetastoreTable: Boolean, + expectNoOp: Boolean = false): DataFrame + + /** + * @param tblId - the table identifier either table name or path + * @param timestamp - timestamp to restore to + * @param isMetastoreTable - whether its a path based table or a metastore table. + * @param expectNoOp - whether the restore is no-op or not + */ + protected def restoreTableToTimestamp( + tblId: String, + timestamp: String, + isMetastoreTable: Boolean, + expectNoOp: Boolean = false): DataFrame + + test("path based table") { + withTempDir { tempDir => + val path = tempDir.getAbsolutePath + + val df1 = Seq(1, 2, 3, 4, 5).toDF("id") + val df2 = Seq(6, 7).toDF("id") + val df3 = Seq(8, 9, 10).toDF("id") + + // write version 0 of the table + df1.write.format("delta").save(path) // version 0 + + val deltaLog = DeltaLog.forTable(spark, path) + require(deltaLog.snapshot.version == 0) + + // append df2 to the table + df2.write.format("delta").mode("append").save(path) // version 1 + + // append df3 to the table + df3.write.format("delta").mode("append").save(path) // version 2 + + // check if the table has all the three dataframes written + checkAnswer(spark.read.format("delta").load(path), df1.union(df2).union(df3)) + + // restore by version to version 1 + restoreTableToVersion(path, 1, false) + checkAnswer(spark.read.format("delta").load(path), df1.union(df2)) + + // Set a custom timestamp for the commit + val desiredDate = "1996-01-12" + setTimestampToCommitFileAtVersion(deltaLog, version = 0, date = desiredDate) + + // restore by timestamp to version 0 + restoreTableToTimestamp(path, desiredDate, false) + checkAnswer(spark.read.format("delta").load(path), df1) + } + } + + protected def dateStringToTimestamp(date: String): Long = { + val format = new java.text.SimpleDateFormat("yyyy-MM-dd") + format.parse(date).getTime + } + + protected def timeStringToTimestamp(time: String): Long = { + val format = new java.text.SimpleDateFormat("yyyy-MM-dd hh:mm:ss Z") + format.parse(time).getTime + } + + protected def setTimestampToCommitFileAtVersion( + deltaLog: DeltaLog, + version: Int, + date: String): Unit = { + val timestamp = dateStringToTimestamp(date) + setTimestampToCommitFileAtVersion(deltaLog, version, timestamp) + } + + protected def setTimestampToCommitFileAtVersion( + deltaLog: DeltaLog, + version: Int, + timestamp: Long): Unit = { + val file = new File(FileNames.deltaFile(deltaLog.logPath, version).toUri) + file.setLastModified(timestamp) + } + + test("metastore based table") { + val identifier = "tbl" + withTable(identifier) { + + val df1 = Seq(1, 2, 3, 4, 5).toDF("id") + val df2 = Seq(6, 7).toDF("id") + + // write first version of the table + df1.write.format("delta").saveAsTable(identifier) // version 0 + + val deltaLog = DeltaLog.forTable(spark, new TableIdentifier(identifier)) + require(deltaLog.snapshot.version == 0) + + // append df2 to the table + df2.write.format("delta").mode("append").saveAsTable(identifier) // version 1 + + // check if the table has all the three dataframes written + checkAnswer(spark.read.format("delta").table(identifier), df1.union(df2)) + + + // restore by version to version 0 + restoreTableToVersion(identifier, 0, true) + checkAnswer(spark.read.format("delta").table(identifier), df1) + } + } + + test("restore a restore back to pre-restore version") { + withTempDir { tempDir => + val df1 = Seq(1, 2, 3).toDF("id") + val df2 = Seq(4, 5, 6).toDF("id") + val df3 = Seq(7, 8, 9).toDF("id") + df1.write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + require(deltaLog.snapshot.version == 0) + + df2.write.format("delta").mode("append").save(tempDir.getAbsolutePath) + assert(deltaLog.update().version == 1) + + df3.write.format("delta").mode("append").save(tempDir.getAbsolutePath) + assert(deltaLog.update().version == 2) + + // we have three versions now, let's restore to version 1 first + restoreTableToVersion(tempDir.getAbsolutePath, 1, false) + + checkAnswer(spark.read.format("delta").load(tempDir.getAbsolutePath), df1.union(df2)) + assert(deltaLog.update().version == 3) + + restoreTableToVersion(tempDir.getAbsolutePath, 2, false) + checkAnswer( + spark.read.format("delta").load(tempDir.getAbsolutePath), df1.union(df2).union(df3)) + + assert(deltaLog.update().version == 4) + } + } + + test("restore to a restored version") { + withTempDir { tempDir => + val df1 = Seq(1, 2, 3).toDF("id") + val df2 = Seq(4, 5, 6).toDF("id") + val df3 = Seq(7, 8, 9).toDF("id") + df1.write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + require(deltaLog.update().version == 0) + + df2.write.format("delta").mode("append").save(tempDir.getAbsolutePath) + assert(deltaLog.update().version == 1) + + // we have two versions now, let's restore to version 0 first + restoreTableToVersion(tempDir.getAbsolutePath, 0, false) + + checkAnswer(spark.read.format("delta").load(tempDir.getAbsolutePath), df1) + assert(deltaLog.update().version == 2) + + df3.write.format("delta").mode("append").save(tempDir.getAbsolutePath) + assert(deltaLog.update().version == 3) + + // now we restore a restored version + restoreTableToVersion(tempDir.getAbsolutePath, 2, false) + checkAnswer(spark.read.format("delta").load(tempDir.getAbsolutePath), df1) + assert(deltaLog.update().version == 4) + } + } + + for (downgradeAllowed <- DeltaTestUtils.BOOLEAN_DOMAIN) + test(s"restore downgrade protocol (allowed=$downgradeAllowed)") { + withTempDir { tempDir => + val path = tempDir.getAbsolutePath + spark.range(5).write.format("delta").save(path) + val deltaLog = DeltaLog.forTable(spark, path) + val oldProtocolVersion = deltaLog.snapshot.protocol + // Update table to latest version. + deltaLog.upgradeProtocol(oldProtocolVersion.merge(Protocol())) + val newProtocolVersion = deltaLog.snapshot.protocol + assert(newProtocolVersion.minReaderVersion > oldProtocolVersion.minReaderVersion && + newProtocolVersion.minWriterVersion > oldProtocolVersion.minWriterVersion, + s"newProtocolVersion=$newProtocolVersion is not strictly greater than" + + s" oldProtocolVersion=$oldProtocolVersion") + + withSQLConf(DeltaSQLConf.RESTORE_TABLE_PROTOCOL_DOWNGRADE_ALLOWED.key -> + downgradeAllowed.toString) { + // Restore to before the upgrade. + restoreTableToVersion(path, version = 0, isMetastoreTable = false) + } + val restoredProtocolVersion = deltaLog.snapshot.protocol + if (downgradeAllowed) { + assert(restoredProtocolVersion === oldProtocolVersion) + } else { + assert(restoredProtocolVersion === newProtocolVersion.merge(oldProtocolVersion)) + } + } + } + + for (downgradeAllowed <- DeltaTestUtils.BOOLEAN_DOMAIN) + test( + s"restore downgrade protocol with table features (allowed=$downgradeAllowed)") { + withTempDir { tempDir => + val path = tempDir.getAbsolutePath + spark.range(5).write.format("delta").save(path) + val deltaLog = DeltaLog.forTable(spark, path) + val oldProtocolVersion = deltaLog.snapshot.protocol + // Update table to latest version. + deltaLog.upgradeProtocol( + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeatures(Seq(TestLegacyReaderWriterFeature)) + .withFeatures(oldProtocolVersion.implicitlySupportedFeatures)) + val newProtocolVersion = deltaLog.snapshot.protocol + assert( + newProtocolVersion.minReaderVersion > oldProtocolVersion.minReaderVersion && + newProtocolVersion.minWriterVersion > oldProtocolVersion.minWriterVersion, + s"newProtocolVersion=$newProtocolVersion is not strictly greater than" + + s" oldProtocolVersion=$oldProtocolVersion") + + withSQLConf( + DeltaSQLConf.RESTORE_TABLE_PROTOCOL_DOWNGRADE_ALLOWED.key -> + downgradeAllowed.toString) { + // Restore to before the upgrade. + restoreTableToVersion(path, version = 0, isMetastoreTable = false) + } + val restoredProtocolVersion = deltaLog.snapshot.protocol + if (downgradeAllowed) { + assert(restoredProtocolVersion === oldProtocolVersion) + } else { + assert(restoredProtocolVersion === + newProtocolVersion.merge(oldProtocolVersion)) + } + } + } + + test("RESTORE doesn't account for session defaults") { + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "1") { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + spark + .range(start = 10, end = 20) + .write + .format("delta") + .mode("append") + .save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + val oldProtocol = log.update().protocol + assert(oldProtocol === Protocol(1, 1)) + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "2", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "2", + TableFeatureProtocolUtils.defaultPropertyKey(TestWriterFeature) -> "enabled") { + restoreTableToVersion(dir.getAbsolutePath, 0, isMetastoreTable = false) + } + val newProtocol = log.update().protocol + assert(newProtocol === oldProtocol) + } + } + } + + test("restore operation metrics in Delta table history") { + withSQLConf( + DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + val df1 = Seq(1, 2, 3).toDF("id") + val df2 = Seq(4, 5, 6).toDF("id") + df1.write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + df2.write.format("delta").mode("append").save(tempDir.getAbsolutePath) + assert(deltaLog.update().version == 1) + + // we have two versions now, let's restore to version 0 first + restoreTableToVersion(tempDir.getAbsolutePath, 0, false) + + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tempDir.getAbsolutePath) + + val actualOperationMetrics = deltaTable.history(1).select("operationMetrics") + .take(1) + .head + .getMap(0) + .asInstanceOf[Map[String, String]] + + // File sizes are flaky due to differences in order of data (=> encoding size differences) + assert(actualOperationMetrics.get("tableSizeAfterRestore").isDefined) + assert(actualOperationMetrics.get("numOfFilesAfterRestore").get == "2") + assert(actualOperationMetrics.get("numRemovedFiles").get == "2") + assert(actualOperationMetrics.get("numRestoredFiles").get == "0") + // File sizes are flaky due to differences in order of data (=> encoding size differences) + assert(actualOperationMetrics.get("removedFilesSize").isDefined) + assert(actualOperationMetrics.get("restoredFilesSize").get == "0") + } + } + } + + test("restore command output metrics") { + withSQLConf( + DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + val df1 = Seq(1, 2, 3).toDF("id") + val df2 = Seq(4, 5, 6).toDF("id") + df1.write.format("delta").save(tempDir.getAbsolutePath) + val deltaLog = DeltaLog.forTable(spark, tempDir.getAbsolutePath) + df2.write.format("delta").mode("append").save(tempDir.getAbsolutePath) + assert(deltaLog.update().version == 1) + + // we have two versions now, let's restore to version 0 first + val actualOutputMetrics = restoreTableToVersion(tempDir.getAbsolutePath, 0, false) + + // verify the schema + val expectedRestoreOutputSchema = StructType(Seq( + StructField("table_size_after_restore", LongType), + StructField("num_of_files_after_restore", LongType), + StructField("num_removed_files", LongType), + StructField("num_restored_files", LongType), + StructField("removed_files_size", LongType), + StructField("restored_files_size", LongType) + )) + assert(actualOutputMetrics.schema == expectedRestoreOutputSchema) + + val outputRow = actualOutputMetrics.take(1).head + // File sizes are flaky due to differences in order of data (=> encoding size differences) + assert(outputRow.getLong(0) > 0L) // table_size_after_restore + assert(outputRow.getLong(1) == 2L) // num_of_files_after_restore + assert(outputRow.getLong(2) == 2L) // num_removed_files + assert(outputRow.getLong(3) == 0L) // num_restored_files + // File sizes are flaky due to differences in order of data (=> encoding size differences) + assert(outputRow.getLong(4) > 0L) // removed_files_size + assert(outputRow.getLong(5) == 0L) // restored_files_size + } + } + } + + test("cdf + RESTORE") { + withSQLConf( + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> "true") { + withTempDir { tempDir => + val df0 = Seq(0, 1).toDF("id") // version 0 = [0, 1] + df0.write.format("delta").save(tempDir.getAbsolutePath) + + val df1 = Seq(2).toDF("id") // version 1: append to df0 = [0, 1, 2] + df1.write.mode("append").format("delta").save(tempDir.getAbsolutePath) + + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tempDir.getAbsolutePath) + deltaTable.delete("id < 1") // version 2: delete (0) = [1, 2] + + deltaTable.updateExpr( + "id > 1", + Map("id" -> "4") + ) // version 3: update 2 --> 4 = [1, 4] + + // version 4: restore to version 2 (delete 4, insert 2) = [1, 2] + restoreTableToVersion(tempDir.getAbsolutePath, 2, false) + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, tempDir), 4, 4, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(4, "delete", 4) :: Row(2, "insert", 4) :: Nil + ) + + // version 5: restore to version 1 (insert 0) = [0, 1, 2] + restoreTableToVersion(tempDir.getAbsolutePath, 1, false) + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, tempDir), 5, 5, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(0, "insert", 5) :: Nil + ) + + // version 6: restore to version 0 (delete 2) = [0, 1] + restoreTableToVersion(tempDir.getAbsolutePath, 0, false) + checkAnswer( + CDCReader.changesToBatchDF(DeltaLog.forTable(spark, tempDir), 6, 6, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(2, "delete", 6) :: Nil + ) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/S3LikeLocalFileSystem.scala b/spark/src/test/scala/org/apache/spark/sql/delta/S3LikeLocalFileSystem.scala new file mode 100644 index 00000000000..a85679758fc --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/S3LikeLocalFileSystem.scala @@ -0,0 +1,43 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.net.URI + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.RawLocalFileSystem + +/** + * A local filesystem on scheme s3. Useful for testing paths on non-defualt schemes. + */ +class S3LikeLocalFileSystem extends RawLocalFileSystem { + private var uri: URI = _ + override def getScheme: String = "s3" + + override def initialize(name: URI, conf: Configuration): Unit = { + uri = URI.create(name.getScheme + ":///") + super.initialize(name, conf) + } + + override def getUri(): URI = if (uri == null) { + // RawLocalFileSystem's constructor will call this one before `initialize` is called. + // Just return the super's URI to avoid NPE. + super.getUri + } else { + uri + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/S3SingleDriverLogStoreSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/S3SingleDriverLogStoreSuite.scala new file mode 100644 index 00000000000..d093c4bcb60 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/S3SingleDriverLogStoreSuite.scala @@ -0,0 +1,158 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.storage.{HDFSLogStore, LogStore, S3SingleDriverLogStore} +import org.apache.spark.sql.delta.util.FileNames +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} + +trait S3SingleDriverLogStoreSuiteBase extends LogStoreSuiteBase { + + private def checkLogStoreList( + store: LogStore, + path: Path, + expectedVersions: Seq[Int], + hadoopConf: Configuration): Unit = { + assert(store.listFrom(path, hadoopConf).map(FileNames.deltaVersion).toSeq === expectedVersions) + } + + private def checkFileSystemList(fs: FileSystem, path: Path, expectedVersions: Seq[Int]): Unit = { + val fsList = fs.listStatus(path.getParent).filter(_.getPath.getName >= path.getName) + assert(fsList.map(FileNames.deltaVersion).sorted === expectedVersions) + } + + testHadoopConf( + ".*No FileSystem for scheme.*fake.*", + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true") + + test("file system has priority over cache") { + withTempDir { dir => + val store = createLogStore(spark) + val deltas = Seq(0, 1, 2).map(i => FileNames.deltaFile(new Path(dir.toURI), i)) + store.write(deltas(0), Iterator("zero"), overwrite = false, sessionHadoopConf) + store.write(deltas(1), Iterator("one"), overwrite = false, sessionHadoopConf) + store.write(deltas(2), Iterator("two"), overwrite = false, sessionHadoopConf) + + // delete delta file 2 and its checksum from file system + val fs = new Path(dir.getCanonicalPath).getFileSystem(sessionHadoopConf) + val delta2CRC = FileNames.checksumFile(new Path(dir.toURI), 2) + fs.delete(deltas(2), true) + fs.delete(delta2CRC, true) + + // magically create a different version of file 2 in the FileSystem only + val hackyStore = new HDFSLogStore(sparkConf, sessionHadoopConf) + hackyStore.write(deltas(2), Iterator("foo"), overwrite = true, sessionHadoopConf) + + // we should see "foo" (FileSystem value) instead of "two" (cache value) + assert(store.read(deltas(2), sessionHadoopConf).head == "foo") + } + } + + test("cache works") { + withTempDir { dir => + val store = createLogStore(spark) + val deltas = + Seq(0, 1, 2, 3, 4).map(i => FileNames.deltaFile(new Path(dir.toURI), i)) + store.write(deltas(0), Iterator("zero"), overwrite = false, sessionHadoopConf) + store.write(deltas(1), Iterator("one"), overwrite = false, sessionHadoopConf) + store.write(deltas(2), Iterator("two"), overwrite = false, sessionHadoopConf) + + // delete delta file 2 from file system + val fs = new Path(dir.getCanonicalPath).getFileSystem(sessionHadoopConf) + fs.delete(deltas(2), true) + + // file system listing doesn't see file 2 + checkFileSystemList(fs, deltas(0), Seq(0, 1)) + + // can't re-write because cache says it still exists + intercept[java.nio.file.FileAlreadyExistsException] { + store.write(deltas(2), Iterator("two"), overwrite = false, sessionHadoopConf) + } + + // log store list still sees file 2 as it's cached + checkLogStoreList(store, deltas(0), Seq(0, 1, 2), sessionHadoopConf) + + if (canInvalidateCache) { + // clear the cache + store.invalidateCache() + + // log store list doesn't see file 2 anymore + checkLogStoreList(store, deltas(0), Seq(0, 1), sessionHadoopConf) + + // write a new file 2 + store.write(deltas(2), Iterator("two"), overwrite = false, sessionHadoopConf) + } + + // add a file 3 to cache only + store.write(deltas(3), Iterator("three"), overwrite = false, sessionHadoopConf) + fs.delete(deltas(3), true) + + // log store listing returns a union of: + // 1) file system listing: 0, 1, 2 + // 2a) cache listing - canInvalidateCache=true: 2, 3 + // 2b) cache listing - canInvalidateCache=false: 0, 1, 2, 3 + checkLogStoreList(store, deltas(0), Seq(0, 1, 2, 3), sessionHadoopConf) + } + } + + test("cache works correctly when writing an initial log version") { + withTempDir { dir => + val store = createLogStore(spark) + val deltas = + Seq(0, 1, 2).map(i => FileNames.deltaFile(new Path(dir.toURI), i)) + store.write(deltas(0), Iterator("log version 0"), overwrite = false, sessionHadoopConf) + store.write(deltas(1), Iterator("log version 1"), overwrite = false, sessionHadoopConf) + store.write(deltas(2), Iterator("log version 2"), overwrite = false, sessionHadoopConf) + + val fs = new Path(dir.getCanonicalPath).getFileSystem(sessionHadoopConf) + // delete all log files + fs.delete(deltas(2), true) + fs.delete(deltas(1), true) + fs.delete(deltas(0), true) + + // can't write a new version 1 as it's in cache + intercept[java.nio.file.FileAlreadyExistsException] { + store.write(deltas(1), Iterator("new log version 1"), overwrite = false, sessionHadoopConf) + } + + // all three log files still in cache + checkLogStoreList(store, deltas(0), Seq(0, 1, 2), sessionHadoopConf) + + // can write a new version 0 as it's the initial version of the log + store.write(deltas(0), Iterator("new log version 0"), overwrite = false, sessionHadoopConf) + + // writing a new initial version invalidates all files in that log + checkLogStoreList(store, deltas(0), Seq(0), sessionHadoopConf) + } + } + + protected def shouldUseRenameToWriteCheckpoint: Boolean = false + + /** + * S3SingleDriverLogStore.scala can invalidate cache + * S3SingleDriverLogStore.java cannot invalidate cache + */ + protected def canInvalidateCache: Boolean +} + +class S3SingleDriverLogStoreSuite extends S3SingleDriverLogStoreSuiteBase { + override val logStoreClassName: String = classOf[S3SingleDriverLogStore].getName + + override protected def canInvalidateCache: Boolean = true +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/SchemaValidationSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/SchemaValidationSuite.scala new file mode 100644 index 00000000000..10997cc1139 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/SchemaValidationSuite.scala @@ -0,0 +1,412 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.util.concurrent.CountDownLatch + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SparkSession} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSparkSession + +trait SchemaValidationSuiteBase extends QueryTest with SharedSparkSession with DeltaSQLCommandTest { + + def checkMergeException(e: Exception, col: String): Unit = { + assert(e.isInstanceOf[MetadataChangedException]) + assert(e.getMessage.contains( + "The metadata of the Delta table has been changed by a concurrent update")) + } +} + +/** + * This Suite tests the behavior of Delta commands when a schema altering commit is run after the + * command completes analysis but before the command starts the transaction. We want to make sure + * That we do not corrupt tables. + */ +class SchemaValidationSuite extends SchemaValidationSuiteBase { + + class BlockingRule( + blockActionLatch: CountDownLatch, + startConcurrentUpdateLatch: CountDownLatch) extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = { + startConcurrentUpdateLatch.countDown() + blockActionLatch.await() + plan + } + } + + /** + * Blocks the thread with the help of an optimizer rule until end of scope. + * We need two latches to ensure that the thread executing the query is blocked until + * the other thread concurrently updates the metadata. `blockActionLatch` blocks the action + * until it is counted down by the thread updating the metadata. `startConcurrentUpdateLatch` + * will block the concurrent update to happen until it is counted down by the action reaches the + * optimizer rule. + */ + private def withBlockedExecution( + t: Thread, + blockActionLatch: CountDownLatch, + startConcurrentUpdateLatch: CountDownLatch)(f: => Unit): Unit = { + t.start() + startConcurrentUpdateLatch.await() + try { + f + } finally { + blockActionLatch.countDown() + t.join() + } + } + + def cloneSession(spark: SparkSession): SparkSession = { + val cloneMethod = classOf[SparkSession].getDeclaredMethod("cloneSession") + cloneMethod.setAccessible(true) + val clonedSession = cloneMethod.invoke(spark).asInstanceOf[SparkSession] + clonedSession + } + + /** + * Common base method for both the path based and table name based tests. + */ + private def testConcurrentChangeBase(identifier: String)( + createTable: (SparkSession, String) => Unit, + actionToTest: (SparkSession, String) => Unit, + concurrentChange: (SparkSession, String) => Unit): Unit = { + createTable(spark, identifier) + + // Clone the session to run the query in a separate thread. + val newSession = cloneSession(spark) + val blockActionLatch = new CountDownLatch(1) + val startConcurrentUpdateLatch = new CountDownLatch(1) + val rule = new BlockingRule(blockActionLatch, startConcurrentUpdateLatch) + newSession.experimental.extraOptimizations :+= rule + + var actionException: Exception = null + val actionToTestThread = new Thread() { + override def run(): Unit = { + try { + actionToTest(newSession, identifier) + } catch { + case e: Exception => + actionException = e + } + } + } + withBlockedExecution(actionToTestThread, blockActionLatch, startConcurrentUpdateLatch) { + concurrentChange(spark, identifier) + } + if (actionException != null) { + throw actionException + } + } + + /** + * tests the behavior of concurrent changes to schema on a blocked command. + * @param testName - name of the test + * @param createTable - method that creates a table given an identifier and spark session. + * @param actionToTest - the method we want to test. + * @param concurrentChange - the concurrent query that updates the schema of the table + * + * All the above methods take SparkSession and the table path as parameters + */ + def testConcurrentChange(testName: String, testTags: org.scalatest.Tag*)( + createTable: (SparkSession, String) => Unit, + actionToTest: (SparkSession, String) => Unit, + concurrentChange: (SparkSession, String) => Unit): Unit = { + + test(testName, testTags: _*) { + withTempDir { tempDir => + testConcurrentChangeBase(tempDir.getCanonicalPath)( + createTable, + actionToTest, + concurrentChange + ) + } + } + } + + /** + * tests the behavior of concurrent changes pf schema on a blocked command with metastore tables. + * @param testName - name of the test + * @param createTable - method that creates a table given an identifier and spark session. + * @param actionToTest - the method we want to test. + * @param concurrentChange - the concurrent query that updates the schema of the table + * + * All the above methods take SparkSession and the table name as parameters + */ + def testConcurrentChangeWithTable(testName: String)( + createTable: (SparkSession, String) => Unit, + actionToTest: (SparkSession, String) => Unit, + concurrentChange: (SparkSession, String) => Unit): Unit = { + + val tblName = "metastoreTable" + test(testName) { + withTable(tblName) { + testConcurrentChangeBase(tblName)( + createTable, + actionToTest, + concurrentChange + ) + } + } + } + + /** + * Creates a method to remove a column from the table by taking column as an argument. + */ + def dropColFromSampleTable(col: String): (SparkSession, String) => Unit = { + (spark: SparkSession, tblPath: String) => { + spark.read.format("delta").load(tblPath) + .drop(col) + .write + .format("delta") + .mode("overwrite") + .option("overwriteSchema", "true") + .save(tblPath) + } + } + + /** + * Adding a column to the schema will result in the blocked thread appending to the table + * with null values for the new column. + */ + testConcurrentChange("write - add a column concurrently")( + createTable = (spark: SparkSession, tblPath: String) => { + spark.range(10).write.format("delta").save(tblPath) + }, + actionToTest = (spark: SparkSession, tblPath: String) => { + spark.range(11, 20).write.format("delta") + .mode("append") + .save(tblPath) + + val appendedCol2Values = spark.read.format("delta") + .load(tblPath) + .filter(col("id") <= 20) + .select("col2") + .distinct() + .collect() + .toList + assert(appendedCol2Values == List(Row(null))) + }, + concurrentChange = (spark: SparkSession, tblPath: String) => { + spark.range(21, 30).withColumn("col2", lit(2)).write + .format("delta") + .mode("append") + .option("mergeSchema", "true") + .save(tblPath) + } + ) + + /** + * Removing a column while a query is in running should throw an analysis + * exception + */ + testConcurrentChange("write - remove a column concurrently")( + createTable = (spark: SparkSession, tblPath: String) => { + spark.range(10).withColumn("col2", lit(1)) + .write + .format("delta") + .save(tblPath) + }, + actionToTest = (spark: SparkSession, tblPath: String) => { + val e = intercept[AnalysisException] { + spark.range(11, 20) + .withColumn("col2", lit(1)).write.format("delta") + .mode("append") + .save(tblPath) + } + assert(e.getMessage.contains( + "A schema mismatch detected when writing to the Delta table")) + }, + concurrentChange = dropColFromSampleTable("col2") + ) + + /** + * Removing a column while performing a delete should be caught while + * writing the deleted files(i.e files with rows that were not deleted). + */ + testConcurrentChange("delete - remove a column concurrently")( + createTable = (spark: SparkSession, tblPath: String) => { + spark.range(10).withColumn("col2", lit(1)) + .write + .format("delta") + .save(tblPath) + }, + actionToTest = (spark: SparkSession, tblPath: String) => { + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tblPath) + val e = intercept[Exception] { + deltaTable.delete(col("id") === 1) + } + assert(e.getMessage.contains(s"Can't resolve column col2")) + }, + concurrentChange = dropColFromSampleTable("col2") + ) + + /** + * Removing a column(referenced in condition) while performing a delete will + * result in a no-op. + */ + testConcurrentChange("test delete query against a concurrent query which removes the" + + " delete condition column" + )( + createTable = (spark: SparkSession, tblPath: String) => { + spark.range(10).withColumn("col2", lit(1)) + .repartition(2) + .write + .format("delta") + .save(tblPath) + }, + actionToTest = (spark: SparkSession, tblPath: String) => { + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tblPath) + deltaTable.delete(col("id") === 1) + // check if delete is no-op + checkAnswer( + sql(s"SELECT * FROM delta.`$tblPath`"), + Seq(Row(1), Row(1), Row(1), Row(1), Row(1), Row(1), Row(1), Row(1), Row(1), Row(1))) + }, + concurrentChange = dropColFromSampleTable("id") + ) + + /** + * An update command that has to rewrite files will have the old schema, + * we catch the outdated schema during the write. + */ + testConcurrentChange("update - remove a column concurrently")( + createTable = (spark: SparkSession, tblPath: String) => { + spark.range(10).withColumn("col2", lit(1)) + .write + .format("delta") + .save(tblPath) + }, + actionToTest = (spark: SparkSession, tblPath: String) => { + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tblPath) + val e = intercept[AnalysisException] { + deltaTable.update(col("id") =!= 1, Map("col2" -> lit(-1))) + } + assert(e.getMessage.contains(s"Can't resolve column col2")) + }, + concurrentChange = dropColFromSampleTable("col2") + ) + + /** + * Removing a column(referenced in condition) while performing a update will + * result in a no-op. + */ + testConcurrentChange("test update query against a concurrent query which removes the" + + " update condition column" + )( + createTable = (spark: SparkSession, tblPath: String) => { + spark.range(10).withColumn("col2", lit(1)) + .repartition(2) + .write + .format("delta") + .save(tblPath) + }, + actionToTest = (spark: SparkSession, tblPath: String) => { + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tblPath) + deltaTable.update(col("id") === 1, Map("id" -> lit("2"))) + // check if update is no-op + checkAnswer( + sql(s"SELECT * FROM delta.`$tblPath`"), + Seq(Row(1), Row(1), Row(1), Row(1), Row(1), Row(1), Row(1), Row(1), Row(1), Row(1))) + }, + concurrentChange = dropColFromSampleTable("id") + ) + + /** + * Concurrently drop column in merge condition. Merge command detects the schema change while + * resolving the target and throws an AnalysisException + */ + testConcurrentChange("merge - remove a column in merge condition concurrently")( + createTable = (spark: SparkSession, tblPath: String) => { + spark.range(10).withColumn("col2", lit(1)) + .write + .format("delta") + .save(tblPath) + }, + actionToTest = (spark: SparkSession, tblPath: String) => { + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tblPath) + val sourceDf = spark.range(10).withColumn("col2", lit(2)) + val e = intercept[Exception] { + deltaTable.as("t1") + .merge(sourceDf.as("t2"), "t1.id == t2.id") + .whenNotMatched() + .insertAll() + .whenMatched() + .updateAll() + .execute() + } + checkMergeException(e, "id") + }, + concurrentChange = dropColFromSampleTable("id") + ) + + /** + * Concurrently drop column not in merge condition but in target. Merge command detects the schema + * change while resolving the target and throws an AnalysisException + */ + testConcurrentChange("merge - remove a column not in merge condition concurrently")( + createTable = (spark: SparkSession, tblPath: String) => { + spark.range(10).withColumn("col2", lit(1)) + .write + .format("delta") + .save(tblPath) + }, + actionToTest = (spark: SparkSession, tblPath: String) => { + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tblPath) + val sourceDf = spark.range(10).withColumn("col2", lit(2)) + val e = intercept[Exception] { + deltaTable.as("t1") + .merge(sourceDf.as("t2"), "t1.id == t2.id") + .whenNotMatched() + .insertAll() + .whenMatched() + .updateAll() + .execute() + } + checkMergeException(e, "col2") + }, + concurrentChange = dropColFromSampleTable("col2") + ) + + /** + * Alter table to add a column and at the same time add a column concurrently. + */ + testConcurrentChangeWithTable("alter table add column - remove column and add same column")( + createTable = (spark: SparkSession, tblName: String) => { + spark.range(10).write.format("delta").saveAsTable(tblName) + }, + actionToTest = (spark: SparkSession, tblName: String) => { + val e = intercept[AnalysisException] { + spark.sql(s"ALTER TABLE `$tblName` ADD COLUMNS (col2 string)") + } + assert(e.getMessage.contains("Found duplicate column(s) in adding columns: col2")) + }, + concurrentChange = (spark: SparkSession, tblName: String) => { + spark.read.format("delta").table(tblName) + .withColumn("col2", lit(1)) + .write + .format("delta") + .option("overwriteSchema", "true") + .mode("overwrite") + .saveAsTable(tblName) + } + ) +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ShowDeltaTableColumnsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ShowDeltaTableColumnsSuite.scala new file mode 100644 index 00000000000..f291e616d38 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ShowDeltaTableColumnsSuite.scala @@ -0,0 +1,194 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.File + +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.functions.struct +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +class ShowDeltaTableColumnsSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with DeltaTestUtilsForTempViews { + + import testImplicits._ + + private val outputColumnNames = Seq("col_name") + private val outputColumnValues = Seq(Seq("column1"), Seq("column2")) + + protected def checkResult( + result: DataFrame, + expected: Seq[Seq[Any]], + columns: Seq[String]): Unit = { + checkAnswer( + result.select(columns.head, columns.tail: _*), + expected.map { x => Row(x: _*)}) + assert(result.columns.toSeq == outputColumnNames) + } + + private def showDeltaColumnsTest( + fileToTableNameMapper: File => String, + schemaName: Option[String] = None): Unit = { + withDatabase("delta") { + val tempDir = Utils.createTempDir() + Seq(1 -> 1) + .toDF("column1", "column2") + .write + .format("delta") + .mode("overwrite") + .save(tempDir.toString) + + val finalSchema = if (schemaName.nonEmpty) s"FROM ${schemaName.get}" else "" + checkResult(sql(s"SHOW COLUMNS IN ${fileToTableNameMapper(tempDir)} $finalSchema"), + outputColumnValues, + outputColumnNames) + } + } + + test("delta table: table identifier") { + showDeltaColumnsTest(f => s"delta.`${f.toString}`") + } + + test("delta table: table name with separated schema name") { + showDeltaColumnsTest(f => s"`${f.toString}`", schemaName = Some("delta")) + } + + test("non-delta table: table identifier with catalog table") { + // Non-Delta table represent by catalog identifier (e.g.: sales.line_ite) is supported in + // SHOW COLUMNS command. + withTable("show_columns") { + sql(s""" + |CREATE TABLE show_columns(column1 INT, column2 INT) + |USING parquet + |COMMENT "describe a non delta table" + """.stripMargin) + checkResult(sql("SHOW COLUMNS IN show_columns"), outputColumnValues, outputColumnNames) + } + } + + test("delta table: table name not found") { + val fakeTableName = s"test_table" + val schemaName = s"delta" + showDeltaColumnsTest(f => s"$schemaName.`${f.toString}`") + val e = intercept[AnalysisException] { + sql(s"SHOW COLUMNS IN `$fakeTableName` IN $schemaName") + } + assert(e.getMessage().contains(s"Table or view not found: $schemaName.$fakeTableName") || + e.getMessage().contains(s"table or view `$schemaName`.`$fakeTableName` cannot be found")) + } + + test("delta table: check duplicated schema name") { + // When `schemaName` and `tableIdentity.database` both exists, we will throw error if they are + // not the same. + val schemaName = s"default" + val tableName = s"test_table" + val fakeSchemaName = s"epsilon" + withTable(tableName) { + sql(s""" + |CREATE TABLE $tableName(column1 INT, column2 INT) + |USING delta + """.stripMargin) + + // when no schema name provided, default schema name is `default`. + checkResult( + sql(s"SHOW COLUMNS IN $tableName"), + outputColumnValues, + outputColumnNames) + checkResult( + sql(s"SHOW COLUMNS IN $schemaName.$tableName"), + outputColumnValues, + outputColumnNames) + + var e = intercept[AnalysisException] { + sql(s"SHOW COLUMNS IN $tableName IN $fakeSchemaName") + } + assert(e + .getMessage() + .contains(s"Table or view not found: $fakeSchemaName.$tableName") || + e.getMessage() + .contains(s"table or view `$fakeSchemaName`.`$tableName` cannot be found")) + + e = intercept[AnalysisException] { + sql(s"SHOW COLUMNS IN $fakeSchemaName.$tableName IN $schemaName") + } + assert(e + .getMessage() + .contains(s"Table or view not found: $fakeSchemaName.$tableName") || + e.getMessage() + .contains(s"table or view `$fakeSchemaName`.`$tableName` cannot be found")) + + e = intercept[AnalysisException] { + sql(s"SHOW COLUMNS IN $schemaName.$tableName IN $fakeSchemaName") + } + assert(e + .getMessage() + .contains(s"SHOW COLUMNS with conflicting databases: '$fakeSchemaName' != '$schemaName'")) + } + } + + testWithTempView(s"show columns on temp view should fallback to Spark") { isSQLTempView => + val tableName = "test_table_2" + withTable(tableName) { + Seq(1 -> 1) + .toDF("column1", "column2") + .write + .format("delta") + .saveAsTable(tableName) + val viewName = "v" + createTempViewFromTable(tableName, isSQLTempView) + checkResult(sql(s"SHOW COLUMNS IN $viewName"), outputColumnValues, outputColumnNames) + } + } + + test(s"delta table: show columns on a nested column") { + withTempDir { tempDir => + (70.to(79).seq ++ 75.to(79).seq) + .toDF("id") + .withColumn("nested", struct(struct('id + 2 as "b", 'id + 3 as "c") as "sub")) + .write + .format("delta") + .save(tempDir.toString) + checkResult( + sql(s"SHOW COLUMNS IN delta.`${tempDir.toString}`"), + Seq(Seq("id"), Seq("nested")), + outputColumnNames) + } + } + + test("delta table: respect the Spark configuration on whether schema name is case sensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + assert(intercept[AnalysisException] { + showDeltaColumnsTest(f => s"delta.`${f.toString}`", schemaName = Some("DELTA")) + }.getMessage().contains(s"SHOW COLUMNS with conflicting databases: 'DELTA' != 'delta'")) + + assert(intercept[AnalysisException] { + showDeltaColumnsTest(f => s"DELTA.`${f.toString}`", schemaName = Some("delta")) + }.getMessage().contains(s"SHOW COLUMNS with conflicting databases: 'delta' != 'DELTA'")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + showDeltaColumnsTest(f => s"delta.`${f.toString}`", schemaName = Some("DELTA")) + showDeltaColumnsTest(f => s"DELTA.`${f.toString}`", schemaName = Some("delta")) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala new file mode 100644 index 00000000000..72fd69e078e --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala @@ -0,0 +1,474 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.io.{File, FileNotFoundException, RandomAccessFile} +import java.util.concurrent.ExecutionException + +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.storage.StorageLevel + +class SnapshotManagementSuite extends QueryTest with SQLTestUtils with SharedSparkSession + with DeltaSQLCommandTest { + + + /** + * Truncate an existing checkpoint file to create a corrupt file. + * + * @param path the Delta table path + * @param checkpointVersion the checkpoint version to be updated + * @param shouldBeEmpty whether to create an empty checkpoint file + */ + private def makeCorruptCheckpointFile( + path: String, + checkpointVersion: Long, + shouldBeEmpty: Boolean, + multipart: Option[(Int, Int)] = None): Unit = { + if (multipart.isDefined) { + val (part, totalParts) = multipart.get + val checkpointFile = FileNames.checkpointFileWithParts(new Path(path, "_delta_log"), + checkpointVersion, totalParts)(part - 1).toString + assert(new File(checkpointFile).exists) + val cp = new RandomAccessFile(checkpointFile, "rw") + cp.setLength(if (shouldBeEmpty) 0 else 10) + cp.close() + } else { + val checkpointFile = + FileNames.checkpointFileSingular(new Path(path, "_delta_log"), checkpointVersion).toString + assert(new File(checkpointFile).exists) + val cp = new RandomAccessFile(checkpointFile, "rw") + cp.setLength(if (shouldBeEmpty) 0 else 10) + cp.close() + } + } + + private def deleteLogVersion(path: String, version: Long): Unit = { + val deltaFile = new File(FileNames.deltaFile(new Path(path, "_delta_log"), version).toString) + assert(deltaFile.exists(), s"Could not find $deltaFile") + assert(deltaFile.delete(), s"Failed to delete $deltaFile") + } + + private def deleteCheckpointVersion(path: String, version: Long): Unit = { + val deltaFile = new File( + FileNames.checkpointFileSingular(new Path(path, "_delta_log"), version).toString) + assert(deltaFile.exists(), s"Could not find $deltaFile") + assert(deltaFile.delete(), s"Failed to delete $deltaFile") + } + + private def testWithAndWithoutMultipartCheckpoint(name: String)(f: (Option[Int]) => Unit) = { + testQuietly(name) { + withSQLConf(DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key -> "1") { + f(Some(1)) + f(Some(2)) + } + f(None) + } + } + + testWithAndWithoutMultipartCheckpoint("recover from a corrupt checkpoint: previous checkpoint " + + "doesn't exist") { partToCorrupt => + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + spark.range(10).write.format("delta").save(path) + var deltaLog = DeltaLog.forTable(spark, path) + deltaLog.checkpoint() + + DeltaLog.clearCache() + deltaLog = DeltaLog.forTable(spark, path) + val checkpointParts = deltaLog.snapshot.logSegment.checkpointProvider.topLevelFiles.size + val multipart = partToCorrupt.map((_, checkpointParts)) + + // We have different code paths for empty and non-empty checkpoints + for (testEmptyCheckpoint <- Seq(true, false)) { + makeCorruptCheckpointFile(path, checkpointVersion = 0, + shouldBeEmpty = testEmptyCheckpoint, multipart = multipart) + DeltaLog.clearCache() + // Checkpoint 0 is corrupted. Verify that we can still create the snapshot using + // existing json files. + DeltaLog.forTable(spark, path).snapshot + } + } + } + + testWithAndWithoutMultipartCheckpoint("recover from a corrupt checkpoint: previous checkpoint " + + "exists") { partToCorrupt => + withTempDir { tempDir => + // Create checkpoint 0 and 1 + val path = tempDir.getCanonicalPath + spark.range(10).write.format("delta").save(path) + var deltaLog = DeltaLog.forTable(spark, path) + deltaLog.checkpoint() + spark.range(10).write.format("delta").mode("append").save(path) + deltaLog.update() + deltaLog.checkpoint() + + DeltaLog.clearCache() + deltaLog = DeltaLog.forTable(spark, path) + val checkpointParts = deltaLog.snapshot.logSegment.checkpointProvider.topLevelFiles.size + val multipart = partToCorrupt.map((_, checkpointParts)) + + // We have different code paths for empty and non-empty checkpoints + for (testEmptyCheckpoint <- Seq(true, false)) { + makeCorruptCheckpointFile(path, checkpointVersion = 1, + shouldBeEmpty = testEmptyCheckpoint, multipart = multipart) + // Checkpoint 1 is corrupted. Verify that we can still create the snapshot using + // checkpoint 0. + DeltaLog.clearCache() + DeltaLog.forTable(spark, path).snapshot + } + } + } + + testWithAndWithoutMultipartCheckpoint("should not recover when the current checkpoint is " + + "broken but we don't have the entire history") { partToCorrupt => + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + spark.range(10).write.format("delta").save(path) + spark.range(10).write.format("delta").mode("append").save(path) + DeltaLog.forTable(spark, path).checkpoint() + deleteLogVersion(path, version = 0) + DeltaLog.clearCache() + + val deltaLog = DeltaLog.forTable(spark, path) + val checkpointParts = deltaLog.snapshot.logSegment.checkpointProvider.topLevelFiles.size + val multipart = partToCorrupt.map((_, checkpointParts)) + + DeltaLog.clearCache() + + // We have different code paths for empty and non-empty checkpoints, and also different + // code paths when listing with or without a checkpoint hint. + for (testEmptyCheckpoint <- Seq(true, false)) { + makeCorruptCheckpointFile(path, checkpointVersion = 1, + shouldBeEmpty = testEmptyCheckpoint, multipart = multipart) + // When finding a Delta log for the first time, we rely on _last_checkpoint hint + val e = intercept[Exception] { DeltaLog.forTable(spark, path).snapshot } + if (testEmptyCheckpoint) { + // - checkpoint 1 is NOT in the list result + // - try to get an alternative LogSegment in `getLogSegmentForVersion` + // - fail to get an alternative LogSegment + // - throw the below exception + assert(e.isInstanceOf[IllegalStateException] && e.getMessage.contains( + "Couldn't find all part files of the checkpoint version: 1")) + } else { + // - checkpoint 1 is in the list result + // - Snapshot creation triggers state reconstruction + // - fail to read protocol+metadata from checkpoint 1 + // - throw FileReadException + // - fail to get an alternative LogSegment + // - cannot find log file 0 so throw the above checkpoint 1 read failure + // Guava cache wraps the root cause + assert(e.isInstanceOf[SparkException] && + e.getMessage.contains("0001.checkpoint") && + e.getMessage.contains(".parquet is not a Parquet file")) + } + } + } + } + + testWithAndWithoutMultipartCheckpoint("should not recover when both the current and previous " + + "checkpoints are broken") { partToCorrupt => + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + val staleLog = DeltaLog.forTable(spark, path) + DeltaLog.clearCache() + + spark.range(10).write.format("delta").save(path) + val deltaLog = DeltaLog.forTable(spark, path) + deltaLog.checkpoint() + DeltaLog.clearCache() + val checkpointParts0 = + DeltaLog.forTable(spark, path).snapshot.logSegment.checkpointProvider.topLevelFiles.size + + spark.range(10).write.format("delta").mode("append").save(path) + deltaLog.update() + deltaLog.checkpoint() + deleteLogVersion(path, version = 0) + + DeltaLog.clearCache() + val checkpointParts1 = + DeltaLog.forTable(spark, path).snapshot.logSegment.checkpointProvider.topLevelFiles.size + + makeCorruptCheckpointFile(path, checkpointVersion = 0, shouldBeEmpty = false, + multipart = partToCorrupt.map((_, checkpointParts0))) + + val multipart = partToCorrupt.map((_, checkpointParts1)) + + // We have different code paths for empty and non-empty checkpoints + for (testEmptyCheckpoint <- Seq(true, false)) { + makeCorruptCheckpointFile(path, checkpointVersion = 1, + shouldBeEmpty = testEmptyCheckpoint, multipart = multipart) + + // The code paths are different, but the error and message end up being the same: + // + // testEmptyCheckpoint = true: + // - checkpoint 1 is NOT in the list result. + // - fallback to load version 0 using checkpoint 0 + // - fail to read checkpoint 0 + // - cannot find log file 0 so throw the above checkpoint 0 read failure + // + // testEmptyCheckpoint = false: + // - checkpoint 1 is in the list result. + // - Snapshot creation triggers state reconstruction + // - fail to read protocol+metadata from checkpoint 1 + // - fallback to load version 0 using checkpoint 0 + // - fail to read checkpoint 0 + // - cannot find log file 0 so throw the original checkpoint 1 read failure + val e = intercept[SparkException] { staleLog.update() } + val version = if (testEmptyCheckpoint) 0 else 1 + assert(e.getMessage.contains(f"$version%020d.checkpoint") && + e.getMessage.contains(".parquet is not a Parquet file")) + } + } + } + + test("should throw a clear exception when checkpoint exists but its corresponding delta file " + + "doesn't exist") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + val staleLog = DeltaLog.forTable(spark, path) + DeltaLog.clearCache() + + spark.range(10).write.format("delta").save(path) + DeltaLog.forTable(spark, path).checkpoint() + // Delete delta files + new File(tempDir, "_delta_log").listFiles().filter(_.getName.endsWith(".json")) + .foreach(_.delete()) + val e = intercept[IllegalStateException] { + staleLog.update() + } + assert(e.getMessage.contains("Could not find any delta files for version 0")) + } + } + + test("should throw an exception when trying to load a non-existent version") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + val staleLog = DeltaLog.forTable(spark, path) + DeltaLog.clearCache() + + spark.range(10).write.format("delta").save(path) + DeltaLog.forTable(spark, path).checkpoint() + val e = intercept[IllegalStateException] { + staleLog.getSnapshotAt(2) + } + assert(e.getMessage.contains("Trying to load a non-existent version 2")) + } + } + + test("should throw a clear exception when the checkpoint is corrupt " + + "but could not find any delta files") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + val staleLog = DeltaLog.forTable(spark, path) + DeltaLog.clearCache() + + spark.range(10).write.format("delta").save(path) + DeltaLog.forTable(spark, path).checkpoint() + // Delete delta files + new File(tempDir, "_delta_log").listFiles().filter(_.getName.endsWith(".json")) + .foreach(_.delete()) + makeCorruptCheckpointFile(path, checkpointVersion = 0, shouldBeEmpty = false) + val e = intercept[IllegalStateException] { + staleLog.update() + } + assert(e.getMessage.contains("Could not find any delta files for version 0")) + } + } + + test("verifyDeltaVersions") { + import SnapshotManagement.verifyDeltaVersions + // empty array + verifyDeltaVersions( + spark, + versions = Array.empty, + expectedStartVersion = None, + expectedEndVersion = None) + // contiguous versions + verifyDeltaVersions( + spark, + versions = Array(1, 2, 3), + expectedStartVersion = None, + expectedEndVersion = None) + // contiguous versions with correct `expectedStartVersion` and `expectedStartVersion` + verifyDeltaVersions( + spark, + versions = Array(1, 2, 3), + expectedStartVersion = None, + expectedEndVersion = Some(3)) + verifyDeltaVersions( + spark, + versions = Array(1, 2, 3), + expectedStartVersion = Some(1), + expectedEndVersion = None) + verifyDeltaVersions( + spark, + versions = Array(1, 2, 3), + expectedStartVersion = Some(1), + expectedEndVersion = Some(3)) + // `expectedStartVersion` or `expectedEndVersion` doesn't match + intercept[IllegalArgumentException] { + verifyDeltaVersions( + spark, + versions = Array(1, 2), + expectedStartVersion = Some(0), + expectedEndVersion = None) + } + intercept[IllegalArgumentException] { + verifyDeltaVersions( + spark, + versions = Array(1, 2), + expectedStartVersion = None, + expectedEndVersion = Some(3)) + } + intercept[IllegalArgumentException] { + verifyDeltaVersions( + spark, + versions = Array.empty, + expectedStartVersion = Some(0), + expectedEndVersion = None) + } + intercept[IllegalArgumentException] { + verifyDeltaVersions( + spark, + versions = Array.empty, + expectedStartVersion = None, + expectedEndVersion = Some(3)) + } + // non contiguous versions + intercept[IllegalStateException] { + verifyDeltaVersions( + spark, + versions = Array(1, 3), + expectedStartVersion = None, + expectedEndVersion = None) + } + // duplicates in versions + intercept[IllegalStateException] { + verifyDeltaVersions( + spark, + versions = Array(1, 2, 2, 3), + expectedStartVersion = None, + expectedEndVersion = None) + } + // unsorted versions + intercept[IllegalStateException] { + verifyDeltaVersions( + spark, + versions = Array(3, 2, 1), + expectedStartVersion = None, + expectedEndVersion = None) + } + } + + test("configurable snapshot cache storage level") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + spark.range(10).write.format("delta").save(path) + DeltaLog.clearCache() + // Corrupted snapshot tests leave a cached snapshot not tracked by the DeltaLog cache + sparkContext.getPersistentRDDs.foreach(_._2.unpersist()) + assert(sparkContext.getPersistentRDDs.isEmpty) + + withSQLConf(DeltaSQLConf.DELTA_SNAPSHOT_CACHE_STORAGE_LEVEL.key -> "DISK_ONLY") { + DeltaLog.forTable(spark, path).snapshot.stateDS.collect() + val persistedRDDs = sparkContext.getPersistentRDDs + assert(persistedRDDs.size == 1) + assert(persistedRDDs.values.head.getStorageLevel == StorageLevel.DISK_ONLY) + } + + DeltaLog.clearCache() + assert(sparkContext.getPersistentRDDs.isEmpty) + + withSQLConf(DeltaSQLConf.DELTA_SNAPSHOT_CACHE_STORAGE_LEVEL.key -> "NONE") { + DeltaLog.forTable(spark, path).snapshot.stateDS.collect() + val persistedRDDs = sparkContext.getPersistentRDDs + assert(persistedRDDs.size == 1) + assert(persistedRDDs.values.head.getStorageLevel == StorageLevel.NONE) + } + + DeltaLog.clearCache() + assert(sparkContext.getPersistentRDDs.isEmpty) + + withSQLConf(DeltaSQLConf.DELTA_SNAPSHOT_CACHE_STORAGE_LEVEL.key -> "invalid") { + intercept[IllegalArgumentException] { + spark.read.format("delta").load(path).collect() + } + } + } + } + + test("SerializableFileStatus json serialization/deserialization") { + val testCases = Seq( + SerializableFileStatus(path = "xyz", length = -1, isDir = true, modificationTime = 0) + -> """{"path":"xyz","length":-1,"isDir":true,"modificationTime":0}""", + SerializableFileStatus( + path = "s3://a.b/pq", length = 123L, isDir = false, modificationTime = 246L) + -> """{"path":"s3://a.b/pq","length":123,"isDir":false,"modificationTime":246}""" + ) + for ((obj, json) <- testCases) { + assert(JsonUtils.toJson(obj) == json) + val status = JsonUtils.fromJson[SerializableFileStatus](json) + assert(status.modificationTime === obj.modificationTime) + assert(status.isDir === obj.isDir) + assert(status.length === obj.length) + assert(status.path === obj.path) + } + } + + test("getLogSegmentAfterCommit can find specified commit") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + val log = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + val oldLogSegment = log.snapshot.logSegment + spark.range(10).write.format("delta").save(path) + val newLogSegment = log.snapshot.logSegment + assert(log.getLogSegmentAfterCommit(oldLogSegment.checkpointProvider) === newLogSegment) + spark.range(10).write.format("delta").mode("append").save(path) + assert(log.getLogSegmentAfterCommit(oldLogSegment.checkpointProvider) + === log.snapshot.logSegment) + } + } + + testQuietly("checkpoint/json not found when executor restart " + + "after expired checkpoints in the snapshot cache are cleaned up") { + withTempDir { tempDir => + // Create checkpoint 1 and 3 + val path = tempDir.getCanonicalPath + spark.range(10).write.format("delta").save(path) + spark.range(10).write.format("delta").mode("append").save(path) + val deltaLog = DeltaLog.forTable(spark, path) + deltaLog.checkpoint() + spark.range(10).write.format("delta").mode("append").save(path) + spark.range(10).write.format("delta").mode("append").save(path) + deltaLog.checkpoint() + // simulate checkpoint 1 expires and is cleaned up + deleteCheckpointVersion(path, 1) + // simulate executor hangs and restart, cache invalidation + deltaLog.snapshot.uncache() + + spark.read.format("delta").load(path).collect() + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/TightBoundsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/TightBoundsSuite.scala new file mode 100644 index 00000000000..7a9bca8d6b0 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/TightBoundsSuite.scala @@ -0,0 +1,246 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import scala.collection.mutable.ArrayBuffer + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.DeltaStatistics.{MIN, NULL_COUNT, NUM_RECORDS, TIGHT_BOUNDS} +import org.apache.spark.sql.delta.stats.StatisticsCollection +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.functions.{col, lit, map_values, when} +import org.apache.spark.sql.test.SharedSparkSession + +class TightBoundsSuite + extends QueryTest + with SharedSparkSession + with DeletionVectorsTestUtils + with DeltaSQLCommandTest { + + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectors(spark.conf) + } + + test("Validate TIGHT_BOUND column") { + val targetDF = createTestDF(0, 100, 2) + val sourceDF = targetDF + + def runDelete(target: io.delta.tables.DeltaTable): Int = { + target.delete("id >= 75") + 2 // Expected number of files. + } + + val operations = ArrayBuffer[io.delta.tables.DeltaTable => Int](runDelete) + for { + // Make sure it works for all operations that add DVs + runOperation <- operations + // Make sure tightBounds update is backwards compatible + tightBoundDisabled <- BOOLEAN_DOMAIN + } { + val conf = Seq( + DeltaSQLConf.TIGHT_BOUND_COLUMN_ON_FILE_INIT_DISABLED.key -> tightBoundDisabled.toString) + + withSQLConf(conf: _*) { + withTempDeltaTable(targetDF) { (targetTable, targetLog) => + val snapshotBeforeOperation = targetLog.update() + val statsColumnName = snapshotBeforeOperation.getBaseStatsColumnName + val tightBoundsValuesBeforeOperation = snapshotBeforeOperation.withStatsDeduplicated + .select(col(s"${statsColumnName}.$TIGHT_BOUNDS")) + .collect() + + assert(tightBoundsValuesBeforeOperation.length === 2) + val expectedTightBoundsValue = if (tightBoundDisabled) "[null]" else "[true]" + tightBoundsValuesBeforeOperation + .foreach(r => assert(r.toString == expectedTightBoundsValue)) + + val expectedNumberOfFiles = runOperation(targetTable()) + // All operations only touch the second file. + assert(getFilesWithDeletionVectors(targetLog).size == 1) + + val snapshotAfterOperation = targetLog.update() + val tightBoundsValuesAfterOperation = snapshotAfterOperation.withStatsDeduplicated + // Order by returns non-null DVs last. Thus, the file with the wide bounds + // should be the last one. + .orderBy(col("deletionVector").asc_nulls_first) + .select(col(s"${statsColumnName}.$TIGHT_BOUNDS")) + .collect() + + // Make sure tightsBounds is generated even for files that initially + // did not contain the column. Note, we expect 2 files each from merge and delete + // operations and three from update. This is because update creates a new file for the + // updated rows. + assert(tightBoundsValuesAfterOperation.length === expectedNumberOfFiles) + assert(tightBoundsValuesAfterOperation.head.toString === expectedTightBoundsValue) + assert(tightBoundsValuesAfterOperation.last.toString === "[false]") + } + } + } + } + + test("Verify exception is thrown if we commit files with DVs and tight bounds") { + val targetDF = createTestDF(0, 100, 2) + withTempDeltaTable(targetDF, enableDVs = true) { (targetTable, targetLog) => + // Remove one record from each file. + targetTable().delete("id in (0, 50)") + verifyDVsExist(targetLog, 2) + + // Commit actions with DVs and tight bounds. + val txn = targetLog.startTransaction() + val addFiles = txn.snapshot.allFiles.collect().toSeq.map { action => + action.copy(stats = + s"""{"${NUM_RECORDS}":${action.numPhysicalRecords.get}, + | "${TIGHT_BOUNDS}":true}""".stripMargin) + } + + val exception = intercept[DeltaIllegalStateException] { + txn.commitManually(addFiles: _*) + } + assert(exception.getErrorClass === + "DELTA_ADDING_DELETION_VECTORS_WITH_TIGHT_BOUNDS_DISALLOWED") + } + } + + protected def getStatFromLastFile(snapshot: Snapshot, statName: String): Row = { + val statsColumnName = snapshot.getBaseStatsColumnName + snapshot + .withStatsDeduplicated + .select(s"$statsColumnName.$statName") + .orderBy(s"$statsColumnName.$MIN") + .collect() + .last + } + + protected def getStatFromLastFileWithDVs(snapshot: Snapshot, statName: String): Row = { + val statsColumnName = snapshot.getBaseStatsColumnName + snapshot + .withStatsDeduplicated + .filter("isNotNull(deletionVector)") + .select(s"$statsColumnName.$statName") + .collect() + .last + } + + /** + * Helper method that returns stats for every file in the snapshot as row objects. + * + * Return value schema is { + * numRecords: Int, + * RminValues: Row(Int, Int, ...), // Min value for each column + * maxValues: Row(Int, Int, ...), // Max value for each column + * nullCount: Row(Int, Int, ...), // Null count for each column + * tightBounds: boolean + * } + */ + protected def getStatsInPartitionOrder(snapshot: Snapshot): Array[Row] = { + val statsColumnName = snapshot.getBaseStatsColumnName + snapshot + .withStatsDeduplicated + .orderBy(map_values(col("partitionValues"))) + .select(s"$statsColumnName.*") + .collect() + } + + protected def getNullCountFromFirstFileWithDVs(snapshot: Snapshot): Row = { + // Note, struct columns in Spark are returned with datatype Row. + getStatFromLastFile(snapshot, NULL_COUNT) + .getAs[Row](NULL_COUNT) + } + + test("NULL COUNT is updated correctly when all values are nulls" + ) { + val targetDF = spark.range(0, 100, 1, 2) + .withColumn("value", when(col("id") < 25, col("id")) + .otherwise(null)) + + withTempDeltaTable(targetDF, enableDVs = true) { (targetTable, targetLog) => + targetTable().delete("id >= 80") + assert(getNullCountFromFirstFileWithDVs(targetLog.update()) === Row(0, 50)) + + targetTable().delete("id >= 70") + assert(getNullCountFromFirstFileWithDVs(targetLog.update()) === Row(0, 50)) + } + } + + test("NULL COUNT is updated correctly where there are no nulls" + ) { + val targetDF = spark.range(0, 100, 1, 2) + .withColumn("value", col("id")) + + withTempDeltaTable(targetDF, enableDVs = true) { (targetTable, targetLog) => + val expectedResult = Row(0, 0) + targetTable().delete("id >= 80") + assert(getNullCountFromFirstFileWithDVs(targetLog.update()) === expectedResult) + + targetTable().delete("id >= 70") + assert(getNullCountFromFirstFileWithDVs(targetLog.update()) === expectedResult) + } + } + + test("NULL COUNT is updated correctly when some values are nulls" + ) { + val targetDF = spark.range(0, 100, 1, 2) + .withColumn("value", when(col("id") < 75, col("id")) + .otherwise(null)) + + withTempDeltaTable(targetDF, enableDVs = true) { (targetTable, targetLog) => + targetTable().delete("id >= 80") + assert(getNullCountFromFirstFileWithDVs(targetLog.update()) === Row(0, 25)) + + targetTable().delete("id >= 70") + assert(getNullCountFromFirstFileWithDVs(targetLog.update()) === Row(0, 25)) + } + } + + test("DML operations fetch stats on tables with partial stats") { + val targetDF = createTestDF(0, 200, 4) + .withColumn("v", col("id")) + .withColumn("partCol", (col("id") / lit(50)).cast("Int")) + + val conf = Seq(DeltaSQLConf.DELTA_COLLECT_STATS.key -> false.toString) + withTempDeltaTable(targetDF, Seq("partCol"), conf = conf) { (targetTable, targetLog) => + val statsBeforeFirstDelete = getStatsInPartitionOrder(targetLog.update()) + val expectedStatsBeforeFirstDelete = Seq( + Row(null, null, null, null, null), // File 1. + Row(null, null, null, null, null), // File 2. + Row(null, null, null, null, null), // File 3. + Row(null, null, null, null, null) // File 4. + ) + assert(statsBeforeFirstDelete === expectedStatsBeforeFirstDelete) + + // This operation touches files 2 and 3. Files 1 and 4 should still have not stats. + targetTable().delete("id in (50, 100)") + + // Expect the stats for every file that got a DV added to it with tightBounds = false + val statsAfterFirstDelete = getStatsInPartitionOrder(targetLog.update()) + val expectedStatsAfterFirstDelete = Seq( + Row(null, null, null, null, null), // File 1. + Row(50, Row(50, 50), Row(99, 99), Row(0, 0), false), // File 2. + Row(50, Row(100, 100), Row(149, 149), Row(0, 0), false), // File 3. + Row(null, null, null, null, null) // File 4. + ) + assert(statsAfterFirstDelete === expectedStatsAfterFirstDelete) + } + } +} + +class TightBoundsColumnMappingSuite extends TightBoundsSuite with DeltaColumnMappingEnableIdMode diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/TimestampLocalFileSystem.scala b/spark/src/test/scala/org/apache/spark/sql/delta/TimestampLocalFileSystem.scala new file mode 100644 index 00000000000..59a066e3dbc --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/TimestampLocalFileSystem.scala @@ -0,0 +1,71 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.net.URI + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{DelegateToFileSystem, Path, RawLocalFileSystem} +import org.apache.hadoop.fs.FileStatus + +/** + * This custom fs implementation is used for testing the msync calling in HDFSLogStore writes. + * If `msync` is not called, `listStatus` will return stale results. + */ +class TimestampLocalFileSystem extends RawLocalFileSystem { + + private var uri: URI = _ + private var latestTimestamp: Long = 0 + + override def getScheme: String = TimestampLocalFileSystem.scheme + + override def initialize(name: URI, conf: Configuration): Unit = { + uri = URI.create(name.getScheme + ":///") + super.initialize(name, conf) + } + + override def getUri(): URI = if (uri == null) { + // RawLocalFileSystem's constructor will call this one before `initialize` is called. + // Just return the super's URI to avoid NPE. + super.getUri + } else { + uri + } + + override def listStatus(path: Path): Array[FileStatus] = { + super.listStatus(path).filter(_.getModificationTime <= latestTimestamp) + } + + override def msync(): Unit = { + latestTimestamp = System.currentTimeMillis() + } +} + +class TimestampAbstractFileSystem(uri: URI, conf: Configuration) + extends DelegateToFileSystem( + uri, + new TimestampLocalFileSystem, + conf, + TimestampLocalFileSystem.scheme, + false) + +/** + * Singleton for BlockWritesLocalFileSystem used to initialize the file system countdown latch. + */ +object TimestampLocalFileSystem { + val scheme = "ts" +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/UpdateMetricsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateMetricsSuite.scala new file mode 100644 index 00000000000..d8bc64bfa65 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateMetricsSuite.scala @@ -0,0 +1,353 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.DatabricksLogging +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{Dataset, QueryTest} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.expr +import org.apache.spark.sql.test.SharedSparkSession + +/** + * Tests for metrics of Delta UPDATE command. + */ +class UpdateMetricsSuite extends QueryTest + with SharedSparkSession + with DatabricksLogging + with DeltaSQLCommandTest { + + + /** + * Case class to parameterize tests. + */ + case class TestConfiguration( + partitioned: Boolean, + cdfEnabled: Boolean + ) + + /** + * Case class to parameterize metric results. + */ + case class TestMetricResults( + operationMetrics: Map[String, Long] + ) + + /** + * Helper to generate tests for all configuration parameters. + */ + protected def testUpdateMetrics(name: String)(testFn: TestConfiguration => Unit): Unit = { + for { + partitioned <- BOOLEAN_DOMAIN + cdfEnabled <- Seq(false) + } { + val testConfig = + TestConfiguration(partitioned = partitioned, + cdfEnabled = cdfEnabled + ) + var testName = + s"update-metrics: $name - Partitioned = $partitioned, cdfEnabled = $cdfEnabled" + test(testName) { + testFn(testConfig) + } + } + } + + + /** + * Create a table from the provided dataset. + * + * If an partitioned table is needed, then we create one data partition per Spark partition, + * i.e. every data partition will contain one file. + * + * Also an extra column is added to be used in non-partition filters. + */ + protected def createTempTable( + table: Dataset[_], + tableName: String, + testConfig: TestConfiguration): Unit = { + val numRows = table.count() + val numPartitions = table.rdd.getNumPartitions + val numRowsPerPart = if (numRows > 0 && numPartitions < numRows) { + numRows / numPartitions + } else { + 1 + } + val partitionBy = if (testConfig.partitioned) { + Seq("partCol") + } else { + Seq() + } + table.withColumn("partCol", expr(s"floor(id / $numRowsPerPart)")) + .withColumn("extraCol", expr(s"$numRows - id")) + .write + .partitionBy(partitionBy: _*) + .format("delta") + .saveAsTable(tableName) + } + + /** + * Run an update command and capture operation metrics from Delta log. + * + */ + private def runUpdateAndCaptureMetrics( + table: Dataset[_], + where: String, + testConfig: TestConfiguration): TestMetricResults = { + val tableName = "target" + val whereClause = if (where.nonEmpty) { + s"WHERE $where" + } else { + "" + } + var operationMetrics: Map[String, Long] = null + import testImplicits._ + withSQLConf( + DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true", + DeltaSQLConf.DELTA_SKIP_RECORDING_EMPTY_COMMITS.key -> "false", + DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> testConfig.cdfEnabled.toString) { + withTable(tableName) { + createTempTable(table, tableName, testConfig) + val resultDf = spark.sql(s"UPDATE $tableName SET id = -1 $whereClause") + operationMetrics = DeltaMetricsUtils.getLastOperationMetrics(tableName) + + // Check operation metrics against commit actions. + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + DeltaMetricsUtils.checkOperationMetricsAgainstCommitActions( + deltaLog, deltaLog.update().version, operationMetrics) + } + } + TestMetricResults( + operationMetrics + ) + } + + /** + * Run a update command and check all available metrics. + * We allow some metrics to be missing, by setting their value to -1. + */ + private def runUpdateAndCheckMetrics( + table: Dataset[_], + where: String, + expectedOperationMetrics: Map[String, Long], + testConfig: TestConfiguration): Unit = { + // Run the update capture and get all metrics. + val results = runUpdateAndCaptureMetrics(table, where, testConfig) + + // Check operation metrics schema. + val unknownKeys = results.operationMetrics.keySet -- DeltaOperationMetrics.UPDATE -- + DeltaOperationMetrics.WRITE + assert(unknownKeys.isEmpty, + s"Unknown operation metrics for UPDATE command: ${unknownKeys.mkString(", ")}") + + // Check values of expected operation metrics. For all unspecified deterministic metrics, + // we implicitly expect a zero value. + val requiredMetrics = Set( + "numCopiedRows", + "numUpdatedRows", + "numAddedFiles", + "numRemovedFiles", + "numAddedChangeFiles") + val expectedMetricsWithDefaults = + requiredMetrics.map(k => k -> 0L).toMap ++ expectedOperationMetrics + val expectedMetricsFiltered = expectedMetricsWithDefaults.filter(_._2 >= 0) + DeltaMetricsUtils.checkOperationMetrics( + expectedMetrics = expectedMetricsFiltered, + operationMetrics = results.operationMetrics) + + + // Check time operation metrics. + val expectedTimeMetrics = + Set("scanTimeMs", "rewriteTimeMs", "executionTimeMs").filter( + k => expectedOperationMetrics.get(k).forall(_ >= 0) + ) + DeltaMetricsUtils.checkOperationTimeMetrics( + operationMetrics = results.operationMetrics, + expectedMetrics = expectedTimeMetrics) + } + + + for (whereClause <- Seq("", "1 = 1")) { + testUpdateMetrics(s"update all with where = '$whereClause'") { testConfig => + val numFiles = 5 + val numRows = 100 + val numAddedChangeFiles = if (testConfig.partitioned && testConfig.cdfEnabled) { + 5 + } else if (testConfig.cdfEnabled) { + 2 + } else { + 0 + } + runUpdateAndCheckMetrics( + table = spark.range(start = 0, end = numRows, step = 1, numPartitions = numFiles), + where = whereClause, + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numUpdatedRows" -> -1, + "numOutputRows" -> -1, + "numFiles" -> -1, + "numAddedFiles" -> -1, + "numRemovedFiles" -> numFiles, + "numAddedChangeFiles" -> numAddedChangeFiles + ), + testConfig = testConfig + ) + } + } + + testUpdateMetrics("update with false predicate") { testConfig => + runUpdateAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "1 != 1", + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numUpdatedRows" -> 0, + "numAddedFiles" -> 0, + "numRemovedFiles" -> 0, + "numAddedChangeFiles" -> 0, + "scanTimeMs" -> -1, + "rewriteTimeMs" -> -1, + "executionTimeMs" -> -1 + ), + testConfig = testConfig + ) + } + + testUpdateMetrics("update with unsatisfied static predicate") { testConfig => + runUpdateAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "id < 0 or id > 100", + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numUpdatedRows" -> 0, + "numAddedFiles" -> 0, + "numRemovedFiles" -> 0, + "numAddedChangeFiles" -> 0, + "scanTimeMs" -> -1, + "rewriteTimeMs" -> -1, + "executionTimeMs" -> -1 + ), + testConfig = testConfig + ) + } + + testUpdateMetrics("update with unsatisfied dynamic predicate") { testConfig => + runUpdateAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "id / 200 > 1 ", + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numUpdatedRows" -> 0, + "numAddedFiles" -> 0, + "numRemovedFiles" -> 0, + "numAddedChangeFiles" -> 0, + "scanTimeMs" -> -1, + "rewriteTimeMs" -> -1, + "executionTimeMs" -> -1 + ), + testConfig = testConfig + ) + } + + for (whereClause <- Seq("id = 0", "id >= 49 and id < 50")) { + testUpdateMetrics(s"update one row with where = `$whereClause`") { testConfig => + var numCopiedRows = 19 + val numAddedFiles = 1 + var numRemovedFiles = 1 + runUpdateAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = whereClause, + expectedOperationMetrics = Map( + "numCopiedRows" -> numCopiedRows, + "numUpdatedRows" -> 1, + "numAddedFiles" -> numAddedFiles, + "numRemovedFiles" -> numRemovedFiles, + "numAddedChangeFiles" -> { + if (testConfig.cdfEnabled) { + 1 + } else { + 0 + } + } + ), + testConfig = testConfig + ) + } + } + + testUpdateMetrics("update one file") { testConfig => + runUpdateAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = 5), + where = "id < 20", + expectedOperationMetrics = Map( + "numCopiedRows" -> 0, + "numUpdatedRows" -> 20, + "numAddedFiles" -> 1, + "numRemovedFiles" -> 1, + "numAddedChangeFiles" -> { + if (testConfig.cdfEnabled) { + 1 + } else { + 0 + } + } + ), + testConfig = testConfig + ) + } + + testUpdateMetrics("update one row per file") { testConfig => + val numPartitions = 5 + var numCopiedRows = 95 + val numAddedFiles = if (testConfig.partitioned) 5 else 2 + var numRemovedFiles = 5 + var unpartitionedNumAddFiles = 2 + runUpdateAndCheckMetrics( + table = spark.range(start = 0, end = 100, step = 1, numPartitions = numPartitions), + where = "id in (5, 25, 45, 65, 85)", + expectedOperationMetrics = Map( + "numCopiedRows" -> numCopiedRows, + "numUpdatedRows" -> 5, + "numAddedFiles" -> { + if (testConfig.partitioned) { + 5 + } else { + unpartitionedNumAddFiles + } + }, + "numRemovedFiles" -> numRemovedFiles, + "numAddedChangeFiles" -> { + if (testConfig.cdfEnabled) { + if (testConfig.partitioned) { + 5 + } else { + unpartitionedNumAddFiles + } + } else { + 0 + } + } + ), + testConfig = testConfig + ) + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSQLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSQLSuite.scala new file mode 100644 index 00000000000..8b1777b4a65 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSQLSuite.scala @@ -0,0 +1,336 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.{AddFile, FileAction, RemoveFile} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaExcludedTestMixin, DeltaSQLCommandTest} + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.errors.QueryExecutionErrors.toSQLType +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy + +class UpdateSQLSuite extends UpdateSuiteBase + with DeltaSQLCommandTest { + + import testImplicits._ + + test("explain") { + append(Seq((2, 2)).toDF("key", "value")) + val df = sql(s"EXPLAIN UPDATE delta.`$tempPath` SET key = 1, value = 2 WHERE key = 2") + val outputs = df.collect().map(_.mkString).mkString + assert(outputs.contains("Delta")) + assert(!outputs.contains("index") && !outputs.contains("ActionLog")) + // no change should be made by explain + checkAnswer(readDeltaTable(tempPath), Row(2, 2)) + } + + test("SC-11376: Update command should check target columns during analysis, same key") { + val targetDF = spark.read.json( + """ + {"a": {"c": {"d": 'random', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""" + .split("\n").toSeq.toDS()) + + testAnalysisException( + targetDF, + set = "z = 30" :: "z = 40" :: Nil, + errMsgs = "There is a conflict from these SET columns" :: Nil) + + testAnalysisException( + targetDF, + set = "a.c.d = 'rand'" :: "a.c.d = 'RANDOM2'" :: Nil, + errMsgs = "There is a conflict from these SET columns" :: Nil) + } + + test("update a dataset temp view") { + withTable("tab") { + withTempView("v") { + Seq((0, 3)).toDF("key", "value").write.format("delta").saveAsTable("tab") + spark.table("tab").as("name").createTempView("v") + sql("UPDATE v SET key = 1 WHERE key = 0 AND value = 3") + checkAnswer(spark.table("tab"), Row(1, 3)) + } + } + } + + test("update a SQL temp view") { + withTable("tab") { + withTempView("v") { + Seq((0, 3)).toDF("key", "value").write.format("delta").saveAsTable("tab") + sql("CREATE TEMP VIEW v AS SELECT * FROM tab") + QueryTest.checkAnswer(sql("UPDATE v SET key = 1 WHERE key = 0 AND value = 3"), Seq(Row(1))) + checkAnswer(spark.table("tab"), Row(1, 3)) + } + } + } + + Seq(true, false).foreach { partitioned => + test(s"User defined _change_type column doesn't get dropped - partitioned=$partitioned") { + withTable("tab") { + sql( + s"""CREATE TABLE tab USING DELTA + |${if (partitioned) "PARTITIONED BY (part) " else ""} + |TBLPROPERTIES (delta.enableChangeDataFeed = false) + |AS SELECT id, int(id / 10) AS part, 'foo' as _change_type + |FROM RANGE(1000) + |""".stripMargin) + val rowsToUpdate = (1 to 1000 by 42).mkString("(", ", ", ")") + executeUpdate("tab", "_change_type = 'bar'", s"id in $rowsToUpdate") + sql("SELECT id, _change_type FROM tab").collect().foreach { row => + val _change_type = row.getString(1) + assert(_change_type === "foo" || _change_type === "bar", + s"Invalid _change_type for id=${row.get(0)}") + } + } + } + } + + // The following two tests are run only against the SQL API because using the Scala API + // incorrectly triggers the analyzer rule [[ResolveRowLevelCommandAssignments]] which allows + // the casts without respecting the value of `storeAssignmentPolicy`. + + // Casts that are not valid upcasts (e.g. string -> boolean) are not allowed with + // storeAssignmentPolicy = STRICT. + test("invalid implicit cast string source type into boolean target, " + + s"storeAssignmentPolicy = ${StoreAssignmentPolicy.STRICT}") { + append(Seq((99, true), (100, false), (101, true)).toDF("key", "value")) + withSQLConf( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") { + checkError( + exception = intercept[AnalysisException] { + executeUpdate(target = s"delta.`$tempPath`", set = "value = 'false'") + }, + errorClass = "CANNOT_UP_CAST_DATATYPE", + parameters = Map( + "expression" -> "'false'", + "sourceType" -> toSQLType("STRING"), + "targetType" -> toSQLType("BOOLEAN"), + "details" -> ("The type path of the target object is:\n\nYou can either add an explicit " + + "cast to the input data or choose a higher precision type of the field in the target " + + "object"))) + } + } + + // Implicit casts that are not upcasts are not allowed with storeAssignmentPolicy = STRICT. + test("valid implicit cast string source type into int target, " + + s"storeAssignmentPolicy = ${StoreAssignmentPolicy.STRICT}") { + append(Seq((99, 2), (100, 4), (101, 3)).toDF("key", "value")) + withSQLConf( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") { + checkError( + exception = intercept[AnalysisException] { + executeUpdate(target = s"delta.`$tempPath`", set = "value = '5'") + }, + errorClass = "CANNOT_UP_CAST_DATATYPE", + parameters = Map( + "expression" -> "'5'", + "sourceType" -> toSQLType("STRING"), + "targetType" -> toSQLType("INT"), + "details" -> ("The type path of the target object is:\n\nYou can either add an explicit " + + "cast to the input data or choose a higher precision type of the field in the target " + + "object"))) + } + } + + override protected def executeUpdate( + target: String, + set: String, + where: String = null): Unit = { + val whereClause = Option(where).map(c => s"WHERE $c").getOrElse("") + sql(s"UPDATE $target SET $set $whereClause") + } +} + +class UpdateSQLWithDeletionVectorsSuite extends UpdateSQLSuite + with DeltaExcludedTestMixin + with DeletionVectorsTestUtils { + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectors(spark, update = true) + } + + override def excluded: Seq[String] = super.excluded ++ + Seq( + // The following two tests must fail when DV is used. Covered by another test case: + // "throw error when non-pinned TahoeFileIndex snapshot is used". + "data and partition predicates - Partition=true Skipping=false", + "data and partition predicates - Partition=false Skipping=false", + // The scan schema contains additional row index filter columns. + "schema pruning on finding files to update", + "nested schema pruning on finding files to update" + ) + + test("repeated UPDATE produces deletion vectors") { + withTempDir { dir => + val path = dir.getCanonicalPath + val log = DeltaLog.forTable(spark, path) + spark.range(0, 10, 1, numPartitions = 2).write.format("delta").save(path) + + // scalastyle:off argcount + def updateAndCheckLog( + where: String, + expectedAnswer: Seq[Row], + + numAddFilesWithDVs: Int, + sumNumRowsInAddFileWithDV: Int, + sumNumRowsInAddFileWithoutDV: Int, + sumDvCardinalityInAddFile: Long, + + numRemoveFilesWithDVs: Int, + sumNumRowsInRemoveFileWithDV: Int, + sumNumRowsInRemoveFileWithoutDV: Int, + sumDvCardinalityInRemoveFile: Long): Unit = { + executeUpdate(s"delta.`$path`", "id = -1", where) + checkAnswer(sql(s"SELECT * FROM delta.`$path`"), expectedAnswer) + + val fileActions = log.getChanges(log.update().version).flatMap(_._2) + .collect { case f: FileAction => f } + .toSeq + val addFiles = fileActions.collect { case f: AddFile => f } + val removeFiles = fileActions.collect { case f: RemoveFile => f } + + val (addFilesWithDV, addFilesWithoutDV) = addFiles.partition(_.deletionVector != null) + assert(addFilesWithDV.size === numAddFilesWithDVs) + assert( + addFilesWithDV.map(_.numPhysicalRecords.getOrElse(0L)).sum === + sumNumRowsInAddFileWithDV) + assert( + addFilesWithDV.map(_.deletionVector.cardinality).sum === + sumDvCardinalityInAddFile) + assert( + addFilesWithoutDV.map(_.numPhysicalRecords.getOrElse(0L)).sum === + sumNumRowsInAddFileWithoutDV) + + val (removeFilesWithDV, removeFilesWithoutDV) = + removeFiles.partition(_.deletionVector != null) + assert(removeFilesWithDV.size === numRemoveFilesWithDVs) + assert( + removeFilesWithDV.map(_.numPhysicalRecords.getOrElse(0L)).sum === + sumNumRowsInRemoveFileWithDV) + assert( + removeFilesWithDV.map(_.deletionVector.cardinality).sum === + sumDvCardinalityInRemoveFile) + assert( + removeFilesWithoutDV.map(_.numPhysicalRecords.getOrElse(0L)).sum === + sumNumRowsInRemoveFileWithoutDV) + } + // scalastyle:on argcount + + def assertDVMetrics( + numUpdatedRows: Long = 0, + numCopiedRows: Long = 0, + numDeletionVectorsAdded: Long = 0, + numDeletionVectorsRemoved: Long = 0, + numDeletionVectorsUpdated: Long = 0): Unit = { + val table = io.delta.tables.DeltaTable.forPath(path) + val updateMetrics = DeltaMetricsUtils.getLastOperationMetrics(table) + assert(updateMetrics.getOrElse("numUpdatedRows", -1) === numUpdatedRows) + assert(updateMetrics.getOrElse("numCopiedRows", -1) === numCopiedRows) + assert(updateMetrics.getOrElse("numDeletionVectorsAdded", -1) === numDeletionVectorsAdded) + assert( + updateMetrics.getOrElse("numDeletionVectorsRemoved", -1) === numDeletionVectorsRemoved) + assert( + updateMetrics.getOrElse("numDeletionVectorsUpdated", -1) === numDeletionVectorsUpdated) + } + + // DV created. 4 rows updated. + updateAndCheckLog( + "id % 3 = 0", + Seq(-1, 1, 2, -1, 4, 5, -1, 7, 8, -1).map(Row(_)), + numAddFilesWithDVs = 2, + sumNumRowsInAddFileWithDV = 10, + sumNumRowsInAddFileWithoutDV = 4, + sumDvCardinalityInAddFile = 4, + + numRemoveFilesWithDVs = 0, + sumNumRowsInRemoveFileWithDV = 0, + sumNumRowsInRemoveFileWithoutDV = 10, + sumDvCardinalityInRemoveFile = 0) + + assertDVMetrics(numUpdatedRows = 4, numDeletionVectorsAdded = 2) + + // DV updated. 2 rows from the original file updated. + updateAndCheckLog( + "id % 4 = 0", + Seq(-1, 1, 2, -1, -1, 5, -1, 7, -1, -1).map(Row(_)), + numAddFilesWithDVs = 2, + sumNumRowsInAddFileWithDV = 10, + sumNumRowsInAddFileWithoutDV = 2, + sumDvCardinalityInAddFile = 6, + numRemoveFilesWithDVs = 2, + sumNumRowsInRemoveFileWithDV = 10, + sumNumRowsInRemoveFileWithoutDV = 0, + sumDvCardinalityInRemoveFile = 4) + + assertDVMetrics( + numUpdatedRows = 2, + numDeletionVectorsAdded = 2, + numDeletionVectorsRemoved = 2, + numDeletionVectorsUpdated = 2) + + // Original files DV removed, because all rows in the SECOND FILE are deleted. + updateAndCheckLog( + "id IN (5, 7)", + Seq(-1, 1, 2, -1, -1, -1, -1, -1, -1, -1).map(Row(_)), + numAddFilesWithDVs = 0, + sumNumRowsInAddFileWithDV = 0, + sumNumRowsInAddFileWithoutDV = 2, + sumDvCardinalityInAddFile = 0, + numRemoveFilesWithDVs = 1, + sumNumRowsInRemoveFileWithDV = 5, + sumNumRowsInRemoveFileWithoutDV = 0, + sumDvCardinalityInRemoveFile = 3) + + assertDVMetrics(numUpdatedRows = 2, numDeletionVectorsRemoved = 1) + } + } + + test("UPDATE a whole partition do not produce DVs") { + withTempDir { dir => + val path = dir.getCanonicalPath + val log = DeltaLog.forTable(spark, path) + spark.range(10).withColumn("part", col("id") % 2) + .write + .format("delta") + .partitionBy("part") + .save(path) + + executeUpdate(s"delta.`$path`", "id = -1", where = "part = 0") + checkAnswer( + sql(s"SELECT * FROM delta.`$path`"), + Row(-1, 0) :: Row(1, 1) :: Row(-1, 0) :: + Row(3, 1) :: Row(-1, 0) :: Row(5, 1) :: Row(-1, 0) :: + Row(7, 1) :: Row(-1, 0) :: Row(9, 1) :: Nil) + + val fileActions = log.getChanges(log.update().version).flatMap(_._2) + .collect { case f: FileAction => f } + .toSeq + val addFiles = fileActions.collect { case f: AddFile => f } + val removeFiles = fileActions.collect { case f: RemoveFile => f } + assert(addFiles.map(_.numPhysicalRecords.getOrElse(0L)).sum === 5) + assert(removeFiles.map(_.numPhysicalRecords.getOrElse(0L)).sum === 5) + for (a <- addFiles) assert(a.deletionVector === null) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/UpdateScalaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateScalaSuite.scala new file mode 100644 index 00000000000..0cc31ea00a6 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateScalaSuite.scala @@ -0,0 +1,115 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.delta.test.{DeltaExcludedTestMixin, DeltaSQLCommandTest} + +import org.apache.spark.sql.{functions, Row} + +class UpdateScalaSuite extends UpdateSuiteBase + with DeltaSQLCommandTest + with DeltaExcludedTestMixin { + + import testImplicits._ + + override def excluded: Seq[String] = super.excluded ++ Seq( + // Exclude tempViews, because DeltaTable.forName does not resolve them correctly, so no one can + // use them anyway with the Scala API. + // scalastyle:off line.size.limit + "different variations of column references - TempView", + "test update on temp view - basic - Partition=true - SQL TempView", + "test update on temp view - basic - Partition=true - Dataset TempView", + "test update on temp view - basic - Partition=false - SQL TempView", + "test update on temp view - basic - Partition=false - Dataset TempView", + "test update on temp view - subset cols - SQL TempView", + "test update on temp view - subset cols - Dataset TempView", + "test update on temp view - superset cols - SQL TempView", + "test update on temp view - superset cols - Dataset TempView", + "test update on temp view - nontrivial projection - SQL TempView", + "test update on temp view - nontrivial projection - Dataset TempView", + "test update on temp view - view with too many internal aliases - SQL TempView", + "test update on temp view - view with too many internal aliases - Dataset TempView", + "test update on temp view - nontrivial projection with write amplification reduction - SQL TempView", + "test update on temp view - nontrivial projection with write amplification reduction - Dataset TempView", + "test update on temp view - view with too many internal aliases with write amplification reduction - SQL TempView", + "test update on temp view - view with too many internal aliases with write amplification reduction - Dataset TempView", + "test update on temp view - view with too many internal aliases with write amplification reduction - Dataset TempView" + // scalastyle:on line.size.limit + ) + + test("update usage test - without condition") { + append(Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value")) + val table = io.delta.tables.DeltaTable.forPath(tempPath) + table.updateExpr(Map("key" -> "100")) + checkAnswer(readDeltaTable(tempPath), + Row(100, 10) :: Row(100, 20) :: Row(100, 30) :: Row(100, 40) :: Nil) + } + + test("update usage test - without condition, using Column") { + append(Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value")) + val table = io.delta.tables.DeltaTable.forPath(tempPath) + table.update(Map("key" -> functions.expr("100"))) + checkAnswer(readDeltaTable(tempPath), + Row(100, 10) :: Row(100, 20) :: Row(100, 30) :: Row(100, 40) :: Nil) + } + + test("update usage test - with condition") { + append(Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value")) + val table = io.delta.tables.DeltaTable.forPath(tempPath) + table.updateExpr("key = 1 or key = 2", Map("key" -> "100")) + checkAnswer(readDeltaTable(tempPath), + Row(100, 10) :: Row(100, 20) :: Row(3, 30) :: Row(4, 40) :: Nil) + } + + test("update usage test - with condition, using Column") { + append(Seq((1, 10), (2, 20), (3, 30), (4, 40)).toDF("key", "value")) + val table = io.delta.tables.DeltaTable.forPath(tempPath) + table.update(functions.expr("key = 1 or key = 2"), + Map("key" -> functions.expr("100"), "value" -> functions.expr("101"))) + checkAnswer(readDeltaTable(tempPath), + Row(100, 101) :: Row(100, 101) :: Row(3, 30) :: Row(4, 40) :: Nil) + } + + override protected def executeUpdate( + target: String, + set: String, + where: String = null): Unit = { + executeUpdate(target, set.split(","), where) + } + + override protected def executeUpdate( + target: String, + set: Seq[String], + where: String): Unit = { + + val deltaTable = DeltaTestUtils.getDeltaTableForIdentifierOrPath( + spark, + DeltaTestUtils.getTableIdentifierOrPath(target)) + + val setColumns = set.map { assign => + val kv = assign.split("=") + require(kv.size == 2) + kv(0).trim -> kv(1).trim + }.toMap + + if (where == null) { + deltaTable.updateExpr(setColumns) + } else { + deltaTable.updateExpr(where, setColumns) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala new file mode 100644 index 00000000000..9e202a14841 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala @@ -0,0 +1,955 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +// scalastyle:off import.ordering.noEmptyLine +import java.util.Locale + +import scala.language.implicitConversions + +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.execution.FileSourceScanExec +import org.apache.spark.sql.execution.datasources.FileFormat +import org.apache.spark.sql.functions.struct +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ + +abstract class UpdateSuiteBase + extends QueryTest + with SharedSparkSession + with DeltaDMLTestUtils + with SQLTestUtils + with DeltaTestUtilsForTempViews { + import testImplicits._ + + protected def executeUpdate(target: String, set: Seq[String], where: String): Unit = { + executeUpdate(target, set.mkString(", "), where) + } + + protected def executeUpdate(target: String, set: String, where: String = null): Unit + + implicit def jsonStringToSeq(json: String): Seq[String] = json.split("\n") + + val fileFormat: String = "parquet" + + protected def checkUpdate( + condition: Option[String], + setClauses: String, + expectedResults: Seq[Row], + tableName: Option[String] = None, + prefix: String = ""): Unit = { + executeUpdate(tableName.getOrElse(s"delta.`$tempPath`"), setClauses, where = condition.orNull) + checkAnswer( + tableName + .map(spark.read.format("delta").table(_)) + .getOrElse(readDeltaTable(tempPath)) + .select(s"${prefix}key", s"${prefix}value"), + expectedResults) + } + + test("basic case") { + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value")) + checkUpdate(condition = None, setClauses = "key = 1, value = 2", + expectedResults = Row(1, 2) :: Row(1, 2) :: Row(1, 2) :: Row(1, 2) :: Nil) + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - Delta table by path - Partition=$isPartitioned") { + withTable("deltaTable") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate( + condition = Some("key >= 1"), + setClauses = "value = key + value, key = key + 1", + expectedResults = Row(0, 3) :: Row(2, 5) :: Row(2, 2) :: Row(3, 4) :: Nil, + tableName = Some(s"delta.`$tempPath`")) + } + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - Delta table by name - Partition=$isPartitioned") { + withTable("delta_table") { + val partitionByClause = if (isPartitioned) "PARTITIONED BY (key)" else "" + sql(s""" + |CREATE TABLE delta_table(key INT, value INT) + |USING delta + |OPTIONS('path'='$tempPath') + |$partitionByClause + """.stripMargin) + + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value")) + + checkUpdate( + condition = Some("key >= 1"), + setClauses = "value = key + value, key = key + 1", + expectedResults = Row(0, 3) :: Row(2, 5) :: Row(2, 2) :: Row(3, 4) :: Nil, + tableName = Some("delta_table")) + } + } + } + + Seq(true, false).foreach { skippingEnabled => + Seq(true, false).foreach { isPartitioned => + test(s"data and partition predicates - Partition=$isPartitioned Skipping=$skippingEnabled") { + withSQLConf(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> skippingEnabled.toString) { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = Some("key >= 1 and value != 4"), + setClauses = "value = key + value, key = key + 5", + expectedResults = Row(0, 3) :: Row(7, 4) :: Row(1, 4) :: Row(6, 2) :: Nil) + } + } + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"SC-12276: table has null values - partitioned=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq(("a", 1), (null, 2), (null, 3), ("d", 4)).toDF("key", "value"), partitions) + + // predicate evaluates to null; no-op + checkUpdate(condition = Some("key = null"), + setClauses = "value = -1", + expectedResults = Row("a", 1) :: Row(null, 2) :: Row(null, 3) :: Row("d", 4) :: Nil) + + checkUpdate(condition = Some("key = 'a'"), + setClauses = "value = -1", + expectedResults = Row("a", -1) :: Row(null, 2) :: Row(null, 3) :: Row("d", 4) :: Nil) + + checkUpdate(condition = Some("key is null"), + setClauses = "value = -2", + expectedResults = Row("a", -1) :: Row(null, -2) :: Row(null, -2) :: Row("d", 4) :: Nil) + + checkUpdate(condition = Some("key is not null"), + setClauses = "value = -3", + expectedResults = Row("a", -3) :: Row(null, -2) :: Row(null, -2) :: Row("d", -3) :: Nil) + + checkUpdate(condition = Some("key <=> null"), + setClauses = "value = -4", + expectedResults = Row("a", -3) :: Row(null, -4) :: Row(null, -4) :: Row("d", -3) :: Nil) + } + } + + test("basic case - condition is false") { + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value")) + checkUpdate(condition = Some("1 != 1"), setClauses = "key = 1, value = 2", + expectedResults = Row(2, 2) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil) + } + + test("basic case - condition is true") { + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value")) + checkUpdate(condition = Some("1 = 1"), setClauses = "key = 1, value = 2", + expectedResults = Row(1, 2) :: Row(1, 2) :: Row(1, 2) :: Row(1, 2) :: Nil) + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - without where - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = None, setClauses = "key = 1, value = 2", + expectedResults = Row(1, 2) :: Row(1, 2) :: Row(1, 2) :: Row(1, 2) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - without where and partial columns - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = None, setClauses = "key = 1", + expectedResults = Row(1, 1) :: Row(1, 2) :: Row(1, 3) :: Row(1, 4) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - without where and out-of-order columns - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = None, setClauses = "value = 3, key = 1", + expectedResults = Row(1, 3) :: Row(1, 3) :: Row(1, 3) :: Row(1, 3) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - without where and complex input - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = None, setClauses = "value = key + 3, key = key + 1", + expectedResults = Row(1, 3) :: Row(2, 4) :: Row(2, 4) :: Row(3, 5) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - with where - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = Some("key = 1"), setClauses = "value = 3, key = 1", + expectedResults = Row(1, 3) :: Row(2, 2) :: Row(0, 3) :: Row(1, 3) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - with where and complex input - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = Some("key >= 1"), setClauses = "value = key + value, key = key + 1", + expectedResults = Row(0, 3) :: Row(2, 5) :: Row(2, 2) :: Row(3, 4) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - with where and no row matched - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = Some("key >= 10"), setClauses = "value = key + value, key = key + 1", + expectedResults = Row(0, 3) :: Row(1, 1) :: Row(1, 4) :: Row(2, 2) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"type mismatch - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = Some("key >= 1"), + setClauses = "value = key + cast(value as double), key = cast(key as double) + 1", + expectedResults = Row(0, 3) :: Row(2, 5) :: Row(3, 4) :: Row(2, 2) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"set to null - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + + checkUpdate(condition = Some("key >= 1"), + setClauses = "value = key, key = null + 1D", + expectedResults = Row(0, 3) :: Row(null, 1) :: Row(null, 1) :: Row(null, 2) :: Nil) + } + } + + Seq(true, false).foreach { isPartitioned => + test(s"basic update - TypeCoercion twice - Partition=$isPartitioned") { + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((99, 2), (100, 4), (101, 3)).toDF("key", "value"), partitions) + + checkUpdate( + condition = Some("cast(key as long) * cast('1.0' as decimal(38, 18)) > 100"), + setClauses = "value = -3", + expectedResults = Row(100, 4) :: Row(101, -3) :: Row(99, 2) :: Nil) + } + } + + for (storeAssignmentPolicy <- StoreAssignmentPolicy.values) + test("upcast int source type into long target, storeAssignmentPolicy = " + + s"$storeAssignmentPolicy") { + append(Seq((99, 2L), (100, 4L), (101, 3L)).toDF("key", "value")) + withSQLConf( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> storeAssignmentPolicy.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") { + checkUpdate( + condition = None, + setClauses = "value = 4", + expectedResults = Row(100, 4) :: Row(101, 4) :: Row(99, 4) :: Nil) + } + } + + // Casts that are not valid implicit casts (e.g. string -> boolean) are allowed only when + // storeAssignmentPolicy is LEGACY or ANSI. STRICT is tested in [[UpdateSQLSuite]] only due to + // limitations when using the Scala API. + for (storeAssignmentPolicy <- StoreAssignmentPolicy.values - StoreAssignmentPolicy.STRICT) + test("invalid implicit cast string source type into boolean target, " + + s"storeAssignmentPolicy = $storeAssignmentPolicy") { + append(Seq((99, true), (100, false), (101, true)).toDF("key", "value")) + withSQLConf( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> storeAssignmentPolicy.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") { + checkUpdate( + condition = None, + setClauses = "value = 'false'", + expectedResults = Row(100, false) :: Row(101, false) :: Row(99, false) :: Nil) + } + } + + // Valid implicit casts that are not upcasts (e.g. string -> int) are allowed only when + // storeAssignmentPolicy is LEGACY or ANSI. STRICT is tested in [[UpdateSQLSuite]] only due to + // limitations when using the Scala API. + for (storeAssignmentPolicy <- StoreAssignmentPolicy.values - StoreAssignmentPolicy.STRICT) + test("valid implicit cast string source type into int target, " + + s"storeAssignmentPolicy = ${storeAssignmentPolicy}") { + append(Seq((99, 2), (100, 4), (101, 3)).toDF("key", "value")) + withSQLConf( + SQLConf.STORE_ASSIGNMENT_POLICY.key -> storeAssignmentPolicy.toString, + DeltaSQLConf.UPDATE_AND_MERGE_CASTING_FOLLOWS_ANSI_ENABLED_FLAG.key -> "false") { + checkUpdate( + condition = None, + setClauses = "value = '5'", + expectedResults = Row(100, 5) :: Row(101, 5) :: Row(99, 5) :: Nil) + } + } + + test("update cached table") { + Seq((2, 2), (1, 4)).toDF("key", "value") + .write.mode("overwrite").format("delta").save(tempPath) + + spark.read.format("delta").load(tempPath).cache() + spark.read.format("delta").load(tempPath).collect() + + executeUpdate(s"delta.`$tempPath`", set = "key = 3") + checkAnswer(spark.read.format("delta").load(tempPath), Row(3, 2) :: Row(3, 4) :: Nil) + } + + test("different variations of column references") { + append(Seq((99, 2), (100, 4), (101, 3), (102, 5)).toDF("key", "value")) + + spark.read.format("delta").load(tempPath).createOrReplaceTempView("tblName") + + checkUpdate( + condition = Some("key = 99"), + setClauses = "value = -1", + expectedResults = Row(99, -1) :: Row(100, 4) :: Row(101, 3) :: Row(102, 5) :: Nil) + checkUpdate( + condition = Some("`key` = 100"), + setClauses = "`value` = -1", + expectedResults = Row(99, -1) :: Row(100, -1) :: Row(101, 3) :: Row(102, 5) :: Nil) + } + + test("different variations of column references - TempView") { + append(Seq((99, 2), (100, 4), (101, 3), (102, 5)).toDF("key", "value")) + + spark.read.format("delta").load(tempPath).createOrReplaceTempView("tblName") + + checkUpdate( + condition = Some("tblName.key = 101"), + setClauses = "tblName.value = -1", + expectedResults = Row(99, 2) :: Row(100, 4) :: Row(101, -1) :: Row(102, 5) :: Nil, + tableName = Some("tblName")) + checkUpdate( + condition = Some("`tblName`.`key` = 102"), + setClauses = "`tblName`.`value` = -1", + expectedResults = Row(99, 2) :: Row(100, 4) :: Row(101, -1) :: Row(102, -1) :: Nil, + tableName = Some("tblName")) + } + + test("target columns can have db and table qualifiers") { + withTable("target") { + spark.read.json(""" + {"a": {"b.1": 1, "c.e": 'random'}, "d": 1} + {"a": {"b.1": 3, "c.e": 'string'}, "d": 2}""" + .split("\n").toSeq.toDS()).write.format("delta").saveAsTable("`target`") + + executeUpdate( + target = "target", + set = "`default`.`target`.a.`b.1` = -1, target.a.`c.e` = 'RANDOM'", + where = "d = 1") + + checkAnswer(spark.table("target"), + spark.read.json(""" + {"a": {"b.1": -1, "c.e": 'RANDOM'}, "d": 1} + {"a": {"b.1": 3, "c.e": 'string'}, "d": 2}""" + .split("\n").toSeq.toDS())) + } + } + + test("Negative case - non-delta target") { + Seq((1, 1), (0, 3), (1, 5)).toDF("key1", "value") + .write.mode("overwrite").format("parquet").save(tempPath) + val e = intercept[DeltaAnalysisException] { + executeUpdate(target = s"delta.`$tempPath`", set = "key1 = 3") + }.getMessage + assert(e.contains("UPDATE destination only supports Delta sources") || + e.contains("is not a Delta table") || e.contains("doesn't exist") || + e.contains("Incompatible format")) + } + + test("Negative case - check target columns during analysis") { + withTable("table") { + sql("CREATE TABLE table (s int, t string) USING delta PARTITIONED BY (s)") + var ae = intercept[AnalysisException] { + executeUpdate("table", set = "column_doesnt_exist = 'San Francisco'", where = "t = 'a'") + } + // The error class is renamed from MISSING_COLUMN to UNRESOLVED_COLUMN in Spark 3.4 + assert(ae.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION" + || ae.getErrorClass == "MISSING_COLUMN" ) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + executeUpdate(target = "table", set = "S = 1, T = 'b'", where = "T = 'a'") + ae = intercept[AnalysisException] { + executeUpdate(target = "table", set = "S = 1, s = 'b'", where = "s = 1") + } + assert(ae.message.contains("There is a conflict from these SET columns")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + ae = intercept[AnalysisException] { + executeUpdate(target = "table", set = "S = 1", where = "t = 'a'") + } + // The error class is renamed from MISSING_COLUMN to UNRESOLVED_COLUMN in Spark 3.4 + assert(ae.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION" + || ae.getErrorClass == "MISSING_COLUMN" ) + + ae = intercept[AnalysisException] { + executeUpdate(target = "table", set = "S = 1, s = 'b'", where = "s = 1") + } + // The error class is renamed from MISSING_COLUMN to UNRESOLVED_COLUMN in Spark 3.4 + assert(ae.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION" + || ae.getErrorClass == "MISSING_COLUMN" ) + + // unresolved column in condition + ae = intercept[AnalysisException] { + executeUpdate(target = "table", set = "s = 1", where = "T = 'a'") + } + // The error class is renamed from MISSING_COLUMN to UNRESOLVED_COLUMN in Spark 3.4 + assert(ae.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION" + || ae.getErrorClass == "MISSING_COLUMN" ) + } + } + } + + test("Negative case - UPDATE the child directory") { + append(Seq((2, 2), (3, 2)).toDF("key", "value"), partitionBy = "key" :: Nil) + val e = intercept[AnalysisException] { + executeUpdate( + target = s"delta.`$tempPath/key=2`", + set = "key = 1, value = 2", + where = "value = 2") + }.getMessage + assert(e.contains("Expect a full scan of Delta sources, but found a partial scan")) + } + + test("Negative case - do not support subquery test") { + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value")) + Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("c", "d").createOrReplaceTempView("source") + + // basic subquery + val e0 = intercept[AnalysisException] { + executeUpdate(target = s"delta.`$tempPath`", + set = "key = 1", + where = "key < (SELECT max(c) FROM source)") + }.getMessage + assert(e0.contains("Subqueries are not supported")) + + // subquery with EXISTS + val e1 = intercept[AnalysisException] { + executeUpdate(target = s"delta.`$tempPath`", + set = "key = 1", + where = "EXISTS (SELECT max(c) FROM source)") + }.getMessage + assert(e1.contains("Subqueries are not supported")) + + // subquery with NOT EXISTS + val e2 = intercept[AnalysisException] { + executeUpdate(target = s"delta.`$tempPath`", + set = "key = 1", + where = "NOT EXISTS (SELECT max(c) FROM source)") + }.getMessage + assert(e2.contains("Subqueries are not supported")) + + // subquery with IN + val e3 = intercept[AnalysisException] { + executeUpdate(target = s"delta.`$tempPath`", + set = "key = 1", + where = "key IN (SELECT max(c) FROM source)") + }.getMessage + assert(e3.contains("Subqueries are not supported")) + + // subquery with NOT IN + val e4 = intercept[AnalysisException] { + executeUpdate(target = s"delta.`$tempPath`", + set = "key = 1", + where = "key NOT IN (SELECT max(c) FROM source)") + }.getMessage + assert(e4.contains("Subqueries are not supported")) + } + + test("nested data support") { + // set a nested field + checkUpdateJson(target = """ + {"a": {"c": {"d": 'random', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "z = 10", + set = "a.c.d = 'RANDOM'" :: Nil, + expected = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""") + + // do nothing as condition has no match + val unchanged = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""" + checkUpdateJson(target = unchanged, + updateWhere = "z = 30", + set = "a.c.d = 'RANDOMMMMM'" :: Nil, + expected = unchanged) + + // set multiple nested fields at different levels + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "z = 20", + set = "a.c.d = 'RANDOM2'" :: "a.c.e = 'STR2'" :: "a.g = -2" :: "z = -20" :: Nil, + expected = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'RANDOM2', "e": 'STR2'}, "g": -2}, "z": -20}""") + + // set nested fields to null + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "a.c.d = 'random2'", + set = "a.c = null" :: "a.g = null" :: Nil, + expected = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": null, "g": null}, "z": 20}""") + + // set a top struct type column to null + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "a.c.d = 'random2'", + set = "a = null" :: Nil, + expected = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": null, "z": 20}""") + + // set a nested field using named_struct + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "a.g = 2", + set = "a.c = named_struct('d', 'RANDOM2', 'e', 'STR2')" :: Nil, + expected = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'RANDOM2', "e": 'STR2'}, "g": 2}, "z": 20}""") + + // set an integer nested field with a string that can be casted into an integer + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'random', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "z = 10", + set = "a.g = '-1'" :: "z = '30'" :: Nil, + expected = """ + {"a": {"c": {"d": 'random', "e": 'str'}, "g": -1}, "z": 30} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""") + + // set the nested data that has an Array field + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'random', "e": [1, 11]}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'RANDOM2', "e": [2, 22]}, "g": 2}, "z": 20}""", + updateWhere = "z = 20", + set = "a.c.d = 'RANDOM22'" :: "a.g = -2" :: Nil, + expected = """ + {"a": {"c": {"d": 'random', "e": [1, 11]}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'RANDOM22', "e": [2, 22]}, "g": -2}, "z": 20}""") + + // set an array field + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'random', "e": [1, 11]}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'RANDOM22', "e": [2, 22]}, "g": -2}, "z": 20}""", + updateWhere = "z = 10", + set = "a.c.e = array(-1, -11)" :: "a.g = -1" :: Nil, + expected = """ + {"a": {"c": {"d": 'random', "e": [-1, -11]}, "g": -1}, "z": 10} + {"a": {"c": {"d": 'RANDOM22', "e": [2, 22]}, "g": -2}, "z": 20}""") + + // set an array field as a top-level attribute + checkUpdateJson( + target = """ + {"a": [1, 11], "b": 'Z'} + {"a": [2, 22], "b": 'Y'}""", + updateWhere = "b = 'Z'", + set = "a = array(-1, -11, -111)" :: Nil, + expected = """ + {"a": [-1, -11, -111], "b": 'Z'} + {"a": [2, 22], "b": 'Y'}""") + } + + test("nested data resolution order") { + // By default, resolve by name. + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "a.g = 2", + set = "a = named_struct('g', 20, 'c', named_struct('e', 'str0', 'd', 'randomNew'))" :: Nil, + expected = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'randomNew', "e": 'str0'}, "g": 20}, "z": 20}""") + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "a.g = 2", + set = "a.c = named_struct('e', 'str0', 'd', 'randomNew')" :: Nil, + expected = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'randomNew', "e": 'str0'}, "g": 2}, "z": 20}""") + + // With the legacy conf, resolve by position. + withSQLConf((DeltaSQLConf.DELTA_RESOLVE_MERGE_UPDATE_STRUCTS_BY_NAME.key, "false")) { + checkUpdateJson( + target = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "a.g = 2", + set = "a.c = named_struct('e', 'str0', 'd', 'randomNew')" :: Nil, + expected = """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'str0', "e": 'randomNew'}, "g": 2}, "z": 20}""") + + val e = intercept[AnalysisException] { + checkUpdateJson( + target = + """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""", + updateWhere = "a.g = 2", + set = + "a = named_struct('g', 20, 'c', named_struct('e', 'str0', 'd', 'randomNew'))" :: Nil, + expected = + """ + {"a": {"c": {"d": 'RANDOM', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'randomNew', "e": 'str0'}, "g": 20}, "z": 20}""") + } + + assert(e.getMessage.contains("cannot cast")) + } + } + + testQuietly("nested data - negative case") { + val targetDF = spark.read.json(""" + {"a": {"c": {"d": 'random', "e": 'str'}, "g": 1}, "z": 10} + {"a": {"c": {"d": 'random2', "e": 'str2'}, "g": 2}, "z": 20}""" + .split("\n").toSeq.toDS()) + + testAnalysisException( + targetDF, + set = "a.c = 'RANDOM2'" :: Nil, + where = "z = 10", + errMsgs = "data type mismatch" :: Nil) + + testAnalysisException( + targetDF, + set = "a.c.z = 'RANDOM2'" :: Nil, + errMsgs = "No such struct field" :: Nil) + + testAnalysisException( + targetDF, + set = "a.c = named_struct('d', 'rand', 'e', 'str')" :: "a.c.d = 'RANDOM2'" :: Nil, + errMsgs = "There is a conflict from these SET columns" :: Nil) + + testAnalysisException( + targetDF, + set = Seq("a = named_struct('c', named_struct('d', 'rand', 'e', 'str'), 'g', 3)", + "a.c.d = 'RANDOM2'"), + errMsgs = "There is a conflict from these SET columns" :: Nil) + + val schema = new StructType().add("a", MapType(StringType, IntegerType)) + val mapData = spark.read.schema(schema).json(Seq("""{"a": {"b": 1}}""").toDS()) + testAnalysisException( + mapData, + set = "a.b = -1" :: Nil, + errMsgs = "Updating nested fields is only supported for StructType" :: Nil) + + // Updating an ArrayStruct is not supported + val arrayStructData = spark.read.json(Seq("""{"a": [{"b": 1}, {"b": 2}]}""").toDS()) + testAnalysisException( + arrayStructData, + set = "a.b = array(-1)" :: Nil, + errMsgs = "Updating nested fields is only supported for StructType" :: Nil) + } + + test("schema pruning on finding files to update") { + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value")) + + val executedPlans = DeltaTestUtils.withPhysicalPlansCaptured(spark) { + checkUpdate(condition = Some("key = 2"), setClauses = "key = 1, value = 3", + expectedResults = Row(1, 3) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil) + } + + val scans = executedPlans.flatMap(_.collect { + case f: FileSourceScanExec => f + }) + // The first scan is for finding files to update. We only are matching against the key + // so that should be the only field in the schema. + assert(scans.head.schema == StructType( + Seq( + StructField("key", IntegerType) + ) + )) + } + + test("nested schema pruning on finding files to update") { + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value") + .select(struct("key", "value").alias("nested"))) + + val executedPlans = DeltaTestUtils.withPhysicalPlansCaptured(spark) { + checkUpdate(condition = Some("nested.key = 2"), + setClauses = "nested.key = 1, nested.value = 3", + expectedResults = Row(1, 3) :: Row(1, 4) :: Row(1, 1) :: Row(0, 3) :: Nil, + prefix = "nested.") + } + + val scans = executedPlans.flatMap(_.collect { + case f: FileSourceScanExec => f + }) + + assert(scans.head.schema == StructType.fromDDL("nested STRUCT")) + } + + /** + * @param function the unsupported function. + * @param functionType The type of the unsupported expression to be tested. + * @param data the data in the table. + * @param set the set action containing the unsupported expression. + * @param where the where clause containing the unsupported expression. + * @param expectException whether an exception is expected to be thrown + * @param customErrorRegex customized error regex. + */ + def testUnsupportedExpression( + function: String, + functionType: String, + data: => DataFrame, + set: String, + where: String, + expectException: Boolean, + customErrorRegex: Option[String] = None) { + test(s"$functionType functions in update - expect exception: $expectException") { + withTable("deltaTable") { + data.write.format("delta").saveAsTable("deltaTable") + + val expectedErrorRegex = "(?s).*(?i)unsupported.*(?i).*Invalid expressions.*" + + def checkExpression( + setOption: Option[String] = None, + whereOption: Option[String] = None) { + var catchException = if (functionType.equals("Generate") && setOption.nonEmpty) { + expectException + } else true + + var errorRegex = if (functionType.equals("Generate") && whereOption.nonEmpty) { + ".*Subqueries are not supported in the UPDATE.*" + } else customErrorRegex.getOrElse(expectedErrorRegex) + + + if (catchException) { + val dataBeforeException = spark.read.format("delta").table("deltaTable").collect() + val e = intercept[Exception] { + executeUpdate( + "deltaTable", + setOption.getOrElse("b = 4"), + whereOption.getOrElse("a = 1")) + } + val message = if (e.getCause != null) { + e.getCause.getMessage + } else e.getMessage + assert(message.matches(errorRegex)) + checkAnswer(spark.read.format("delta").table("deltaTable"), dataBeforeException) + } else { + executeUpdate( + "deltaTable", + setOption.getOrElse("b = 4"), + whereOption.getOrElse("a = 1")) + } + } + + // on set + checkExpression(setOption = Option(set)) + + // on condition + checkExpression(whereOption = Option(where)) + } + } + } + + testUnsupportedExpression( + function = "row_number", + functionType = "Window", + data = Seq((1, 2, 3)).toDF("a", "b", "c"), + set = "b = row_number() over (order by c)", + where = "row_number() over (order by c) > 1", + expectException = true + ) + + testUnsupportedExpression( + function = "max", + functionType = "Aggregate", + data = Seq((1, 2, 3)).toDF("a", "b", "c"), + set = "b = max(c)", + where = "b > max(c)", + expectException = true + ) + + // Explode functions are supported in set and where if there's only one row generated. + testUnsupportedExpression( + function = "explode", + functionType = "Generate", + data = Seq((1, 2, List(3))).toDF("a", "b", "c"), + set = "b = (select explode(c) from deltaTable)", + where = "b = (select explode(c) from deltaTable)", + expectException = false // only one row generated, no exception. + ) + + // Explode functions are supported in set and where but if there's more than one row generated, + // it will throw an exception. + testUnsupportedExpression( + function = "explode", + functionType = "Generate", + data = Seq((1, 2, List(3, 4))).toDF("a", "b", "c"), + set = "b = (select explode(c) from deltaTable)", + where = "b = (select explode(c) from deltaTable)", + expectException = true, // more than one generated, expect exception. + customErrorRegex = + Some(".*ore than one row returned by a subquery used as an expression(?s).*") + ) + + protected def checkUpdateJson( + target: Seq[String], + source: Seq[String] = Nil, + updateWhere: String, + set: Seq[String], + expected: Seq[String]): Unit = { + withTempDir { dir => + withTempView("source") { + def toDF(jsonStrs: Seq[String]) = spark.read.json(jsonStrs.toDS) + toDF(target).write.format("delta").mode("overwrite").save(dir.toString) + if (source.nonEmpty) { + toDF(source).createOrReplaceTempView("source") + } + executeUpdate(s"delta.`$dir`", set, updateWhere) + checkAnswer(readDeltaTable(dir.toString), toDF(expected)) + } + } + } + + protected def testAnalysisException( + targetDF: DataFrame, + set: Seq[String], + where: String = null, + errMsgs: Seq[String] = Nil) = { + withTempDir { dir => + targetDF.write.format("delta").save(dir.toString) + val e = intercept[AnalysisException] { + executeUpdate(target = s"delta.`$dir`", set, where) + } + errMsgs.foreach { msg => + assert(e.getMessage.toLowerCase(Locale.ROOT).contains(msg.toLowerCase(Locale.ROOT))) + } + } + } + + Seq(true, false).foreach { isPartitioned => + val testName = s"test update on temp view - basic - Partition=$isPartitioned" + testWithTempView(testName) { isSQLTempView => + val partitions = if (isPartitioned) "key" :: Nil else Nil + append(Seq((2, 2), (1, 4), (1, 1), (0, 3)).toDF("key", "value"), partitions) + createTempViewFromTable(s"delta.`$tempPath`", isSQLTempView) + checkUpdate( + condition = Some("key >= 1"), + setClauses = "value = key + value, key = key + 1", + expectedResults = Row(0, 3) :: Row(2, 5) :: Row(2, 2) :: Row(3, 4) :: Nil, + tableName = Some("v")) + } + } + + protected def testInvalidTempViews(name: String)( + text: String, + expectedErrorMsgForSQLTempView: String = null, + expectedErrorMsgForDataSetTempView: String = null, + expectedErrorClassForSQLTempView: String = null, + expectedErrorClassForDataSetTempView: String = null): Unit = { + testWithTempView(s"test update on temp view - $name") { isSQLTempView => + withTable("tab") { + Seq((0, 3), (1, 2)).toDF("key", "value").write.format("delta").saveAsTable("tab") + createTempViewFromSelect(text, isSQLTempView) + val ex = intercept[AnalysisException] { + executeUpdate( + "v", + where = "key >= 1 and value < 3", + set = "value = key + value, key = key + 1" + ) + } + testErrorMessageAndClass( + isSQLTempView, + ex, + expectedErrorMsgForSQLTempView, + expectedErrorMsgForDataSetTempView, + expectedErrorClassForSQLTempView, + expectedErrorClassForDataSetTempView) + } + } + } + + testInvalidTempViews("subset cols")( + text = "SELECT key FROM tab", + expectedErrorClassForSQLTempView = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + expectedErrorClassForDataSetTempView = "UNRESOLVED_COLUMN.WITH_SUGGESTION" + ) + + testInvalidTempViews("superset cols")( + text = "SELECT key, value, 1 FROM tab", + // The analyzer can't tell whether the table originally had the extra column or not. + expectedErrorMsgForSQLTempView = "Can't resolve column 1 in root", + expectedErrorMsgForDataSetTempView = "Can't resolve column 1 in root" + ) + + protected def testComplexTempViews(name: String)(text: String, expectedResult: Seq[Row]) = { + testWithTempView(s"test update on temp view - $name") { isSQLTempView => + withTable("tab") { + Seq((0, 3), (1, 2)).toDF("key", "value").write.format("delta").saveAsTable("tab") + createTempViewFromSelect(text, isSQLTempView) + executeUpdate( + "v", + where = "key >= 1 and value < 3", + set = "value = key + value, key = key + 1" + ) + checkAnswer(spark.read.format("delta").table("v"), expectedResult) + } + } + } + + testComplexTempViews("nontrivial projection")( + text = "SELECT value as key, key as value FROM tab", + expectedResult = Seq(Row(3, 0), Row(3, 3)) + ) + + testComplexTempViews("view with too many internal aliases")( + text = "SELECT * FROM (SELECT * FROM tab AS t1) AS t2", + expectedResult = Seq(Row(0, 3), Row(2, 3)) + ) + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/actions/DeletionVectorDescriptorSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/actions/DeletionVectorDescriptorSuite.scala new file mode 100644 index 00000000000..b89fe1d08f2 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/actions/DeletionVectorDescriptorSuite.scala @@ -0,0 +1,146 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.actions + +import java.util.UUID + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor._ +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkFunSuite +// scalastyle:on import.ordering.noEmptyLine + +/** + * Test: DV descriptor creation, created DV descriptor properties and utility methods are + * working as expected. + */ +class DeletionVectorDescriptorSuite extends SparkFunSuite { + test("Inline DV") { + val dv = inlineInLog(testDVData, cardinality = 3) + + // Make sure the metadata (type, size etc.) in the DV is as expected + assert(!dv.isOnDisk && dv.isInline, s"Incorrect DV storage type: $dv") + assertCardinality(dv, 3) + + val encodedDVData = "0rJua" + assert(dv.pathOrInlineDv === encodedDVData) + assert(dv.sizeInBytes === testDVData.size) + assert(dv.inlineData === testDVData) + assert(dv.estimatedSerializedSize === 18) + + assert(dv.offset.isEmpty) // There shouldn't be an offset for inline DV + + // Unique id to identify the DV + assert(dv.uniqueId === s"i$encodedDVData") + assert(dv.uniqueFileId === s"i$encodedDVData") + + // There is no on-disk file name for an inline DV + intercept[IllegalArgumentException] { dv.absolutePath(testTablePath) } + + // Copy as on-disk DV with absolute path and relative path - + // expect the returned DV is same as input, since this is inline + // so paths are irrelevant. + assert(dv.copyWithAbsolutePath(testTablePath) === dv) + assert(dv.copyWithNewRelativePath(UUID.randomUUID(), "predix2") === dv) + } + + for (offset <- Seq(None, Some(25))) { + test(s"On disk DV with absolute path with offset=$offset") { + val dv = onDiskWithAbsolutePath(testDVAbsPath, sizeInBytes = 15, cardinality = 10, offset) + + // Make sure the metadata (type, size etc.) in the DV is as expected + assert(dv.isOnDisk && !dv.isInline, s"Incorrect DV storage type: $dv") + assertCardinality(dv, 10) + + assert(dv.pathOrInlineDv === testDVAbsPath) + assert(dv.sizeInBytes === 15) + intercept[Exception] { dv.inlineData } + assert(dv.estimatedSerializedSize === (if (offset.isDefined) 4 else 0) + 37) + assert(dv.offset === offset) + + // Unique id to identify the DV + val offsetSuffix = offset.map(o => s"@$o").getOrElse("") + assert(dv.uniqueId === s"p$testDVAbsPath$offsetSuffix") + assert(dv.uniqueFileId === s"p$testDVAbsPath") + + // Given the input already has an absolute path, it should return the path in DV + assert(dv.absolutePath(testTablePath) === new Path(testDVAbsPath)) + + // Given the input already has an absolute path, expect the output to be same as input + assert(dv.copyWithAbsolutePath(testTablePath) === dv) + + // Copy DV as a relative path DV + val uuid = UUID.randomUUID() + val dvCopyWithRelativePath = dv.copyWithNewRelativePath(uuid, "prefix") + assert(dvCopyWithRelativePath.isRelative) + assert(dvCopyWithRelativePath.isOnDisk) + assert(dvCopyWithRelativePath.pathOrInlineDv === encodeUUID(uuid, "prefix")) + } + } + + for (offset <- Seq(None, Some(25))) { + test(s"On-disk DV with relative path with offset=$offset") { + val uuid = UUID.randomUUID() + val dv = onDiskWithRelativePath( + uuid, randomPrefix = "prefix", sizeInBytes = 15, cardinality = 25, offset) + + // Make sure the metadata (type, size etc.) in the DV is as expected + assert(dv.isOnDisk && !dv.isInline, s"Incorrect DV storage type: $dv") + assertCardinality(dv, 25) + + assert(dv.pathOrInlineDv === encodeUUID(uuid, "prefix")) + assert(dv.sizeInBytes === 15) + intercept[Exception] { dv.inlineData } + assert(dv.estimatedSerializedSize === (if (offset.isDefined) 4 else 0) + 39) + assert(dv.offset === offset) + + // Unique id to identify the DV + val offsetSuffix = offset.map(o => s"@$o").getOrElse("") + val encodedUUID = encodeUUID(uuid, "prefix") + assert(dv.uniqueId === s"u$encodedUUID$offsetSuffix") + assert(dv.uniqueFileId === s"u$encodedUUID") + + // Expect the DV final path to be under the given table path + assert(dv.absolutePath(testTablePath) === + new Path(s"$testTablePath/prefix/deletion_vector_$uuid.bin")) + + // Copy DV with an absolute path location + val dvCopyWithAbsPath = dv.copyWithAbsolutePath(testTablePath) + assert(dvCopyWithAbsPath.isAbsolute) + assert(dvCopyWithAbsPath.isOnDisk) + assert( + dvCopyWithAbsPath.pathOrInlineDv === s"$testTablePath/prefix/deletion_vector_$uuid.bin") + + // Copy DV as a relative path DV - expect to return the same DV as the current + // DV already contains relative path. + assert(dv.copyWithNewRelativePath(UUID.randomUUID(), "predix2") === dv) + } + } + + private def assertCardinality(dv: DeletionVectorDescriptor, expSize: Int): Unit = { + if (expSize == 0) { + assert(dv.isEmpty, s"Expected DV to be empty: $dv") + } else { + assert(!dv.isEmpty && dv.cardinality == expSize, s"Invalid size expected: $expSize, $dv") + } + } + + private val testTablePath = new Path("s3a://table/test") + private val testDVAbsPath = "s3a://table/test/dv1.bin" + private val testDVData: Array[Byte] = Array(1, 2, 3, 4) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/cdc/CDCReaderSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/CDCReaderSuite.scala new file mode 100644 index 00000000000..a5bdce207d5 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/CDCReaderSuite.scala @@ -0,0 +1,453 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.cdc + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.DeltaOperations.Delete +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.commands.cdc.CDCReader._ +import org.apache.spark.sql.delta.files.DelayedCommitProtocol +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.{Row, SaveMode} +import org.apache.spark.sql.execution.{LogicalRDD, SQLExecution} +import org.apache.spark.sql.execution.datasources.FileFormatWriter +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSparkSession + +class CDCReaderSuite + extends QueryTest + with CheckCDCAnswer + with SharedSparkSession + with DeltaSQLCommandTest + with DeltaColumnMappingTestUtils { + + override protected def sparkConf: SparkConf = super.sparkConf + .set(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true") + + /** + * Write a commit with just CDC data. Returns the committed version. + */ + private def writeCdcData( + log: DeltaLog, + data: DataFrame, + extraActions: Seq[Action] = Seq.empty): Long = { + log.withNewTransaction { txn => + val qe = data.queryExecution + val basePath = log.dataPath.toString + + // column mapped mode forces to use random file prefix + val randomPrefixes = if (columnMappingEnabled) { + Some(DeltaConfigs.RANDOM_PREFIX_LENGTH.fromMetaData(log.snapshot.metadata)) + } else { + None + } + // we need to convert to physical name in column mapping mode + val mappedOutput = if (columnMappingEnabled) { + val metadata = log.snapshot.metadata + DeltaColumnMapping.createPhysicalAttributes( + qe.analyzed.output, metadata.schema, metadata.columnMappingMode + ) + } else { + qe.analyzed.output + } + + SQLExecution.withNewExecutionId(qe) { + var committer = new DelayedCommitProtocol("delta", basePath, randomPrefixes, None) + FileFormatWriter.write( + sparkSession = spark, + plan = qe.executedPlan, + fileFormat = log.fileFormat(log.snapshot.protocol, log.unsafeVolatileMetadata), + committer = committer, + outputSpec = FileFormatWriter.OutputSpec(basePath, Map.empty, mappedOutput), + hadoopConf = log.newDeltaHadoopConf(), + partitionColumns = Seq.empty, + bucketSpec = None, + statsTrackers = Seq.empty, + options = Map.empty) + + val cdc = committer.addedStatuses.map { a => + AddCDCFile(a.path, Map.empty, a.size) + } + txn.commit(extraActions ++ cdc, DeltaOperations.ManualUpdate) + } + } + } + + + def createCDFDF(start: Long, end: Long, commitVersion: Long, changeType: String): DataFrame = { + spark.range(start, end) + .withColumn(CDC_TYPE_COLUMN_NAME, lit(changeType)) + .withColumn(CDC_COMMIT_VERSION, lit(commitVersion)) + } + + test("simple CDC scan") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + val data = spark.range(10) + val cdcData = spark.range(20, 25).withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + + data.write.format("delta").save(dir.getAbsolutePath) + sql(s"DELETE FROM delta.`${dir.getAbsolutePath}`") + writeCdcData(log, cdcData) + + // For this basic test, we check each of the versions individually in addition to the full + // range to try and catch weird corner cases. + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 0, spark), + data.withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + .withColumn(CDC_COMMIT_VERSION, lit(0)) + ) + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 1, 1, spark), + data.withColumn(CDC_TYPE_COLUMN_NAME, lit("delete")) + .withColumn(CDC_COMMIT_VERSION, lit(1)) + ) + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 2, 2, spark), + cdcData.withColumn(CDC_COMMIT_VERSION, lit(2)) + ) + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 2, spark), + data.withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + .withColumn(CDC_COMMIT_VERSION, lit(0)) + .unionAll(data + .withColumn(CDC_TYPE_COLUMN_NAME, lit("delete")) + .withColumn(CDC_COMMIT_VERSION, lit(1))) + .unionAll(cdcData.withColumn(CDC_COMMIT_VERSION, lit(2))) + ) + } + } + + test("CDC has correct stats") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + val data = spark.range(10) + val cdcData = spark.range(20, 25).withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + + data.write.format("delta").save(dir.getAbsolutePath) + sql(s"DELETE FROM delta.`${dir.getAbsolutePath}`") + writeCdcData(log, cdcData) + + assert( + CDCReader + .changesToBatchDF(log, 0, 2, spark) + .queryExecution + .optimizedPlan + .collectLeaves() + .exists { + case l: LogicalRDD => l.stats.sizeInBytes == 0 && !l.isStreaming + case _ => false + } + ) + } + } + + test("cdc update ops") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + val data = spark.range(10) + + data.write.format("delta").save(dir.getAbsolutePath) + writeCdcData( + log, + spark.range(20, 25).toDF().withColumn(CDC_TYPE_COLUMN_NAME, lit("update_pre"))) + writeCdcData( + log, + spark.range(30, 35).toDF().withColumn(CDC_TYPE_COLUMN_NAME, lit("update_post"))) + + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 2, spark), + data.withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + .withColumn(CDC_COMMIT_VERSION, lit(0)) + .unionAll(spark.range(20, 25).withColumn(CDC_TYPE_COLUMN_NAME, lit("update_pre")) + .withColumn(CDC_COMMIT_VERSION, lit(1)) + ) + .unionAll(spark.range(30, 35).withColumn(CDC_TYPE_COLUMN_NAME, lit("update_post")) + .withColumn(CDC_COMMIT_VERSION, lit(2)) + ) + ) + } + } + + test("dataChange = false operations ignored") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + val data = spark.range(10) + + data.write.format("delta").save(dir.getAbsolutePath) + sql(s"OPTIMIZE delta.`${dir.getAbsolutePath}`") + + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 1, spark), + data.withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + .withColumn(CDC_COMMIT_VERSION, lit(0)) + ) + } + } + + test("range with start and end equal") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + val data = spark.range(10) + val cdcData = spark.range(0, 5).withColumn(CDC_TYPE_COLUMN_NAME, lit("delete")) + .withColumn(CDC_COMMIT_VERSION, lit(1)) + + data.write.format("delta").save(dir.getAbsolutePath) + writeCdcData(log, cdcData) + + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 0, spark), + data.withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + .withColumn(CDC_COMMIT_VERSION, lit(0)) + ) + + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 1, 1, spark), + cdcData) + } + } + + test("range past the end of the log") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 1, spark), + spark.range(10).withColumn(CDC_TYPE_COLUMN_NAME, lit("insert")) + .withColumn(CDC_COMMIT_VERSION, lit(0)) + ) + } + } + + test("invalid range - end before start") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + spark.range(20).write.format("delta").mode("append").save(dir.getAbsolutePath) + + intercept[IllegalArgumentException] { + CDCReader.changesToBatchDF(log, 1, 0, spark) + } + } + } + + testQuietly("invalid range - start after last version of CDF") { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + spark.range(20).write.format("delta").mode("append").save(dir.getAbsolutePath) + + val e = intercept[IllegalArgumentException] { + spark.read.format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", Long.MaxValue) + .option("endingVersion", Long.MaxValue) + .load(dir.toString) + .count() + } + assert(e.getMessage == + DeltaErrors.startVersionAfterLatestVersion(Long.MaxValue, 1).getMessage) + } + } + + test("partition filtering of removes and cdc files") { + withTempDir { dir => + withSQLConf((DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")) { + val path = dir.getAbsolutePath + val log = DeltaLog.forTable(spark, path) + spark.range(6).selectExpr("id", "'old' as text", "id % 2 as part") + .write.format("delta").partitionBy("part").save(path) + + // Generate some CDC files. + withTempView("source") { + spark.range(4).createOrReplaceTempView("source") + sql( + s"""MERGE INTO delta.`$path` t USING source s ON s.id = t.id + |WHEN MATCHED AND s.id = 1 THEN UPDATE SET text = 'new' + |WHEN MATCHED AND s.id = 3 THEN DELETE""".stripMargin) + } + + // This will generate just remove files due to the partition delete optimization. + sql(s"DELETE FROM delta.`$path` WHERE part = 0") + + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 2, spark).filter("_change_type = 'insert'"), + Range(0, 6).map { i => Row(i, "old", i % 2, "insert", 0) }) + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 2, spark).filter("_change_type = 'delete'"), + Seq(0, 2, 3, 4).map { i => Row(i, "old", i % 2, "delete", if (i % 2 == 0) 2 else 1) }) + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 2, spark).filter("_change_type = 'update_preimage'"), + Row(1, "old", 1, "update_preimage", 1) :: Nil) + checkCDCAnswer( + log, + CDCReader.changesToBatchDF(log, 0, 2, spark).filter("_change_type = 'update_postimage'"), + Row(1, "new", 1, "update_postimage", 1) :: Nil) + } + } + } + + test("file layout - unpartitioned") { + withTempDir { dir => + withSQLConf((DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")) { + val path = dir.getAbsolutePath + spark.range(10).repartition(1).write.format("delta").save(path) + sql(s"DELETE FROM delta.`$path` WHERE id < 5") + + val log = DeltaLog.forTable(spark, path) + // The data path should contain four files: the delta log, the CDC folder `__is_cdc=true`, + // and two data files with randomized names from before and after the DELETE command. The + // commit protocol should have stripped out __is_cdc=false. + val baseDirFiles = + log.logPath.getFileSystem(log.newDeltaHadoopConf()).listStatus(log.dataPath) + assert(baseDirFiles.length == 4) + assert(baseDirFiles.exists { f => f.isDirectory && f.getPath.getName == "_delta_log"}) + assert(baseDirFiles.exists { f => f.isDirectory && f.getPath.getName == CDC_LOCATION}) + assert(!baseDirFiles.exists { f => f.getPath.getName.contains(CDC_PARTITION_COL) }) + } + } + } + + test("file layout - partitioned") { + withTempDir { dir => + withSQLConf((DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")) { + val path = dir.getAbsolutePath + spark.range(10).withColumn("part", col("id") % 2) + .repartition(1).write.format("delta").partitionBy("part").save(path) + sql(s"DELETE FROM delta.`$path` WHERE id < 5") + + val log = DeltaLog.forTable(spark, path) + // The data path should contain four directories: the delta log, the CDC folder + // `__is_cdc=true`, and the two partition folders. The commit protocol + // should have stripped out __is_cdc=false. + val fs = log.logPath.getFileSystem(log.newDeltaHadoopConf()) + val baseDirFiles = fs.listStatus(log.dataPath) + baseDirFiles.foreach { f => assert(f.isDirectory) } + assert(baseDirFiles.map(_.getPath.getName).toSet == + Set("_delta_log", CDC_LOCATION, "part=0", "part=1")) + + // Each partition folder should contain only two data files from before and after the read. + // In particular, they should not contain any __is_cdc folder - that should always be the + // top level partition. + for (partitionFolder <- Seq("part=0", "part=1")) { + val files = fs.listStatus(new Path(log.dataPath, partitionFolder)) + assert(files.length === 2) + files.foreach { f => + assert(!f.isDirectory) + assert(!f.getPath.getName.startsWith(CDC_LOCATION)) + } + } + + // The CDC folder should also contain the two partitions. + val cdcPartitions = fs.listStatus(new Path(log.dataPath, CDC_LOCATION)) + cdcPartitions.foreach { f => assert(f.isDirectory, s"$f was not a directory") } + assert(cdcPartitions.map(_.getPath.getName).toSet == Set("part=0", "part=1")) + } + } + } + + test("for CDC add backtick in column name with dot [.] ") { + import testImplicits._ + + withTempDir { dir => + withSQLConf((DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")) { + val path = dir.getAbsolutePath + // 0th commit + Seq(2, 4).toDF("id.num") + .withColumn("id.num`s", lit(10)) + .withColumn("struct_col", struct(lit(1).as("field"), lit(2).as("field.one"))) + .write.format("delta").save(path) + // 1st commit + Seq(1, 3, 5).toDF("id.num") + .withColumn("id.num`s", lit(10)) + .withColumn("struct_col", struct(lit(1).as("field"), lit(2).as("field.one"))) + .write.format("delta").mode(SaveMode.Append).save(path) + // Reading from 0th version + val actual = spark.read.format("delta") + .option("readChangeFeed", "true").option("startingVersion", 0) + .load(path).drop(CDCReader.CDC_COMMIT_TIMESTAMP) + + val expected = spark.range(1, 6).toDF("id.num").withColumn("id.num`s", lit(10)) + .withColumn("struct_col", struct(lit(1).as("field"), lit(2).as("field.one"))) + .withColumn(CDCReader.CDC_TYPE_COLUMN_NAME, lit("insert")) + .withColumn(CDCReader.CDC_COMMIT_VERSION, col("`id.num`") % 2) + checkAnswer(actual, expected) + } + } + } + + for (cdfEnabled <- BOOLEAN_DOMAIN) + test(s"Coarse-grained CDF, cdfEnabled=$cdfEnabled") { + withSQLConf(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey -> cdfEnabled.toString) { + withTempDir { dir => + val log = DeltaLog.forTable(spark, dir.getAbsolutePath) + + // commit 0: 2 inserts + spark.range(start = 0, end = 2, step = 1, numPartitions = 1) + .write.format("delta").save(dir.getAbsolutePath) + var df = CDCReader.changesToBatchDF(log, 0, 1, spark, useCoarseGrainedCDC = true) + checkAnswer(df.drop(CDC_COMMIT_TIMESTAMP), + createCDFDF(start = 0, end = 2, commitVersion = 0, changeType = "insert")) + + // commit 1: 2 inserts + spark.range(start = 2, end = 4) + .write.mode("append").format("delta").save(dir.getAbsolutePath) + df = CDCReader.changesToBatchDF(log, 1, 2, spark, useCoarseGrainedCDC = true) + checkAnswer(df.drop(CDC_COMMIT_TIMESTAMP), + createCDFDF(start = 2, end = 4, commitVersion = 1, changeType = "insert")) + + // commit 2 + sql(s"DELETE FROM delta.`$dir` WHERE id = 0") + df = CDCReader.changesToBatchDF(log, 2, 3, spark, useCoarseGrainedCDC = true) + .drop(CDC_COMMIT_TIMESTAMP) + + // Using only Add and RemoveFiles should generate 2 deletes and 1 insert. Even when CDF + // is enabled, we want to use only Add and RemoveFiles. + val dfWithDeletesFirst = df.sort(CDC_TYPE_COLUMN_NAME) + val expectedAnswer = + createCDFDF(start = 0, end = 2, commitVersion = 2, changeType = "delete") + .union( + createCDFDF(start = 1, end = 2, commitVersion = 2, changeType = "insert")) + checkAnswer(dfWithDeletesFirst, expectedAnswer) + } + } + } +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/cdc/CDCWorkloadSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/CDCWorkloadSuite.scala new file mode 100644 index 00000000000..f867e610326 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/CDCWorkloadSuite.scala @@ -0,0 +1,88 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.cdc + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession + +/** + * Small end to end tests of workloads using CDC from Delta. + */ +class CDCWorkloadSuite extends QueryTest with SharedSparkSession + with DeltaSQLCommandTest { + + test("replication workload") { + withSQLConf((DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")) { + withTempPaths(2) { paths => + // Create an empty table at `path` we're going to replicate from, and a replication + // destination at `replicatedPath`. The destination contains a subset of the final keys, + // but with out-of-date enrichment data. + val path = paths.head.getAbsolutePath + val replicatedPath = paths(1).getAbsolutePath + spark.range(0).selectExpr("id", "'none' as text").write.format("delta").save(path) + spark.range(50) + .selectExpr("id", "'oldEnrichment' as text") + .filter("id % 4 = 0") + .write.format("delta").save(replicatedPath) + + // Add data to the replication source in overlapping batches, so we produce both insert and + // update events. + for (i <- 0 to 8) { + withTempView("source") { + spark.range(i * 5, i * 5 + 10) + .selectExpr("id", "'newEnrichment' as text") + .createOrReplaceTempView("source") + sql( + s"""MERGE INTO delta.`$path` t USING source s ON s.id = t.id + |WHEN MATCHED THEN UPDATE SET * + |WHEN NOT MATCHED THEN INSERT *""".stripMargin) + } + } + + // Delete some data too. + sql(s"DELETE FROM delta.`$path` WHERE id < 5") + + for (v <- 0 to 10) { + withTempView("cdcSource") { + val changes = spark.read.format("delta") + .option("readChangeFeed", "true") + .option("startingVersion", v) + .option("endingVersion", v) + .load(path) + // Filter out the preimage so the update events only have the final row, as required by + // our merge API. + changes.filter("_change_type != 'update_preimage'").createOrReplaceTempView("cdcSource") + sql( + s"""MERGE INTO delta.`$replicatedPath` t USING cdcSource s ON s.id = t.id + |WHEN MATCHED AND s._change_type = 'update_postimage' OR s._change_type = 'insert' + | THEN UPDATE SET * + |WHEN MATCHED AND s._change_type = 'delete' THEN DELETE + |WHEN NOT MATCHED THEN INSERT *""".stripMargin) + } + } + + // We should have all the rows, all with the new enrichment data from the replication + // source, except for 0 to 5 which were deleted. + val expected = spark.range(5, 50).selectExpr("id", "'newEnrichment' as text") + checkAnswer(spark.read.format("delta").load(replicatedPath), expected) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/cdc/DeleteCDCSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/DeleteCDCSuite.scala new file mode 100644 index 00000000000..a49c83040d8 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/DeleteCDCSuite.scala @@ -0,0 +1,127 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.cdc + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.commands.cdc.CDCReader._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf._ +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql.Dataset +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.lit + +class DeleteCDCSuite extends DeleteSQLSuite { + import testImplicits._ + + override protected def sparkConf: SparkConf = super.sparkConf + .set(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true") + + protected def testCDCDelete(name: String)( + initialData: => Dataset[_], + partitionColumns: Seq[String] = Seq.empty, + deleteCondition: String, + expectedData: => Dataset[_], + expectedChangeDataWithoutVersion: => Dataset[_] + ): Unit = { + test(s"CDC - $name") { + withSQLConf( + (DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true")) { + withTempDir { dir => + val path = dir.getAbsolutePath + initialData.write.format("delta").partitionBy(partitionColumns: _*) + .save(path) + + executeDelete(s"delta.`$path`", deleteCondition) + + checkAnswer( + spark.read.format("delta").load(path), + expectedData.toDF()) + + checkAnswer( + getCDCForLatestOperation( + deltaLog = DeltaLog.forTable(spark, dir), + operation = "DELETE"), + expectedChangeDataWithoutVersion.toDF()) + } + } + } + } + + testCDCDelete("unconditional")( + initialData = spark.range(0, 10, step = 1, numPartitions = 3), + deleteCondition = "", + expectedData = spark.range(0), + expectedChangeDataWithoutVersion = spark.range(10) + .withColumn(CDC_TYPE_COLUMN_NAME, lit("delete")) + ) + + testCDCDelete("conditional covering all rows")( + initialData = spark.range(0, 10, step = 1, numPartitions = 3), + deleteCondition = "id < 100", + expectedData = spark.range(0), + expectedChangeDataWithoutVersion = spark.range(10) + .withColumn(CDC_TYPE_COLUMN_NAME, lit("delete")) + ) + + testCDCDelete("two random rows")( + initialData = spark.range(0, 10, step = 1, numPartitions = 3), + deleteCondition = "id = 2 OR id = 8", + expectedData = Seq(0, 1, 3, 4, 5, 6, 7, 9).toDF(), + expectedChangeDataWithoutVersion = Seq(2, 8).toDF() + .withColumn(CDC_TYPE_COLUMN_NAME, lit("delete")) + ) + + testCDCDelete("delete unconditionally - partitioned table")( + initialData = spark.range(0, 100, step = 1, numPartitions = 10) + .selectExpr("id % 10 as part", "id"), + partitionColumns = Seq("part"), + deleteCondition = "", + expectedData = Seq.empty[(Long, Long)].toDF("part", "id"), + expectedChangeDataWithoutVersion = + spark.range(100) + .selectExpr("id % 10 as part", "id", "'delete' as _change_type") + ) + + testCDCDelete("delete all rows by condition - partitioned table")( + initialData = spark.range(0, 100, step = 1, numPartitions = 10) + .selectExpr("id % 10 as part", "id"), + partitionColumns = Seq("part"), + deleteCondition = "id < 1000", + expectedData = Seq.empty[(Long, Long)].toDF("part", "id"), + expectedChangeDataWithoutVersion = + spark.range(100) + .selectExpr("id % 10 as part", "id", "'delete' as _change_type") + ) + + + testCDCDelete("partition-optimized delete")( + initialData = spark.range(0, 100, step = 1, numPartitions = 10) + .selectExpr("id % 10 as part", "id"), + partitionColumns = Seq("part"), + deleteCondition = "part = 3", + expectedData = + spark.range(100).selectExpr("id % 10 as part", "id").where("part != 3"), + expectedChangeDataWithoutVersion = + Range(0, 10).map(x => x * 10 + 3).toDF("id") + .selectExpr("3 as part", "id", "'delete' as _change_type")) + +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/cdc/MergeCDCSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/MergeCDCSuite.scala new file mode 100644 index 00000000000..788dcbee5d0 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/MergeCDCSuite.scala @@ -0,0 +1,410 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.cdc + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} + +/** + * The MergeCDCCoreSuite suite only includes CDC tests defined in this file while MergeCDCSuite + * runs exhaustive tests from MergeIntoSQLSuite to verify that CDC writing mode doesn't break + * existing functionality. + */ +class MergeCDCCoreSuite extends MergeCDCTests +class MergeCDCSuite extends MergeIntoSQLSuite with MergeCDCTests + +/** + * Tests for MERGE INTO in CDC output mode. + * + */ +trait MergeCDCTests extends QueryTest + with MergeIntoSQLTestUtils + with DeltaColumnMappingTestUtils + with DeltaSQLCommandTest { + import testImplicits._ + + override protected def sparkConf: SparkConf = super.sparkConf + .set(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true") + .set(DeltaSQLConf.MERGE_USE_PERSISTENT_DELETION_VECTORS.key, "false") + + // scalastyle:off argcount + /** + * Utility method for simpler test writing when there's at most clause of each type. + */ + private def testMergeCdc(name: String)( + target: => DataFrame, + source: => DataFrame, + deleteWhen: String = null, + update: String = null, + insert: String = null, + expectedTableData: => DataFrame = null, + expectedCdcDataWithoutVersion: => DataFrame = null, + expectErrorContains: String = null, + confs: Seq[(String, String)] = Seq()): Unit = { + val updateClauses = Option(update).map(u => this.update(set = u)).toSeq + val insertClauses = Option(insert).map(i => this.insert(values = i)).toSeq + val deleteClauses = Option(deleteWhen).map(d => this.delete(condition = d)).toSeq + testMergeCdcUnlimitedClauses(name)( + target = target, + source = source, + clauses = deleteClauses ++ updateClauses ++ insertClauses, + expectedTableData = expectedTableData, + expectedCdcDataWithoutVersion = expectedCdcDataWithoutVersion, + expectErrorContains = expectErrorContains, + confs = confs) + } + // scalastyle:on argcount + + private def testMergeCdcUnlimitedClauses(name: String)( + target: => DataFrame, + source: => DataFrame, + mergeCondition: String = "s.key = t.key", + clauses: Seq[MergeClause], + expectedTableData: => DataFrame = null, + expectedCdcDataWithoutVersion: => DataFrame = null, + expectErrorContains: String = null, + confs: Seq[(String, String)] = Seq(), + targetTableSchema: Option[StructType] = None): Unit = { + test(s"merge CDC - $name") { + withSQLConf(confs: _*) { + targetTableSchema.foreach { schema => + io.delta.tables.DeltaTable.create(spark).location(tempPath).addColumns(schema).execute() + } + append(target) + withTempView("source") { + source.createOrReplaceTempView("source") + + if (expectErrorContains != null) { + val ex = intercept[Exception] { + executeMerge(s"delta.`$tempPath` t", s"source s", mergeCondition, + clauses.toSeq: _*) + } + assert(ex.getMessage.contains(expectErrorContains)) + } else { + executeMerge(s"delta.`$tempPath` t", s"source s", mergeCondition, + clauses.toSeq: _*) + checkAnswer( + spark.read.format("delta").load(tempPath), + expectedTableData) + + // Craft expected CDC data + val latestVersion = DeltaLog.forTable(spark, tempPath).snapshot.version + val expectedCdcData = expectedCdcDataWithoutVersion + .withColumn(CDCReader.CDC_COMMIT_VERSION, lit(latestVersion)) + + // The timestamp is nondeterministic so we drop it when comparing results. + checkAnswer( + CDCReader.changesToBatchDF( + DeltaLog.forTable(spark, tempPath), latestVersion, latestVersion, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + expectedCdcData) + } + } + } + } + } + + testMergeCdc("insert only")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: Nil).toDF("key", "n"), + source = ((1, 1) :: (2, 2) :: Nil).toDF("key", "n"), + insert = "*", + expectedTableData = ((0, 0) :: (1, 10) :: (2, 2) :: (3, 30) :: Nil).toDF(), + expectedCdcDataWithoutVersion = ((2, 2, "insert") :: Nil).toDF() + ) + + testMergeCdc("update only")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: Nil).toDF("key", "n"), + source = ((1, 1) :: (2, 2) :: Nil).toDF("key", "n"), + update = "*", + expectedTableData = ((0, 0) :: (1, 1) :: (3, 30) :: Nil).toDF(), + expectedCdcDataWithoutVersion = ( + (1, 10, "update_preimage") :: (1, 1, "update_postimage") :: Nil).toDF() + ) + + testMergeCdc("delete only")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: Nil).toDF("key", "n"), + source = ((1, 1) :: (2, 2) :: Nil).toDF("key", "n"), + deleteWhen = "true", + expectedTableData = ((0, 0) :: (3, 30) :: Nil).toDF(), + expectedCdcDataWithoutVersion = ((1, 10, "delete") :: Nil).toDF() + ) + + testMergeCdc("delete only with duplicate matches")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: Nil).toDF("key", "n"), + source = ((1, 1) :: (1, 2) :: (2, 3) :: Nil).toDF("key", "n"), + deleteWhen = "true", + expectErrorContains = "attempted to modify the same\ntarget row" + ) + + testMergeCdc("update + delete + insert together")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: Nil).toDF("key", "n"), + source = ((1, 1) :: (2, 2) :: (3, -1) :: Nil).toDF("key", "n"), + insert = "*", + update = "*", + deleteWhen = "s.key = 3", + expectedTableData = ((0, 0) :: (1, 1) :: (2, 2) :: Nil).toDF(), + expectedCdcDataWithoutVersion = ( + (2, 2, "insert") :: + (1, 10, "update_preimage") :: (1, 1, "update_postimage") :: + (3, 30, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses("unlimited clauses - conditional final branch")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: (4, 40) :: (6, 60) :: Nil).toDF("key", "n"), + source = ((1, 1) :: (2, 2) :: (3, -1) :: (4, 4) :: (5, 0) :: (6, 0) :: Nil).toDF("key", "n"), + clauses = + update("*", "s.key = 1") :: update("n = 400", "s.key = 4") :: + delete("s.key = 3") :: delete("s.key = 6") :: + insert("*", "s.key = 2") :: insert("(key, n) VALUES (50, 50)", "s.key = 5") :: Nil, + expectedTableData = ((0, 0) :: (1, 1) :: (2, 2) :: (4, 400) :: (50, 50) :: Nil).toDF(), + expectedCdcDataWithoutVersion = ( + (2, 2, "insert") :: (50, 50, "insert") :: + (1, 10, "update_preimage") :: (1, 1, "update_postimage") :: + (4, 40, "update_preimage") :: (4, 400, "update_postimage") :: + (3, 30, "delete") :: (6, 60, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses("unlimited clauses - unconditional final branch")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: (4, 40) :: (6, 60) :: Nil).toDF("key", "n"), + source = ((1, 1) :: (2, 2) :: (3, -1) :: (4, 4) :: (5, 0) :: (6, 0) :: Nil).toDF("key", "n"), + clauses = + update("*", "s.key = 1") :: update("n = 400", "s.key = 4") :: + delete("s.key = 3") :: delete(condition = null) :: + insert("*", "s.key = 2") :: insert("(key, n) VALUES (50, 50)", condition = null) :: Nil, + expectedTableData = ((0, 0) :: (1, 1) :: (2, 2) :: (4, 400) :: (50, 50) :: Nil).toDF(), + expectedCdcDataWithoutVersion = ( + (2, 2, "insert") :: (50, 50, "insert") :: + (1, 10, "update_preimage") :: (1, 1, "update_postimage") :: + (4, 40, "update_preimage") :: (4, 400, "update_postimage") :: + (3, 30, "delete") :: (6, 60, "delete") :: Nil).toDF() + ) + + testMergeCdc("basic schema evolution")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: Nil).toDF("key", "n"), + source = ((1, 1, "a") :: (2, 2, "b") :: (3, -1, "c") :: Nil).toDF("key", "n", "text"), + insert = "*", + update = "*", + deleteWhen = "s.key = 3", + expectedTableData = ((0, 0, null) :: (1, 1, "a") :: (2, 2, "b") :: Nil) + .asInstanceOf[Seq[(Int, Int, String)]].toDF(), + expectedCdcDataWithoutVersion = ( + (1, 10, null, "update_preimage") :: + (1, 1, "a", "update_postimage") :: + (2, 2, "b", "insert") :: + (3, 30, null, "delete") :: Nil) + .asInstanceOf[List[(Integer, Integer, String, String)]] + .toDF("key", "targetVal", "srcVal", "_change_type"), + confs = (DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key, "true") :: Nil + ) + + testMergeCdcUnlimitedClauses("schema evolution with non-nullable schema")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: Nil).toDF("key", "n"), + source = ((1, 1, "a") :: (2, 2, "b") :: (3, -1, "c") :: Nil).toDF("key", "n", "text"), + mergeCondition = "t.key = s.key", + clauses = delete(condition = "s.key = 3") :: update("*") :: insert("*") :: Nil, + expectedTableData = ((0, 0, null) :: (1, 1, "a") :: (2, 2, "b") :: Nil) + .asInstanceOf[Seq[(Int, Int, String)]].toDF(), + expectedCdcDataWithoutVersion = ( + (1, 10, null, "update_preimage") :: + (1, 1, "a", "update_postimage") :: + (2, 2, "b", "insert") :: + (3, 30, null, "delete") :: Nil) + .asInstanceOf[List[(Integer, Integer, String, String)]] + .toDF("key", "targetVal", "srcVal", "_change_type"), + confs = (DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key, "true") :: Nil, + targetTableSchema = Some(StructType(Seq( + StructField("key", IntegerType, nullable = false), + StructField("n", IntegerType, nullable = false)))) + ) + + testMergeCdcUnlimitedClauses("schema evolution with non-nullable schema - matched only")( + target = ((0, 0) :: (1, 10) :: (3, 30) :: Nil).toDF("key", "n"), + source = ((1, 1, "a") :: (2, 2, "b") :: (3, -1, "c") :: Nil).toDF("key", "n", "text"), + mergeCondition = "t.key = s.key", + clauses = delete(condition = "s.key = 3") :: update("*") :: Nil, + expectedTableData = ((0, 0, null) :: (1, 1, "a") :: Nil) + .asInstanceOf[Seq[(Int, Int, String)]].toDF(), + expectedCdcDataWithoutVersion = ( + (1, 10, null, "update_preimage") :: + (1, 1, "a", "update_postimage") :: + (3, 30, null, "delete") :: Nil) + .asInstanceOf[List[(Integer, Integer, String, String)]] + .toDF("key", "targetVal", "srcVal", "_change_type"), + confs = (DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key, "true") :: Nil, + targetTableSchema = Some(StructType(Seq( + StructField("key", IntegerType, nullable = false), + StructField("n", IntegerType, nullable = false)))) + ) + + testMergeCdcUnlimitedClauses("unconditional delete only with duplicate matches")( + target = Seq(0, 1).toDF("value"), + source = Seq(1, 1).toDF("value"), + mergeCondition = "t.value = s.value", + clauses = delete() :: Nil, + expectedTableData = Seq(0).toDF(), + expectedCdcDataWithoutVersion = ((1, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses( + "unconditional delete only with duplicate matches without duplicates rows in the source")( + target = Seq(0).toDF("value"), + source = ((0, 0) :: (0, 1) :: Nil).toDF("col1", "col2"), + mergeCondition = "t.value = s.col1", + clauses = delete() :: Nil, + expectedTableData = + Nil.asInstanceOf[List[Integer]] + .toDF("value"), + expectedCdcDataWithoutVersion = ((0, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses( + "unconditional delete only with duplicate matches with duplicates in the target")( + target = Seq(0, 1, 1).toDF("value"), + source = Seq(1, 1).toDF("value"), + mergeCondition = "t.value = s.value", + clauses = delete() :: Nil, + expectedTableData = Seq(0).toDF(), + expectedCdcDataWithoutVersion = ((1, "delete") :: (1, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses("unconditional delete only with target-only merge condition")( + target = Seq(0, 1).toDF("value"), + source = Seq(0, 1).toDF("value"), + mergeCondition = "t.value > 0", + clauses = delete() :: Nil, + expectedTableData = Seq(0).toDF(), + expectedCdcDataWithoutVersion = ((1, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses( + "unconditional delete only with target-only merge condition with duplicates in the target")( + target = Seq(0, 1, 1).toDF("value"), + source = Seq(0, 1).toDF("value"), + mergeCondition = "t.value > 0", + clauses = delete() :: Nil, + expectedTableData = Seq(0).toDF(), + expectedCdcDataWithoutVersion = ((1, "delete") :: (1, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses("unconditional delete only with source-only merge condition")( + target = Seq(0, 1).toDF("value"), + source = Seq(0, 1).toDF("value"), + mergeCondition = "s.value < 2", + clauses = delete() :: Nil, + expectedTableData = + Nil.asInstanceOf[List[Integer]] + .toDF("value"), + expectedCdcDataWithoutVersion = ((0, "delete") :: (1, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses( + "unconditional delete only with source-only merge condition with duplicates in the target")( + target = Seq(0, 1, 1).toDF("value"), + source = Seq(0, 1).toDF("value"), + mergeCondition = "s.value < 2", + clauses = delete() :: Nil, + expectedTableData = + Nil.asInstanceOf[List[Integer]] + .toDF("value"), + expectedCdcDataWithoutVersion = ((0, "delete") :: (1, "delete") :: (1, "delete") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses("unconditional delete with duplicate matches + insert")( + target = ((1, 1) :: (2, 2) :: Nil).toDF("key", "value"), + source = ((1, 10) :: (1, 100) :: (3, 30) :: (3, 300) :: Nil).toDF("key", "value"), + mergeCondition = "s.key = t.key", + clauses = delete() :: + insert(values = "(key, value) VALUES (s.key, s.value)") :: Nil, + expectedTableData = ((2, 2) :: (3, 30) :: (3, 300) :: Nil).toDF("key", "value"), + expectedCdcDataWithoutVersion = + ((1, 1, "delete") :: (3, 30, "insert") :: (3, 300, "insert") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses( + "unconditional delete with duplicate matches + insert with duplicate rows")( + target = ((1, 1) :: (2, 2) :: Nil).toDF("key", "value"), + source = ((1, 10) :: (1, 100) :: (3, 30) :: (3, 300) :: (3, 300) :: Nil).toDF("key", "value"), + mergeCondition = "s.key = t.key", + clauses = delete() :: + insert(values = "(key, value) VALUES (s.key, s.value)") :: Nil, + expectedTableData = ((2, 2) :: (3, 30) :: (3, 300) :: (3, 300) :: Nil).toDF("key", "value"), + expectedCdcDataWithoutVersion = + ((1, 1, "delete") :: (3, 30, "insert") :: (3, 300, "insert") :: + (3, 300, "insert") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses("unconditional delete with duplicate matches " + + "+ insert a duplicate of the unmatched target rows")( + target = Seq(1, 2).toDF("value"), + source = ((1, 10) :: (1, 100) :: (3, 2) :: Nil).toDF("col1", "col2"), + mergeCondition = "s.col1 = t.value", + clauses = delete() :: + insert(values = "(value) VALUES (col2)") :: Nil, + expectedTableData = Seq(2, 2).toDF(), + expectedCdcDataWithoutVersion = + ((1, "delete") :: (2, "insert") :: Nil).toDF() + ) + + testMergeCdcUnlimitedClauses("all conditions failed for all rows")( + target = Seq((1, "a"), (2, "b")).toDF("key", "val"), + source = Seq((1, "t"), (2, "u")).toDF("key", "val"), + clauses = + update("t.val = s.val", "s.key = 10") :: insert("*", "s.key = 11") :: Nil, + expectedTableData = + Seq((1, "a"), (2, "b")).asInstanceOf[List[(Integer, String)]].toDF("key", "targetVal"), + expectedCdcDataWithoutVersion = + Nil.asInstanceOf[List[(Integer, String, String)]] + .toDF("key", "targetVal", "_change_type") + ) + + testMergeCdcUnlimitedClauses("unlimited clauses schema evolution")( + // 1 and 2 should be updated from the source, 3 and 4 should be deleted. Only 5 is unchanged + target = Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")).toDF("key", "targetVal"), + // 1 and 2 should be updated into the target, 6 and 7 should be inserted. 8 should be ignored + source = Seq((1, "t"), (2, "u"), (3, "v"), (4, "w"), (6, "x"), (7, "y"), (8, "z")) + .toDF("key", "srcVal"), + clauses = + update("targetVal = srcVal", "s.key = 1") :: update("*", "s.key = 2") :: + delete("s.key = 3") :: delete("s.key = 4") :: + insert("(key) VALUES (s.key)", "s.key = 6") :: insert("*", "s.key = 7") :: Nil, + expectedTableData = + ((1, "t", null) :: (2, "b", "u") :: (5, "e", null) :: + (6, null, null) :: (7, null, "y") :: Nil) + .asInstanceOf[List[(Integer, String, String)]].toDF("key", "targetVal", "srcVal"), + expectedCdcDataWithoutVersion = ( + (1, "a", null, "update_preimage") :: + (1, "t", null, "update_postimage") :: + (2, "b", null, "update_preimage") :: + (2, "b", "u", "update_postimage") :: + (3, "c", null, "delete") :: + (4, "d", null, "delete") :: + (6, null, null, "insert") :: + (7, null, "y", "insert") :: Nil) + .asInstanceOf[List[(Integer, String, String, String)]] + .toDF("key", "targetVal", "srcVal", "_change_type"), + confs = (DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key, "true") :: Nil + ) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/cdc/UpdateCDCSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/UpdateCDCSuite.scala new file mode 100644 index 00000000000..d2767e2a0f4 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/cdc/UpdateCDCSuite.scala @@ -0,0 +1,239 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.cdc + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, RemoveFile} +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.test.DeltaExcludedTestMixin +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.TableIdentifier + +class UpdateCDCSuite extends UpdateSQLSuite with DeltaColumnMappingTestUtils { + import testImplicits._ + + override protected def sparkConf: SparkConf = super.sparkConf + .set(DeltaConfigs.CHANGE_DATA_FEED.defaultTablePropertyKey, "true") + + test("CDC for unconditional update") { + append(Seq((1, 1), (2, 2), (3, 3), (4, 4)).toDF("key", "value")) + + checkUpdate( + condition = None, + setClauses = "value = -1", + expectedResults = Row(1, -1) :: Row(2, -1) :: Row(3, -1) :: Row(4, -1) :: Nil) + + val log = DeltaLog.forTable(spark, tempPath) + val latestVersion = log.unsafeVolatileSnapshot.version + checkAnswer( + CDCReader + .changesToBatchDF(log, latestVersion, latestVersion, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(1, 1, "update_preimage", latestVersion) :: + Row(1, -1, "update_postimage", latestVersion) :: + Row(2, 2, "update_preimage", latestVersion) :: + Row(2, -1, "update_postimage", latestVersion) :: + Row(3, 3, "update_preimage", latestVersion) :: + Row(3, -1, "update_postimage", latestVersion) :: + Row(4, 4, "update_preimage", latestVersion) :: + Row(4, -1, "update_postimage", latestVersion) :: + Nil) + } + + test("CDC for conditional update on all rows") { + append(Seq((1, 1), (2, 2), (3, 3), (4, 4)).toDF("key", "value")) + + checkUpdate( + condition = Some("key < 10"), + setClauses = "value = -1", + expectedResults = Row(1, -1) :: Row(2, -1) :: Row(3, -1) :: Row(4, -1) :: Nil) + + val log = DeltaLog.forTable(spark, tempPath) + val latestVersion = log.unsafeVolatileSnapshot.version + checkAnswer( + CDCReader + .changesToBatchDF(log, latestVersion, latestVersion, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(1, 1, "update_preimage", latestVersion) :: + Row(1, -1, "update_postimage", latestVersion) :: + Row(2, 2, "update_preimage", latestVersion) :: + Row(2, -1, "update_postimage", latestVersion) :: + Row(3, 3, "update_preimage", latestVersion) :: + Row(3, -1, "update_postimage", latestVersion) :: + Row(4, 4, "update_preimage", latestVersion) :: + Row(4, -1, "update_postimage", latestVersion) :: + Nil) + } + + test("CDC for point update") { + append(Seq((1, 1), (2, 2), (3, 3), (4, 4)).toDF("key", "value")) + + checkUpdate( + condition = Some("key = 1"), + setClauses = "value = -1", + expectedResults = Row(1, -1) :: Row(2, 2) :: Row(3, 3) :: Row(4, 4) :: Nil) + + val log = DeltaLog.forTable(spark, tempPath) + val latestVersion = log.unsafeVolatileSnapshot.version + checkAnswer( + CDCReader + .changesToBatchDF(log, latestVersion, latestVersion, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(1, 1, "update_preimage", latestVersion) :: + Row(1, -1, "update_postimage", latestVersion) :: + Nil) + } + + test("CDC for repeated point update") { + append(Seq((1, 1), (2, 2), (3, 3), (4, 4)).toDF("key", "value")) + + checkUpdate( + condition = Some("key = 1"), + setClauses = "value = -1", + expectedResults = Row(1, -1) :: Row(2, 2) :: Row(3, 3) :: Row(4, 4) :: Nil) + + val log = DeltaLog.forTable(spark, tempPath) + val latestVersion1 = log.unsafeVolatileSnapshot.version + checkAnswer( + CDCReader + .changesToBatchDF(log, latestVersion1, latestVersion1, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(1, 1, "update_preimage", latestVersion1) :: + Row(1, -1, "update_postimage", latestVersion1) :: + Nil) + + checkUpdate( + condition = Some("key = 3"), + setClauses = "value = -3", + expectedResults = Row(1, -1) :: Row(2, 2) :: Row(3, -3) :: Row(4, 4) :: Nil) + + val latestVersion2 = log.unsafeVolatileSnapshot.version + checkAnswer( + CDCReader + .changesToBatchDF(log, latestVersion1, latestVersion2, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(1, 1, "update_preimage", latestVersion1) :: + Row(1, -1, "update_postimage", latestVersion1) :: + Row(3, 3, "update_preimage", latestVersion2) :: + Row(3, -3, "update_postimage", latestVersion2) :: + Nil) + } + + test("CDC for partition-optimized update") { + append( + Seq((1, 1, 1), (2, 2, 0), (3, 3, 1), (4, 4, 0)).toDF("key", "value", "part"), + partitionBy = Seq("part")) + + checkUpdate( + condition = Some("part = 1"), + setClauses = "value = -1", + expectedResults = Row(1, -1) :: Row(2, 2) :: Row(3, -1) :: Row(4, 4) :: Nil) + + val log = DeltaLog.forTable(spark, tempPath) + val latestVersion = log.unsafeVolatileSnapshot.version + checkAnswer( + CDCReader + .changesToBatchDF(log, latestVersion, latestVersion, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(1, 1, 1, "update_preimage", latestVersion) :: + Row(1, -1, 1, "update_postimage", latestVersion) :: + Row(3, 3, 1, "update_preimage", latestVersion) :: + Row(3, -1, 1, "update_postimage", latestVersion) :: + Nil) + } + + + test("update a partitioned CDC enabled table to set the partition column to null") { + val tableName = "part_table_test" + withTable(tableName) { + Seq((0, 0, 0), (1, 1, 1), (2, 2, 2)) + .toDF("key", "partition_column", "value") + .write + .partitionBy("partition_column") + .format("delta") + .saveAsTable(tableName) + sql(s"INSERT INTO $tableName VALUES (4, 4, 4)") + sql(s"UPDATE $tableName SET partition_column = null WHERE partition_column = 4") + checkAnswer( + CDCReader.changesToBatchDF( + DeltaLog.forTable( + spark, + spark.sessionState.sqlParser.parseTableIdentifier(tableName) + ), 1, 3, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(4, 4, 4, "insert", 1) :: + Row(4, 4, 4, "update_preimage", 2) :: + Row(4, null, 4, "update_postimage", 2) :: Nil) + } + } +} + +class UpdateCDCWithDeletionVectorsSuite extends UpdateCDCSuite + with DeltaExcludedTestMixin + with DeletionVectorsTestUtils { + override def beforeAll(): Unit = { + super.beforeAll() + enableDeletionVectors(spark, update = true) + } + + override def excluded: Seq[String] = super.excluded ++ + Seq( + // The following two tests must fail when DV is used. Covered by another test case: + // "throw error when non-pinned TahoeFileIndex snapshot is used". + "data and partition predicates - Partition=true Skipping=false", + "data and partition predicates - Partition=false Skipping=false", + // The scan schema contains additional row index filter columns. + "schema pruning on finding files to update", + "nested schema pruning on finding files to update" + ) + + test("UPDATE with DV write CDC files explicitly") { + withTempDir { dir => + val path = dir.getCanonicalPath + val log = DeltaLog.forTable(spark, path) + spark.range(0, 10, 1, numPartitions = 2).write.format("delta").save(path) + executeUpdate(s"delta.`$path`", "id = -1", "id % 4 = 0") + + val latestVersion = log.update().version + checkAnswer( + CDCReader + .changesToBatchDF(log, latestVersion, latestVersion, spark) + .drop(CDCReader.CDC_COMMIT_TIMESTAMP), + Row(0, "update_preimage", latestVersion) :: + Row(-1, "update_postimage", latestVersion) :: + Row(4, "update_preimage", latestVersion) :: + Row(-1, "update_postimage", latestVersion) :: + Row(8, "update_preimage", latestVersion) :: + Row(-1, "update_postimage", latestVersion) :: + Nil) + + val allActions = log.getChanges(latestVersion).flatMap(_._2).toSeq + val addActions = allActions.collect { case f: AddFile => f } + val removeActions = allActions.collect { case f: RemoveFile => f } + val cdcActions = allActions.collect { case f: AddCDCFile => f } + + assert(addActions.count(_.deletionVector != null) === 2) + assert(removeActions.size === 2) + assert(cdcActions.nonEmpty) + } + } +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteredTableClusteringSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteredTableClusteringSuite.scala new file mode 100644 index 00000000000..d1b2e0da346 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteredTableClusteringSuite.scala @@ -0,0 +1,82 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.clustering + +import org.apache.spark.sql.delta.skipping.ClusteredTableTestUtils +import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +class ClusteredTableClusteringSuite extends SparkFunSuite + with SharedSparkSession + with ClusteredTableTestUtils + with DeltaSQLCommandTest { + import testImplicits._ + + private val table: String = "test_table" + + // Ingest data to create numFiles files with one row in each file. + private def addFiles(table: String, numFiles: Int): Unit = { + val df = (1 to numFiles).map(i => (i, i)).toDF("col1", "col2") + withSQLConf(SQLConf.MAX_RECORDS_PER_FILE.key -> "1") { + df.write.format("delta").mode("append").saveAsTable(table) + } + } + + private def getFiles(table: String): Set[AddFile] = { + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(table)) + deltaLog.update().allFiles.collect().toSet + } + + private def assertClustered(files: Set[AddFile]): Unit = { + assert(files.forall(_.clusteringProvider.contains(ClusteredTableUtils.clusteringProvider))) + } + + private def assertNotClustered(files: Set[AddFile]): Unit = { + assert(files.forall(_.clusteringProvider.isEmpty)) + } + + test("optimize clustered table") { + withSQLConf(SQLConf.MAX_RECORDS_PER_FILE.key -> "2") { + withClusteredTable( + table = table, + schema = "col1 int, col2 int", + clusterBy = "col1, col2") { + addFiles(table, numFiles = 4) + val files0 = getFiles(table) + assert(files0.size === 4) + assertNotClustered(files0) + + // Optimize should cluster the data into two 2 files since MAX_RECORDS_PER_FILE is 2. + runOptimize(table) { metrics => + assert(metrics.numFilesRemoved == 4) + assert(metrics.numFilesAdded == 2) + } + + val files1 = getFiles(table) + assert(files1.size == 2) + assertClustered(files1) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteringMetadataDomainSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteringMetadataDomainSuite.scala new file mode 100644 index 00000000000..ce7027a25ba --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteringMetadataDomainSuite.scala @@ -0,0 +1,33 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.clustering + +import org.apache.spark.sql.delta.skipping.clustering.ClusteringColumn + +import org.apache.spark.SparkFunSuite + +class ClusteringMetadataDomainSuite extends SparkFunSuite { + test("serialized string follows the spec") { + val clusteringColumns = Seq(ClusteringColumn(Seq("col1", "`col2,col3`", "`col4.col5`,col6"))) + val clusteringMetadataDomain = ClusteringMetadataDomain.fromClusteringColumns(clusteringColumns) + val serializedString = clusteringMetadataDomain.toDomainMetadata.json + assert(serializedString === + """|{"domainMetadata":{"domain":"delta.clustering","configuration": + |"{\"clusteringColumns\":[[\"col1\",\"`col2,col3`\",\"`col4.col5`,col6\"]], + |\"domainName\":\"delta.clustering\"}","removed":false}}""".stripMargin.replace("\n", "")) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteringTableFeatureSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteringTableFeatureSuite.scala new file mode 100644 index 00000000000..0bda0997833 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/clustering/ClusteringTableFeatureSuite.scala @@ -0,0 +1,51 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.clustering + +import org.apache.spark.sql.delta.DeltaAnalysisException +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.SparkFunSuite + +class ClusteringTableFeatureSuite extends SparkFunSuite with DeltaSQLCommandTest { + + test("create table without cluster by clause cannot set clustering table properties") { + withTable("tbl") { + val e = intercept[DeltaAnalysisException] { + sql("CREATE TABLE tbl(a INT, b STRING) USING DELTA " + + "TBLPROPERTIES('delta.feature.clustering' = 'supported')") + } + checkError( + e, + "DELTA_CREATE_TABLE_SET_CLUSTERING_TABLE_FEATURE_NOT_ALLOWED", + parameters = Map("tableFeature" -> "clustering")) + } + } + + test("use alter table set table properties to enable clustering is not allowed.") { + withTable("tbl") { + sql("CREATE TABLE tbl(a INT, b STRING) USING DELTA") + val e = intercept[DeltaAnalysisException] { + sql("ALTER TABLE tbl SET TBLPROPERTIES ('delta.feature.clustering' = 'supported')") + } + checkError( + e, + "DELTA_ALTER_TABLE_SET_CLUSTERING_TABLE_FEATURE_NOT_ALLOWED", + parameters = Map("tableFeature" -> "clustering")) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala new file mode 100644 index 00000000000..23798151994 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala @@ -0,0 +1,794 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.deletionvectors + +import java.io.{File, FileNotFoundException} + +import org.apache.spark.sql.delta.{DeletionVectorsTableFeature, DeletionVectorsTestUtils, DeltaChecksumException, DeltaConfigs, DeltaLog, DeltaMetricsUtils, DeltaTestUtilsForTempViews} +import org.apache.spark.sql.delta.DeltaTestUtils.{createTestAddFile, BOOLEAN_DOMAIN} +import org.apache.spark.sql.delta.actions.{AddFile, DeletionVectorDescriptor, RemoveFile} +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor.{inlineInLog, EMPTY} +import org.apache.spark.sql.delta.deletionvectors.DeletionVectorsSuite._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.JsonUtils +import com.fasterxml.jackson.databind.node.ObjectNode +import io.delta.tables.DeltaTable +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, Subquery} +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.test.SharedSparkSession + +class DeletionVectorsSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with DeletionVectorsTestUtils + with DeltaTestUtilsForTempViews { + import testImplicits._ + + test(s"read Delta table with deletion vectors") { + def verifyVersion(version: Int, expectedData: Seq[Int]): Unit = { + checkAnswer( + spark.read.format("delta").option("versionAsOf", version.toString).load(table1Path), + expectedData.toDF()) + } + // Verify all versions of the table + verifyVersion(0, expectedTable1DataV0) + verifyVersion(1, expectedTable1DataV1) + verifyVersion(2, expectedTable1DataV2) + verifyVersion(3, expectedTable1DataV3) + verifyVersion(4, expectedTable1DataV4) + } + + test(s"read partitioned Delta table with deletion vectors") { + def verify(version: Int, expectedData: Seq[Int], filterExp: String = "true"): Unit = { + val query = spark.read.format("delta") + .option("versionAsOf", version.toString) + .load(table3Path) + .filter(filterExp) + val expected = expectedData.toDF("id") + .withColumn("partCol", col("id") % 10) + .filter(filterExp) + + checkAnswer(query, expected) + } + // Verify all versions of the table + verify(0, expectedTable3DataV0) + verify(1, expectedTable3DataV1) + verify(2, expectedTable3DataV2) + verify(3, expectedTable3DataV3) + verify(4, expectedTable3DataV4) + + verify(4, expectedTable3DataV4, filterExp = "partCol = 3") + verify(3, expectedTable3DataV3, filterExp = "partCol = 3 and id > 25") + verify(1, expectedTable3DataV1, filterExp = "id > 25") + } + + test("select metadata columns from a Delta table with deletion vectors") { + assert(spark.read.format("delta").load(table1Path) + .select("_metadata.file_path").distinct().count() == 22) + } + + test("throw error when non-pinned TahoeFileIndex snapshot is used") { + // Corner case where we still have non-pinned TahoeFileIndex when data skipping is disabled + withSQLConf(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> "false") { + def assertError(dataFrame: DataFrame): Unit = { + val ex = intercept[IllegalArgumentException] { + dataFrame.collect() + } + assert(ex.getMessage contains + "Cannot work with a non-pinned table snapshot of the TahoeFileIndex") + } + assertError(spark.read.format("delta").load(table1Path)) + assertError(spark.read.format("delta").option("versionAsOf", "2").load(table1Path)) + } + } + + test("read Delta table with deletion vectors with a filter") { + checkAnswer( + spark.read.format("delta").load(table1Path).where("value in (300, 787, 239)"), + // 300 is removed in the final table + Seq(787, 239).toDF()) + } + + test("read Delta table with DV for a select files") { + val deltaLog = DeltaLog.forTable(spark, table1Path) + val snapshot = deltaLog.unsafeVolatileSnapshot + + // Select a subset of files with DVs and specific value range, this is just to test + // that reading these files will respect the DVs + var rowCount = 0L + var deletedRowCount = 0L + val selectFiles = snapshot.allFiles.collect().filter( + addFile => { + val stats = JsonUtils.mapper.readTree(addFile.stats).asInstanceOf[ObjectNode] + // rowCount += stats.get("rowCount") + val min = stats.get("minValues").get("value").toString + val max = stats.get("maxValues").get("value").toString + val selected = (min == "18" && max == "1988") || + (min == "33" && max == "1995") || (min == "13" && max == "1897") + // TODO: these steps will be easier and also change (depending upon tightBounds value) once + // we expose more methods on AddFile as part of the data skipping changes with DVs + if (selected) { + rowCount += stats.get("numRecords").asInt(0) + deletedRowCount += Option(addFile.deletionVector).getOrElse(EMPTY).cardinality + } + selected + } + ).toSeq + assert(selectFiles.filter(_.deletionVector != null).size > 1) // make at least one file has DV + + assert(deltaLog.createDataFrame(snapshot, selectFiles).count() == rowCount - deletedRowCount) + } + + for (optimizeMetadataQuery <- BOOLEAN_DOMAIN) + test("read Delta tables with DVs in subqueries: " + + s"metadataQueryOptimizationEnabled=$optimizeMetadataQuery") { + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> + optimizeMetadataQuery.toString) { + val table1 = s"delta.`${new File(table1Path).getAbsolutePath}`" + val table2 = s"delta.`${new File(table2Path).getAbsolutePath}`" + + def assertQueryResult(query: String, expected1: Int, expected2: Int): Unit = { + val df = spark.sql(query) + assertPlanContains(df, Subquery.getClass.getSimpleName.stripSuffix("$")) + val actual = df.collect()(0) // fetch only row in the result + assert(actual === Row(expected1, expected2)) + } + + // same table used twice in the query + val query1 = s"SELECT (SELECT COUNT(*) FROM $table1), (SELECT COUNT(*) FROM $table1)" + assertQueryResult(query1, expectedTable1DataV4.size, expectedTable1DataV4.size) + + // two tables used in the query + val query2 = s"SELECT (SELECT COUNT(*) FROM $table1), (SELECT COUNT(*) FROM $table2)" + assertQueryResult(query2, expectedTable1DataV4.size, expectedTable2DataV1.size) + } + } + + test("insert into Delta table with DVs") { + withTempDir { tempDir => + val source1 = new File(table1Path) + val source2 = new File(table2Path) + val target = new File(tempDir, "insertTest") + + // Copy the source2 DV table to a temporary directory + FileUtils.copyDirectory(source1, target) + + // Insert data from source2 into source1 (copied to target) + // This blind append generates a plan with `V2WriteCommand` which is a corner + // case in `PrepareDeltaScan` rule + val insertDf = spark.sql(s"INSERT INTO TABLE delta.`${target.getAbsolutePath}` " + + s"SELECT * FROM delta.`${source2.getAbsolutePath}`") + // [[AppendData]] is one of the [[V2WriteCommand]] subtypes + assertPlanContains(insertDf, AppendData.getClass.getSimpleName.stripSuffix("$")) + + val dataInTarget = spark.sql(s"SELECT * FROM delta.`${target.getAbsolutePath}`") + + // Make sure the number of rows is correct. + for (metadataQueryOptimization <- BOOLEAN_DOMAIN) { + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> + metadataQueryOptimization.toString) { + assert(dataInTarget.count() == expectedTable2DataV1.size + expectedTable1DataV4.size) + } + } + + // Make sure the contents are the same + checkAnswer( + dataInTarget, + spark.sql( + s"SELECT * FROM delta.`${source1.getAbsolutePath}` UNION ALL " + + s"SELECT * FROM delta.`${source2.getAbsolutePath}`") + ) + } + } + + test("DELETE with DVs - on a table with no prior DVs") { + withDeletionVectorsEnabled() { + withTempDir { dirName => + // Create table with 500 files of 2 rows each. + val numFiles = 500 + val path = dirName.getAbsolutePath + spark.range(0, 1000, step = 1, numPartitions = numFiles).write.format("delta").save(path) + val tableName = s"delta.`$path`" + + val log = DeltaLog.forTable(spark, path) + val beforeDeleteFilesWithStats = log.update().allFiles.collect() + val beforeDeleteFiles = beforeDeleteFilesWithStats.map(_.path) + + val numFilesWithDVs = 100 + val numDeletedRows = numFilesWithDVs * 1 + spark.sql(s"DELETE FROM $tableName WHERE id % 2 = 0 AND id < 200") + + val snapshotAfterDelete = log.update() + val afterDeleteFilesWithStats = snapshotAfterDelete.allFiles.collect() + val afterDeleteFilesWithDVs = afterDeleteFilesWithStats.filter(_.deletionVector != null) + val afterDeleteFiles = afterDeleteFilesWithStats.map(_.path) + + // Verify the expected no. of deletion vectors and deleted rows according to DV cardinality + assert(afterDeleteFiles.length === numFiles) + assert(afterDeleteFilesWithDVs.length === numFilesWithDVs) + assert(afterDeleteFilesWithDVs.map(_.deletionVector.cardinality).sum == numDeletedRows) + + // Expect all DVs are written in one file + assert( + afterDeleteFilesWithDVs + .map(_.deletionVector.absolutePath(new Path(path))) + .toSet + .size === 1) + + // Verify "tightBounds" is false for files that have DVs + for (f <- afterDeleteFilesWithDVs) { + assert(f.tightBounds.get === false) + } + + // Verify all stats are the same except "tightBounds". + // Drop "tightBounds" and convert the rest to JSON. + val dropTightBounds: (AddFile => String) = + _.stats.replaceAll("\"tightBounds\":(false|true)", "") + val beforeStats = beforeDeleteFilesWithStats.map(dropTightBounds).sorted + val afterStats = afterDeleteFilesWithStats.map(dropTightBounds).sorted + assert(beforeStats === afterStats) + + // make sure the data file list is the same + assert(beforeDeleteFiles === afterDeleteFiles) + + // Contents after the DELETE are as expected + checkAnswer( + spark.sql(s"SELECT * FROM $tableName"), + Seq.range(0, 1000).filterNot(Seq.range(start = 0, end = 200, step = 2).contains(_)).toDF() + ) + } + } + } + + Seq("name", "id").foreach(mode => + test(s"DELETE with DVs with column mapping mode=$mode") { + withSQLConf("spark.databricks.delta.properties.defaults.columnMapping.mode" -> mode) { + withTempDir { dirName => + val path = dirName.getAbsolutePath + val data = (0 until 50).map(x => (x % 10, x, s"foo${x % 5}")) + data.toDF("part", "col1", "col2").write.format("delta").partitionBy( + "part").save(path) + val tableLog = DeltaLog.forTable(spark, path) + enableDeletionVectorsInTable(tableLog, true) + spark.sql(s"DELETE FROM delta.`$path` WHERE col1 = 2") + checkAnswer(spark.sql(s"select * from delta.`$path` WHERE col1 = 2"), Seq()) + verifyDVsExist(tableLog, 1) + } + } + } + ) + + test("DELETE with DVs - existing table already has DVs") { + withSQLConf(DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key -> "true") { + withTempDir { tempDir => + val source = new File(table1Path) + val target = new File(tempDir, "deleteTest") + + // Copy the source DV table to a temporary directory + FileUtils.copyDirectory(source, target) + + val targetPath = s"delta.`${target.getAbsolutePath}`" + val dataToRemove = Seq(1999, 299, 7, 87, 867, 456) + val existingDVs = getFilesWithDeletionVectors(DeltaLog.forTable(spark, target)) + + spark.sql(s"DELETE FROM $targetPath WHERE value in (${dataToRemove.mkString(",")})") + + // Check new DVs are created + val newDVs = getFilesWithDeletionVectors(DeltaLog.forTable(spark, target)) + // expect the new DVs contain extra entries for the deleted rows. + assert( + existingDVs.map(_.deletionVector.cardinality).sum + dataToRemove.size === + newDVs.map(_.deletionVector.cardinality).sum + ) + for (f <- newDVs) { + assert(f.tightBounds.get === false) + } + + // Check the data is valid + val expectedTable1DataV5 = expectedTable1DataV4.filterNot(e => dataToRemove.contains(e)) + checkAnswer(spark.sql(s"SELECT * FROM $targetPath"), expectedTable1DataV5.toDF()) + } + } + } + + test("Metrics when deleting with DV") { + withDeletionVectorsEnabled() { + val tableName = "tbl" + withTable(tableName) { + spark.range(0, 10, 1, numPartitions = 2) + .write.format("delta").saveAsTable(tableName) + + { + // Delete one row from the first file, and the whole second file. + val result = sql(s"DELETE FROM $tableName WHERE id >= 4") + assert(result.collect() === Array(Row(6))) + val opMetrics = DeltaMetricsUtils.getLastOperationMetrics(tableName) + assert(opMetrics.getOrElse("numDeletedRows", -1) === 6) + assert(opMetrics.getOrElse("numRemovedFiles", -1) === 1) + assert(opMetrics.getOrElse("numDeletionVectorsAdded", -1) === 1) + assert(opMetrics.getOrElse("numDeletionVectorsRemoved", -1) === 0) + assert(opMetrics.getOrElse("numDeletionVectorsUpdated", -1) === 0) + } + + { + // Delete one row again. + sql(s"DELETE FROM $tableName WHERE id = 3") + val opMetrics = DeltaMetricsUtils.getLastOperationMetrics(tableName) + assert(opMetrics.getOrElse("numDeletedRows", -1) === 1) + assert(opMetrics.getOrElse("numRemovedFiles", -1) === 0) + val initialNumDVs = 0 + val numDVUpdated = 1 + // An "updated" DV is "deleted" then "added" again. + // We increment the count for "updated", "added", and "deleted". + assert( + opMetrics.getOrElse("numDeletionVectorsAdded", -1) === + initialNumDVs + numDVUpdated) + assert( + opMetrics.getOrElse("numDeletionVectorsRemoved", -1) === + initialNumDVs + numDVUpdated) + assert( + opMetrics.getOrElse("numDeletionVectorsUpdated", -1) === + numDVUpdated) + } + + { + // Delete all renaming rows. + sql(s"DELETE FROM $tableName WHERE id IN (0, 1, 2)") + val opMetrics = DeltaMetricsUtils.getLastOperationMetrics(tableName) + assert(opMetrics.getOrElse("numDeletedRows", -1) === 3) + assert(opMetrics.getOrElse("numRemovedFiles", -1) === 1) + assert(opMetrics.getOrElse("numDeletionVectorsAdded", -1) === 0) + assert(opMetrics.getOrElse("numDeletionVectorsRemoved", -1) === 1) + assert(opMetrics.getOrElse("numDeletionVectorsUpdated", -1) === 0) + } + } + } + } + + for(targetDVFileSize <- Seq(2, 200, 2000000)) { + test(s"DELETE with DVs - packing multiple DVs into one file: target max DV file " + + s"size=$targetDVFileSize") { + withSQLConf( + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey -> "true", + DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key -> "true", + DeltaSQLConf.DELETION_VECTOR_PACKING_TARGET_SIZE.key -> targetDVFileSize.toString) { + withTempDir { dirName => + // Create table with 100 files of 2 rows each. + val numFiles = 100 + val path = dirName.getAbsolutePath + spark.range(0, 200, step = 1, numPartitions = numFiles) + .write.format("delta").save(path) + val tableName = s"delta.`$path`" + + val beforeDeleteFiles = DeltaLog.forTable(spark, path) + .unsafeVolatileSnapshot.allFiles.collect().map(_.path) + + val numFilesWithDVs = 10 + val numDeletedRows = numFilesWithDVs * 1 + spark.sql(s"DELETE FROM $tableName WHERE id % 2 = 0 AND id < 20") + + // Verify the expected number of AddFiles with DVs + val allFiles = DeltaLog.forTable(spark, path).unsafeVolatileSnapshot.allFiles.collect() + assert(allFiles.size === numFiles) + val addFilesWithDV = allFiles.filter(_.deletionVector != null) + assert(addFilesWithDV.size === numFilesWithDVs) + assert(addFilesWithDV.map(_.deletionVector.cardinality).sum == numDeletedRows) + + val expectedDVFileCount = targetDVFileSize match { + // Each AddFile will have its own DV file + case 2 => numFilesWithDVs + // Each DV size is about 34bytes according the latest format. + case 200 => numFilesWithDVs / (200 / 34).floor.toInt + // Expect all DVs in one file + case 2000000 => 1 + case default => + throw new IllegalStateException(s"Unknown target DV file size: $default") + } + // Expect all DVs are written in one file + assert( + addFilesWithDV.map(_.deletionVector.absolutePath(new Path(path))).toSet.size === + expectedDVFileCount) + + val afterDeleteFiles = allFiles.map(_.path) + // make sure the data file list is the same + assert(beforeDeleteFiles === afterDeleteFiles) + + // Contents after the DELETE are as expected + checkAnswer( + spark.sql(s"SELECT * FROM $tableName"), + Seq.range(0, 200).filterNot( + Seq.range(start = 0, end = 20, step = 2).contains(_)).toDF()) + } + } + } + } + + test("JOIN with DVs - self-join a table with DVs") { + val tableDf = spark.read.format("delta").load(table2Path) + val leftDf = tableDf.withColumn("key", col("value") % 2) + val rightDf = tableDf.withColumn("key", col("value") % 2 + 1) + + checkAnswer( + leftDf.as("left").join(rightDf.as("right"), "key").drop("key"), + Seq(1, 3, 5, 7).flatMap(l => Seq(2, 4, 6, 8).map(r => (l, r))).toDF() + ) + } + + test("JOIN with DVs - non-DV table joins DV table") { + val tableDf = spark.read.format("delta").load(table2Path) + val tableDfV0 = spark.read.format("delta").option("versionAsOf", "0").load(table2Path) + val leftDf = tableDf.withColumn("key", col("value") % 2) + val rightDf = tableDfV0.withColumn("key", col("value") % 2 + 1) + + // Right has two more rows 0 and 9. 0 will be left in the join result. + checkAnswer( + leftDf.as("left").join(rightDf.as("right"), "key").drop("key"), + Seq(1, 3, 5, 7).flatMap(l => Seq(0, 2, 4, 6, 8).map(r => (l, r))).toDF() + ) + } + + test("MERGE with DVs - merge into DV table") { + withTempDir { tempDir => + val source = new File(table1Path) + val target = new File(tempDir, "mergeTest") + FileUtils.copyDirectory(new File(table2Path), target) + + DeltaTable.forPath(spark, target.getAbsolutePath).as("target") + .merge( + spark.read.format("delta").load(source.getAbsolutePath).as("source"), + "source.value = target.value") + .whenMatched() + .updateExpr(Map("value" -> "source.value + 10000")) + .whenNotMatched() + .insertExpr(Map("value" -> "source.value")) + .execute() + + val snapshot = DeltaLog.forTable(spark, target).update() + val allFiles = snapshot.allFiles.collect() + val tombstones = snapshot.tombstones.collect() + // DVs are removed + for (ts <- tombstones) { + assert(ts.deletionVector != null) + } + // target log should not contain DVs + for (f <- allFiles) { + assert(f.deletionVector == null) + assert(f.tightBounds.get) + } + + // Target table should contain "table2 records + 10000" and "table1 records \ table2 records". + checkAnswer( + spark.read.format("delta").load(target.getAbsolutePath), + (expectedTable2DataV1.map(_ + 10000) ++ + expectedTable1DataV4.filterNot(expectedTable2DataV1.contains)).toDF() + ) + } + } + + test("UPDATE with DVs - update rewrite files with DVs") { + withTempDir { tempDir => + FileUtils.copyDirectory(new File(table2Path), tempDir) + val deltaLog = DeltaLog.forTable(spark, tempDir) + + DeltaTable.forPath(spark, tempDir.getAbsolutePath) + .update(col("value") === 1, Map("value" -> (col("value") + 1))) + + val snapshot = deltaLog.update() + val allFiles = snapshot.allFiles.collect() + val tombstones = snapshot.tombstones.collect() + // DVs are removed + for (ts <- tombstones) { + assert(ts.deletionVector != null) + } + // target log should contain two files, one with and one without DV + assert(allFiles.count(_.deletionVector != null) === 1) + assert(allFiles.count(_.deletionVector == null) === 1) + } + } + + test("UPDATE with DVs - update deleted rows updates nothing") { + withTempDir { tempDir => + FileUtils.copyDirectory(new File(table2Path), tempDir) + val deltaLog = DeltaLog.forTable(spark, tempDir) + + val snapshotBeforeUpdate = deltaLog.update() + val allFilesBeforeUpdate = snapshotBeforeUpdate.allFiles.collect() + + DeltaTable.forPath(spark, tempDir.getAbsolutePath) + .update(col("value") === 0, Map("value" -> (col("value") + 1))) + + val snapshot = deltaLog.update() + val allFiles = snapshot.allFiles.collect() + val tombstones = snapshot.tombstones.collect() + // nothing changed + assert(tombstones.length === 0) + assert(allFiles === allFilesBeforeUpdate) + + checkAnswer( + spark.read.format("delta").load(tempDir.getAbsolutePath), + expectedTable2DataV1.toDF() + ) + } + } + + test("INSERT + DELETE + MERGE + UPDATE with DVs") { + withTempDir { tempDir => + val path = tempDir.getAbsolutePath + val deltaLog = DeltaLog.forTable(spark, path) + + def checkTableContents(rows: DataFrame): Unit = + checkAnswer(sql(s"SELECT * FROM delta.`$path`"), rows) + + // Version 0: DV is enabled on table + { + withSQLConf( + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey -> "true") { + spark.range(0, 10, 1, numPartitions = 2).write.format("delta").save(path) + } + val snapshot = deltaLog.update() + assert(snapshot.protocol.isFeatureSupported(DeletionVectorsTableFeature)) + for (f <- snapshot.allFiles.collect()) { + assert(f.tightBounds.get) + } + } + // Version 1: DELETE one row from each file + { + withSQLConf(DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key -> "true") { + sql(s"DELETE FROM delta.`$path` WHERE id IN (1, 8)") + } + val (add, _) = getFileActionsInLastVersion(deltaLog) + for (a <- add) { + assert(a.deletionVector !== null) + assert(a.deletionVector.cardinality === 1) + assert(a.numPhysicalRecords.get === a.numLogicalRecords.get + 1) + assert(a.tightBounds.get === false) + } + + checkTableContents(Seq(0, 2, 3, 4, 5, 6, 7, 9).toDF()) + } + // Version 2: UPDATE one row in the first file + { + sql(s"UPDATE delta.`$path` SET id = -1 WHERE id = 0") + val (added, removed) = getFileActionsInLastVersion(deltaLog) + assert(added.length === 2) + assert(removed.length === 1) + // Added files must be two, one containing DV and one not + assert(added.count(_.deletionVector != null) === 1) + assert(added.count(_.deletionVector == null) === 1) + // Removed files must contain DV + for (r <- removed) { + assert(r.deletionVector !== null) + } + + checkTableContents(Seq(-1, 2, 3, 4, 5, 6, 7, 9).toDF()) + } + // Version 3: MERGE into the table using table2 + { + DeltaTable.forPath(spark, path).as("target") + .merge( + spark.read.format("delta").load(table2Path).as("source"), + "source.value = target.id") + .whenMatched() + .updateExpr(Map("id" -> "source.value")) + .whenNotMatchedBySource().delete().execute() + val (added, removed) = getFileActionsInLastVersion(deltaLog) + assert(removed.length === 3) + for (a <- added) { + assert(a.deletionVector === null) + assert(a.tightBounds.get) + } + // Two of three removed files have DV + assert(removed.count(_.deletionVector != null) === 2) + + // -1 and 9 are deleted by "when not matched by source" + checkTableContents(Seq(2, 3, 4, 5, 6, 7).toDF()) + } + // Version 4: DELETE one row again + { + withSQLConf(DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key -> "true") { + sql(s"DELETE FROM delta.`$path` WHERE id IN (4)") + } + val (add, _) = getFileActionsInLastVersion(deltaLog) + for (a <- add) { + assert(a.deletionVector !== null) + assert(a.deletionVector.cardinality === 1) + assert(a.numPhysicalRecords.get === a.numLogicalRecords.get + 1) + assert(a.tightBounds.get === false) + } + + checkTableContents(Seq(2, 3, 5, 6, 7).toDF()) + } + } + } + test("huge table: read from tables of 2B rows with existing DV of many zeros") { + val canonicalTable5Path = new File(table5Path).getCanonicalPath + checkCountAndSum("value", table5Count, table5Sum, canonicalTable5Path) + } + + test("sanity check for non-incremental DV update") { + val addFile = createTestAddFile() + def bitmapToDvDescriptor(bitmap: RoaringBitmapArray): DeletionVectorDescriptor = { + DeletionVectorDescriptor.inlineInLog( + bitmap.serializeAsByteArray(RoaringBitmapArrayFormat.Portable), + bitmap.cardinality) + } + val dv0 = bitmapToDvDescriptor(RoaringBitmapArray()) + val dv1 = bitmapToDvDescriptor(RoaringBitmapArray(0L, 1L)) + val dv2 = bitmapToDvDescriptor(RoaringBitmapArray(0L, 2L)) + val dv3 = bitmapToDvDescriptor(RoaringBitmapArray(3L)) + + def removeRows(a: AddFile, dv: DeletionVectorDescriptor): (AddFile, RemoveFile) = { + a.removeRows( + deletionVector = dv, + updateStats = true + ) + } + + // Adding an empty DV to a file is allowed. + removeRows(addFile, dv0) + // Updating with the same DV is allowed. + val (addFileWithDV1, _) = removeRows(addFile, dv1) + removeRows(addFileWithDV1, dv1) + // Updating with a different DV with the same cardinality and different rows should not be + // allowed, but is expensive to detect it. + removeRows(addFileWithDV1, dv2) + + // Updating with a DV with lower cardinality should throw. + for (dv <- Seq(dv0, dv3)) { + assertThrows[DeltaChecksumException] { + removeRows(addFileWithDV1, dv) + } + } + } + + test("Check no resource leak when DV files are missing (table corrupted)") { + withTempDir { tempDir => + val source = new File(table2Path) + val target = new File(tempDir, "resourceLeakTest") + val targetPath = target.getAbsolutePath + + // Copy the source DV table to a temporary directory + FileUtils.copyDirectory(source, target) + + val filesWithDvs = getFilesWithDeletionVectors(DeltaLog.forTable(spark, target)) + assert(filesWithDvs.size > 0) + deleteDVFile(targetPath, filesWithDvs(0)) + + val se = intercept[SparkException] { + spark.sql(s"SELECT * FROM delta.`$targetPath`").collect() + } + assert(findIfResponsible[FileNotFoundException](se).nonEmpty, + s"Expected a file not found exception as the cause, but got: [${se}]") + } + } + + private sealed case class DeleteUsingDVWithResults( + scale: String, + sqlRule: String, + count: Long, + sum: Long) + private val deleteUsingDvSmallScale = DeleteUsingDVWithResults( + "small", + "value = 1", + table5CountByValues.filterKeys(_ != 1).values.sum, + table5SumByValues.filterKeys(_ != 1).values.sum) + private val deleteUsingDvMediumScale = DeleteUsingDVWithResults( + "medium", + "value > 10", + table5CountByValues.filterKeys(_ <= 10).values.sum, + table5SumByValues.filterKeys(_ <= 10).values.sum) + private val deleteUsingDvLargeScale = DeleteUsingDVWithResults( + "large", + "value != 21", + table5CountByValues(21), + table5SumByValues(21)) + + // deleteUsingDvMediumScale and deleteUsingDvLargeScale runs too slow thus disabled. + for (deleteSpec <- Seq(deleteUsingDvSmallScale)) { + test( + s"huge table: delete a ${deleteSpec.scale} number of rows from tables of 2B rows with DVs") { + withTempDir { dir => + FileUtils.copyDirectory(new File(table5Path), dir) + val log = DeltaLog.forTable(spark, dir) + + withDeletionVectorsEnabled() { + sql(s"DELETE FROM delta.`${dir.getCanonicalPath}` WHERE ${deleteSpec.sqlRule}") + } + val (added, _) = getFileActionsInLastVersion(log) + assert(added.forall(_.deletionVector != null)) + checkCountAndSum("value", deleteSpec.count, deleteSpec.sum, dir.getCanonicalPath) + } + } + } + + private def checkCountAndSum(column: String, count: Long, sum: Long, tableDir: String): Unit = { + checkAnswer( + sql(s"SELECT count($column), sum($column) FROM delta.`$tableDir`"), + Seq((count, sum)).toDF()) + } + + private def assertPlanContains(queryDf: DataFrame, expected: String): Unit = { + val optimizedPlan = queryDf.queryExecution.analyzed.toString() + assert(optimizedPlan.contains(expected), s"Plan is missing `$expected`: $optimizedPlan") + } +} + +object DeletionVectorsSuite { + val table1Path = "src/test/resources/delta/table-with-dv-large" + // Table at version 0: contains [0, 2000) + val expectedTable1DataV0 = Seq.range(0, 2000) + // Table at version 1: removes rows with id = 0, 180, 300, 700, 1800 + val v1Removed = Set(0, 180, 300, 700, 1800) + val expectedTable1DataV1 = expectedTable1DataV0.filterNot(e => v1Removed.contains(e)) + // Table at version 2: inserts rows with id = 300, 700 + val v2Added = Set(300, 700) + val expectedTable1DataV2 = expectedTable1DataV1 ++ v2Added + // Table at version 3: removes rows with id = 300, 250, 350, 900, 1353, 1567, 1800 + val v3Removed = Set(300, 250, 350, 900, 1353, 1567, 1800) + val expectedTable1DataV3 = expectedTable1DataV2.filterNot(e => v3Removed.contains(e)) + // Table at version 4: inserts rows with id = 900, 1567 + val v4Added = Set(900, 1567) + val expectedTable1DataV4 = expectedTable1DataV3 ++ v4Added + + val table2Path = "src/test/resources/delta/table-with-dv-small" + // Table at version 0: contains 0 - 9 + val expectedTable2DataV0 = Seq(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + // Table at version 1: removes rows 0 and 9 + val expectedTable2DataV1 = Seq(1, 2, 3, 4, 5, 6, 7, 8) + + val table3Path = "src/test/resources/delta/partitioned-table-with-dv-large" + // Table at version 0: contains [0, 2000) + val expectedTable3DataV0 = Seq.range(0, 2000) + // Table at version 1: removes rows with id = (0, 180, 308, 225, 756, 1007, 1503) + val table3V1Removed = Set(0, 180, 308, 225, 756, 1007, 1503) + val expectedTable3DataV1 = expectedTable3DataV0.filterNot(e => table3V1Removed.contains(e)) + // Table at version 2: inserts rows with id = 308, 756 + val table3V2Added = Set(308, 756) + val expectedTable3DataV2 = expectedTable3DataV1 ++ table3V2Added + // Table at version 3: removes rows with id = (300, 257, 399, 786, 1353, 1567, 1800) + val table3V3Removed = Set(300, 257, 399, 786, 1353, 1567, 1800) + val expectedTable3DataV3 = expectedTable3DataV2.filterNot(e => table3V3Removed.contains(e)) + // Table at version 4: inserts rows with id = 1353, 1567 + val table3V4Added = Set(1353, 1567) + val expectedTable3DataV4 = expectedTable3DataV3 ++ table3V4Added + + // Table with DV table feature as supported but no DVs + val table4Path = "src/test/resources/delta/table-with-dv-feature-enabled" + val expectedTable4DataV0 = Seq(1L) + + // Table with DV, (1<<31)+10=2147483658 rows in total including 2147484 rows deleted. Parquet is + // generated by: + // spark.range(0, (1L << 31) + 10, 1, numPartitions = 1) + // .withColumn( + // "value", + // when($"id" % 1000 === 0, 1).otherwise(($"id" / 100000000).cast(IntegerType))) + // All "id % 1000 = 0" rows are marked as deleted. + // Column "value" ranges from 0 to 21. + // 99900000 rows with values 0 to 20 each, and 47436174 rows with value 21. + val table5Path = "src/test/resources/delta/table-with-dv-gigantic" + val table5Count = 2145336174L + val table5Sum = 21975159654L + val table5CountByValues = (0 to 20).map(_ -> 99900000L).toMap + (21 -> 47436174L) + val table5SumByValues = (0 to 20).map(v => v -> v * 99900000L).toMap + (21 -> 21 * 47436174L) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/RoaringBitmapArraySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/RoaringBitmapArraySuite.scala new file mode 100644 index 00000000000..7f083090473 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/RoaringBitmapArraySuite.scala @@ -0,0 +1,533 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.deletionvectors + +import java.nio.{ByteBuffer, ByteOrder} + +import scala.collection.immutable.TreeSet + +import com.google.common.primitives.Ints + +import org.apache.spark.SparkFunSuite + +class RoaringBitmapArraySuite extends SparkFunSuite { + + final val BITMAP2_NUMBER = Int.MaxValue.toLong * 3L + /** RoaringBitmap containers mostly use `Char` constants internally, so this is consistent. */ + final val CONTAINER_BOUNDARY = Char.MaxValue.toLong + 1L + final val BITMAP_BOUNDARY = 0xFFFFFFFFL + 1L + + private def testEquality(referenceResult: Seq[Long])( + testOps: (RoaringBitmapArray => Unit)*): Unit = { + val referenceBitmap = RoaringBitmapArray(referenceResult: _*) + val testBitmap = RoaringBitmapArray() + testOps.foreach(op => op(testBitmap)) + assert(testBitmap === referenceBitmap) + assert(testBitmap.## === referenceBitmap.##) + assert(testBitmap.toArray === referenceBitmap.toArray) + } + + test("equality") { + testEquality(Seq(1))(_.add(1)) + testEquality(Nil)(_.add(1), _.remove(1)) + testEquality(Seq(1))(_.add(1), _.add(1)) + testEquality(Nil)(_.add(1), _.add(1), _.remove(1)) + testEquality(Nil)(_.add(1), _.remove(1), _.remove(1)) + testEquality(Nil)(_.add(1), _.add(1), _.remove(1), _.remove(1)) + testEquality(Seq(1))(_.add(1), _.remove(1), _.add(1)) + testEquality(Nil)(_.add(1), _.remove(1), _.add(1), _.remove(1)) + + testEquality(Seq(BITMAP2_NUMBER))(_.add(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.remove(BITMAP2_NUMBER)) + testEquality(Seq(BITMAP2_NUMBER))(_.add(BITMAP2_NUMBER), _.add(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.add(BITMAP2_NUMBER), _.remove(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.remove(BITMAP2_NUMBER), _.remove(BITMAP2_NUMBER)) + testEquality(Nil)( + _.add(BITMAP2_NUMBER), + _.add(BITMAP2_NUMBER), + _.remove(BITMAP2_NUMBER), + _.remove(BITMAP2_NUMBER)) + testEquality(Seq(BITMAP2_NUMBER))( + _.add(BITMAP2_NUMBER), + _.remove(BITMAP2_NUMBER), + _.add(BITMAP2_NUMBER)) + testEquality(Nil)( + _.add(BITMAP2_NUMBER), + _.remove(BITMAP2_NUMBER), + _.add(BITMAP2_NUMBER), + _.remove(BITMAP2_NUMBER)) + + testEquality(Seq(1, BITMAP2_NUMBER))(_.add(1), _.add(BITMAP2_NUMBER)) + testEquality(Seq(BITMAP2_NUMBER))(_.add(1), _.add(BITMAP2_NUMBER), _.remove(1)) + testEquality(Seq(1, BITMAP2_NUMBER))(_.add(BITMAP2_NUMBER), _.add(1)) + testEquality(Seq(BITMAP2_NUMBER))(_.add(BITMAP2_NUMBER), _.add(1), _.remove(1)) + testEquality(Seq(BITMAP2_NUMBER))(_.add(1), _.remove(1), _.add(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(1), _.remove(1), _.add(BITMAP2_NUMBER), _.remove(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(1), _.add(BITMAP2_NUMBER), _.remove(1), _.remove(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.add(1), _.remove(1), _.remove(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.add(1), _.remove(BITMAP2_NUMBER), _.remove(1)) + + val denseSequence = 1L to (3L * CONTAINER_BOUNDARY) + def addAll(v: Long): RoaringBitmapArray => Unit = rb => rb.add(v) + testEquality(denseSequence)(denseSequence.map(addAll): _*) + testEquality(denseSequence)(denseSequence.reverse.map(addAll): _*) + + val sparseSequence = 1L to BITMAP2_NUMBER by CONTAINER_BOUNDARY + testEquality(sparseSequence)(sparseSequence.map(addAll): _*) + testEquality(sparseSequence)(sparseSequence.reverse.map(addAll): _*) + } + + /** + * A [[RoaringBitmapArray]] that contains all 3 container types + * in two [[org.roaringbitmap.RoaringBitmap]] instances. + */ + lazy val allContainerTypesBitmap: RoaringBitmapArray = { + val bitmap = RoaringBitmapArray() + // RoaringBitmap 1 Container 1 (Array) + bitmap.addAll(1L, 17L, 63000L, CONTAINER_BOUNDARY - 1) + // RoaringBitmap 1 Container 2 (RLE) + bitmap.addRange((CONTAINER_BOUNDARY + 500L) until (CONTAINER_BOUNDARY + 1200L)) + // RoaringBitmap 1 Container 3 (Bitset) + bitmap.addRange((2L * CONTAINER_BOUNDARY) until (3L * CONTAINER_BOUNDARY - 1L) by 3L) + + // RoaringBitmap 2 Container 1 (Array) + bitmap.addAll( + BITMAP_BOUNDARY, BITMAP_BOUNDARY + 17L, + BITMAP_BOUNDARY + 63000L, + BITMAP_BOUNDARY + CONTAINER_BOUNDARY - 1) + // RoaringBitmap 2 Container 2 (RLE) + bitmap.addRange((BITMAP_BOUNDARY + CONTAINER_BOUNDARY + 500L) until + (BITMAP_BOUNDARY + CONTAINER_BOUNDARY + 1200L)) + // RoaringBitmap 2 Container 3 (Bitset) + bitmap.addRange((BITMAP_BOUNDARY + 2L * CONTAINER_BOUNDARY) until + (BITMAP_BOUNDARY + 3L * CONTAINER_BOUNDARY - 1L) by 3L) + + // Check that RLE containers are actually created. + assert(bitmap.runOptimize()) + + bitmap + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test(s"serialization - $serializationFormat") { + checkSerializeDeserialize(RoaringBitmapArray(), serializationFormat) + checkSerializeDeserialize(RoaringBitmapArray(1L), serializationFormat) + checkSerializeDeserialize(RoaringBitmapArray(BITMAP2_NUMBER), serializationFormat) + checkSerializeDeserialize(RoaringBitmapArray(1L, BITMAP2_NUMBER), serializationFormat) + checkSerializeDeserialize(allContainerTypesBitmap, serializationFormat) + } + } + + private def checkSerializeDeserialize( + input: RoaringBitmapArray, + format: RoaringBitmapArrayFormat.Value): Unit = { + val serializedSize = Ints.checkedCast(input.serializedSizeInBytes(format)) + val buffer = ByteBuffer.allocate(serializedSize).order(ByteOrder.LITTLE_ENDIAN) + input.serialize(buffer, format) + val output = RoaringBitmapArray() + buffer.flip() + output.deserialize(buffer) + assert(input === output) + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test( + s"serialization and deserialization with big endian buffers throws - $serializationFormat") { + val roaringBitmapArray = RoaringBitmapArray(1L) + val bigEndianBuffer = ByteBuffer + .allocate(roaringBitmapArray.serializedSizeInBytes(serializationFormat).toInt) + .order(ByteOrder.BIG_ENDIAN) + + assertThrows[IllegalArgumentException] { + roaringBitmapArray.serialize(bigEndianBuffer, serializationFormat) + } + + assertThrows[IllegalArgumentException] { + roaringBitmapArray.deserialize(bigEndianBuffer) + } + } + } + + test("empty") { + val bitmap = RoaringBitmapArray() + assert(bitmap.isEmpty) + assert(bitmap.cardinality === 0L) + assert(!bitmap.contains(0L)) + assert(bitmap.toArray === Array.empty[Long]) + var hadValue = false + bitmap.forEach(_ => hadValue = true) + assert(!hadValue) + } + + test("special values") { + testSpecialValue(0L) + testSpecialValue(Int.MaxValue.toLong) + testSpecialValue(CONTAINER_BOUNDARY - 1L) + testSpecialValue(CONTAINER_BOUNDARY) + testSpecialValue(BITMAP_BOUNDARY - 1L) + testSpecialValue(BITMAP_BOUNDARY) + testSpecialValue(3L * BITMAP_BOUNDARY + 42L) + } + + private def testSpecialValue(value: Long): Unit = { + val bitmap = RoaringBitmapArray(value) + assert(bitmap.cardinality === 1L) + assert(bitmap.contains(value)) + assert(bitmap.toArray === Array(value)) + var valueCount = 0 + bitmap.forEach { v => + valueCount += 1 + assert(v === value) + } + assert(valueCount === 1) + bitmap.remove(value) + assert(!bitmap.contains(value)) + assert(bitmap.cardinality === 0L) + } + + test("negative numbers") { + assertThrows[IllegalArgumentException] { + val bitmap = RoaringBitmapArray() + bitmap.add(-1L) + } + assertThrows[IllegalArgumentException] { + RoaringBitmapArray(-1L) + } + assertThrows[IllegalArgumentException] { + val bitmap = RoaringBitmapArray(1L) + bitmap.remove(-1L) + } + assertThrows[IllegalArgumentException] { + val bitmap = RoaringBitmapArray() + bitmap.add(Long.MaxValue) + } + assertThrows[IllegalArgumentException] { + RoaringBitmapArray(Long.MaxValue) + } + assertThrows[IllegalArgumentException] { + val bitmap = RoaringBitmapArray(1L) + bitmap.remove(Long.MaxValue) + } + assertThrows[IllegalArgumentException] { + val bitmap = RoaringBitmapArray() + bitmap.addAll(-1L, 1L) + } + assertThrows[IllegalArgumentException] { + val bitmap = RoaringBitmapArray() + bitmap.addRange(-3 to 1) + } + assertThrows[IllegalArgumentException] { + val bitmap = RoaringBitmapArray() + bitmap.addRange(-3L to 1L) + } + } + + private def testContainsButNoSimilarValues(value: Long, bitmap: RoaringBitmapArray): Unit = { + assert(bitmap.contains(value)) + for (i <- 1 to 3) { + assert(!bitmap.contains(value + i * CONTAINER_BOUNDARY)) + assert(!bitmap.contains(value + i * BITMAP_BOUNDARY)) + } + } + + test("small integers") { + val bitmap = RoaringBitmapArray( + 3L, 4L, CONTAINER_BOUNDARY - 1L, CONTAINER_BOUNDARY, Int.MaxValue.toLong) + assert(bitmap.cardinality === 5L) + testContainsButNoSimilarValues(3L, bitmap) + testContainsButNoSimilarValues(4L, bitmap) + testContainsButNoSimilarValues(CONTAINER_BOUNDARY - 1L, bitmap) + testContainsButNoSimilarValues(CONTAINER_BOUNDARY, bitmap) + testContainsButNoSimilarValues(Int.MaxValue.toLong, bitmap) + assert(bitmap.toArray === + Array(3L, 4L, CONTAINER_BOUNDARY - 1L, CONTAINER_BOUNDARY, Int.MaxValue.toLong)) + var values: List[Long] = Nil + bitmap.forEach { value => + values ::= value + } + assert(values.reverse === + List(3L, 4L, CONTAINER_BOUNDARY - 1L, CONTAINER_BOUNDARY, Int.MaxValue.toLong)) + bitmap.remove(CONTAINER_BOUNDARY) + assert(!bitmap.contains(CONTAINER_BOUNDARY)) + assert(bitmap.cardinality === 4L) + testContainsButNoSimilarValues(3L, bitmap) + testContainsButNoSimilarValues(4L, bitmap) + testContainsButNoSimilarValues(CONTAINER_BOUNDARY - 1L, bitmap) + testContainsButNoSimilarValues(Int.MaxValue.toLong, bitmap) + } + + test("large integers") { + val container1Number = Int.MaxValue.toLong + 1L + val container3Number = 2 * BITMAP_BOUNDARY + 1L + val bitmap = RoaringBitmapArray( + 3L, 4L, container1Number, BITMAP_BOUNDARY, BITMAP2_NUMBER, container3Number) + assert(bitmap.cardinality === 6L) + testContainsButNoSimilarValues(3L, bitmap) + testContainsButNoSimilarValues(4L, bitmap) + testContainsButNoSimilarValues(container1Number, bitmap) + testContainsButNoSimilarValues(BITMAP_BOUNDARY, bitmap) + testContainsButNoSimilarValues(BITMAP2_NUMBER, bitmap) + testContainsButNoSimilarValues(container3Number, bitmap) + assert(bitmap.toArray === + Array(3L, 4L, container1Number, BITMAP_BOUNDARY, BITMAP2_NUMBER, container3Number)) + var values: List[Long] = Nil + bitmap.forEach { value => + values ::= value + } + assert(values.reverse === + List(3L, 4L, container1Number, BITMAP_BOUNDARY, BITMAP2_NUMBER, container3Number)) + bitmap.remove(BITMAP_BOUNDARY) + assert(!bitmap.contains(BITMAP_BOUNDARY)) + assert(bitmap.cardinality === 5L) + testContainsButNoSimilarValues(3L, bitmap) + testContainsButNoSimilarValues(4L, bitmap) + testContainsButNoSimilarValues(container1Number, bitmap) + testContainsButNoSimilarValues(BITMAP2_NUMBER, bitmap) + testContainsButNoSimilarValues(container3Number, bitmap) + } + + test("add/remove round-trip") { + // Single value in the second bitmap + val bitmap = RoaringBitmapArray(BITMAP2_NUMBER) + assert(bitmap.contains(BITMAP2_NUMBER)) + bitmap.remove(BITMAP2_NUMBER) + assert(!bitmap.contains(BITMAP2_NUMBER)) + bitmap.add(BITMAP2_NUMBER) + assert(bitmap.contains(BITMAP2_NUMBER)) + + // Two values in two bitmaps + bitmap.add(CONTAINER_BOUNDARY) + assert(bitmap.contains(CONTAINER_BOUNDARY)) + assert(bitmap.contains(BITMAP2_NUMBER)) + bitmap.remove(CONTAINER_BOUNDARY) + assert(!bitmap.contains(CONTAINER_BOUNDARY)) + assert(bitmap.contains(BITMAP2_NUMBER)) + bitmap.add(CONTAINER_BOUNDARY) + assert(bitmap.contains(CONTAINER_BOUNDARY)) + assert(bitmap.contains(BITMAP2_NUMBER)) + } + + test("or") { + testOr(left = TreeSet.empty, right = TreeSet.empty) + testOr(left = TreeSet(1L), right = TreeSet.empty) + testOr(left = TreeSet.empty, right = TreeSet(1L)) + testOr(left = TreeSet(0L, CONTAINER_BOUNDARY), right = TreeSet(1L, BITMAP_BOUNDARY - 1L)) + testOr( + left = TreeSet(0L, CONTAINER_BOUNDARY, BITMAP2_NUMBER), + right = TreeSet(1L, BITMAP_BOUNDARY - 1L)) + testOr( + left = TreeSet(0L, CONTAINER_BOUNDARY), + right = TreeSet(1L, BITMAP_BOUNDARY - 1L, BITMAP2_NUMBER)) + } + + private def testOr(left: TreeSet[Long], right: TreeSet[Long]): Unit = { + val leftBitmap = RoaringBitmapArray(left.toSeq: _*) + val rightBitmap = RoaringBitmapArray(right.toSeq: _*) + + val expected = left.union(right).toSeq + + leftBitmap.or(rightBitmap) + + assert(leftBitmap.toArray.toSeq === expected) + } + + test("andNot") { + testAndNot(left = TreeSet.empty, right = TreeSet.empty) + testAndNot(left = TreeSet(1L), right = TreeSet.empty) + testAndNot(left = TreeSet.empty, right = TreeSet(1L)) + testAndNot(left = TreeSet(0L, CONTAINER_BOUNDARY), right = TreeSet(1L, BITMAP_BOUNDARY - 1L)) + testAndNot( + left = TreeSet(0L, CONTAINER_BOUNDARY, BITMAP2_NUMBER), + right = TreeSet(1L, BITMAP_BOUNDARY - 1L)) + testAndNot( + left = TreeSet(0L, CONTAINER_BOUNDARY), + right = TreeSet(1L, BITMAP_BOUNDARY - 1L, BITMAP2_NUMBER)) + } + + private def testAndNot(left: TreeSet[Long], right: TreeSet[Long]): Unit = { + val leftBitmap = RoaringBitmapArray() + left.foreach(leftBitmap.add) + val rightBitmap = RoaringBitmapArray() + right.foreach(rightBitmap.add) + + val expected = left.diff(right).toArray + + leftBitmap.andNot(rightBitmap) + + assert(leftBitmap.toArray === expected) + } + + test("and") { + // Empty result + testAnd(left = TreeSet.empty, right = TreeSet.empty) + testAnd(left = TreeSet.empty, right = TreeSet(1L)) + testAnd(left = TreeSet.empty, right = TreeSet(1L, BITMAP_BOUNDARY - 1L)) + testAnd(left = TreeSet.empty, right = TreeSet(0L, CONTAINER_BOUNDARY, BITMAP2_NUMBER)) + testAnd(left = TreeSet(1L), right = TreeSet.empty) + testAnd(left = TreeSet(1L), right = TreeSet(BITMAP_BOUNDARY)) + testAnd(left = TreeSet(1L), right = TreeSet(CONTAINER_BOUNDARY)) + testAnd(left = TreeSet(1L, BITMAP_BOUNDARY - 1L), right = TreeSet.empty) + testAnd(left = TreeSet(0L, CONTAINER_BOUNDARY, BITMAP2_NUMBER), right = TreeSet.empty) + testAnd( + left = TreeSet(0L, CONTAINER_BOUNDARY, BITMAP2_NUMBER), + right = TreeSet(1L, BITMAP_BOUNDARY - 1L)) + testAnd( + left = TreeSet(0L, CONTAINER_BOUNDARY), + right = TreeSet(1L, BITMAP_BOUNDARY - 1L, BITMAP2_NUMBER)) + + // Non empty result + testAnd(left = TreeSet(0L, 5L, 10L), right = TreeSet(5L, 15L)) + testAnd( + left = TreeSet(0L, CONTAINER_BOUNDARY, BITMAP2_NUMBER), + right = TreeSet(1L, BITMAP2_NUMBER)) + testAnd( + left = TreeSet(1L, BITMAP_BOUNDARY, CONTAINER_BOUNDARY), + right = TreeSet(1L, BITMAP_BOUNDARY, CONTAINER_BOUNDARY)) + } + + private def testAnd(left: TreeSet[Long], right: TreeSet[Long]): Unit = { + val leftBitmap = RoaringBitmapArray() + leftBitmap.addAll(left.toSeq: _*) + val rightBitmap = RoaringBitmapArray() + rightBitmap.addAll(right.toSeq: _*) + + leftBitmap.and(rightBitmap) + val expected = left.intersect(right) + assert(leftBitmap.toArray === expected.toArray) + } + + test("clear") { + testEquality(Nil)(_.add(1), _.clear()) + testEquality(Nil)(_.add(1), _.add(1), _.clear()) + testEquality(Nil)(_.add(1), _.clear(), _.clear()) + testEquality(Nil)(_.add(1), _.add(1), _.clear(), _.clear()) + testEquality(Seq(1))(_.add(1), _.clear(), _.add(1)) + testEquality(Nil)(_.add(1), _.clear(), _.add(1), _.clear()) + + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.clear()) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.add(BITMAP2_NUMBER), _.clear()) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.clear(), _.clear()) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.add(BITMAP2_NUMBER), _.clear(), _.clear()) + testEquality(Seq(BITMAP2_NUMBER))(_.add(BITMAP2_NUMBER), _.clear(), _.add(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.clear(), _.add(BITMAP2_NUMBER), _.clear()) + + testEquality(Nil)(_.add(1), _.add(BITMAP2_NUMBER), _.clear()) + testEquality(Nil)(_.add(BITMAP2_NUMBER), _.add(1), _.clear()) + testEquality(Seq(BITMAP2_NUMBER))(_.add(1), _.clear(), _.add(BITMAP2_NUMBER)) + testEquality(Nil)(_.add(1), _.clear(), _.add(BITMAP2_NUMBER), _.clear()) + testEquality(Nil)(_.add(1), _.add(BITMAP2_NUMBER), _.clear(), _.clear()) + + val denseSequence = 1L to (3L * CONTAINER_BOUNDARY) + testEquality(Nil)(_.addAll(denseSequence: _*), _.clear()) + + val sparseSequence = 1L to BITMAP2_NUMBER by CONTAINER_BOUNDARY + testEquality(Nil)(_.addAll(sparseSequence: _*), _.clear()) + } + + test("bulk adds") { + + def testArrayEquality(referenceResult: Seq[Long], command: RoaringBitmapArray => Unit): Unit = { + val testBitmap = RoaringBitmapArray() + command(testBitmap) + assert(testBitmap.toArray.toSeq === referenceResult) + } + + val bitmap = RoaringBitmapArray(1L, 5L, CONTAINER_BOUNDARY, BITMAP_BOUNDARY) + assert(bitmap.toArray.toSeq === Seq(1L, 5L, CONTAINER_BOUNDARY, BITMAP_BOUNDARY)) + + testArrayEquality( + referenceResult = Seq(1L, 5L, CONTAINER_BOUNDARY, BITMAP_BOUNDARY), + command = _.addAll(1L, 5L, CONTAINER_BOUNDARY, BITMAP_BOUNDARY)) + + testArrayEquality( + referenceResult = (CONTAINER_BOUNDARY - 5L) to (CONTAINER_BOUNDARY + 5L), + command = _.addRange((CONTAINER_BOUNDARY - 5L) to (CONTAINER_BOUNDARY + 5L))) + + testArrayEquality( + referenceResult = (CONTAINER_BOUNDARY - 5L) to (CONTAINER_BOUNDARY + 5L) by 3L, + command = _.addRange((CONTAINER_BOUNDARY - 5L) to (CONTAINER_BOUNDARY + 5L) by 3L)) + + // Int ranges call a different method. + testArrayEquality( + referenceResult = (CONTAINER_BOUNDARY - 5L) to (CONTAINER_BOUNDARY + 5L), + command = _.addRange((CONTAINER_BOUNDARY - 5L).toInt to (CONTAINER_BOUNDARY + 5L).toInt)) + + testArrayEquality( + referenceResult = (CONTAINER_BOUNDARY - 5L) to (CONTAINER_BOUNDARY + 5L) by 3L, + command = _.addRange((CONTAINER_BOUNDARY - 5L).toInt to (CONTAINER_BOUNDARY + 5L).toInt by 3)) + + testArrayEquality( + referenceResult = (BITMAP_BOUNDARY - 5L) to BITMAP_BOUNDARY, + command = _.addRange((BITMAP_BOUNDARY - 5L) to BITMAP_BOUNDARY)) + + testArrayEquality( + referenceResult = (BITMAP_BOUNDARY - 5L) to (BITMAP_BOUNDARY + 5L), + command = _.addRange((BITMAP_BOUNDARY - 5L) to (BITMAP_BOUNDARY + 5L))) + + testArrayEquality( + referenceResult = BITMAP_BOUNDARY to (BITMAP_BOUNDARY + 5L), + command = _.addRange(BITMAP_BOUNDARY to (BITMAP_BOUNDARY + 5L))) + } + + test("large cardinality") { + val bitmap = RoaringBitmapArray() + // We can't produce ranges in Scala whose lengths would be greater than Int.MaxValue + // so we add them in stages of Int.MaxValue / 2 instead. + for (index <- 0 until 6) { + val start = index.toLong * Int.MaxValue.toLong / 2L + val end = (index.toLong + 1L) * Int.MaxValue.toLong / 2L + bitmap.addRange(start until end) + } + assert(bitmap.cardinality === (3L * Int.MaxValue.toLong)) + for (index <- 0 until 6) { + val start = index.toLong * Int.MaxValue.toLong / 2L + val end = (index.toLong + 1L) * Int.MaxValue.toLong / 2L + val stride = 1023 + for (pos <- start until end by stride) { + assert(bitmap.contains(pos)) + } + } + assert(!bitmap.contains(3L * Int.MaxValue.toLong)) + assert(!bitmap.contains(3L * Int.MaxValue.toLong + 42L)) + } + + test("first/last") { + { + val bitmap = RoaringBitmapArray() + assert(bitmap.first.isEmpty) + assert(bitmap.last.isEmpty) + } + // Single value bitmaps. + val valuesOfInterest = Seq(0L, 1L, 64L, CONTAINER_BOUNDARY, BITMAP_BOUNDARY, BITMAP2_NUMBER) + for (v <- valuesOfInterest) { + val bitmap = RoaringBitmapArray(v) + assert(bitmap.first === Some(v)) + assert(bitmap.last === Some(v)) + } + // Two value bitmaps. + for { + start <- valuesOfInterest + end <- valuesOfInterest + if start < end + } { + val bitmap = RoaringBitmapArray(start, end) + assert(bitmap.first === Some(start)) + assert(bitmap.last === Some(end)) + } + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/RowIndexMarkingFiltersSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/RowIndexMarkingFiltersSuite.scala new file mode 100644 index 00000000000..fad1b12d254 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/RowIndexMarkingFiltersSuite.scala @@ -0,0 +1,160 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.deletionvectors + +import org.apache.spark.sql.delta.RowIndexFilter +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor._ +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore._ +import org.apache.spark.sql.delta.util.PathWithFileSystem +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.ByteType +import org.apache.spark.util.Utils + +class RowIndexMarkingFiltersSuite extends QueryTest with SharedSparkSession { + + test("empty deletion vector (drop filter)") { + val rowIndexFilter = DropMarkedRowsFilter.createInstance( + DeletionVectorDescriptor.EMPTY, + newHadoopConf, + tablePath = None) + + assert(getMarked(rowIndexFilter, start = 0, end = 20) === Seq.empty) + assert(getMarked(rowIndexFilter, start = 20, end = 200) === Seq.empty) + assert(getMarked(rowIndexFilter, start = 200, end = 2000) === Seq.empty) + } + + test("empty deletion vector (keep filter)") { + val rowIndexFilter = KeepMarkedRowsFilter.createInstance( + DeletionVectorDescriptor.EMPTY, + newHadoopConf, + tablePath = None) + + assert(getMarked(rowIndexFilter, start = 0, end = 20) === 0.until(20)) + assert(getMarked(rowIndexFilter, start = 20, end = 200) === 20.until(200)) + assert(getMarked(rowIndexFilter, start = 200, end = 2000) === 200.until(2000)) + } + + private val filtersToBeTested = + Seq((DropMarkedRowsFilter, "drop"), (KeepMarkedRowsFilter, "keep")) + + for { + (filterType, filterName) <- filtersToBeTested + isInline <- BOOLEAN_DOMAIN + } { + test(s"deletion vector single row marked (isInline=$isInline) ($filterName filter)") { + withTempDir { tableDir => + val tablePath = unescapedStringToPath(tableDir.toString) + val dv = createDV(isInline, tablePath, 25) + + val rowIndexFilter = filterType.createInstance(dv, newHadoopConf, Some(tablePath)) + + def correctValues(range: Seq[Long]): Seq[Long] = filterName match { + case "drop" => range.filter(_ == 25) + case "keep" => range.filterNot(_ == 25) + case _ => throw new RuntimeException("unreachable code reached") + } + + for ((start, end) <- Seq((0, 20), (20, 35), (35, 325))) { + val actual = getMarked(rowIndexFilter, start, end) + val correct = correctValues(start.toLong.until(end)) + assert(actual === correct) + } + } + } + } + + for { + (filterType, filterName) <- filtersToBeTested + isInline <- BOOLEAN_DOMAIN + } { + test(s"deletion vector with multiple rows marked (isInline=$isInline) ($filterName filter)") { + withTempDir { tableDir => + val tablePath = unescapedStringToPath(tableDir.toString) + val markedRows = Seq[Long](0, 25, 35, 2000, 50000) + val dv = createDV(isInline, tablePath, markedRows: _*) + + val rowIndexFilter = filterType.createInstance(dv, newHadoopConf, Some(tablePath)) + + def correctValues(range: Seq[Long]): Seq[Long] = filterName match { + case "drop" => range.filter(markedRows.contains(_)) + case "keep" => range.filterNot(markedRows.contains(_)) + case _ => throw new RuntimeException("unreachable code reached") + } + + for ((start, end) <- Seq( + (0, 20), (20, 35), (35, 325), (325, 1000), (1000, 60000), (60000, 800000))) { + val actual = getMarked(rowIndexFilter, start, end) + val correct = correctValues(start.toLong.until(end)) + assert(actual === correct) + } + } + } + } + + private def newBatch(capacity: Int): WritableColumnVector = + new OnHeapColumnVector(capacity, ByteType) + + protected def newHadoopConf: Configuration = { + // scalastyle:off deltahadoopconfiguration + spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + } + + /** + * Helper method that creates DV with the given deleted row ids and returns + * a [[DeletionVectorDescriptor]]. DV created can be an in-line or on disk + */ + protected def createDV( + isInline: Boolean, tablePath: Path, markedRows: Long*): DeletionVectorDescriptor = { + val bitmap = RoaringBitmapArray(markedRows: _*) + val serializedBitmap = bitmap.serializeAsByteArray(RoaringBitmapArrayFormat.Portable) + val cardinality = markedRows.size + if (isInline) { + inlineInLog(serializedBitmap, cardinality) + } else { + val tableWithFS = PathWithFileSystem.withConf(tablePath, newHadoopConf).makeQualified() + val dvPath = dvStore.generateUniqueNameInTable(tableWithFS) + val dvRange = Utils.tryWithResource(dvStore.createWriter(dvPath)) { writer => + writer.write(serializedBitmap) + } + onDiskWithAbsolutePath( + pathToEscapedString(dvPath.path), dvRange.length, cardinality, Some(dvRange.offset)) + } + } + + /** Evaluate the given row index filter instance and return sequence of marked rows indexes */ + protected def getMarked(rowIndexFilter: RowIndexFilter, start: Long, end: Long): Seq[Long] = { + val batchSize = (end - start + 1).toInt + val batch = newBatch(batchSize) + rowIndexFilter.materializeIntoVector(start, end, batch) + batch.getBytes(0, batchSize).toSeq + .zip(Seq.range(start, end)) + .filter(_._1 == RowIndexFilter.DROP_ROW_VALUE) // filter out marked rows + .map(_._2) // select only the row id + .toSeq + } + + lazy val dvStore: DeletionVectorStore = DeletionVectorStore.createInstance(newHadoopConf) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/expressions/HilbertIndexSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/HilbertIndexSuite.scala new file mode 100644 index 00000000000..a377301e5b3 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/HilbertIndexSuite.scala @@ -0,0 +1,200 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +import java.util + +import org.scalatest.Tag +import org.apache.spark.SparkFunSuite + +class HilbertIndexSuite extends SparkFunSuite { + + /** + * Represents a test case. Each n-k pair will verify the continuity of the mapping, + * and the reversibility of it. + * @param n The number of dimensions + * @param k The number of bits in each dimension + */ + case class TestCase(n: Int, k: Int) + val testCases = Seq( + TestCase(2, 10), + TestCase(3, 6), + TestCase(4, 5), + TestCase(5, 4), + TestCase(6, 3) + ) + + def gridTest[A](testNamePrefix: String, testTags: Tag*)(params: Seq[A])( + testFun: A => Unit): Unit = { + for (param <- params) { + test(testNamePrefix + s" ($param)", testTags: _*)(testFun(param)) + } + } + + gridTest("HilbertStates caches states")(2 to 9) { n => + val start = System.nanoTime() + HilbertStates.getStateList(n) + val end = System.nanoTime() + + HilbertStates.getStateList(n) + val end2 = System.nanoTime() + assert(end2 - end < end - start) + } + + gridTest("Hilbert Mapping is continuous (long keys)")(testCases) { case TestCase(n, k) => + val generator = HilbertIndex.getStateGenerator(n) + + val stateList = generator.generateStateList() + + val states = stateList.getDKeyToNPointStateMap + + val maxDKeys = 1L << (k * n) + var d = 0 + var lastPoint = new Array[Int](n) + while (d < maxDKeys) { + val point = states.translateDKeyToNPoint(d, k) + if (d != 0) { + assert(HilbertUtils.manhattanDist(lastPoint, point) == 1) + } + + lastPoint = point + d += 1 + } + + } + + gridTest("Hilbert Mapping is 1 to 1 (long keys)")(testCases) { case TestCase(n, k) => + val generator = HilbertIndex.getStateGenerator(n) + val stateList = generator.generateStateList() + + val d2p = stateList.getDKeyToNPointStateMap + val p2d = stateList.getNPointToDKeyStateMap + + val maxDKeys = 1L << (k * n) + var d = 0 + while (d < maxDKeys) { + val point = d2p.translateDKeyToNPoint(d, k) + val d2 = p2d.translateNPointToDKey(point, k) + assert(d == d2) + d += 1 + } + } + + gridTest("Hilbert Mapping is continuous (array keys)")(testCases) { case TestCase(n, k) => + val generator = HilbertIndex.getStateGenerator(n) + + val stateList = generator.generateStateList() + + val states = stateList.getDKeyToNPointStateMap + + val maxDKeys = 1L << (k * n) + val d = new Array[Byte](((k * n) / 8) + 1) + var lastPoint = new Array[Int](n) + var i = 0 + while (i < maxDKeys) { + val point = states.translateDKeyArrayToNPoint(d, k) + if (i != 0) { + assert(HilbertUtils.manhattanDist(lastPoint, point) == 1, + s"$i ${d.toSeq.map(_.toBinaryString.takeRight(8))} ${lastPoint.toSeq} to ${point.toSeq}") + } + + lastPoint = point + i += 1 + HilbertUtils.addOne(d) + } + + } + + gridTest("Hilbert Mapping is 1 to 1 (array keys)")(testCases) { case TestCase(n, k) => + val generator = HilbertIndex.getStateGenerator(n) + val stateList = generator.generateStateList() + + val d2p = stateList.getDKeyToNPointStateMap + val p2d = stateList.getNPointToDKeyStateMap + + val maxDKeys = 1L << (k * n) + val d = new Array[Byte](((k * n) / 8) + 1) + var i = 0 + while (i < maxDKeys) { + val point = d2p.translateDKeyArrayToNPoint(d, k) + val d2 = p2d.translateNPointToDKeyArray(point, k) + assert(util.Arrays.equals(d, d2), s"$i ${d.toSeq}, ${d2.toSeq}") + i += 1 + HilbertUtils.addOne(d) + } + } + + gridTest("continuous and 1 to 1 for all spaces")((2 to 9).map(n => TestCase(n, 15 - n))) { + case TestCase(n, k) => + val generator = HilbertIndex.getStateGenerator(n) + val stateList = generator.generateStateList() + + val d2p = stateList.getDKeyToNPointStateMap + val p2d = stateList.getNPointToDKeyStateMap + + val numBits = k * n + val numBytes = (numBits + 7) / 8 + + // test 1000 contiguous 1000 point blocks to make sure the mapping is continuous and one to one + + val maxDKeys = 1L << (k * n) + val step = maxDKeys / 1000 + var x = 0L + for (_ <- 0 until 1000) { + var dLong = x + val bigIntArray = BigInt(dLong).toByteArray + val dArray = new Array[Byte](numBytes) + + System.arraycopy( + bigIntArray, + math.max(0, bigIntArray.length - dArray.length), + dArray, + math.max(0, dArray.length - bigIntArray.length), + math.min(bigIntArray.length, dArray.length) + ) + + var lastPoint: Array[Int] = null + + for (_ <- 0 until 1000) { + val pArray = d2p.translateDKeyArrayToNPoint(dArray, k) + val pLong = d2p.translateDKeyToNPoint(dLong, k) + assert(util.Arrays.equals(pArray, pLong), s"points should be the same at $dLong") + + if (lastPoint != null) { + assert(HilbertUtils.manhattanDist(lastPoint, pLong) == 1, + s"distance between point and last point should be the same at $dLong") + } + + val dArray2 = p2d.translateNPointToDKeyArray(pArray, k) + val dLong2 = p2d.translateNPointToDKey(pLong, k) + + assert(dLong == dLong2, s"reversing the points should map correctly at $dLong != $dLong2") + + assert(util.Arrays.equals(dArray, dArray2), + s"reversing the points should map correctly at $dLong") + + lastPoint = pLong + + dLong += 1 + HilbertUtils.addOne(dArray) + } + + x += step + } + + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/expressions/HilbertUtilsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/HilbertUtilsSuite.scala new file mode 100644 index 00000000000..342af67d338 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/HilbertUtilsSuite.scala @@ -0,0 +1,129 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +import java.util + +import org.apache.spark.sql.delta.expressions.HilbertUtils.HilbertMatrix + +import org.apache.spark.SparkFunSuite + +class HilbertUtilsSuite extends SparkFunSuite { + + test("circularLeftShift") { + assert( + (0 until (1 << 10) by 7).forall(i => HilbertUtils.circularLeftShift(10, i, 0) == i), + "Shift by 0 should be a no op" + ) + assert( + (0 until (1 << 10) by 7).forall(i => HilbertUtils.circularLeftShift(10, i, 10) == i), + "Shift by n should be a no op" + ) + // 0111 (<< 2) => 1101 + assert( + HilbertUtils.circularLeftShift(4, 7, 2) == 13, + "handle wrapping" + ) + assert( + (0 until (1 << 5)).forall(HilbertUtils.circularLeftShift(5, _, 5) <= (1 << 5)), + "always mask values based on n" + ) + } + + test("circularRightShift") { + assert( + (0 until (1 << 10) by 7).forall(i => HilbertUtils.circularRightShift(10, i, 0) == i), + "Shift by 0 should be a no op" + ) + assert( + (0 until (1 << 10) by 7).forall(i => HilbertUtils.circularRightShift(10, i, 10) == i), + "Shift by n should be a no op" + ) + // 0111 (>> 2) => 1101 + assert( + HilbertUtils.circularRightShift(4, 7, 2) == 13, + "handle wrapping" + ) + assert( + (0 until (1 << 5)).forall(HilbertUtils.circularRightShift(5, _, 5) <= (1 << 5)), + "always mask values based on n" + ) + } + + test("getSetColumn should return the column that is set") { + (0 until 16) foreach { i => + assert(HilbertUtils.getSetColumn(16, 1 << i) == 16 - 1 - i) + } + } + + test("HilbertMatrix makes sense") { + val identityMatrix = HilbertMatrix.identity(10) + (0 until (1 << 10) by 7) foreach { i => + assert(identityMatrix.transform(i) == i, s"$i transformed by the identity should be $i") + } + + identityMatrix.multiply(HilbertMatrix.identity(10)) == identityMatrix + + val shift5 = HilbertMatrix(10, 0, 5) + assert(shift5.multiply(shift5) == identityMatrix, "shift by 5 twice should equal identity") + } + + test("HilbertUtils.getBits") { + assert(HilbertUtils.getBits(Array(0, 0, 1), 22, 2) == 1) + val array = Array[Byte](0, 0, -1, 0) + assert(HilbertUtils.getBits(array, 16, 4) == 15) + assert(HilbertUtils.getBits(array, 18, 3) == 7) + assert(HilbertUtils.getBits(array, 23, 1) == 1) + assert(HilbertUtils.getBits(array, 23, 2) == 2) + assert(HilbertUtils.getBits(array, 23, 8) == 128) + assert(HilbertUtils.getBits(array, 16, 3) == 7) + assert(HilbertUtils.getBits(array, 16, 2) == 3) + assert(HilbertUtils.getBits(array, 16, 1) == 1) + assert(HilbertUtils.getBits(array, 15, 2) == 1) + assert(HilbertUtils.getBits(array, 15, 1) == 0) + assert(HilbertUtils.getBits(array, 12, 8) == 15) + assert(HilbertUtils.getBits(array, 12, 12) == 255) + assert(HilbertUtils.getBits(array, 12, 13) == (255 << 1)) + + assert(HilbertUtils.getBits(Array(0, 1, 0), 6, 6) == 0) + assert(HilbertUtils.getBits(Array(0, 1, 0), 12, 6) == 4) + assert(HilbertUtils.getBits(Array(0, 1, 0), 18, 6) == 0) + } + + def check(received: Array[Byte], expected: Array[Byte]): Unit = { + assert(util.Arrays.equals(expected, received), + s"${expected.toSeq.map(_.toBinaryString.takeRight(8))} " + + s"${received.toSeq.map(_.toBinaryString.takeRight(8))}") + } + + test("HilbertUtils.setBits") { + check(HilbertUtils.setBits(Array(0, 0, 0), 7, 8, 4), Array(1, 0, 0)) + check(HilbertUtils.setBits(Array(0, 0, 0), 7, 12, 4), Array(1, (1.toByte << 7).toByte, 0)) + check(HilbertUtils.setBits(Array(8, 0, 5), 7, 12, 4), Array(9, (1.toByte << 7).toByte, 5)) + check(HilbertUtils.setBits(Array(8, 0, 2), 7, -1, 12), + Array(9, -1, ((7.toByte << 5).toByte | 2).toByte)) + check(HilbertUtils.setBits(Array(8, 14, 2), 15, 1, 1), Array(8, 15, 2)) + } + + test("addOne") { + check(HilbertUtils.addOne(Array(0, 0, 0)), Array(0, 0, 1)) + check(HilbertUtils.addOne(Array(0, 0, -1)), Array(0, 1, 0)) + check(HilbertUtils.addOne(Array(0, 0, -2)), Array(0, 0, -1)) + check(HilbertUtils.addOne(Array(0, -1, -1)), Array(1, 0, 0)) + check(HilbertUtils.addOne(Array(-1, -1, -1)), Array(0, 0, 0)) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/expressions/InterleaveBitsBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/InterleaveBitsBenchmark.scala new file mode 100644 index 00000000000..7861bc03321 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/InterleaveBitsBenchmark.scala @@ -0,0 +1,122 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.Expression + +/** + * Benchmark to measure performance for interleave bits. + * To run this benchmark: + * {{{ + * build/sbt "core/test:runMain org.apache.spark.sql.delta.expressions.InterleaveBitsBenchmark" + * }}} + */ +object InterleaveBitsBenchmark extends BenchmarkBase { + + private val numRows = 1 * 1000 * 1000 + + private def seqInt(numColumns: Int): Seq[Array[Int]] = { + (1 to numRows).map { l => + val arr = new Array[Int](numColumns) + (0 until numColumns).foreach(col => arr(col) = l) + arr + } + } + + private def randomInt(numColumns: Int): Seq[Array[Int]] = { + (1 to numRows).map { l => + val arr = new Array[Int](numColumns) + (0 until numColumns).foreach(col => arr(col) = scala.util.Random.nextInt()) + arr + } + } + + private def createExpression(numColumns: Int): Expression = { + val inputs = (0 until numColumns).map { i => + $"c_$i".int.at(i) + } + InterleaveBits(inputs) + } + + protected def create_row(values: Any*): InternalRow = { + InternalRow.fromSeq(values.map(CatalystTypeConverters.convertToCatalyst)) + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val benchmark = + new Benchmark(s"$numRows rows interleave bits benchmark", numRows, output = output) + benchmark.addCase("sequence - 1 int columns benchmark", 3) { _ => + val interleaveBits = createExpression(1) + seqInt(1).foreach { input => + interleaveBits.eval(create_row(input: _*)) + } + } + + benchmark.addCase("sequence - 2 int columns benchmark", 3) { _ => + val interleaveBits = createExpression(2) + seqInt(2).foreach { input => + interleaveBits.eval(create_row(input: _*)) + } + } + + benchmark.addCase("sequence - 3 int columns benchmark", 3) { _ => + val interleaveBits = createExpression(3) + seqInt(3).foreach { input => + interleaveBits.eval(create_row(input: _*)) + } + } + + benchmark.addCase("sequence - 4 int columns benchmark", 3) { _ => + val interleaveBits = createExpression(4) + seqInt(4).foreach { input => + interleaveBits.eval(create_row(input: _*)) + } + } + + benchmark.addCase("random - 1 int columns benchmark", 3) { _ => + val interleaveBits = createExpression(1) + randomInt(1).foreach { input => + interleaveBits.eval(create_row(input: _*)) + } + } + + benchmark.addCase("random - 2 int columns benchmark", 3) { _ => + val interleaveBits = createExpression(2) + randomInt(2).foreach { input => + interleaveBits.eval(create_row(input: _*)) + } + } + + benchmark.addCase("random - 3 int columns benchmark", 3) { _ => + val interleaveBits = createExpression(3) + randomInt(3).foreach { input => + interleaveBits.eval(create_row(input: _*)) + } + } + + benchmark.addCase(" random - 4 int columns benchmark", 3) { _ => + val interleaveBits = createExpression(4) + randomInt(4).foreach { input => + interleaveBits.eval(create_row(input: _*)) + } + } + benchmark.run() + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/expressions/InterleaveBitsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/InterleaveBitsSuite.scala new file mode 100644 index 00000000000..40d4abdd2ec --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/InterleaveBitsSuite.scala @@ -0,0 +1,184 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +import java.nio.ByteBuffer + +import scala.util.Random + +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckSuccess +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionEvalHelper, Literal} +import org.apache.spark.sql.types.IntegerType + + +class InterleaveBitsSuite extends SparkFunSuite with ExpressionEvalHelper { + + def intToBinary(x: Int): Array[Byte] = { + ByteBuffer.allocate(4).putInt(x).array() + } + + def checkInterleaving(input: Seq[Expression], expectedOutput: Any): Unit = { + Seq("true", "false").foreach { flag => + withSQLConf(DeltaSQLConf.FAST_INTERLEAVE_BITS_ENABLED.key -> flag) { + checkEvaluation(InterleaveBits(input), expectedOutput) + } + } + } + + test("0 inputs") { + checkInterleaving(Seq.empty[Expression], Array.empty[Byte]) + } + + test("1 input") { + for { i <- 1.to(10) } { + val r = Random.nextInt() + checkInterleaving(Seq(Literal(r)), intToBinary(r)) + } + } + + test("2 inputs") { + checkInterleaving( + input = Seq( + 0x000ff0ff, + 0xfff00f00 + ).map(Literal(_)), + expectedOutput = + Array(0x55, 0x55, 0x55, 0xaa, 0xaa, 0x55, 0xaa, 0xaa) + .map(_.toByte)) + } + + test("3 inputs") { + checkInterleaving( + input = Seq( + 0xff00, + 0x00ff, + 0x0000 + ).map(Literal(_)), + expectedOutput = + Array(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x92, 0x49, 0x24, 0x49, 0x24, 0x92) + .map(_.toByte)) + } + + test("9 inputs") { + val result = Array( + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xffffff92, + 0x00000049, + 0x00000024, + 0xffffff92, + 0x00000049, + 0x00000024, + 0xffffff92, + 0x00000049, + 0x00000024, + 0x00000049, + 0x00000024, + 0xffffff92, + 0x00000049, + 0x00000024, + 0xffffff92, + 0x00000049, + 0x00000024, + 0xffffff92 + ) + checkInterleaving( + input = Seq( + 0xff00, + 0x00ff, + 0x0000, + 0xff00, + 0x00ff, + 0x0000, + 0xff00, + 0x00ff, + 0x0000 + ).map(Literal(_)), + expectedOutput = result.map(_.toByte) + ) + } + + test("nulls") { + val ones = 0xffffffff + checkInterleaving( + Seq(Literal(ones), Literal.create(null, IntegerType)), Array.fill(8)(0xaa.toByte)) + checkInterleaving( + Seq(Literal.create(null, IntegerType), Literal(ones)), Array.fill(8)(0x55.toByte)) + + for { i <- 0.to(6) } { + checkInterleaving( + Seq.fill(i)(Literal.create(null, IntegerType)), Array.fill(i * 4)(0x00.toByte)) + } + } + + test("consistency") { + for { num_inputs <- 1 to 10 } { + checkConsistencyBetweenInterpretedAndCodegen(InterleaveBits(_), IntegerType, num_inputs) + } + } + + test("supported types") { + // only int for now + InterleaveBits(Seq(Literal(0))).checkInputDataTypes() == TypeCheckSuccess + // nothing else + InterleaveBits(Seq(Literal(false))).checkInputDataTypes() != TypeCheckSuccess + InterleaveBits(Seq(Literal(0.toLong))).checkInputDataTypes() != TypeCheckSuccess + InterleaveBits(Seq(Literal(0.toDouble))).checkInputDataTypes() != TypeCheckSuccess + InterleaveBits(Seq(Literal(0.toString))).checkInputDataTypes() != TypeCheckSuccess + } + + test("randomization interleave bits") { + val numIters = sys.env + .get("NUMBER_OF_ITERATIONS_TO_INTERLEAVE_BITS") + .map(_.toInt) + .getOrElse(1000000) + var i = 0 + while (i < numIters) { + // generate n columns where 1 <= n <= 8 + val numCols = Random.nextInt(8) + 1 + val input = new Array[Int](numCols) + var j = 0 + while (j < numCols) { + input(j) = Random.nextInt() + j += 1 + } + val r1 = InterleaveBits.interleaveBits(input, true) + val r2 = InterleaveBits.interleaveBits(input, false) + assert(java.util.Arrays.equals(r1, r2), s"input: ${input.mkString(",")}") + i += 1 + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/expressions/RangePartitionIdSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/RangePartitionIdSuite.scala new file mode 100644 index 00000000000..d40c4cd2d93 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/RangePartitionIdSuite.scala @@ -0,0 +1,83 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions + +import scala.reflect.ClassTag + +import org.apache.spark.{Partitioner, RangePartitioner, SparkFunSuite, SparkThrowable} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.test.SharedSparkSession + + +class RangePartitionIdSuite + extends SparkFunSuite with ExpressionEvalHelper with SharedSparkSession { + + def getPartitioner[T : Ordering : ClassTag](data: Seq[T], partitions: Int): Partitioner = { + implicit val ordering = new Ordering[GenericInternalRow] { + override def compare(x: GenericInternalRow, y: GenericInternalRow): Int = { + def getValue0AsT(row: GenericInternalRow): T = row.values.head.asInstanceOf[T] + val orderingT = implicitly[Ordering[T]] + orderingT.compare(getValue0AsT(x), getValue0AsT(y)) + } + } + + val rdd = + spark.sparkContext.parallelize(data).filter(_ != null) + .map(key => (new GenericInternalRow(Array[Any](key)), null)) + + new RangePartitioner(partitions, rdd) + } + + def testRangePartitionerExpr[T : Ordering : ClassTag]( + data: Seq[T], partitions: Int, childExpr: Expression, expected: Any): Unit = { + val rangePartitioner = getPartitioner(data, partitions) + checkEvaluation(PartitionerExpr(childExpr, rangePartitioner), expected) + } + + test("RangePartitionerExpr: test basic") { + val data = 0.until(12) + for { numPartitions <- Seq(2, 3, 4, 6) } { + val rangePartitioner = getPartitioner(data, numPartitions) + data.foreach { i => + val expected = i / (data.size / numPartitions) + checkEvaluation(PartitionerExpr(Literal(i), rangePartitioner), expected) + } + } + } + + test("RangePartitionerExpr: null values") { + testRangePartitionerExpr( + data = 0.until(10), + partitions = 2, + childExpr = Literal(null), + expected = 0) + } + + test("RangePartitionerExpr: null data") { + testRangePartitionerExpr( + data = 0.until(10).map(_ => null), + partitions = 2, + childExpr = Literal("asd"), + expected = 0) + } + + test("RangePartitionId: unevaluable") { + intercept[Exception with SparkThrowable] { + evaluateWithoutCodegen(RangePartitionId(Literal(2), 10)) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/expressions/aggregation/BitmapAggregatorSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/aggregation/BitmapAggregatorSuite.scala new file mode 100644 index 00000000000..5dedd3f53c3 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/expressions/aggregation/BitmapAggregatorSuite.scala @@ -0,0 +1,159 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.expressions.aggregation + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.expressions.aggregation.BitmapAggregator +import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, RoaringBitmapArrayFormat} + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.BoundReference +import org.apache.spark.sql.types.LongType + +class BitmapAggregatorSuite extends SparkFunSuite { + + import BitmapAggregatorSuite._ + + private val childExpression = BoundReference(0, LongType, nullable = true) + + /** Creates a bitmap aggregate expression, using the child expression defined above. */ + private def newBitmapAgg(format: RoaringBitmapArrayFormat.Value): BitmapAggregator = + new BitmapAggregator(childExpression, format) + + for (serializationFormat <- RoaringBitmapArrayFormat.values) + test(s"Bitmap serialization - $serializationFormat") { + val bitmapSet = fillSetWithAggregator(newBitmapAgg(serializationFormat), Array(1L, 2L, 3L, 4L)) + val serialized = bitmapSet.serializeAsByteArray(serializationFormat) + val deserialized = RoaringBitmapArray.readFrom(serialized) + assert(bitmapSet === deserialized) + assert(bitmapSet.## === deserialized.##) + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) + test(s"Aggregator serialization - $serializationFormat") { + val aggregator = newBitmapAgg(serializationFormat) + val bitmapSet = fillSetWithAggregator(aggregator, Array(1L, 2L, 3L, 4L)) + val deserialized = aggregator.deserialize(aggregator.serialize(bitmapSet)) + assert(bitmapSet === deserialized) + assert(bitmapSet.## === deserialized.##) + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) + test(s"Bitmap Aggregator merge no duplicates - $serializationFormat") { + val (dataset1, dataset2) = createDatasetsNoDuplicates + + val finalResult = + fillSetWithAggregatorAndMerge( + newBitmapAgg(serializationFormat), + dataset1, + dataset2) + + verifyContainsAll(finalResult, dataset1) + verifyContainsAll(finalResult, dataset2) + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) + test(s"Bitmap Aggregator with duplicates - $serializationFormat") { + val (dataset1, dataset2) = createDatasetsWithDuplicates + + val finalResult = + fillSetWithAggregatorAndMerge( + newBitmapAgg(serializationFormat), + dataset1, + dataset2) + + verifyContainsAll(finalResult, dataset1) + verifyContainsAll(finalResult, dataset2) + } + + private lazy val createDatasetsNoDuplicates: (List[Long], List[Long]) = { + val primeSet = primes(DATASET_SIZE).toSet + val notPrime = (0 until DATASET_SIZE).filterNot(primeSet.contains).toList + (primeSet.map(_.toLong).toList, notPrime.map(_.toLong)) + } + + private def createDatasetsWithDuplicates: (List[Long], List[Long]) = { + var (primes, notPrimes) = createDatasetsNoDuplicates + // duplicate all powers of 3 (powers of 2 might align with container boundaries) + notPrimes ::= 3L + var value = 3L + while (value < DATASET_SIZE.toLong) { + value *= 3L + primes ::= value + } + (primes, notPrimes) + } + + // List the first primes smaller than `end` + private def primes(end: Int): List[Int] = { + // scalastyle:off + // Basically https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes#Algorithm_and_variants + // but concretely the implementation is adapted from: + // https://medium.com/coding-with-clarity/functional-vs-iterative-prime-numbers-in-scala-7e22447146f0 + // scalastyle:on + val primeIndices = mutable.ArrayBuffer.fill((end + 1) / 2)(true) + + val intSqrt = Math.sqrt(end).toInt + for { + i <- 3 to end by 2 if i <= intSqrt + nonPrime <- i * i to end by 2 * i + } primeIndices.update(nonPrime / 2, false) + + + (for (i <- primeIndices.indices if primeIndices(i)) yield 2 * i + 1).tail.toList + } + + private def fillSetWithAggregatorAndMerge( + aggregator: BitmapAggregator, + dataset1: Seq[Long], + dataset2: Seq[Long]): RoaringBitmapArray = { + val buffer1 = fillSetWithAggregator(aggregator, dataset1) + val buffer2 = fillSetWithAggregator(aggregator, dataset2) + val merged = aggregator.merge(buffer1, buffer2) + val fieldIndex = aggregator.dataType.fieldIndex("bitmap") + val result = aggregator.eval(merged).getBinary(fieldIndex) + RoaringBitmapArray.readFrom(result) + } + + private def fillSetWithAggregator( + aggregator: BitmapAggregator, + dataset: Seq[Long]): RoaringBitmapArray = { + val buffer = aggregator.createAggregationBuffer() + for (entry <- dataset) { + val row = InternalRow(entry) + aggregator.update(buffer, row) + } + buffer + } + + private def verifyContainsAll( + aggregator: RoaringBitmapArray, + dataset: Seq[Long]): Unit = { + for (entry <- dataset) { + assert(aggregator.contains(entry), + s"Aggregator did not contain file $entry") + } + } +} + +object BitmapAggregatorSuite { + // Pick something over 64k to make sure we fill a few different bitmap containers + val DATASET_SIZE: Int = 100000 +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/files/TransactionalWriteSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/files/TransactionalWriteSuite.scala new file mode 100644 index 00000000000..6182ff25eb3 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/files/TransactionalWriteSuite.scala @@ -0,0 +1,72 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.files + +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.functions.column +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{StringType, StructType} + +class TransactionalWriteSuite extends QueryTest with SharedSparkSession with DeltaSQLCommandTest { + + test("writing out an empty dataframe produces no AddFiles") { + withTempDir { dir => + spark.range(100).write.format("delta").save(dir.getCanonicalPath) + + val log = DeltaLog.forTable(spark, dir.getCanonicalPath) + val schema = new StructType().add("id", StringType) + val emptyDf = spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema) + assert(log.startTransaction().writeFiles(emptyDf).isEmpty) + } + } + + test("write data files to the data subdir") { + withSQLConf(DeltaSQLConf.WRITE_DATA_FILES_TO_SUBDIR.key -> "true") { + def validateDataSubdir(tablePath: String): Unit = { + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, tablePath) + snapshot.allFiles.collect().foreach { f => + assert(f.path.startsWith("data/")) + } + } + + withTempDir { dir => + spark.range(100).toDF("id").write.format("delta").save(dir.getCanonicalPath) + validateDataSubdir(dir.getCanonicalPath) + } + + withTempDir { dir => + spark.range(100).toDF("id").withColumn("id1", column("id")).write.format("delta") + .partitionBy("id").save(dir.getCanonicalPath) + validateDataSubdir(dir.getCanonicalPath) + } + } + + withSQLConf(DeltaSQLConf.WRITE_DATA_FILES_TO_SUBDIR.key -> "false") { + withTempDir { dir => + spark.range(100).toDF("id").write.format("delta").save(dir.getCanonicalPath) + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, dir.getCanonicalPath) + snapshot.allFiles.collect().foreach { f => + assert(!f.path.startsWith("data/")) + } + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/metric/IncrementMetricSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/metric/IncrementMetricSuite.scala new file mode 100644 index 00000000000..2d3eb85a969 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/metric/IncrementMetricSuite.scala @@ -0,0 +1,92 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.metric + + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Column, DataFrame, QueryTest} +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.{Expression, GreaterThan, If, Literal} +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.metric.SQLMetrics +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSparkSession + +abstract class IncrementMetricSuiteBase extends QueryTest with SharedSparkSession { + import testImplicits._ + import SQLMetrics._ + + val ROWS_IN_DF = 1000 + + protected override def beforeAll(): Unit = { + super.beforeAll() + spark.range(ROWS_IN_DF).toDF("a") + .withColumn("gb", rand(0).multiply(10).cast("integer")) + .write + .format("parquet") + .mode("overwrite") + .save("test-df") + } + + def testDf: DataFrame = spark.read.format("parquet").load("test-df") + + test("Increment the same metric") { + val metric = createMetric(sparkContext, "metric") + val increment = IncrementMetric(Literal(true), metric) + val groupByKey = IncrementMetric(UnresolvedAttribute("gb"), metric) + val havingIncrement = IncrementMetric( + GreaterThan(UnresolvedAttribute("s"), Literal(10)), metric) + val df = testDf + .filter(new Column(increment)) + .groupBy(new Column(groupByKey).as("gby")) + .agg(sum("a").as("s")) + .filter(new Column(havingIncrement)) + val numGroups = df.collect().size + validatePlan(df.queryExecution.executedPlan) + + assert(metric.value === 2 * ROWS_IN_DF + numGroups) + } + + test("Increment with filter and conditional") { + val trueBranchCount = createMetric(sparkContext, "true") + val falseBranchCount = createMetric(sparkContext, "false") + val incrementTrueBranch = IncrementMetric(Literal(true), trueBranchCount) + val incrementFalseBranch = IncrementMetric(Literal(false), falseBranchCount) + val incrementMetric = createMetric(sparkContext, "increment") + val increment = IncrementMetric(Literal(true), incrementMetric) + val incrementPreFilterMetric = createMetric(sparkContext, "incrementPreFilter") + val incrementPreFilter = IncrementMetric(Literal(true), incrementPreFilterMetric) + val ifCondition: Expression = ('a < Literal(20)).expr + val conditional = If(ifCondition, incrementTrueBranch, incrementFalseBranch) + val df = testDf + .filter(new Column(incrementPreFilter)) + .filter('a < 25) + .filter(new Column(increment)) + .filter(new Column(conditional)) + val numRows = df.collect().size + validatePlan(df.queryExecution.executedPlan) + + assert(incrementPreFilterMetric.value === ROWS_IN_DF) + assert(trueBranchCount.value === numRows) + assert(falseBranchCount.value + numRows === incrementMetric.value) + } + + protected def validatePlan(plan: SparkPlan): Unit = {} + +} + +class IncrementMetricSuite extends IncrementMetricSuiteBase {} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/optimize/CompactionTestHelper.scala b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/CompactionTestHelper.scala new file mode 100644 index 00000000000..bf1458ff0bc --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/CompactionTestHelper.scala @@ -0,0 +1,109 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.optimize + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.commands.optimize.OptimizeMetrics +import org.apache.spark.sql.delta.hooks.AutoCompact +import org.apache.spark.sql.delta.sources.DeltaSQLConf._ +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.test.SQLTestUtils + +/** + * A trait used by unit tests to trigger compaction over a table. + */ +private[delta] trait CompactionTestHelper extends QueryTest with SQLTestUtils { + + /** + * Compact files under the given `tablePath` using AutoCompaction/OPTIMIZE and + * returns the [[OptimizeMetrics]] + */ + def compactAndGetMetrics(tablePath: String, where: String = ""): OptimizeMetrics + + /** config controlling the min file size required for compaction */ + val minFileSizeConf: String + + /** config controlling the target file size for compaction */ + val maxFileSizeConf: String + + /** Create `numFilePartitions` partitions and each partition has `numFilesPerPartition` files. */ + def createFilesToPartitions( + numFilePartitions: Int, numFilesPerPartition: Int, dir: String) + (implicit spark: SparkSession): Unit = { + val totalNumFiles = numFilePartitions * numFilesPerPartition + spark.range(start = 0, end = totalNumFiles, step = 1, numPartitions = totalNumFiles) + .selectExpr(s"id % $numFilePartitions as c0", "id as c1") + .write + .format("delta") + .partitionBy("c0") + .mode("append") + .save(dir) + } + + /** Create `numFiles` files without any partition. */ + def createFilesWithoutPartitions( + numFiles: Int, dir: String)(implicit spark: SparkSession): Unit = { + spark.range(start = 0, end = numFiles, step = 1, numPartitions = numFiles) + .selectExpr("id as c0", "id as c1", "id as c2") + .write + .format("delta") + .mode("append") + .save(dir) + } +} + +private[delta] trait CompactionTestHelperForOptimize extends CompactionTestHelper { + + override def compactAndGetMetrics(tablePath: String, where: String = ""): OptimizeMetrics = { + import testImplicits._ + val whereClause = if (where != "") s"WHERE $where" else "" + val res = spark.sql(s"OPTIMIZE tahoe.`$tablePath` $whereClause") + val metrics: OptimizeMetrics = res.select($"metrics.*").as[OptimizeMetrics].head() + metrics + } + + override val minFileSizeConf: String = DELTA_OPTIMIZE_MIN_FILE_SIZE.key + + override val maxFileSizeConf: String = DELTA_OPTIMIZE_MAX_FILE_SIZE.key +} + +private[delta] trait CompactionTestHelperForAutoCompaction extends CompactionTestHelper { + + override def compactAndGetMetrics(tablePath: String, where: String = ""): OptimizeMetrics = { + // Set min num files to 2 - so that even if two small files are present in a partition, then + // also they are compacted. + var metrics: Option[OptimizeMetrics] = None + withSQLConf(DELTA_AUTO_COMPACT_MIN_NUM_FILES.key -> "2") { + metrics = + Some( + AutoCompact.compact( + spark, + DeltaLog.forTable(spark, tablePath) + ).head + ) + } + metrics.get + } + + override val minFileSizeConf: String = DELTA_AUTO_COMPACT_MIN_FILE_SIZE.key + + override val maxFileSizeConf: String = DELTA_AUTO_COMPACT_MAX_FILE_SIZE.key +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/optimize/DeltaReorgSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/DeltaReorgSuite.scala new file mode 100644 index 00000000000..116397cf702 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/DeltaReorgSuite.scala @@ -0,0 +1,153 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.optimize + +import org.apache.spark.sql.delta.DeletionVectorsTestUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import io.delta.tables.DeltaTable + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.test.SharedSparkSession + +class DeltaReorgSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with DeletionVectorsTestUtils { + + import testImplicits._ + + def executePurge(table: String, condition: Option[String] = None): Unit = { + condition match { + case Some(cond) => sql(s"REORG TABLE delta.`$table` WHERE $cond APPLY (PURGE)") + case None => sql(s"REORG TABLE delta.`$table` APPLY (PURGE)") + } + } + + test("Purge DVs will combine small files") { + val targetDf = spark.range(0, 100, 1, numPartitions = 5).toDF + withTempDeltaTable(targetDf) { (_, log) => + val path = log.dataPath.toString + + sql(s"DELETE FROM delta.`$path` WHERE id IN (0, 99)") + assert(log.update().allFiles.filter(_.deletionVector != null).count() === 2) + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_FILE_SIZE.key -> "1073741824") { // 1gb + executePurge(path) + } + val (addFiles, _) = getFileActionsInLastVersion(log) + assert(addFiles.size === 1, "files should be combined") + assert(addFiles.forall(_.deletionVector === null)) + checkAnswer( + sql(s"SELECT * FROM delta.`$path`"), + (1 to 98).toDF()) + + // Verify commit history and operation metrics + checkOpHistory( + tablePath = path, + expOpParams = Map("applyPurge" -> "true", "predicate" -> "[]"), + numFilesRemoved = 2, + numFilesAdded = 1) + } + } + + test("Purge DVs") { + val targetDf = spark.range(0, 100, 1, numPartitions = 5).toDF() + withTempDeltaTable(targetDf) { (_, log) => + val path = log.dataPath.toString + + sql(s"DELETE FROM delta.`$path` WHERE id IN (0, 99)") + assert(log.update().allFiles.filter(_.deletionVector != null).count() === 2) + + // First purge + executePurge(path) + val (addFiles, _) = getFileActionsInLastVersion(log) + assert(addFiles.size === 1) // two files are combined + assert(addFiles.forall(_.deletionVector === null)) + checkAnswer( + sql(s"SELECT * FROM delta.`$path`"), + (1 to 98).toDF()) + + // Verify commit history and operation metrics + checkOpHistory( + tablePath = path, + expOpParams = Map("applyPurge" -> "true", "predicate" -> "[]"), + numFilesRemoved = 2, + numFilesAdded = 1) + + // Second purge is a noop + val versionBefore = log.update().version + executePurge(path) + val versionAfter = log.update().version + assert(versionBefore === versionAfter) + } + } + + test("Purge a non-DV table is a noop") { + val targetDf = spark.range(0, 100, 1, numPartitions = 5).toDF() + withTempDeltaTable(targetDf, enableDVs = false) { (_, log) => + val versionBefore = log.update().version + executePurge(log.dataPath.toString) + val versionAfter = log.update().version + assert(versionBefore === versionAfter) + } + } + + test("Purge some partitions of a table with DV") { + val targetDf = spark.range(0, 100, 1, numPartitions = 1) + .withColumn("part", col("id") % 4) + .toDF() + withTempDeltaTable(targetDf, partitionBy = Seq("part")) { (_, log) => + val path = log.dataPath + // Delete one row from each partition + sql(s"DELETE FROM delta.`$path` WHERE id IN (48, 49, 50, 51)") + val (addFiles1, _) = getFileActionsInLastVersion(log) + assert(addFiles1.size === 4) + assert(addFiles1.forall(_.deletionVector !== null)) + // PURGE two partitions + sql(s"REORG TABLE delta.`$path` WHERE part IN (0, 2) APPLY (PURGE)") + val (addFiles2, _) = getFileActionsInLastVersion(log) + assert(addFiles2.size === 2) + assert(addFiles2.forall(_.deletionVector === null)) + + // Verify commit history and operation metrics + checkOpHistory( + tablePath = path.toString, + expOpParams = Map("applyPurge" -> "true", "predicate" -> "[\"'part IN (0,2)\"]"), + numFilesRemoved = 2, + numFilesAdded = 2) + } + } + + private def checkOpHistory( + tablePath: String, + expOpParams: Map[String, String], + numFilesRemoved: Long, + numFilesAdded: Long): Unit = { + val (opName, opParams, opMetrics) = DeltaTable.forPath(tablePath) + .history(1) + .select("operation", "operationParameters", "operationMetrics") + .as[(String, Map[String, String], Map[String, String])] + .head() + assert(opName === "REORG") + assert(opParams === expOpParams) + assert(opMetrics("numAddedFiles").toLong === numFilesAdded) + assert(opMetrics("numRemovedFiles").toLong === numFilesRemoved) + // Because each deleted file has a DV associated it which gets rewritten as part of PURGE + assert(opMetrics("numDeletionVectorsRemoved").toLong === numFilesRemoved) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeCompactionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeCompactionSuite.scala new file mode 100644 index 00000000000..63c551326c9 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeCompactionSuite.scala @@ -0,0 +1,663 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.optimize + +import java.io.File + +import scala.collection.JavaConverters._ + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import io.delta.tables.DeltaTable + +import org.scalatest.concurrent.TimeLimits.failAfter +import org.scalatest.time.SpanSugar._ + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.test.SharedSparkSession + +/** + * Base class containing tests for Delta table Optimize (file compaction) + */ +trait OptimizeCompactionSuiteBase extends QueryTest + with SharedSparkSession + with DeletionVectorsTestUtils + with DeltaColumnMappingTestUtils { + + import testImplicits._ + + def executeOptimizeTable(table: String, condition: Option[String] = None) + def executeOptimizePath(path: String, condition: Option[String] = None) + + test("optimize command: with database and table name") { + withTempDir { tempDir => + val dbName = "delta_db" + val tableName = s"$dbName.delta_optimize" + withDatabase(dbName) { + spark.sql(s"create database $dbName") + withTable(tableName) { + appendToDeltaTable(Seq(1, 2, 3).toDF(), tempDir.toString, partitionColumns = None) + appendToDeltaTable(Seq(4, 5, 6).toDF(), tempDir.toString, partitionColumns = None) + + spark.sql(s"create table $tableName using delta location '$tempDir'") + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val versionBeforeOptimize = deltaLog.snapshot.version + executeOptimizeTable(tableName) + + deltaLog.update() + assert(deltaLog.snapshot.version === versionBeforeOptimize + 1) + checkDatasetUnorderly(spark.table(tableName).as[Int], 1, 2, 3, 4, 5, 6) + } + } + } + } + + test("optimize command") { + withTempDir { tempDir => + appendToDeltaTable(Seq(1, 2, 3).toDF(), tempDir.toString, partitionColumns = None) + appendToDeltaTable(Seq(4, 5, 6).toDF(), tempDir.toString, partitionColumns = None) + + def data: DataFrame = spark.read.format("delta").load(tempDir.toString) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val versionBeforeOptimize = deltaLog.snapshot.version + executeOptimizePath(tempDir.getCanonicalPath) + deltaLog.update() + assert(deltaLog.snapshot.version === versionBeforeOptimize + 1) + checkDatasetUnorderly(data.toDF().as[Int], 1, 2, 3, 4, 5, 6) + + // Make sure thread pool is shut down + assert(Thread.getAllStackTraces.keySet.asScala + .filter(_.getName.startsWith("OptimizeJob")).isEmpty) + } + } + + test("optimize command: predicate on non-partition column") { + withTempDir { tempDir => + val path = new File(tempDir, "testTable").getCanonicalPath + val partitionColumns = Some(Seq("id")) + appendToDeltaTable( + Seq(1, 2, 3).toDF("value").withColumn("id", 'value % 2), + path, + partitionColumns) + + val e = intercept[AnalysisException] { + // Should fail when predicate is on a non-partition column + executeOptimizePath(path, Some("value < 4")) + } + assert(e.getMessage.contains("Predicate references non-partition column 'value'. " + + "Only the partition columns may be referenced: [id]")) + } + } + + test("optimize command: on partitioned table - all partitions") { + withTempDir { tempDir => + val path = new File(tempDir, "testTable").getCanonicalPath + val partitionColumns = Some(Seq("id")) + appendToDeltaTable( + Seq(1, 2, 3).toDF("value").withColumn("id", 'value % 2), + path, + partitionColumns) + + appendToDeltaTable( + Seq(4, 5, 6).toDF("value").withColumn("id", 'value % 2), + path, + partitionColumns) + + val deltaLogBefore = DeltaLog.forTable(spark, path) + val txnBefore = deltaLogBefore.startTransaction(); + val fileListBefore = txnBefore.filterFiles(); + val versionBefore = deltaLogBefore.snapshot.version + + val id = "id".phy(deltaLogBefore) + + // Expect each partition have more than one file + (0 to 1).foreach(partId => + assert(fileListBefore.count(_.partitionValues === Map(id -> partId.toString)) > 1)) + + executeOptimizePath(path) + + val deltaLogAfter = DeltaLog.forTable(spark, path) + val txnAfter = deltaLogAfter.startTransaction(); + val fileListAfter = txnAfter.filterFiles(); + + (0 to 1).foreach(partId => + assert(fileListAfter.count(_.partitionValues === Map(id -> partId.toString)) === 1)) + + // version is incremented + assert(deltaLogAfter.snapshot.version === versionBefore + 1) + + // data should remain the same after the OPTIMIZE + checkDatasetUnorderly( + spark.read.format("delta").load(path).select("value").as[Long], + (1L to 6L): _*) + } + } + + test( + s"optimize command with DVs") { + withTempDir { tempDir => + val path = tempDir.getAbsolutePath + withSQLConf( + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey -> "true") { + // Create 10 files each with 1000 records + spark.range(start = 0, end = 10000, step = 1, numPartitions = 10) + .toDF("id") + .withColumn(colName = "extra", lit("just a random text to fill up the space.....")) + .write.format("delta").mode("append").save(path) // v0 + + val deltaLog = DeltaLog.forTable(spark, path) + val filesV0 = deltaLog.unsafeVolatileSnapshot.allFiles.collect() + assert(filesV0.size == 10) + + // Default `optimize.maxDeletedRowsRatio` is 0.05. + // Delete slightly more than threshold ration in two files, less in one of the file + val file0 = filesV0(1) + val file1 = filesV0(4) + val file2 = filesV0(8) + deleteRows(deltaLog, file0, approxPhyRows = 1000, ratioOfRowsToDelete = 0.06d) // v1 + deleteRows(deltaLog, file1, approxPhyRows = 1000, ratioOfRowsToDelete = 0.06d) // v2 + deleteRows(deltaLog, file2, approxPhyRows = 1000, ratioOfRowsToDelete = 0.01d) // v3 + + // Add a one small file, so that the file selection is based on both the file size and + // deleted rows ratio + spark.range(start = 1, end = 2, step = 1, numPartitions = 1) + .toDF("id").withColumn(colName = "extra", lit("")) + .write.format("delta").mode("append").save(path) // v4 + val smallFiles = addedFiles(deltaLog.getChanges(startVersion = 4).next()._2) + assert(smallFiles.size == 1) + + // Save the data before optimize for comparing it later with optimize + val data = spark.read.format("delta").load(path) + + // Set a low value for minFileSize so that the criteria for file selection is based on DVs + // and not based on the file size. + val targetSmallSize = smallFiles(0).size + 10 // A number just higher than the `smallFile` + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_MIN_FILE_SIZE.key -> targetSmallSize.toString) { + executeOptimizePath(path) // v5 + } + val changes = deltaLog.getChanges(startVersion = 5).next()._2 + + // We expect the two files containing more than the threshold rows to be compacted. + var expectedRemoveFiles = Set(file0.path, file1.path) + // Expect the small file also to be compacted always + expectedRemoveFiles += smallFiles(0).path + + assert(removedFiles(changes).map(_.path).toSet === expectedRemoveFiles) + + assert(addedFiles(changes).size == 1) // Expect one new file added + + // Verify the final data after optimization hasn't changed. + checkAnswer(spark.read.format("delta").load(path), data) + } + } + } + + private def removedFiles(actions: Seq[Action]): Seq[RemoveFile] = { + actions.filter(_.isInstanceOf[RemoveFile]).map(_.asInstanceOf[RemoveFile]) + } + + private def addedFiles(actions: Seq[Action]): Seq[AddFile] = { + actions.filter(_.isInstanceOf[AddFile]).map(_.asInstanceOf[AddFile]) + } + + def appendRowsToDeltaTable( + path: String, + numFiles: Int, + numRowsPerFiles: Int, + partitionColumns: Option[Seq[String]], + partitionValues: Seq[Int]): Unit = { + partitionValues.foreach { partition => + (0 until numFiles).foreach { _ => + appendToDeltaTable( + (0 until numRowsPerFiles).toDF("value").withColumn("id", lit(partition)), + path, + partitionColumns) + } + } + } + + def testOptimizeCompactWithLargeFile( + name: String, unCompactablePartitions: Seq[Int], compactablePartitions: Seq[Int]) { + test(name) { + withTempDir { tempDir => + val path = new File(tempDir, "testTable").getCanonicalPath + val partitionColumns = Some(Seq("id")) + // Create un-compactable partitions. + appendRowsToDeltaTable( + path, numFiles = 1, numRowsPerFiles = 200, partitionColumns, unCompactablePartitions) + // Create compactable partitions with 5 files + appendRowsToDeltaTable( + path, numFiles = 5, numRowsPerFiles = 10, partitionColumns, compactablePartitions) + + val deltaLogBefore = DeltaLog.forTable(spark, path) + val txnBefore = deltaLogBefore.startTransaction() + val fileListBefore = txnBefore.filterFiles() + val versionBefore = deltaLogBefore.snapshot.version + + val id = "id".phy(deltaLogBefore) + unCompactablePartitions.foreach(partId => + assert(fileListBefore.count(_.partitionValues === Map(id -> partId.toString)) == 1)) + compactablePartitions.foreach(partId => + assert(fileListBefore.count(_.partitionValues === Map(id -> partId.toString)) == 5)) + // Optimize compact all partitions + spark.sql(s"OPTIMIZE '$path'") + + val deltaLogAfter = DeltaLog.forTable(spark, path) + val txnAfter = deltaLogAfter.startTransaction(); + val fileListAfter = txnAfter.filterFiles(); + // All partitions should only contains single file. + (unCompactablePartitions ++ compactablePartitions).foreach(partId => + assert(fileListAfter.count(_.partitionValues === Map(id -> partId.toString)) === 1)) + // version is incremented + assert(deltaLogAfter.snapshot.version === versionBefore + 1) + } + } + } + testOptimizeCompactWithLargeFile( + "optimize command: interleaves compactable/un-compactable partitions", + unCompactablePartitions = Seq(1, 3, 5), + compactablePartitions = Seq(2, 4, 6)) + + testOptimizeCompactWithLargeFile( + "optimize command: first two and last two partitions are un-compactable", + unCompactablePartitions = Seq(1, 2, 5, 6), + compactablePartitions = Seq(3, 4)) + + testOptimizeCompactWithLargeFile( + "optimize command: only first and last partition are compactable", + unCompactablePartitions = Seq(2, 3, 4, 5), + compactablePartitions = Seq(1, 6)) + + testOptimizeCompactWithLargeFile( + "optimize command: only first partition is un-compactable", + unCompactablePartitions = Seq(1), + compactablePartitions = Seq(2, 3, 4, 5, 6)) + + testOptimizeCompactWithLargeFile( + "optimize command: only first partition is compactable", + unCompactablePartitions = Seq(2, 3, 4, 5, 6), + compactablePartitions = Seq(1)) + + test("optimize command: on partitioned table - selected partitions") { + withTempDir { tempDir => + val path = new File(tempDir, "testTable").getCanonicalPath + val partitionColumns = Some(Seq("id")) + appendToDeltaTable( + Seq(1, 2, 3).toDF("value").withColumn("id", 'value % 2), + path, + partitionColumns) + + appendToDeltaTable( + Seq(4, 5, 6).toDF("value").withColumn("id", 'value % 2), + path, + partitionColumns) + + val deltaLogBefore = DeltaLog.forTable(spark, path) + val txnBefore = deltaLogBefore.startTransaction(); + val fileListBefore = txnBefore.filterFiles() + + val id = "id".phy(deltaLogBefore) + + assert(fileListBefore.length >= 3) + assert(fileListBefore.count(_.partitionValues === Map(id -> "0")) > 1) + + val versionBefore = deltaLogBefore.snapshot.version + executeOptimizePath(path, Some("id = 0")) + + val deltaLogAfter = DeltaLog.forTable(spark, path) + val txnAfter = deltaLogBefore.startTransaction(); + val fileListAfter = txnAfter.filterFiles() + + assert(fileListBefore.length > fileListAfter.length) + // Optimized partition should contain only one file + assert(fileListAfter.count(_.partitionValues === Map(id -> "0")) === 1) + + // File counts in partitions that are not part of the OPTIMIZE should remain the same + assert(fileListAfter.count(_.partitionValues === Map(id -> "1")) === + fileListAfter.count(_.partitionValues === Map(id -> "1"))) + + // version is incremented + assert(deltaLogAfter.snapshot.version === versionBefore + 1) + + // data should remain the same after the OPTIMIZE + checkDatasetUnorderly( + spark.read.format("delta").load(path).select("value").as[Long], + (1L to 6L): _*) + } + } + + test("optimize command: on null partition columns") { + withTempDir { tempDir => + val path = new File(tempDir, "testTable").getCanonicalPath + val partitionColumn = "part" + + (1 to 5).foreach { _ => + appendToDeltaTable( + Seq(("a", 1), ("b", 2), (null.asInstanceOf[String], 3), ("", 4)) + .toDF(partitionColumn, "value"), + path, + Some(Seq(partitionColumn))) + } + + val deltaLogBefore = DeltaLog.forTable(spark, path) + val txnBefore = deltaLogBefore.startTransaction(); + val fileListBefore = txnBefore.filterFiles() + val versionBefore = deltaLogBefore.snapshot.version + + val partitionColumnPhysicalName = partitionColumn.phy(deltaLogBefore) + + // we have only 1 partition here + val filesInEachPartitionBefore = groupInputFilesByPartition( + fileListBefore.map(_.path).toArray, deltaLogBefore) + + // There exist at least one file in each partition + assert(filesInEachPartitionBefore.forall(_._2.length > 1)) + + // And there is a partition for null values + assert(filesInEachPartitionBefore.keys.exists( + _ === (partitionColumnPhysicalName, nullPartitionValue))) + + executeOptimizePath(path) + + val deltaLogAfter = DeltaLog.forTable(spark, path) + val txnAfter = deltaLogBefore.startTransaction(); + val fileListAfter = txnAfter.filterFiles() + + // Number of files is less than before optimize + assert(fileListBefore.length > fileListAfter.length) + + // Optimized partition should contain only one file in null partition + assert(fileListAfter.count( + _.partitionValues === Map[String, String](partitionColumnPhysicalName -> null)) === 1) + + // version is incremented + assert(deltaLogAfter.snapshot.version === versionBefore + 1) + + // data should remain the same after the OPTIMIZE + checkAnswer( + spark.read.format("delta").load(path).groupBy(partitionColumn).count(), + Seq(Row("a", 5), Row("b", 5), Row(null, 10))) + } + } + + test("optimize command: on table with multiple partition columns") { + withTempDir { tempDir => + val path = new File(tempDir, "testTable").getCanonicalPath + val partitionColumns = Seq("date", "part") + + Seq(10, 100).foreach { count => + appendToDeltaTable( + spark.range(count) + .select('id, lit("2017-10-10").cast("date") as "date", 'id % 5 as "part"), + path, + Some(partitionColumns)) + } + + val deltaLogBefore = DeltaLog.forTable(spark, path) + val txnBefore = deltaLogBefore.startTransaction(); + val fileListBefore = txnBefore.filterFiles() + val versionBefore = deltaLogBefore.snapshot.version + + val date = "date".phy(deltaLogBefore) + val part = "part".phy(deltaLogBefore) + + val fileCountInTestPartitionBefore = fileListBefore + .count(_.partitionValues === Map[String, String](date -> "2017-10-10", part -> "3")) + + executeOptimizePath(path, Some("date = '2017-10-10' and part = 3")) + + val deltaLogAfter = DeltaLog.forTable(spark, path) + val txnAfter = deltaLogBefore.startTransaction(); + val fileListAfter = txnAfter.filterFiles() + + // Number of files is less than before optimize + assert(fileListBefore.length > fileListAfter.length) + + // Optimized partition should contain only one file in null partition and less number + // of files than before optimize + val fileCountInTestPartitionAfter = fileListAfter + .count(_.partitionValues === Map[String, String](date -> "2017-10-10", part -> "3")) + assert(fileCountInTestPartitionAfter === 1L) + assert(fileCountInTestPartitionBefore > fileCountInTestPartitionAfter, + "Expected the partition to count less number of files after optimzie.") + + // version is incremented + assert(deltaLogAfter.snapshot.version === versionBefore + 1) + } + } + + test("optimize - multiple jobs start executing at once ") { + // The idea here is to make sure multiple optimize jobs execute concurrently. We can + // block the writes of one batch with a countdown latch that will unblock only + // after the second batch also tries to write. + + val numPartitions = 2 + withTempDir { tempDir => + spark.range(100) + .withColumn("pCol", 'id % numPartitions) + .repartition(10) + .write + .format("delta") + .partitionBy("pCol") + .save(tempDir.getAbsolutePath) + + // We have two partitions so we would have two tasks. We can make sure we have two batches + withSQLConf( + ("fs.AbstractFileSystem.block.impl", + classOf[BlockWritesAbstractFileSystem].getCanonicalName), + ("fs.block.impl", classOf[BlockWritesLocalFileSystem].getCanonicalName)) { + + val path = s"block://${tempDir.getAbsolutePath}" + val deltaLog = DeltaLog.forTable(spark, path) + require(deltaLog.snapshot.numOfFiles === 20) // 10 files in each partition + // block the first write until the second batch can attempt to write. + BlockWritesLocalFileSystem.blockUntilConcurrentWrites(numPartitions) + failAfter(60.seconds) { + executeOptimizePath(path) + } + assert(deltaLog.snapshot.numOfFiles === numPartitions) // 1 file per partition + } + } + } + + test("optimize command with multiple partition predicates") { + withTempDir { tempDir => + def writeData(count: Int): Unit = { + spark.range(count).select('id, lit("2017-10-10").cast("date") as "date", 'id % 5 as "part") + .write + .partitionBy("date", "part") + .format("delta") + .mode("append") + .save(tempDir.getAbsolutePath) + } + + writeData(10) + writeData(100) + + executeOptimizePath(tempDir.getAbsolutePath, Some("date = '2017-10-10' and part = 3")) + + val df = spark.read.format("delta").load(tempDir.getAbsolutePath) + val deltaLog = loadDeltaLog(tempDir.getAbsolutePath) + val part = "part".phy(deltaLog) + val files = groupInputFilesByPartition(df.inputFiles, deltaLog) + assert(files.filter(_._1._1 == part).minBy(_._2.length)._1 === (part, "3"), + "part 3 should have been optimized and have least amount of files") + } + } + + test("optimize command with multiple partition predicates with multiple where") { + withTempDir { tempDir => + def writeData(count: Int): Unit = { + spark.range(count).select('id, lit("2017-10-10").cast("date") as "date", 'id % 5 as "part") + .write + .partitionBy("date", "part") + .format("delta") + .mode("append") + .save(tempDir.getAbsolutePath) + } + + writeData(10) + writeData(100) + + DeltaTable.forPath(tempDir.getAbsolutePath).optimize() + .where("part = 3") + .where("date = '2017-10-10'") + .executeCompaction() + + val df = spark.read.format("delta").load(tempDir.getAbsolutePath) + val deltaLog = loadDeltaLog(tempDir.getAbsolutePath) + val part = "part".phy(deltaLog) + val files = groupInputFilesByPartition(df.inputFiles, deltaLog) + assert(files.filter(_._1._1 == part).minBy(_._2.length)._1 === (part, "3"), + "part 3 should have been optimized and have least amount of files") + } + } + + /** + * Utility method to append the given data to the Delta table located at the given path. + * Optionally partitions the data. + */ + protected def appendToDeltaTable[T]( + data: Dataset[T], tablePath: String, partitionColumns: Option[Seq[String]] = None): Unit = { + var df = data.repartition(1).write; + partitionColumns.map(columns => { + df = df.partitionBy(columns: _*) + }) + df.format("delta").mode("append").save(tablePath) + } +} + +/** + * Runs optimize compaction tests using OPTIMIZE SQL + */ +class OptimizeCompactionSQLSuite extends OptimizeCompactionSuiteBase + with DeltaSQLCommandTest { + import testImplicits._ + + def executeOptimizeTable(table: String, condition: Option[String] = None): Unit = { + val conditionClause = condition.map(c => s"WHERE $c").getOrElse("") + spark.sql(s"OPTIMIZE $table $conditionClause") + } + + def executeOptimizePath(path: String, condition: Option[String] = None): Unit = { + executeOptimizeTable(s"'$path'", condition) + } + + test("optimize command: missing path") { + val e = intercept[ParseException] { + spark.sql(s"OPTIMIZE") + } + assert(e.getMessage.contains("OPTIMIZE")) + } + + test("optimize command: missing predicate on path") { + val e = intercept[ParseException] { + spark.sql(s"OPTIMIZE /doesnt/exist WHERE") + } + assert(e.getMessage.contains("OPTIMIZE")) + } + + test("optimize command: non boolean expression") { + val e = intercept[ParseException] { + spark.sql(s"OPTIMIZE /doesnt/exist WHERE 1+1") + } + assert(e.getMessage.contains("OPTIMIZE")) + } + + test("optimize with partition value containing space") { + withTempDir { tempDir => + val baseDf = Seq(("a space", 1), ("other", 2)).toDF("name", "value") + + def write(): Unit = { + baseDf.write + .format("delta") + .partitionBy("name") + .mode("append") + .save(tempDir.getAbsolutePath) + } + + write() + write() + + sql(s"optimize '${tempDir.getAbsolutePath}'") + val df = spark.read.format("delta").load(tempDir.getAbsolutePath) + assert(df.inputFiles.length === 2, "2 files for 2 partitions") + checkAnswer( + df, + baseDf.union(baseDf)) + } + } + + test("optimize command: subquery predicate") { + val tableName = "myTable" + withTable(tableName) { + spark.sql(s"create table $tableName (p int, id int) using delta partitioned by(p)") + val e = intercept[DeltaAnalysisException] { + spark.sql(s"optimize $tableName where p >= (select p from $tableName where id > 5)") + } + checkError(e, "DELTA_UNSUPPORTED_SUBQUERY_IN_PARTITION_PREDICATES", + "0AKDC", Map.empty[String, String]) + } + } +} + +/** + * Runs optimize compaction tests using OPTIMIZE Scala APIs + */ +class OptimizeCompactionScalaSuite extends OptimizeCompactionSuiteBase + with DeltaSQLCommandTest { + def executeOptimizeTable(table: String, condition: Option[String] = None): Unit = { + if (condition.isDefined) { + DeltaTable.forName(table).optimize().where(condition.get).executeCompaction() + } else { + DeltaTable.forName(table).optimize().executeCompaction() + } + } + + def executeOptimizePath(path: String, condition: Option[String] = None): Unit = { + if (condition.isDefined) { + DeltaTable.forPath(path).optimize().where(condition.get).executeCompaction() + } else { + DeltaTable.forPath(path).optimize().executeCompaction() + } + } +} + +trait OptimizeCompactionColumnMappingSuiteBase extends DeltaColumnMappingSelectedTestMixin { + override protected def runOnlyTests = Seq( + "optimize command: on table with multiple partition columns", + "optimize command: on null partition columns" + ) +} + +class OptimizeCompactionIdColumnMappingSuite extends OptimizeCompactionSQLSuite + with DeltaColumnMappingEnableIdMode + with OptimizeCompactionColumnMappingSuiteBase { +} + +class OptimizeCompactionNameColumnMappingSuite extends OptimizeCompactionSQLSuite + with DeltaColumnMappingEnableNameMode + with OptimizeCompactionColumnMappingSuiteBase diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeMetricsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeMetricsSuite.scala new file mode 100644 index 00000000000..3f5df26578d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeMetricsSuite.scala @@ -0,0 +1,429 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.optimize + +// scalastyle:off import.ordering.noEmptyLine +import com.databricks.spark.util.Log4jUsageLogger +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.commands.optimize.{FileSizeStats, OptimizeMetrics, ZOrderStats} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.JsonUtils +import io.delta.tables.DeltaTable + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.functions.floor +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ + +/** Tests that run optimize and verify the returned output (metrics) is expected. */ +trait OptimizeMetricsSuiteBase extends QueryTest + with SharedSparkSession + with DeletionVectorsTestUtils { + + import testImplicits._ + + test("optimize metrics") { + withTempDir { tempDir => + val skewedRightSeq = + 0.to(79).seq ++ 40.to(79).seq ++ 60.to(79).seq ++ 70.to(79).seq ++ 75.to(79).seq + skewedRightSeq.toDF().withColumn("p", floor('value / 10)).repartition(4) + .write.partitionBy("p").format("delta").save(tempDir.toString) + val deltaLog = DeltaLog.forTable(spark, tempDir) + val startCount = deltaLog.unsafeVolatileSnapshot.numOfFiles + val startSizes = deltaLog.unsafeVolatileSnapshot.allFiles.select('size).as[Long].collect() + val res = spark.sql(s"OPTIMIZE delta.`${tempDir.toString}`") + val metrics: OptimizeMetrics = res.select($"metrics.*").as[OptimizeMetrics].head() + val finalSizes = deltaLog.unsafeVolatileSnapshot.allFiles + .select('size).collect().map(_.getLong(0)) + val finalNumFiles = deltaLog.unsafeVolatileSnapshot.numOfFiles + assert(metrics.numFilesAdded == finalNumFiles) + assert(metrics.numFilesRemoved == startCount) + assert(metrics.filesAdded.min.get == finalSizes.min) + assert(metrics.filesAdded.max.get == finalSizes.max) + assert(metrics.filesAdded.totalSize == finalSizes.sum) + assert(metrics.filesAdded.totalFiles == finalSizes.length) + assert(metrics.filesRemoved.max.get == startSizes.max) + assert(metrics.filesRemoved.min.get == startSizes.min) + assert(metrics.filesRemoved.totalSize == startSizes.sum) + assert(metrics.filesRemoved.totalFiles == startSizes.length) + assert(metrics.totalConsideredFiles == startCount) + assert(metrics.totalFilesSkipped == 0) + assert(metrics.numTableColumns == 2) + assert(metrics.numTableColumnsWithStats == 2) + } + } + + + /** + * Ensure public API for metrics persists + */ + test("optimize command output schema") { + + val zOrderFileStatsSchema = StructType(Seq( + StructField("num", LongType, nullable = false), + StructField("size", LongType, nullable = false) + )) + + val zOrderStatsSchema = StructType(Seq( + StructField("strategyName", StringType, nullable = true), + StructField("inputCubeFiles", zOrderFileStatsSchema, nullable = true), + StructField("inputOtherFiles", zOrderFileStatsSchema, nullable = true), + StructField("inputNumCubes", LongType, nullable = false), + StructField("mergedFiles", zOrderFileStatsSchema, nullable = true), + StructField("numOutputCubes", LongType, nullable = false), + StructField("mergedNumCubes", LongType, nullable = true) + )) + val fileSizeMetricsSchema = StructType(Seq( + StructField("min", LongType, nullable = true), + StructField("max", LongType, nullable = true), + StructField("avg", DoubleType, nullable = false), + StructField("totalFiles", LongType, nullable = false), + StructField("totalSize", LongType, nullable = false) + )) + + val parallelismMetricsSchema = StructType(Seq( + StructField("maxClusterActiveParallelism", LongType, nullable = true), + StructField("minClusterActiveParallelism", LongType, nullable = true), + StructField("maxSessionActiveParallelism", LongType, nullable = true), + StructField("minSessionActiveParallelism", LongType, nullable = true) + )) + val dvMetricsSchema = StructType(Seq( + StructField("numDeletionVectorsRemoved", LongType, nullable = false), + StructField("numDeletionVectorRowsRemoved", LongType, nullable = false) + )) + + val optimizeMetricsSchema = StructType(Seq( + StructField("numFilesAdded", LongType, nullable = false), + StructField("numFilesRemoved", LongType, nullable = false), + StructField("filesAdded", fileSizeMetricsSchema, nullable = true), + StructField("filesRemoved", fileSizeMetricsSchema, nullable = true), + StructField("partitionsOptimized", LongType, nullable = false), + StructField("zOrderStats", zOrderStatsSchema, nullable = true), + StructField("numBatches", LongType, nullable = false), + StructField("totalConsideredFiles", LongType, nullable = false), + StructField("totalFilesSkipped", LongType, nullable = false), + StructField("preserveInsertionOrder", BooleanType, nullable = false), + StructField("numFilesSkippedToReduceWriteAmplification", LongType, nullable = false), + StructField("numBytesSkippedToReduceWriteAmplification", LongType, nullable = false), + StructField("startTimeMs", LongType, nullable = false), + StructField("endTimeMs", LongType, nullable = false), + StructField("totalClusterParallelism", LongType, nullable = false), + StructField("totalScheduledTasks", LongType, nullable = false), + StructField("autoCompactParallelismStats", parallelismMetricsSchema, nullable = true), + StructField("deletionVectorStats", dvMetricsSchema, nullable = true), + StructField("numTableColumns", LongType, nullable = false), + StructField("numTableColumnsWithStats", LongType, nullable = false) + )) + val optimizeSchema = StructType(Seq( + StructField("path", StringType, nullable = true), + StructField("metrics", optimizeMetricsSchema, nullable = true) + )) + withTempDir { tempDir => + spark.range(0, 10).write.format("delta").save(tempDir.toString) + val res = sql(s"OPTIMIZE delta.`${tempDir.toString}`") + assert(res.schema == optimizeSchema) + } + } + + test("optimize operation metrics in Delta table history") { + withSQLConf(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + val sampleData = + 0.to(79).seq ++ 40.to(79).seq ++ 60.to(79).seq ++ 70.to(79).seq ++ 75.to(79).seq + + // partition the data and write to test table + sampleData.toDF().withColumn("p", floor('value / 10)).repartition(4) + .write.partitionBy("p").format("delta").save(tempDir.toString) + + spark.sql(s"OPTIMIZE delta.`${tempDir.toString}`") // run optimize on the table + + val actualOperationMetricsAndName = DeltaTable.forPath(spark, tempDir.getAbsolutePath) + .history(1) + .select("operationMetrics", "operation") + .head + + val actualOperationMetrics = actualOperationMetricsAndName + .getMap(0) + .asInstanceOf[Map[String, String]] + + // File sizes depend on the order of how they are merged (=> compression). In order to avoid + // flaky test, just test that the metric exists. + Seq( + "numAddedFiles", + "numAddedBytes", + "numRemovedBytes", + "numRemovedFiles", + "numRemovedBytes", + "minFileSize", + "maxFileSize", + "p25FileSize", + "p50FileSize", + "p75FileSize", + "numDeletionVectorsRemoved" + ).foreach(metric => assert(actualOperationMetrics.get(metric).isDefined)) + + val operationName = actualOperationMetricsAndName(1).asInstanceOf[String] + assert(operationName === DeltaOperations.OPTIMIZE_OPERATION_NAME) + } + } + } + + test("optimize metrics on idempotent operations") { + val tblName = "tblName" + withTable(tblName) { + // Create Delta table + spark.range(10).write.format("delta").saveAsTable(tblName) + + // First Optimize + spark.sql(s"OPTIMIZE $tblName") + + // Second Optimize + val res = spark.sql(s"OPTIMIZE $tblName") + val actMetrics: OptimizeMetrics = res.select($"metrics.*").as[OptimizeMetrics].head() + var preserveInsertionOrder = false + + val expMetrics = OptimizeMetrics( + numFilesAdded = 0, + numFilesRemoved = 0, + filesAdded = FileSizeStats().toFileSizeMetrics, + filesRemoved = FileSizeStats().toFileSizeMetrics, + partitionsOptimized = 0, + zOrderStats = None, + numBatches = 0, + totalConsideredFiles = 1, + totalFilesSkipped = 1, + preserveInsertionOrder = preserveInsertionOrder, + startTimeMs = actMetrics.startTimeMs, + endTimeMs = actMetrics.endTimeMs, + totalClusterParallelism = 2, + totalScheduledTasks = 0, + numTableColumns = 1, + numTableColumnsWithStats = 1) + + assert(actMetrics === expMetrics) + } + } + + test("optimize metrics when certain table columns have no stats") { + val tblName = "tblName" + withTable(tblName) { + // Create Delta table with 5 columns + spark.range(10) + .withColumn("col2", 'id * 2) + .withColumn("col3", 'id * 3) + .withColumn("col4", 'id * 4) + .withColumn("col5", 'id * 5) + .write.format("delta").saveAsTable(tblName) + + // Set to only collect data skipping stats on 3 columns + spark.sql(s""" + |ALTER TABLE $tblName + |SET TBLPROPERTIES ( + | 'delta.dataSkippingNumIndexedCols' = '3' + |)""".stripMargin) + + // Optimize + val res = spark.sql(s"OPTIMIZE $tblName") + val actMetrics: OptimizeMetrics = res.select($"metrics.*").as[OptimizeMetrics].head() + + // The table has 5 columns + assert(actMetrics.numTableColumns == 5) + // There are only 3 columns to collect stats because of the dataSkippingNumIndexedCols config + assert(actMetrics.numTableColumnsWithStats == 3) + } + } + + + test("optimize ZOrderBy operation metrics in Delta table history") { + withSQLConf( + DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED.key -> "true") { + withTempDir { tempDir => + // create a partitioned table with each partition containing multiple files + 0.to(100).seq.toDF() + .withColumn("col1", floor('value % 7)) + .withColumn("col2", floor('value % 27)) + .withColumn("p", floor('value % 10)) + .repartition(4).write.partitionBy("p").format("delta").save(tempDir.toString) + + val startSizes = DeltaLog.forTable(spark, tempDir) + .unsafeVolatileSnapshot.allFiles.select('size).as[Long].collect().sorted + + spark.sql(s"OPTIMIZE delta.`${tempDir.toString}` ZORDER BY (col1, col2)").show() + + val finalSizes = DeltaLog.forTable(spark, tempDir) + .unsafeVolatileSnapshot.allFiles.select('size).collect().map(_.getLong(0)).sorted + + val actualOperation = DeltaTable.forPath(spark, tempDir.getAbsolutePath).history(1) + .select( + "operationParameters.zOrderBy", + "operationMetrics", + "operation") + .head + + // Verify ZOrder operation parameters + val actualOpParameters = actualOperation.getString(0) + assert(actualOpParameters === "[\"col1\",\"col2\"]") + + // Verify metrics records in commit log. + val actualMetrics = actualOperation + .getMap(1) + .asInstanceOf[Map[String, String]] + + val expMetricsJson = + s"""{ + | "numRemovedFiles" : "37", + | "numAddedFiles" : "10", + | "numAddedBytes" : "${finalSizes.sum}", + | "numRemovedBytes" : "${startSizes.sum}", + | "minFileSize" : "${finalSizes.min}", + | "maxFileSize" : "${finalSizes.max}", + | "p25FileSize" : "${finalSizes(finalSizes.length / 4)}", + | "p50FileSize" : "${finalSizes(finalSizes.length / 2)}", + | "p75FileSize" : "${finalSizes(3 * finalSizes.length / 4)}", + | "numDeletionVectorsRemoved" : "0" + |}""".stripMargin.trim + + val expMetrics = JsonUtils.fromJson[Map[String, String]](expMetricsJson) + assert(actualMetrics === expMetrics) + + val operationName = actualOperation(2).asInstanceOf[String] + assert(operationName === DeltaOperations.OPTIMIZE_OPERATION_NAME) + } + } + } + + test("optimize ZOrderBy operation metrics in command output") { + withSQLConf( + DeltaSQLConf.DELTA_OPTIMIZE_MAX_FILE_SIZE.key -> "1000000") { + withTempDir { tempDir => + // create a partitioned table with each partition containing multiple files + 0.to(100).seq.toDF() + .withColumn("col1", floor('value % 7)) + .withColumn("col2", floor('value % 27)) + .withColumn("p", floor('value % 10)) + .repartition(4).write.partitionBy("p").format("delta").save(tempDir.toString) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val startCount = deltaLog.unsafeVolatileSnapshot.allFiles.count() + val startSizes = deltaLog.unsafeVolatileSnapshot.allFiles.select('size).as[Long].collect() + + val result = spark.sql(s"OPTIMIZE delta.`${tempDir.toString}` ZORDER BY (col1, col2)") + val metrics: OptimizeMetrics = result.select($"metrics.*").as[OptimizeMetrics].head() + + val finalSizes = deltaLog.unsafeVolatileSnapshot.allFiles + .select('size).collect().map(_.getLong(0)) + val finalNumFiles = deltaLog.unsafeVolatileSnapshot.allFiles.collect().length + + assert(metrics.filesAdded.totalFiles === finalNumFiles) + assert(metrics.filesRemoved.totalFiles === startCount) + assert(metrics.filesAdded.min.get === finalSizes.min) + assert(metrics.filesAdded.max.get === finalSizes.max) + assert(metrics.filesRemoved.max.get === startSizes.max) + assert(metrics.filesRemoved.min.get === startSizes.min) + assert(metrics.totalFilesSkipped === 0) + assert(metrics.totalConsideredFiles === metrics.numFilesRemoved) + + val expZOrderMetrics = s"""{ + | "strategyName" : "all", + | "inputCubeFiles" : { + | "num" : 0, + | "size" : 0 + | }, + | "inputOtherFiles" : { + | "num" : $startCount, + | "size" : ${startSizes.sum} + | }, + | "inputNumCubes" : 0, + | "mergedFiles" : { + | "num" : $startCount, + | "size" : ${startSizes.sum} + | }, + | "numOutputCubes" : 10 + |}""".stripMargin + + assert(metrics.zOrderStats === Some(JsonUtils.fromJson[ZOrderStats](expZOrderMetrics))) + } + } + } + + val optimizeCommands = Seq("optimize", "zorder", "purge") + for (cmd <- optimizeCommands) { + testWithDVs(s"deletion vector metrics - $cmd") { + withTempDir { dirName => + // Create table with 100 files of 10 rows each. + val numFiles = 100 + val path = dirName.getAbsolutePath + spark.range(0, 1000, step = 1, numPartitions = numFiles) + .write.format("delta").save(path) + val tableName = s"delta.`$path`" + val deltaTable = DeltaTable.forPath(spark, path) + val deltaLog = DeltaLog.forTable(spark, path) + + var allFiles = deltaLog.unsafeVolatileSnapshot.allFiles.collect().toSeq + // Delete two rows each from 5 files to create Deletion Vectors. + val numFilesWithDVs = 5 + val numDeletedRows = numFilesWithDVs * 2 + allFiles.take(numFilesWithDVs).foreach( + file => removeRowsFromFile(deltaLog, file, Seq(1, 5))) + + allFiles = deltaLog.unsafeVolatileSnapshot.allFiles.collect().toSeq + assert(allFiles.size === numFiles) + assert(allFiles.filter(_.deletionVector != null).size === numFilesWithDVs) + + var expOpName = DeltaOperations.OPTIMIZE_OPERATION_NAME + val metrics: Seq[OptimizeMetrics] = cmd match { + case "optimize" => + spark.sql(s"OPTIMIZE $tableName") + .select("metrics.*").as[OptimizeMetrics].collect().toSeq + case "zorder" => + spark.sql(s"OPTIMIZE $tableName ZORDER BY (id)") + .select("metrics.*").as[OptimizeMetrics].collect().toSeq + case "purge" => + expOpName = DeltaOperations.REORG_OPERATION_NAME + spark.sql(s"REORG TABLE $tableName APPLY (PURGE)") + .select("metrics.*").as[OptimizeMetrics].collect().toSeq + case unknown => throw new IllegalArgumentException(s"Unknown command: $unknown") + } + + // Check DV metrics in the result. + assert(metrics.length === 1) + val dvStats = metrics.head.deletionVectorStats + assert(dvStats.get.numDeletionVectorsRemoved === numFilesWithDVs) + assert(dvStats.get.numDeletionVectorRowsRemoved === numDeletedRows) + + // Check DV metrics in the Delta history. + val opMetricsAndName = deltaTable.history.select("operationMetrics", "operation") + .head + + val opMetrics = opMetricsAndName + .getMap(0) + .asInstanceOf[Map[String, String]] + val dvMetrics = opMetrics.keys.filter(_.contains("DeletionVector")) + assert(dvMetrics === Set("numDeletionVectorsRemoved")) + assert(opMetrics("numDeletionVectorsRemoved") === numFilesWithDVs.toString) + + val operationName = opMetricsAndName(1).asInstanceOf[String] + assert(operationName === expOpName) + } + } + } +} + +class OptimizeMetricsSuite extends OptimizeMetricsSuiteBase + with DeltaSQLCommandTest diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeZOrderSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeZOrderSuite.scala new file mode 100644 index 00000000000..2482eccc69a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/optimize/OptimizeZOrderSuite.scala @@ -0,0 +1,316 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.optimize + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.commands.optimize.OptimizeMetrics +import org.apache.spark.sql.delta.sources.DeltaSQLConf._ +import org.apache.spark.sql.delta.test.{DeltaSQLCommandTest, TestsStatistics} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import io.delta.tables.DeltaTable +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.functions.{col, floor, lit, max, struct} +import org.apache.spark.sql.test.SharedSparkSession + +trait OptimizePartitionTableHelper extends QueryTest { + def testPartition(str: String)(testFun: => Any): Unit = { + test("partitioned table - " + str) { + testFun + } + } +} + +/** Tests for Optimize Z-Order by */ +trait OptimizeZOrderSuiteBase extends OptimizePartitionTableHelper + with TestsStatistics + with SharedSparkSession + with DeltaColumnMappingTestUtils { + import testImplicits._ + + + def executeOptimizeTable(table: String, zOrderBy: Seq[String], + condition: Option[String] = None): DataFrame + def executeOptimizePath(path: String, zOrderBy: Seq[String], + condition: Option[String] = None): DataFrame + + test("optimize command: checks existence of interleaving columns") { + withTempDir { tempDir => + Seq(1, 2, 3).toDF("value") + .select('value, 'value % 2 as "id", 'value % 3 as "id2") + .write + .format("delta") + .save(tempDir.toString) + val e = intercept[IllegalArgumentException] { + executeOptimizePath(tempDir.getCanonicalPath, Seq("id", "id3")) + } + assert(Seq("id3", "data schema").forall(e.getMessage.contains)) + } + } + + test("optimize command: interleaving columns can't be partitioning columns") { + withTempDir { tempDir => + Seq(1, 2, 3).toDF("value") + .select('value, 'value % 2 as "id", 'value % 3 as "id2") + .write + .format("delta") + .partitionBy("id") + .save(tempDir.toString) + val e = intercept[IllegalArgumentException] { + executeOptimizePath(tempDir.getCanonicalPath, Seq("id", "id2")) + } + assert(e.getMessage === DeltaErrors.zOrderingOnPartitionColumnException("id").getMessage) + } + } + + test("optimize command: interleaving with nested columns") { + withTempDir { tempDir => + val df = spark.read.json(Seq("""{"a":1,"b":{"c":2,"d":3}}""").toDS()) + df.write.format("delta").save(tempDir.toString) + executeOptimizePath(tempDir.getCanonicalPath, Seq("a", "b.c")) + } + } + + testPartition("optimize on null partition column") { + withTempDir { tempDir => + (1 to 5).foreach { _ => + Seq(("a", 1), ("b", 2), (null.asInstanceOf[String], 3), ("", 4)).toDF("part", "value") + .write + .partitionBy("part") + .format("delta") + .mode("append") + .save(tempDir.getAbsolutePath) + } + + var df = spark.read.format("delta").load(tempDir.getAbsolutePath) + val deltaLog = loadDeltaLog(tempDir.getAbsolutePath) + val part = "part".phy(deltaLog) + var preOptInputFiles = groupInputFilesByPartition(df.inputFiles, deltaLog) + assert(preOptInputFiles.forall(_._2.length > 1)) + assert(preOptInputFiles.keys.exists(_ == (part, nullPartitionValue))) + + executeOptimizePath(tempDir.getAbsolutePath, Seq("value")) + + df = spark.read.format("delta").load(tempDir.getAbsolutePath) + preOptInputFiles = groupInputFilesByPartition(df.inputFiles, deltaLog) + assert(preOptInputFiles.forall(_._2.length == 1)) + assert(preOptInputFiles.keys.exists(_ == (part, nullPartitionValue))) + + checkAnswer( + df.groupBy('part).count(), + Seq(Row("a", 5), Row("b", 5), Row(null, 10)) + ) + } + } + + test("optimize: Zorder on col name containing dot") { + withTempDir { tempDir => + (0.to(79).seq ++ 40.to(79).seq ++ 60.to(79).seq ++ 70.to(79).seq ++ 75.to(79).seq) + .toDF("id") + .withColumn("flat.a", $"id" + 1) + .write + .format("delta") + .save(tempDir.toString) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val numFilesBefore = deltaLog.snapshot.numOfFiles + val res = executeOptimizePath(tempDir.getCanonicalPath, Seq("`flat.a`")) + val metrics = res.select($"metrics.*").as[OptimizeMetrics].head() + val numFilesAfter = deltaLog.snapshot.numOfFiles + assert(metrics.numFilesAdded === numFilesAfter) + assert(metrics.numFilesRemoved === numFilesBefore) + } + } + + test("optimize: Zorder on a nested column") { + withTempDir { tempDir => + (0.to(79).seq ++ 40.to(79).seq ++ 60.to(79).seq ++ 70.to(79).seq ++ 75.to(79).seq) + .toDF("id") + .withColumn("nested", struct(struct('id + 2 as "b", 'id + 3 as "c") as "sub")) + .write + .format("delta") + .save(tempDir.toString) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val numFilesBefore = deltaLog.snapshot.numOfFiles + val res = executeOptimizePath(tempDir.getCanonicalPath, Seq("nested.sub.c")) + val metrics = res.select($"metrics.*").as[OptimizeMetrics].head() + val numFilesAfter = deltaLog.snapshot.numOfFiles + assert(metrics.numFilesAdded === numFilesAfter) + assert(metrics.numFilesRemoved === numFilesBefore) + } + } + + test("optimize: ZOrder on a column without stats") { + withTempDir { tempDir => + withSQLConf("spark.databricks.delta.properties.defaults.dataSkippingNumIndexedCols" -> + "1", DELTA_OPTIMIZE_ZORDER_COL_STAT_CHECK.key -> "true") { + val data = Seq(1, 2, 3).toDF("id") + data.withColumn("nested", + struct(struct('id + 1 as "p1", 'id + 2 as "p2") as "a", 'id + 3 as "b")) + .write + .format("delta") + .save(tempDir.getAbsolutePath) + val e1 = intercept[AnalysisException] { + executeOptimizeTable(s"delta.`${tempDir.getPath}`", Seq("nested.b")) + } + assert(e1.getMessage == DeltaErrors + .zOrderingOnColumnWithNoStatsException(Seq[String]("nested.b"), spark) + .getMessage) + val e2 = intercept[AnalysisException] { + executeOptimizeTable(s"delta.`${tempDir.getPath}`", Seq("nested.a.p1")) + } + assert(e2.getMessage == DeltaErrors + .zOrderingOnColumnWithNoStatsException(Seq[String]("nested.a.p1"), spark) + .getMessage) + val e3 = intercept[AnalysisException] { + executeOptimizeTable(s"delta.`${tempDir.getPath}`", + Seq("nested.a.p1", "nested.b")) + } + assert(e3.getMessage == DeltaErrors + .zOrderingOnColumnWithNoStatsException( + Seq[String]("nested.a.p1", "nested.b"), spark) + .getMessage) + } + } + } + + statsTest("optimize command: interleaving") { + def statsDF(deltaLog: DeltaLog): DataFrame = { + val (c1, c2, c3) = ("c1".phy(deltaLog), "c2".phy(deltaLog), "c3".phy(deltaLog)) + getStatsDf(deltaLog, Seq( + $"numRecords", + struct($"minValues.`$c1`", $"minValues.`$c2`", $"minValues.`$c3`"), + struct($"maxValues.`$c1`", $"maxValues.`$c2`", $"maxValues.`$c3`"))) + } + + withTempDir { tempDir => + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + { + val df = spark.range(100) + .map(i => (i, 99 - i, (i + 50) % 100)) + .toDF("c1", "c2", "c3") + + df.repartitionByRange(4, $"c1", $"c2", $"c3") + .write + .format("delta") + .save(tempDir.toString) + } + assert(deltaLog.snapshot.allFiles.count() == 4) + checkAnswer(statsDF(deltaLog), Seq( + Row(25, Row(0, 75, 50), Row(24, 99, 74)), + Row(25, Row(25, 50, 75), Row(49, 74, 99)), + Row(25, Row(50, 25, 0), Row(74, 49, 24)), + Row(25, Row(75, 0, 25), Row(99, 24, 49)))) + + withSQLConf( + DELTA_OPTIMIZE_MAX_FILE_SIZE.key -> "1000000" + ) { + val res = executeOptimizePath(tempDir.getCanonicalPath, Seq("c1", "c2", "c3")) + val metrics = res.select($"metrics.*").as[OptimizeMetrics].head() + assert(metrics.zOrderStats.get.mergedFiles.num == 4) + assert(deltaLog.snapshot.allFiles.count() == 1) + checkAnswer(statsDF(deltaLog), + Row(100, Row(0, 0, 0), Row(99, 99, 99))) + } + + // I want to get 4 files again, in order for this to be comparable to the initial scenario + val maxFileSize = deltaLog.snapshot.allFiles.head().size / 4 + withSQLConf( + DELTA_OPTIMIZE_MAX_FILE_SIZE.key -> maxFileSize.toString + ) { + val res = executeOptimizePath(tempDir.getCanonicalPath, Seq("c1", "c2", "c3")) + val metrics = res.select($"metrics.*").as[OptimizeMetrics].head() + val expectedFileCount = 4 + val expectedStats: Seq[Row] = Seq( + Row(25, Row(0, 50, 50), Row(49, 99, 99)), + Row(25, Row(16, 20, 18), Row(79, 83, 85)), + Row(25, Row(36, 36, 0), Row(63, 63, 96)), + Row(25, Row(64, 0, 14), Row(99, 35, 49))) + assert(metrics.zOrderStats.get.mergedFiles.num == 1) + assert(deltaLog.snapshot.allFiles.count() == expectedFileCount) + checkAnswer(statsDF(deltaLog), expectedStats) + } + } + } +} + +/** + * Runs optimize compaction tests using OPTIMIZE SQL + */ +class OptimizeZOrderSQLSuite extends OptimizeZOrderSuiteBase + with DeltaSQLCommandTest { + import testImplicits._ + + def executeOptimizeTable(table: String, zOrderBy: Seq[String], + condition: Option[String] = None): DataFrame = { + val conditionClause = condition.map(c => s"WHERE $c").getOrElse("") + val zOrderClause = s"ZORDER BY (${zOrderBy.mkString(", ")})" + spark.sql(s"OPTIMIZE $table $conditionClause $zOrderClause") + } + + def executeOptimizePath(path: String, zOrderBy: Seq[String], + condition: Option[String] = None): DataFrame = { + executeOptimizeTable(s"'$path'", zOrderBy, condition) + } + + test("optimize command: no need for parenthesis") { + withTempDir { tempDir => + val df = spark.read.json(Seq("""{"a":1,"b":{"c":2,"d":3}}""").toDS()) + df.write.format("delta").save(tempDir.toString) + spark.sql(s"OPTIMIZE '${tempDir.getCanonicalPath}' ZORDER BY a, b.c") + } + } +} + +/** + * Runs optimize compaction tests using OPTIMIZE Scala APIs + */ +class OptimizeZOrderScalaSuite extends OptimizeZOrderSuiteBase + with DeltaSQLCommandTest { + + + def executeOptimizeTable(table: String, zOrderBy: Seq[String], + condition: Option[String] = None): DataFrame = { + if (condition.isDefined) { + DeltaTable.forName(table).optimize().where(condition.get).executeZOrderBy(zOrderBy: _*) + } else { + DeltaTable.forName(table).optimize().executeZOrderBy(zOrderBy: _*) + } + } + + def executeOptimizePath(path: String, zOrderBy: Seq[String], + condition: Option[String] = None): DataFrame = { + if (condition.isDefined) { + DeltaTable.forPath(path).optimize().where(condition.get).executeZOrderBy(zOrderBy: _*) + } else { + DeltaTable.forPath(path).optimize().executeZOrderBy(zOrderBy: _*) + } + } +} + +class OptimizeZOrderNameColumnMappingSuite extends OptimizeZOrderSQLSuite + with DeltaColumnMappingEnableNameMode + with DeltaColumnMappingTestUtils + +class OptimizeZOrderIdColumnMappingSuite extends OptimizeZOrderSQLSuite + with DeltaColumnMappingEnableIdMode + with DeltaColumnMappingTestUtils diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizeGeneratedColumnSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizeGeneratedColumnSuite.scala new file mode 100644 index 00000000000..414c0c31015 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizeGeneratedColumnSuite.scala @@ -0,0 +1,1483 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.perf + +import java.sql.Timestamp +import java.util.Locale +import java.util.concurrent.{CountDownLatch, TimeUnit} + +import scala.util.matching.Regex + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta._ + +import org.apache.spark.sql.delta.sources.DeltaSQLConf.{DELTA_COLLECT_STATS, GENERATED_COLUMN_PARTITION_FILTER_OPTIMIZATION_ENABLED} +import org.apache.spark.sql.delta.stats.PrepareDeltaScanBase +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.execution.{FileSourceScanExec, QueryExecution} +import org.apache.spark.sql.types.TimestampType +import org.apache.spark.util.ThreadUtils +import org.apache.spark.util.Utils + +class OptimizeGeneratedColumnSuite extends GeneratedColumnTest { + import testImplicits._ + + private val regex = new Regex(s"(\\S+)\\s(\\S+)\\sGENERATED\\sALWAYS\\sAS\\s\\((.*)\\s?\\)", + "col_name", "data_type", "generated_as") + + private def getPushedPartitionFilters(queryExecution: QueryExecution): Seq[Expression] = { + queryExecution.executedPlan.collectFirst { + case scan: FileSourceScanExec => scan.partitionFilters + }.getOrElse(Nil) + } + + protected def insertInto(path: String, df: DataFrame) = { + df.write.format("delta").mode("append").save(path) + } + + /** + * Verify we can recognize an `OptimizablePartitionExpression` and generate corresponding + * partition filters correctly. + * + * @param dataSchema DDL of the data columns + * @param partitionSchema DDL of the partition columns + * @param generatedColumns a map of generated partition columns defined using the above data + * columns + * @param expectedPartitionExpr the expected `OptimizablePartitionExpression` to be recognized + * @param auxiliaryTestName string to append to the generated test name + * @param expressionKey key to check for the optmizable expression if not the default first + * word in the data schema + * @param skipNested Whether to skip the nested variant of the test + * @param filterTestCases test cases for partition filters. The key is the data filter, and the + * value is the partition filters we should generate. + */ + private def testOptimizablePartitionExpression( + dataSchema: String, + partitionSchema: String, + generatedColumns: Map[String, String], + expectedPartitionExpr: OptimizablePartitionExpression, + auxiliaryTestName: Option[String] = None, + expressionKey: Option[String] = None, + skipNested: Boolean = false, + filterTestCases: Seq[(String, Seq[String])]): Unit = { + test(expectedPartitionExpr.toString + auxiliaryTestName.getOrElse("")) { + val normalCol = dataSchema.split(" ")(0) + + withTableName("optimizable_partition_expression") { table => + createTable( + table, + None, + s"$dataSchema, $partitionSchema", + generatedColumns, + generatedColumns.keys.toSeq + ) + + val metadata = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(table))._2.metadata + assert(metadata.optimizablePartitionExpressions(expressionKey.getOrElse( + normalCol).toLowerCase(Locale.ROOT)) == expectedPartitionExpr :: Nil) + filterTestCases.foreach { filterTestCase => + val partitionFilters = getPushedPartitionFilters( + sql(s"SELECT * from $table where ${filterTestCase._1}").queryExecution) + assert(partitionFilters.map(_.sql) == filterTestCase._2) + } + } + } + + if (!skipNested) { + test(expectedPartitionExpr.toString + auxiliaryTestName.getOrElse("") + " nested") { + val normalCol = dataSchema.split(" ")(0) + val nestedSchema = s"nested struct<${dataSchema.replace(" ", ": ")}>" + val updatedGeneratedColumns = + generatedColumns.mapValues(v => v.replaceAll(s"(?i)($normalCol)", "nested.$1")).toMap + + withTableName("optimizable_partition_expression") { table => + createTable( + table, + None, + s"$nestedSchema, $partitionSchema", + updatedGeneratedColumns, + updatedGeneratedColumns.keys.toSeq + ) + + val metadata = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(table))._2.metadata + val nestedColPath = + s"nested.${expressionKey.getOrElse(normalCol).toLowerCase(Locale.ROOT)}" + assert(metadata.optimizablePartitionExpressions(nestedColPath) + == expectedPartitionExpr :: Nil) + filterTestCases.foreach { filterTestCase => + val updatedFilter = filterTestCase._1.replaceAll(s"(?i)($normalCol)", "nested.$1") + val partitionFilters = getPushedPartitionFilters( + sql(s"SELECT * from $table where $updatedFilter").queryExecution) + assert(partitionFilters.map(_.sql) == filterTestCase._2) + } + } + } + } + } + + /** Format a human readable SQL filter into Spark's compact SQL format */ + private def compactFilter(filter: String): String = { + filter.replaceAllLiterally("\n", "") + .replaceAll("(?<=\\)) +(?=\\))", "") + .replaceAll("(?<=\\() +(?=\\()", "") + .replaceAll("\\) +OR +\\(", ") OR (") + .replaceAll("\\) +AND +\\(", ") AND (") + } + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "date DATE", + Map("date" -> "CAST(eventTime AS DATE)"), + expectedPartitionExpr = DatePartitionExpr("date"), + auxiliaryTestName = Option(" from cast(timestamp)"), + filterTestCases = Seq( + "eventTime < '2021-01-01 18:00:00'" -> + Seq("((date <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "eventTime <= '2021-01-01 18:00:00'" -> + Seq("((date <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "eventTime = '2021-01-01 18:00:00'" -> + Seq("((date = CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date = CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "eventTime > '2021-01-01 18:00:00'" -> + Seq("((date >= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date >= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "eventTime >= '2021-01-01 18:00:00'" -> + Seq("((date >= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date >= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "eventTime is null" -> Seq("(date IS NULL)"), + // Verify we can reverse the order + "'2021-01-01 18:00:00' > eventTime" -> + Seq("((date <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "'2021-01-01 18:00:00' >= eventTime" -> + Seq("((date <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "'2021-01-01 18:00:00' = eventTime" -> + Seq("((date = CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date = CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "'2021-01-01 18:00:00' < eventTime" -> + Seq("((date >= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date >= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + "'2021-01-01 18:00:00' <= eventTime" -> + Seq("((date >= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((date >= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))"), + // Verify date type literal. In theory, the best filter should be date < DATE '2021-01-01'. + // But Spark's analyzer converts eventTime < '2021-01-01' to + // `eventTime` < TIMESTAMP '2021-01-01 00:00:00'. So it's the same as + // eventTime < '2021-01-01 18:00:00' for `OptimizeGeneratedColumn`. + "eventTime < '2021-01-01'" -> + Seq("((date <= CAST(TIMESTAMP '2021-01-01 00:00:00' AS DATE)) " + + "OR ((date <= CAST(TIMESTAMP '2021-01-01 00:00:00' AS DATE)) IS NULL))") + ) + ) + + testOptimizablePartitionExpression( + "eventDate DATE", + "date DATE", + Map("date" -> "CAST(eventDate AS DATE)"), + expectedPartitionExpr = DatePartitionExpr("date"), + auxiliaryTestName = Option(" from cast(date)"), + filterTestCases = Seq( + "eventDate < '2021-01-01 18:00:00'" -> + Seq("((date <= DATE '2021-01-01') " + + "OR ((date <= DATE '2021-01-01') IS NULL))"), + "eventDate <= '2021-01-01 18:00:00'" -> + Seq("((date <= DATE '2021-01-01') " + + "OR ((date <= DATE '2021-01-01') IS NULL))"), + "eventDate = '2021-01-01 18:00:00'" -> + Seq("((date = DATE '2021-01-01') " + + "OR ((date = DATE '2021-01-01') IS NULL))"), + "eventDate > '2021-01-01 18:00:00'" -> + Seq("((date >= DATE '2021-01-01') " + + "OR ((date >= DATE '2021-01-01') IS NULL))"), + "eventDate >= '2021-01-01 18:00:00'" -> + Seq("((date >= DATE '2021-01-01') " + + "OR ((date >= DATE '2021-01-01') IS NULL))"), + "eventDate is null" -> Seq("(date IS NULL)"), + // Verify we can reverse the order + "'2021-01-01 18:00:00' > eventDate" -> + Seq("((date <= DATE '2021-01-01') " + + "OR ((date <= DATE '2021-01-01') IS NULL))"), + "'2021-01-01 18:00:00' >= eventDate" -> + Seq("((date <= DATE '2021-01-01') " + + "OR ((date <= DATE '2021-01-01') IS NULL))"), + "'2021-01-01 18:00:00' = eventDate" -> + Seq("((date = DATE '2021-01-01') " + + "OR ((date = DATE '2021-01-01') IS NULL))"), + "'2021-01-01 18:00:00' < eventDate" -> + Seq("((date >= DATE '2021-01-01') " + + "OR ((date >= DATE '2021-01-01') IS NULL))"), + "'2021-01-01 18:00:00' <= eventDate" -> + Seq("((date >= DATE '2021-01-01') " + + "OR ((date >= DATE '2021-01-01') IS NULL))"), + // Verify date type literal. In theory, the best filter should be date < DATE '2021-01-01'. + // But Spark's analyzer converts eventTime < '2021-01-01' to + // `eventTime` < TIMESTAMP '2021-01-01 00:00:00'. So it's the same as + // eventTime < '2021-01-01 18:00:00' for `OptimizeGeneratedColumn`. + "eventDate < '2021-01-01'" -> + Seq("((date <= DATE '2021-01-01') " + + "OR ((date <= DATE '2021-01-01') IS NULL))") + ) + ) + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "year INT, month INT, day INT, hour INT", + Map( + "year" -> "YEAR(eventTime)", + "month" -> "MONTH(eventTime)", + "day" -> "DAY(eventTime)", + "hour" -> "HOUR(eventTime)" + ), + expectedPartitionExpr = YearMonthDayHourPartitionExpr("year", "month", "day", "hour"), + filterTestCases = Seq( + "eventTime < '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | ( + | ( + | (year < year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month < month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day < dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day = dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (hour <= hour(TIMESTAMP '2021-01-01 18:00:00')) + | ) + |) + |""".stripMargin)), + "eventTime <= '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | ( + | ( + | (year < year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month < month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day < dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day = dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (hour <= hour(TIMESTAMP '2021-01-01 18:00:00')) + | ) + |) + |""".stripMargin)), + "eventTime = '2021-01-01 18:00:00'" -> Seq( + "(year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)))", + "(month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)))", + "(day = dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)))", + "(hour = hour(TIMESTAMP '2021-01-01 18:00:00'))" + ), + "eventTime > '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | ( + | ( + | (year > year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month > month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day > dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day = dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (hour >= hour(TIMESTAMP '2021-01-01 18:00:00')) + | ) + |) + |""".stripMargin)), + "eventTime >= '2021-01-01 18:00:00'" ->Seq( + compactFilter( + """( + | ( + | ( + | (year > year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month > month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day > dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day = dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (hour >= hour(TIMESTAMP '2021-01-01 18:00:00')) + | ) + |) + |""".stripMargin)), + "eventTime is null" -> Seq( + "(year IS NULL)", + "(month IS NULL)", + "(day IS NULL)", + "(hour IS NULL)" + ) + ) + ) + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "year INT, month INT, day INT", + Map( + "year" -> "YEAR(eventTime)", + "month" -> "MONTH(eventTime)", + "day" -> "DAY(eventTime)" + ), + expectedPartitionExpr = YearMonthDayPartitionExpr("year", "month", "day"), + filterTestCases = Seq( + "eventTime < '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | ( + | (year < year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month < month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day <= dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + |) + |""".stripMargin)), + "eventTime <= '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | ( + | (year < year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month < month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day <= dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + |) + |""".stripMargin)), + "eventTime = '2021-01-01 18:00:00'" -> Seq( + "(year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)))", + "(month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)))", + "(day = dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)))" + ), + "eventTime > '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | ( + | (year > year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month > month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day >= dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + |) + |""".stripMargin)), + "eventTime >= '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | ( + | (year > year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month > month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | ) + | OR + | ( + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + | AND + | (day >= dayofmonth(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + |) + |""".stripMargin)), + "eventTime is null" -> Seq( + "(year IS NULL)", + "(month IS NULL)", + "(day IS NULL)" + ) + ) + ) + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "year INT, month INT", + // Use different cases to verify we can recognize the same column using different cases in + // generation expressions. + Map( + "year" -> "YEAR(EVENTTIME)", + "month" -> "MONTH(eventTime)" + ), + expectedPartitionExpr = YearMonthPartitionExpr("year", "month"), + filterTestCases = Seq( + "eventTime < '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | (year < year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month <= month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + |) + |""".stripMargin)), + "eventTime <= '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | (year < year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month <= month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + |) + |""".stripMargin)), + "eventTime = '2021-01-01 18:00:00'" -> Seq( + "(year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)))", + "(month = month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)))" + ), + "eventTime > '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | (year > year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month >= month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + |) + |""".stripMargin)), + "eventTime >= '2021-01-01 18:00:00'" -> Seq( + compactFilter( + """( + | (year > year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | OR + | ( + | (year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | AND + | (month >= month(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) + | ) + |) + |""".stripMargin)), + "eventTime is null" -> Seq("(year IS NULL)", "(month IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "year INT", + Map("year" -> "YEAR(eventTime)"), + expectedPartitionExpr = YearPartitionExpr("year"), + filterTestCases = Seq( + "eventTime < '2021-01-01 18:00:00'" -> + Seq("((year <= year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) " + + "OR ((year <= year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) IS NULL))"), + "eventTime <= '2021-01-01 18:00:00'" -> + Seq("((year <= year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) " + + "OR ((year <= year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) IS NULL))"), + "eventTime = '2021-01-01 18:00:00'" -> + Seq("((year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) " + + "OR ((year = year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) IS NULL))"), + "eventTime > '2021-01-01 18:00:00'" -> + Seq("((year >= year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) " + + "OR ((year >= year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) IS NULL))"), + "eventTime >= '2021-01-01 18:00:00'" -> + Seq("((year >= year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) " + + "OR ((year >= year(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE))) IS NULL))"), + "eventTime is null" -> Seq("(year IS NULL)") + ) + ) + + Seq(("YEAR(eventDate)", " from year(date)"), + ("YEAR(CAST(eventDate AS DATE))", " from year(cast(date))")) + .foreach { case (partCol, auxTestName) => + testOptimizablePartitionExpression( + "eventDate DATE", + "year INT", + Map("year" -> partCol), + expectedPartitionExpr = YearPartitionExpr("year"), + auxiliaryTestName = Option(auxTestName), + filterTestCases = Seq( + "eventDate < '2021-01-01'" -> + Seq("((year <= year(DATE '2021-01-01')) " + + "OR ((year <= year(DATE '2021-01-01')) IS NULL))"), + "eventDate <= '2021-01-01'" -> + Seq("((year <= year(DATE '2021-01-01')) " + + "OR ((year <= year(DATE '2021-01-01')) IS NULL))"), + "eventDate = '2021-01-01'" -> + Seq("((year = year(DATE '2021-01-01')) " + + "OR ((year = year(DATE '2021-01-01')) IS NULL))"), + "eventDate > '2021-01-01'" -> + Seq("((year >= year(DATE '2021-01-01')) " + + "OR ((year >= year(DATE '2021-01-01')) IS NULL))"), + "eventDate >= '2021-01-01'" -> + Seq("((year >= year(DATE '2021-01-01')) " + + "OR ((year >= year(DATE '2021-01-01')) IS NULL))"), + "eventDate is null" -> Seq("(year IS NULL)") + ) + ) + } + + testOptimizablePartitionExpression( + "value STRING", + "substr STRING", + Map("substr" -> "SUBSTRING(value, 2, 3)"), + expectedPartitionExpr = SubstringPartitionExpr("substr", 2, 3), + filterTestCases = Seq( + "value < 'foo'" -> Nil, + "value <= 'foo'" -> Nil, + "value = 'foo'" -> Seq("((substr IS NULL) OR (substr = substring('foo', 2, 3)))"), + "value > 'foo'" -> Nil, + "value >= 'foo'" -> Nil, + "value is null" -> Seq("(substr IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "value STRING", + "substr STRING", + Map("substr" -> "SUBSTRING(value, 0, 3)"), + expectedPartitionExpr = SubstringPartitionExpr("substr", 0, 3), + filterTestCases = Seq( + "value < 'foo'" -> Seq("((substr IS NULL) OR (substr <= substring('foo', 0, 3)))"), + "value <= 'foo'" -> Seq("((substr IS NULL) OR (substr <= substring('foo', 0, 3)))"), + "value = 'foo'" -> Seq("((substr IS NULL) OR (substr = substring('foo', 0, 3)))"), + "value > 'foo'" -> Seq("((substr IS NULL) OR (substr >= substring('foo', 0, 3)))"), + "value >= 'foo'" -> Seq("((substr IS NULL) OR (substr >= substring('foo', 0, 3)))"), + "value is null" -> Seq("(substr IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "value STRING", + "substr STRING", + Map("substr" -> "SUBSTRING(value, 1, 3)"), + expectedPartitionExpr = SubstringPartitionExpr("substr", 1, 3), + filterTestCases = Seq( + "value < 'foo'" -> Seq("((substr IS NULL) OR (substr <= substring('foo', 1, 3)))"), + "value <= 'foo'" -> Seq("((substr IS NULL) OR (substr <= substring('foo', 1, 3)))"), + "value = 'foo'" -> Seq("((substr IS NULL) OR (substr = substring('foo', 1, 3)))"), + "value > 'foo'" -> Seq("((substr IS NULL) OR (substr >= substring('foo', 1, 3)))"), + "value >= 'foo'" -> Seq("((substr IS NULL) OR (substr >= substring('foo', 1, 3)))"), + "value is null" -> Seq("(substr IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "value STRING", + "`my.substr` STRING", + Map("my.substr" -> "SUBSTRING(value, 1, 3)"), + expectedPartitionExpr = SubstringPartitionExpr("my.substr", 1, 3), + filterTestCases = Seq( + "value < 'foo'" -> Seq("((`my.substr` IS NULL) OR (`my.substr` <= substring('foo', 1, 3)))"), + "value <= 'foo'" -> Seq("((`my.substr` IS NULL) OR (`my.substr` <= substring('foo', 1, 3)))"), + "value = 'foo'" -> Seq("((`my.substr` IS NULL) OR (`my.substr` = substring('foo', 1, 3)))"), + "value > 'foo'" -> Seq("((`my.substr` IS NULL) OR (`my.substr` >= substring('foo', 1, 3)))"), + "value >= 'foo'" -> Seq("((`my.substr` IS NULL) OR (`my.substr` >= substring('foo', 1, 3)))"), + "value is null" -> Seq("(`my.substr` IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "outer struct>>", + "substr STRING", + Map("substr" -> "SUBSTRING(outer.inner.nested.value, 1, 3)"), + expectedPartitionExpr = SubstringPartitionExpr("substr", 1, 3), + auxiliaryTestName = Some(" deeply nested"), + expressionKey = Some("outer.inner.nested.value"), + skipNested = true, + filterTestCases = Seq( + "outer.inner.nested.value < 'foo'" -> + Seq("((substr IS NULL) OR (substr <= substring('foo', 1, 3)))"), + "outer.inner.nested.value <= 'foo'" -> + Seq("((substr IS NULL) OR (substr <= substring('foo', 1, 3)))"), + "outer.inner.nested.value = 'foo'" -> + Seq("((substr IS NULL) OR (substr = substring('foo', 1, 3)))"), + "outer.inner.nested.value > 'foo'" -> + Seq("((substr IS NULL) OR (substr >= substring('foo', 1, 3)))"), + "outer.inner.nested.value >= 'foo'" -> + Seq("((substr IS NULL) OR (substr >= substring('foo', 1, 3)))"), + "outer.inner.nested.value is null" -> Seq("(substr IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "eventTimeTrunc TIMESTAMP", + Map("eventTimeTrunc" -> "date_trunc('YEAR', eventTime)"), + expectedPartitionExpr = TimestampTruncPartitionExpr("YEAR", "eventTimeTrunc"), + auxiliaryTestName = Option(" from date_trunc(timestamp)"), + filterTestCases = Seq( + "eventTime < '2021-01-01 18:00:00'" -> + Seq("((eventTimeTrunc <= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc <= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "eventTime <= '2021-01-01 18:00:00'" -> + Seq("((eventTimeTrunc <= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc <= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "eventTime = '2021-01-01 18:00:00'" -> + Seq("((eventTimeTrunc = date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc = date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "eventTime > '2021-01-01 18:00:00'" -> + Seq("((eventTimeTrunc >= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc >= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "eventTime >= '2021-01-01 18:00:00'" -> + Seq("((eventTimeTrunc >= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc >= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "eventTime is null" -> Seq("(eventTimeTrunc IS NULL)"), + // Verify we can reverse the order + "'2021-01-01 18:00:00' > eventTime" -> + Seq("((eventTimeTrunc <= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc <= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "'2021-01-01 18:00:00' >= eventTime" -> + Seq("((eventTimeTrunc <= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc <= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "'2021-01-01 18:00:00' = eventTime" -> + Seq("((eventTimeTrunc = date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc = date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "'2021-01-01 18:00:00' < eventTime" -> + Seq("((eventTimeTrunc >= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc >= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))"), + "'2021-01-01 18:00:00' <= eventTime" -> + Seq("((eventTimeTrunc >= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) " + + "OR ((eventTimeTrunc >= date_trunc('YEAR', TIMESTAMP '2021-01-01 18:00:00')) IS NULL))") + ) + ) + + testOptimizablePartitionExpression( + "eventDate DATE", + "eventTimeTrunc TIMESTAMP", + Map("eventTimeTrunc" -> "date_trunc('DD', eventDate)"), + expectedPartitionExpr = TimestampTruncPartitionExpr("DD", "eventTimeTrunc"), + auxiliaryTestName = Option(" from date_trunc(cast(date))"), + filterTestCases = Seq( + "eventDate < '2021-01-01'" -> + Seq("((eventTimeTrunc <= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc <= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "eventDate <= '2021-01-01'" -> + Seq("((eventTimeTrunc <= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc <= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "eventDate = '2021-01-01'" -> + Seq("((eventTimeTrunc = date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc = date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "eventDate > '2021-01-01'" -> + Seq("((eventTimeTrunc >= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc >= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "eventDate >= '2021-01-01'" -> + Seq("((eventTimeTrunc >= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc >= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "eventDate is null" -> Seq("(eventTimeTrunc IS NULL)"), + // Verify we can reverse the order + "'2021-01-01' > eventDate" -> + Seq("((eventTimeTrunc <= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc <= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "'2021-01-01' >= eventDate" -> + Seq("((eventTimeTrunc <= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc <= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "'2021-01-01' = eventDate" -> + Seq("((eventTimeTrunc = date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc = date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "'2021-01-01' < eventDate" -> + Seq("((eventTimeTrunc >= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc >= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))"), + "'2021-01-01' <= eventDate" -> + Seq("((eventTimeTrunc >= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) " + + "OR ((eventTimeTrunc >= date_trunc('DD', CAST(DATE '2021-01-01' AS TIMESTAMP))) IS NULL))") + ) + ) + + testOptimizablePartitionExpression( + "value STRING", + "part STRING", + Map("part" -> "value"), + expectedPartitionExpr = IdentityPartitionExpr("part"), + expressionKey = Some("value"), + filterTestCases = Seq( + "value < 'foo'" -> Seq("((part IS NULL) OR (part < 'foo'))"), + "value <= 'foo'" -> Seq("((part IS NULL) OR (part <= 'foo'))"), + "value = 'foo'" -> Seq("((part IS NULL) OR (part = 'foo'))"), + "value > 'foo'" -> Seq("((part IS NULL) OR (part > 'foo'))"), + "value >= 'foo'" -> Seq("((part IS NULL) OR (part >= 'foo'))"), + "value is null" -> Seq("(part IS NULL)") + ) + ) + + /** + * In order to distinguish between field names with periods and nested field names, + * fields with periods must be escaped. Otherwise in the example below, there's + * no way to tell whether a filter on nested.value should be applied to part1 or part2. + */ + testOptimizablePartitionExpression( + "`nested.value` STRING, nested struct", + "part1 STRING, part2 STRING", + Map("part1" -> "`nested.value`", "part2" -> "nested.value"), + auxiliaryTestName = Some(" escaped field names"), + expectedPartitionExpr = IdentityPartitionExpr("part1"), + expressionKey = Some("`nested.value`"), + skipNested = true, + filterTestCases = Seq( + "`nested.value` < 'foo'" -> Seq("((part1 IS NULL) OR (part1 < 'foo'))"), + "`nested.value` <= 'foo'" -> Seq("((part1 IS NULL) OR (part1 <= 'foo'))"), + "`nested.value` = 'foo'" -> Seq("((part1 IS NULL) OR (part1 = 'foo'))"), + "`nested.value` > 'foo'" -> Seq("((part1 IS NULL) OR (part1 > 'foo'))"), + "`nested.value` >= 'foo'" -> Seq("((part1 IS NULL) OR (part1 >= 'foo'))"), + "`nested.value` is null" -> Seq("(part1 IS NULL)") + ) + ) + + test("end-to-end optimizable partition expression") { + withTempDir { tempDir => + withTableName("optimizable_partition_expression") { table => + + createTable( + table, + Some(tempDir.getCanonicalPath), + "c1 INT, c2 TIMESTAMP, c3 DATE", + Map("c3" -> "CAST(c2 AS DATE)"), + Seq("c3") + ) + + Seq( + Tuple2(1, "2020-12-31 11:00:00"), + Tuple2(2, "2021-01-01 12:00:00"), + Tuple2(3, "2021-01-02 13:00:00") + ).foreach { values => + insertInto( + tempDir.getCanonicalPath, + Seq(values).toDF("c1", "c2") + .withColumn("c2", $"c2".cast(TimestampType)) + ) + } + assert(tempDir.listFiles().map(_.getName).toSet == + Set("c3=2021-01-01", "c3=2021-01-02", "c3=2020-12-31", "_delta_log")) + // Delete folders which should not be read if we generate the partition filters correctly + tempDir.listFiles().foreach { f => + if (f.getName != "c3=2021-01-01" && f.getName != "_delta_log") { + Utils.deleteRecursively(f) + } + } + assert(tempDir.listFiles().map(_.getName).toSet == Set("c3=2021-01-01", "_delta_log")) + checkAnswer( + sql(s"select * from $table where " + + s"c2 >= '2021-01-01 12:00:00' AND c2 <= '2021-01-01 18:00:00'"), + Row(2, sqlTimestamp("2021-01-01 12:00:00"), sqlDate("2021-01-01"))) + // Verify `OptimizeGeneratedColumn` doesn't mess up Projects. + checkAnswer( + sql(s"select c1 from $table where " + + s"c2 >= '2021-01-01 12:00:00' AND c2 <= '2021-01-01 18:00:00'"), + Row(2)) + + // Check both projection orders to make sure projection orders are handled correctly + checkAnswer( + sql(s"select c1, c2 from $table where " + + s"c2 >= '2021-01-01 12:00:00' AND c2 <= '2021-01-01 18:00:00'"), + Row(2, Timestamp.valueOf("2021-01-01 12:00:00"))) + checkAnswer( + sql(s"select c2, c1 from $table where " + + s"c2 >= '2021-01-01 12:00:00' AND c2 <= '2021-01-01 18:00:00'"), + Row(Timestamp.valueOf("2021-01-01 12:00:00"), 2)) + + // Verify the optimization works for limit. + val limitQuery = sql( + s"""select * from $table + |where c2 >= '2021-01-01 12:00:00' AND c2 <= '2021-01-01 18:00:00' + |limit 10""".stripMargin) + val expectedPartitionFilters = Seq( + "((c3 >= CAST(TIMESTAMP '2021-01-01 12:00:00' AS DATE)) " + + "OR ((c3 >= CAST(TIMESTAMP '2021-01-01 12:00:00' AS DATE)) IS NULL))", + "((c3 <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) " + + "OR ((c3 <= CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE)) IS NULL))" + ) + assert(expectedPartitionFilters == + getPushedPartitionFilters(limitQuery.queryExecution).map(_.sql)) + checkAnswer(limitQuery, Row(2, sqlTimestamp("2021-01-01 12:00:00"), sqlDate("2021-01-01"))) + } + } + } + + test("empty string and null ambiguity in a partition column") { + withTempDir { tempDir => + withTableName("optimizable_partition_expression") { table => + createTable( + table, + Some(tempDir.getCanonicalPath), + "c1 STRING, c2 STRING", + Map("c2" -> "SUBSTRING(c1, 1, 4)"), + Seq("c2") + ) + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("")).toDF("c1") + ) + checkAnswer( + sql(s"select * from $table where c1 = ''"), + Row("", null)) + // The following check shows the weird behavior of SPARK-24438 and confirms the generated + // partition filter doesn't impact the answer. + withSQLConf(GENERATED_COLUMN_PARTITION_FILTER_OPTIMIZATION_ENABLED.key -> "false") { + checkAnswer( + sql(s"select * from $table where c1 = ''"), + Row("", null)) + } + } + } + } + + test("substring on multibyte characters") { + withTempDir { tempDir => + withTableName("multibyte_characters") { table => + createTable( + table, + Some(tempDir.getCanonicalPath), + "c1 STRING, c2 STRING", + Map("c2" -> "SUBSTRING(c1, 1, 2)"), + Seq("c2") + ) + // scalastyle:off nonascii + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("一二三四")).toDF("c1") + ) + val testQuery = s"select * from $table where c1 > 'abcd'" + assert("((c2 IS NULL) OR (c2 >= substring('abcd', 1, 2)))" :: Nil == + getPushedPartitionFilters(sql(testQuery).queryExecution).map(_.sql)) + checkAnswer( + sql(testQuery), + Row("一二三四", "一二")) + // scalastyle:on nonascii + } + } + } + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "month STRING", + Map("month" -> "DATE_FORMAT(eventTime, 'yyyy-MM')"), + expectedPartitionExpr = DateFormatPartitionExpr("month", "yyyy-MM"), + auxiliaryTestName = Option(" from timestamp"), + filterTestCases = Seq( + "eventTime < '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') <= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') <= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), " + + "'yyyy-MM')) IS NULL))"), + "eventTime <= '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') <= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') <= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), " + + "'yyyy-MM')) IS NULL))"), + "eventTime = '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') = " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') = " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), " + + "'yyyy-MM')) IS NULL))"), + "eventTime > '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') >= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') >= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), " + + "'yyyy-MM')) IS NULL))"), + "eventTime >= '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') >= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') >= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM'), " + + "'yyyy-MM')) IS NULL))"), + "eventTime is null" -> Seq("(month IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "eventDate DATE", + "month STRING", + Map("month" -> "DATE_FORMAT(eventDate, 'yyyy-MM')"), + expectedPartitionExpr = DateFormatPartitionExpr("month", "yyyy-MM"), + auxiliaryTestName = Option(" from cast(date)"), + filterTestCases = Seq( + "eventDate < '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') <= unix_timestamp(date_format(CAST(" + + "DATE '2021-06-28' AS TIMESTAMP), 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') <= " + + "unix_timestamp(date_format(CAST(DATE '2021-06-28' AS TIMESTAMP), " + + "'yyyy-MM'), 'yyyy-MM')) IS NULL))"), + "eventDate <= '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') <= unix_timestamp(date_format(CAST(" + + "DATE '2021-06-28' AS TIMESTAMP), 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') <= " + + "unix_timestamp(date_format(CAST(DATE '2021-06-28' AS TIMESTAMP), " + + "'yyyy-MM'), 'yyyy-MM')) IS NULL))"), + "eventDate = '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') = unix_timestamp(date_format(CAST(" + + "DATE '2021-06-28' AS TIMESTAMP), 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') = " + + "unix_timestamp(date_format(CAST(DATE '2021-06-28' AS TIMESTAMP), " + + "'yyyy-MM'), 'yyyy-MM')) IS NULL))"), + "eventDate > '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') >= unix_timestamp(date_format(CAST(" + + "DATE '2021-06-28' AS TIMESTAMP), 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') >= " + + "unix_timestamp(date_format(CAST(DATE '2021-06-28' AS TIMESTAMP), " + + "'yyyy-MM'), 'yyyy-MM')) IS NULL))"), + "eventDate >= '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(month, 'yyyy-MM') >= unix_timestamp(date_format(CAST(" + + "DATE '2021-06-28' AS TIMESTAMP), 'yyyy-MM'), 'yyyy-MM')) " + + "OR ((unix_timestamp(month, 'yyyy-MM') >= " + + "unix_timestamp(date_format(CAST(DATE '2021-06-28' AS TIMESTAMP), " + + "'yyyy-MM'), 'yyyy-MM')) IS NULL))"), + "eventDate is null" -> Seq("(month IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "day STRING", + Map("day" -> "DATE_FORMAT(eventTime, 'yyyy-MM-dd')"), + expectedPartitionExpr = DateFormatPartitionExpr("day", "yyyy-MM-dd"), + auxiliaryTestName = Option(" from timestamp"), + filterTestCases = Seq( + "eventTime < '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(day, 'yyyy-MM-dd') <= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " + + "OR ((unix_timestamp(day, 'yyyy-MM-dd') <= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"), + "eventTime <= '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(day, 'yyyy-MM-dd') <= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " + + "OR ((unix_timestamp(day, 'yyyy-MM-dd') <= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"), + "eventTime = '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(day, 'yyyy-MM-dd') = unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " + + "OR ((unix_timestamp(day, 'yyyy-MM-dd') = unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"), + "eventTime > '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(day, 'yyyy-MM-dd') >= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " + + "OR ((unix_timestamp(day, 'yyyy-MM-dd') >= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"), + "eventTime >= '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(day, 'yyyy-MM-dd') >= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " + + "OR ((unix_timestamp(day, 'yyyy-MM-dd') >= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"), + "eventTime is null" -> Seq("(day IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "hour STRING", + Map("hour" -> "(DATE_FORMAT(eventTime, 'yyyy-MM-dd-HH'))"), + expectedPartitionExpr = DateFormatPartitionExpr("hour", "yyyy-MM-dd-HH"), + filterTestCases = Seq( + "eventTime < '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(hour, 'yyyy-MM-dd-HH') <= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) " + + "OR ((unix_timestamp(hour, 'yyyy-MM-dd-HH') <= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', " + + "'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) IS NULL))"), + "eventTime <= '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(hour, 'yyyy-MM-dd-HH') <= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) " + + "OR ((unix_timestamp(hour, 'yyyy-MM-dd-HH') <= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', " + + "'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) IS NULL))"), + "eventTime = '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(hour, 'yyyy-MM-dd-HH') = unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) " + + "OR ((unix_timestamp(hour, 'yyyy-MM-dd-HH') = " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', " + + "'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) IS NULL))"), + "eventTime > '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(hour, 'yyyy-MM-dd-HH') >= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) " + + "OR ((unix_timestamp(hour, 'yyyy-MM-dd-HH') >= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', " + + "'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) IS NULL))"), + "eventTime >= '2021-06-28 18:00:00'" -> + Seq("((unix_timestamp(hour, 'yyyy-MM-dd-HH') >= unix_timestamp(" + + "date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) " + + "OR ((unix_timestamp(hour, 'yyyy-MM-dd-HH') >= " + + "unix_timestamp(date_format(TIMESTAMP '2021-06-28 18:00:00', " + + "'yyyy-MM-dd-HH'), 'yyyy-MM-dd-HH')) IS NULL))"), + "eventTime is null" -> Seq("(hour IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "eventTime TIMESTAMP", + "date DATE", + Map("date" -> "(trunc(eventTime, 'year'))"), + expectedPartitionExpr = TruncDatePartitionExpr("date", "year"), + filterTestCases = Seq( + "eventTime < '2021-01-01 18:00:00'" -> + Seq("((date <= trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) " + + "OR ((date <= trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) IS NULL))"), + "eventTime <= '2021-01-01 18:00:00'" -> + Seq("((date <= trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) " + + "OR ((date <= trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) IS NULL))"), + "eventTime = '2021-01-01 18:00:00'" -> + Seq("((date = trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) " + + "OR ((date = trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) IS NULL))"), + "eventTime > '2021-01-01 18:00:00'" -> + Seq("((date >= trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) " + + "OR ((date >= trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) IS NULL))"), + "eventTime >= '2021-01-01 18:00:00'" -> + Seq("((date >= trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) " + + "OR ((date >= trunc(CAST(TIMESTAMP '2021-01-01 18:00:00' AS DATE), 'year')) IS NULL))"), + "eventTime is null" -> + Seq("(date IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "eventDate DATE", + "date DATE", + Map("date" -> "(trunc(eventDate, 'month'))"), + expectedPartitionExpr = TruncDatePartitionExpr("date", "month"), + filterTestCases = Seq( + "eventDate < '2021-12-01'" -> + Seq("((date <= trunc(DATE '2021-12-01', 'month')) " + + "OR ((date <= trunc(DATE '2021-12-01', 'month')) IS NULL))"), + "eventDate <= '2021-12-01'" -> + Seq("((date <= trunc(DATE '2021-12-01', 'month')) " + + "OR ((date <= trunc(DATE '2021-12-01', 'month')) IS NULL))"), + "eventDate = '2021-12-01'" -> + Seq("((date = trunc(DATE '2021-12-01', 'month')) " + + "OR ((date = trunc(DATE '2021-12-01', 'month')) IS NULL))"), + "eventDate > '2021-12-01'" -> + Seq("((date >= trunc(DATE '2021-12-01', 'month')) " + + "OR ((date >= trunc(DATE '2021-12-01', 'month')) IS NULL))"), + "eventDate >= '2021-12-01'" -> + Seq("((date >= trunc(DATE '2021-12-01', 'month')) " + + "OR ((date >= trunc(DATE '2021-12-01', 'month')) IS NULL))"), + "eventDate is null" -> + Seq("(date IS NULL)") + ) + ) + + testOptimizablePartitionExpression( + "eventDateStr STRING", + "date DATE", + Map("date" -> "(trunc(eventDateStr, 'quarter'))"), + expectedPartitionExpr = TruncDatePartitionExpr("date", "quarter"), + filterTestCases = Seq( + "eventDateStr < '2022-04-01'" -> + Seq("((date <= trunc(CAST('2022-04-01' AS DATE), 'quarter')) " + + "OR ((date <= trunc(CAST('2022-04-01' AS DATE), 'quarter')) IS NULL))"), + "eventDateStr <= '2022-04-01'" -> + Seq("((date <= trunc(CAST('2022-04-01' AS DATE), 'quarter')) " + + "OR ((date <= trunc(CAST('2022-04-01' AS DATE), 'quarter')) IS NULL))"), + "eventDateStr = '2022-04-01'" -> + Seq("((date = trunc(CAST('2022-04-01' AS DATE), 'quarter')) " + + "OR ((date = trunc(CAST('2022-04-01' AS DATE), 'quarter')) IS NULL))"), + "eventDateStr > '2022-04-01'" -> + Seq("((date >= trunc(CAST('2022-04-01' AS DATE), 'quarter')) " + + "OR ((date >= trunc(CAST('2022-04-01' AS DATE), 'quarter')) IS NULL))"), + "eventDateStr >= '2022-04-01'" -> + Seq("((date >= trunc(CAST('2022-04-01' AS DATE), 'quarter')) " + + "OR ((date >= trunc(CAST('2022-04-01' AS DATE), 'quarter')) IS NULL))"), + "eventDateStr is null" -> + Seq("(date IS NULL)") + ) + ) + + test("five digits year in a year month day partition column") { + withTempDir { tempDir => + withTableName("optimizable_partition_expression") { table => + createTable( + table, + Some(tempDir.getCanonicalPath), + "c1 TIMESTAMP, c2 INT, c3 INT, c4 INT", + Map( + "c2" -> "YEAR(c1)", + "c3" -> "MONTH(c1)", + "c4" -> "DAY(c1)" + ), + Seq("c2", "c3", "c4") + ) + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("12345-07-15 18:00:00")) + .toDF("c1") + .withColumn("c1", $"c1".cast(TimestampType)) + ) + + checkAnswer( + sql(s"select * from $table where c1 = CAST('12345-07-15 18:00:00' as timestamp)"), + Row(new Timestamp(327420320400000L), 12345, 7, 15)) + withSQLConf(GENERATED_COLUMN_PARTITION_FILTER_OPTIMIZATION_ENABLED.key -> "false") { + checkAnswer( + sql(s"select * from $table where c1 = CAST('12345-07-15 18:00:00' as timestamp)"), + Row(new Timestamp(327420320400000L), 12345, 7, 15)) + } + } + } + } + + test("five digits year in a date_format yyyy-MM partition column") { + withTempDir { tempDir => + withTableName("optimizable_partition_expression") { table => + createTable( + table, + Some(tempDir.getCanonicalPath), + "c1 TIMESTAMP, c2 STRING", + Map("c2" -> "DATE_FORMAT(c1, 'yyyy-MM')"), + Seq("c2") + ) + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("12345-07-15 18:00:00")) + .toDF("c1") + .withColumn("c1", $"c1".cast(TimestampType)) + ) + + checkAnswer( + sql(s"select * from $table where c1 = CAST('12345-07-15 18:00:00' as timestamp)"), + Row(new Timestamp(327420320400000L), "+12345-07")) + withSQLConf(GENERATED_COLUMN_PARTITION_FILTER_OPTIMIZATION_ENABLED.key -> "false") { + checkAnswer( + sql(s"select * from $table where c1 = CAST('12345-07-15 18:00:00' as timestamp)"), + Row(new Timestamp(327420320400000L), "+12345-07")) + } + } + } + } + + test("five digits year in a date_format yyyy-MM-dd-HH partition column") { + withTempDir { tempDir => + withTableName("optimizable_partition_expression") { table => + createTable( + table, + Some(tempDir.getCanonicalPath), + "c1 TIMESTAMP, c2 STRING", + Map("c2" -> "DATE_FORMAT(c1, 'yyyy-MM-dd-HH')"), + Seq("c2") + ) + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("12345-07-15 18:00:00")) + .toDF("c1") + .withColumn("c1", $"c1".cast(TimestampType)) + ) + + checkAnswer( + sql(s"select * from $table where c1 = CAST('12345-07-15 18:00:00' as timestamp)"), + Row(new Timestamp(327420320400000L), "+12345-07-15-18")) + withSQLConf(GENERATED_COLUMN_PARTITION_FILTER_OPTIMIZATION_ENABLED.key -> "false") { + checkAnswer( + sql(s"select * from $table where c1 = CAST('12345-07-15 18:00:00' as timestamp)"), + Row(new Timestamp(327420320400000L), "+12345-07-15-18")) + } + } + } + } + + test("end-to-end test of behaviors of write/read null on partition column") { + // unix_timestamp('12345-12', 'yyyy-MM') | unix_timestamp('+12345-12', 'yyyy-MM') + // EXCEPTION fail | 327432240000 + // CORRECTED null | 327432240000 + // LEGACY 327432240000 | null + withTempDir { tempDir => + withTableName("optimizable_partition_expression") { table => + createTable( + table, + Some(tempDir.getCanonicalPath), + "c1 TIMESTAMP, c2 STRING", + Map("c2" -> "DATE_FORMAT(c1, 'yyyy-MM')"), + Seq("c2") + ) + + // write in LEGACY + withSQLConf( + "spark.sql.legacy.timeParserPolicy" -> "CORRECTED" + ) { + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("12345-07-01 00:00:00")) + .toDF("c1") + .withColumn("c1", $"c1".cast(TimestampType)) + ) + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("+23456-07-20 18:30:00")) + .toDF("c1") + .withColumn("c1", $"c1".cast(TimestampType)) + ) + } + + // write in LEGACY + withSQLConf( + "spark.sql.legacy.timeParserPolicy" -> "LEGACY" + ) { + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("+12349-07-01 00:00:00")) + .toDF("c1") + .withColumn("c1", $"c1".cast(TimestampType)) + ) + insertInto( + tempDir.getCanonicalPath, + Seq(Tuple1("+30000-12-30 20:00:00")) + .toDF("c1") + .withColumn("c1", $"c1".cast(TimestampType)) + ) + } + + // we have partitions based on CORRECTED + LEGACY parser (with +) + assert(tempDir.listFiles().map(_.getName).toSet == + Set("c2=+23456-07", "c2=12349-07", "c2=30000-12", "c2=+12345-07", "_delta_log")) + + // read behaviors in CORRECTED, we still can query correctly + withSQLConf("spark.sql.legacy.timeParserPolicy" -> "CORRECTED") { + checkAnswer( + sql(s"select (unix_timestamp('+20000-01', 'yyyy-MM')) as value"), + Row(568971849600L) + ) + withSQLConf("spark.sql.ansi.enabled" -> "false") { + checkAnswer( + sql(s"select (unix_timestamp('20000-01', 'yyyy-MM')) as value"), + Row(null) + ) + checkAnswer( + sql(s"select * from $table where " + + s"c1 >= '20000-01-01 12:00:00'"), + // 23456-07-20 18:30:00 + Row(new Timestamp(678050098200000L), "+23456-07") :: + // 30000-12-30 20:00:00 + Row(new Timestamp(884572891200000L), "30000-12") :: Nil + ) + } + } + + // read behaviors in LEGACY, we still can query correctly + withSQLConf("spark.sql.legacy.timeParserPolicy" -> "LEGACY") { + checkAnswer( + sql(s"select (unix_timestamp('20000-01', 'yyyy-MM')) as value"), + Row(568971849600L) + ) + withSQLConf("spark.sql.ansi.enabled" -> "false") { + checkAnswer( + sql(s"select (unix_timestamp('+20000-01', 'yyyy-MM')) as value"), + Row(null) + ) + checkAnswer( + sql(s"select * from $table where " + + s"c1 >= '20000-01-01 12:00:00'"), + // 23456-07-20 18:30:00 + Row(new Timestamp(678050098200000L), "+23456-07") :: + // 30000-12-30 20:00:00 + Row(new Timestamp(884572891200000L), "30000-12") :: Nil + ) + } + } + } + } + } + + test("generated partition filters should avoid conflicts") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + withTableName("avoid_conflicts") { table => + createTable( + table, + Some(path), + "eventTime TIMESTAMP, date DATE", + Map("date" -> "CAST(eventTime AS DATE)"), + Seq("date") + ) + insertInto( + path, + Seq(Tuple1("2021-01-01 00:00:00"), Tuple1("2021-01-02 00:00:00")) + .toDF("eventTime") + .withColumn("eventTime", $"eventTime".cast(TimestampType)) + ) + + val unblockQueries = new CountDownLatch(1) + val waitForAllQueries = new CountDownLatch(2) + + PrepareDeltaScanBase.withCallbackOnGetDeltaScanGenerator(_ => { + waitForAllQueries.countDown() + assert( + unblockQueries.await(30, TimeUnit.SECONDS), + "the main thread didn't wake up queries") + }) { + val threadPool = ThreadUtils.newDaemonFixedThreadPool(2, "test") + try { + // Run two queries that should not conflict with each other if we generate the partition + // filter correctly. + val f1 = threadPool.submit(() => { + spark.read.format("delta").load(path).where("eventTime = '2021-01-01 00:00:00'") + .write.mode("append").format("delta").save(path) + true + }) + val f2 = threadPool.submit(() => { + spark.read.format("delta").load(path).where("eventTime = '2021-01-02 00:00:00'") + .write.mode("append").format("delta").save(path) + true + }) + assert( + waitForAllQueries.await(30, TimeUnit.SECONDS), + "queries didn't finish before timeout") + unblockQueries.countDown() + f1.get(30, TimeUnit.SECONDS) + f2.get(30, TimeUnit.SECONDS) + } finally { + threadPool.shutdownNow() + } + } + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuerySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuerySuite.scala new file mode 100644 index 00000000000..f8d32b7efb1 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuerySuite.scala @@ -0,0 +1,1016 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.perf + +import scala.collection.mutable + +import org.apache.spark.sql.delta.{DeletionVectorsTestUtils, DeltaColumnMappingEnableIdMode, DeltaColumnMappingEnableNameMode, DeltaLog, DeltaTestUtils} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.PrepareDeltaScanBase +import org.apache.spark.sql.delta.stats.StatisticsCollection +import org.apache.spark.sql.delta.test.DeltaColumnMappingSelectedTestMixin +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import io.delta.tables.DeltaTable +import org.apache.hadoop.fs.Path +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.sql.{DataFrame, Dataset, QueryTest, Row, SaveMode} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.plans.logical.LocalRelation +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSparkSession + +class OptimizeMetadataOnlyDeltaQuerySuite + extends QueryTest + with SharedSparkSession + with BeforeAndAfterAll + with DeltaSQLCommandTest + with DeletionVectorsTestUtils { + val testTableName = "table_basic" + val noStatsTableName = " table_nostats" + val mixedStatsTableName = " table_mixstats" + + var dfPart1: DataFrame = null + var dfPart2: DataFrame = null + + var totalRows: Long = -1 + var minId: Long = -1 + var maxId: Long = -1 + + override def beforeAll(): Unit = { + super.beforeAll() + + dfPart1 = generateRowsDataFrame(spark.range(1L, 6L)) + dfPart2 = generateRowsDataFrame(spark.range(6L, 11L)) + + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + dfPart1.write.format("delta").mode(SaveMode.Overwrite).saveAsTable(noStatsTableName) + dfPart1.write.format("delta").mode(SaveMode.Overwrite).saveAsTable(mixedStatsTableName) + + spark.sql(s"DELETE FROM $noStatsTableName WHERE id = 1") + spark.sql(s"DELETE FROM $mixedStatsTableName WHERE id = 1") + + dfPart2.write.format("delta").mode("append").saveAsTable(noStatsTableName) + } + + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "true") { + dfPart1.write.format("delta").mode(SaveMode.Overwrite).saveAsTable(testTableName) + + spark.sql(s"DELETE FROM $testTableName WHERE id = 1") + + dfPart2.write.format("delta").mode(SaveMode.Append).saveAsTable(testTableName) + dfPart2.write.format("delta").mode(SaveMode.Append).saveAsTable(mixedStatsTableName) + + // Run updates to generate more Delta Log and trigger a checkpoint + // and make sure stats works after checkpoints + for (a <- 1 to 10) { + spark.sql(s"UPDATE $testTableName SET data='$a' WHERE id = 7") + } + spark.sql(s"UPDATE $testTableName SET data=NULL WHERE id = 7") + + // Creates an empty (numRecords == 0) AddFile record + generateRowsDataFrame(spark.range(11L, 12L)) + .write.format("delta").mode("append").saveAsTable(testTableName) + spark.sql(s"DELETE FROM $testTableName WHERE id = 11") + } + + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "false") { + val result = spark.sql(s"SELECT COUNT(*), MIN(id), MAX(id) FROM $testTableName").head + totalRows = result.getLong(0) + minId = result.getLong(1) + maxId = result.getLong(2) + } + } + + /** Class to hold test parameters */ + case class ScalaTestParams(name: String, queryScala: () => DataFrame, expectedPlan: String) + + Seq( + new ScalaTestParams( + name = "count - simple query", + queryScala = () => spark.read.format("delta").table(testTableName) + .agg(count(col("*"))), + expectedPlan = "LocalRelation [none#0L]"), + new ScalaTestParams( + name = "min-max - simple query", + queryScala = () => spark.read.format("delta").table(testTableName) + .agg(min(col("id")), max(col("id")), + min(col("TinyIntColumn")), max(col("TinyIntColumn")), + min(col("SmallIntColumn")), max(col("SmallIntColumn")), + min(col("IntColumn")), max(col("IntColumn")), + min(col("BigIntColumn")), max(col("BigIntColumn")), + min(col("FloatColumn")), max(col("FloatColumn")), + min(col("DoubleColumn")), max(col("DoubleColumn")), + min(col("DateColumn")), max(col("DateColumn"))), + expectedPlan = "LocalRelation [none#0L, none#1L, none#2, none#3, none#4, none#5, none#6" + + ", none#7, none#8L, none#9L, none#10, none#11, none#12, none#13, none#14, none#15]")) + .foreach { testParams => + test(s"optimization supported - Scala - ${testParams.name}") { + checkResultsAndOptimizedPlan(testParams.queryScala, testParams.expectedPlan) + } + } + + /** Class to hold test parameters */ + case class SqlTestParams( + name: String, + querySql: String, + expectedPlan: String, + querySetup: Option[Seq[String]] = None) + + Seq( + new SqlTestParams( + name = "count - simple query", + querySql = s"SELECT COUNT(*) FROM $testTableName", + expectedPlan = "LocalRelation [none#0L]"), + new SqlTestParams( + name = "min-max - simple query", + querySql = s"SELECT MIN(id), MAX(id)" + + s", MIN(TinyIntColumn), MAX(TinyIntColumn)" + + s", MIN(SmallIntColumn), MAX(SmallIntColumn)" + + s", MIN(IntColumn), MAX(IntColumn)" + + s", MIN(BigIntColumn), MAX(BigIntColumn)" + + s", MIN(FloatColumn), MAX(FloatColumn)" + + s", MIN(DoubleColumn), MAX(DoubleColumn)" + + s", MIN(DateColumn), MAX(DateColumn)" + + s"FROM $testTableName", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2, none#3, none#4, none#5, none#6" + + ", none#7, none#8L, none#9L, none#10, none#11, none#12, none#13, none#14, none#15]"), + new SqlTestParams( + name = "min-max - column name non-matching case", + querySql = s"SELECT MIN(ID), MAX(iD)" + + s", MIN(tINYINTCOLUMN), MAX(tinyintcolumN)" + + s", MIN(sMALLINTCOLUMN), MAX(smallintcolumN)" + + s", MIN(iNTCOLUMN), MAX(intcolumN)" + + s", MIN(bIGINTCOLUMN), MAX(bigintcolumN)" + + s", MIN(fLOATCOLUMN), MAX(floatcolumN)" + + s", MIN(dOUBLECOLUMN), MAX(doublecolumN)" + + s", MIN(dATECOLUMN), MAX(datecolumN)" + + s"FROM $testTableName", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2, none#3, none#4, none#5, none#6" + + ", none#7, none#8L, none#9L, none#10, none#11, none#12, none#13, none#14, none#15]"), + new SqlTestParams( + name = "count with column name alias", + querySql = s"SELECT COUNT(*) as MyCount FROM $testTableName", + expectedPlan = "LocalRelation [none#0L]"), + new SqlTestParams( + name = "count-min-max with column name alias", + querySql = s"SELECT COUNT(*) as MyCount, MIN(id) as MyMinId, MAX(id) as MyMaxId" + + s" FROM $testTableName", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2L]"), + new SqlTestParams( + name = "count-min-max - table name with alias", + querySql = s"SELECT COUNT(*), MIN(id), MAX(id) FROM $testTableName MyTable", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2L]"), + new SqlTestParams( + name = "count-min-max - query using time travel - version 0", + querySql = s"SELECT COUNT(*), MIN(id), MAX(id) " + + s"FROM $testTableName VERSION AS OF 0", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2L]"), + new SqlTestParams( + name = "count-min-max - query using time travel - version 1", + querySql = s"SELECT COUNT(*), MIN(id), MAX(id) " + + s"FROM $testTableName VERSION AS OF 1", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2L]"), + new SqlTestParams( + name = "count-min-max - query using time travel - version 2", + querySql = s"SELECT COUNT(*), MIN(id), MAX(id) " + + s"FROM $testTableName VERSION AS OF 2", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2L]"), + new SqlTestParams( + name = "count - sub-query", + querySql = s"SELECT (SELECT COUNT(*) FROM $testTableName)", + expectedPlan = "Project [scalar-subquery#0 [] AS #0L]\n" + + ": +- LocalRelation [none#0L]\n+- OneRowRelation"), + new SqlTestParams( + name = "min - sub-query", + querySql = s"SELECT (SELECT MIN(id) FROM $testTableName)", + expectedPlan = "Project [scalar-subquery#0 [] AS #0L]\n" + + ": +- LocalRelation [none#0L]\n+- OneRowRelation"), + new SqlTestParams( + name = "max - sub-query", + querySql = s"SELECT (SELECT MAX(id) FROM $testTableName)", + expectedPlan = "Project [scalar-subquery#0 [] AS #0L]\n" + + ": +- LocalRelation [none#0L]\n+- OneRowRelation"), + new SqlTestParams( + name = "count - sub-query filter", + querySql = s"SELECT 'ABC' WHERE" + + s" (SELECT COUNT(*) FROM $testTableName) = $totalRows", + expectedPlan = "Project [ABC AS #0]\n+- Filter (scalar-subquery#0 [] = " + + totalRows + ")\n : +- LocalRelation [none#0L]\n +- OneRowRelation"), + new SqlTestParams( + name = "min - sub-query filter", + querySql = s"SELECT 'ABC' WHERE" + + s" (SELECT MIN(id) FROM $testTableName) = $minId", + expectedPlan = "Project [ABC AS #0]\n+- Filter (scalar-subquery#0 [] = " + + minId + ")\n : +- LocalRelation [none#0L]\n +- OneRowRelation"), + new SqlTestParams( + name = "max - sub-query filter", + querySql = s"SELECT 'ABC' WHERE" + + s" (SELECT MAX(id) FROM $testTableName) = $maxId", + expectedPlan = "Project [ABC AS #0]\n+- Filter (scalar-subquery#0 [] = " + + maxId + ")\n : +- LocalRelation [none#0L]\n +- OneRowRelation"), + // Limit doesn't affect aggregation results + new SqlTestParams( + name = "count-min-max - query with limit", + querySql = s"SELECT COUNT(*), MIN(id), MAX(id) FROM $testTableName LIMIT 3", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2L]"), + new SqlTestParams( + name = "count-min-max - duplicated functions", + querySql = s"SELECT COUNT(*), COUNT(*), MIN(id), MIN(id), MAX(id), MAX(id)" + + s" FROM $testTableName", + expectedPlan = "LocalRelation [none#0L, none#1L, none#2L, none#3L, none#4L, none#5L]"), + new SqlTestParams( + name = "count - empty table", + querySetup = Some(Seq("CREATE TABLE TestEmpty (c1 int) USING DELTA")), + querySql = "SELECT COUNT(*) FROM TestEmpty", + expectedPlan = "LocalRelation [none#0L]"), + /** Dates are stored as Int in literals. This test make sure Date columns works + * and NULL are handled correctly + */ + new SqlTestParams( + name = "min-max - date columns", + querySetup = Some(Seq( + "CREATE TABLE TestDateValues" + + " (Column1 DATE, Column2 DATE, Column3 DATE) USING DELTA;", + "INSERT INTO TestDateValues" + + " (Column1, Column2, Column3) VALUES (NULL, current_date(), current_date());", + "INSERT INTO TestDateValues" + + " (Column1, Column2, Column3) VALUES (NULL, NULL, current_date());")), + querySql = "SELECT COUNT(*), MIN(Column1), MAX(Column1), MIN(Column2)" + + ", MAX(Column2), MIN(Column3), MAX(Column3) FROM TestDateValues", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4, none#5, none#6]"), + new SqlTestParams( + name = "min-max - floating point infinity", + querySetup = Some(Seq( + "CREATE TABLE TestFloatInfinity (FloatColumn Float, DoubleColumn Double) USING DELTA", + "INSERT INTO TestFloatInfinity (FloatColumn, DoubleColumn) VALUES (1, 1);", + "INSERT INTO TestFloatInfinity (FloatColumn, DoubleColumn) VALUES (NULL, NULL);", + "INSERT INTO TestFloatInfinity (FloatColumn, DoubleColumn) VALUES " + + "(float('inf'), double('inf'))" + + ", (float('+inf'), double('+inf'))" + + ", (float('infinity'), double('infinity'))" + + ", (float('+infinity'), double('+infinity'))" + + ", (float('-inf'), double('-inf'))" + + ", (float('-infinity'), double('-infinity'))" + )), + querySql = "SELECT COUNT(*), MIN(FloatColumn), MAX(FloatColumn), MIN(DoubleColumn)" + + ", MAX(DoubleColumn) FROM TestFloatInfinity", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4]"), + // NaN is larger than any other value, including Infinity + new SqlTestParams( + name = "min-max - floating point NaN values", + querySetup = Some(Seq( + "CREATE TABLE TestFloatNaN (FloatColumn Float, DoubleColumn Double) USING DELTA", + "INSERT INTO TestFloatNaN (FloatColumn, DoubleColumn) VALUES (1, 1);", + "INSERT INTO TestFloatNaN (FloatColumn, DoubleColumn) VALUES (NULL, NULL);", + "INSERT INTO TestFloatNaN (FloatColumn, DoubleColumn) VALUES " + + "(float('inf'), double('inf'))" + + ", (float('+inf'), double('+inf'))" + + ", (float('infinity'), double('infinity'))" + + ", (float('+infinity'), double('+infinity'))" + + ", (float('-inf'), double('-inf'))" + + ", (float('-infinity'), double('-infinity'))", + "INSERT INTO TestFloatNaN (FloatColumn, DoubleColumn) VALUES " + + "(float('NaN'), double('NaN'));" + )), + querySql = "SELECT COUNT(*), MIN(FloatColumn), MAX(FloatColumn), MIN(DoubleColumn)" + + ", MAX(DoubleColumn) FROM TestFloatNaN", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4]"), + new SqlTestParams( + name = "min-max - floating point min positive value", + querySetup = Some(Seq( + "CREATE TABLE TestFloatPrecision (FloatColumn Float, DoubleColumn Double) USING DELTA", + "INSERT INTO TestFloatPrecision (FloatColumn, DoubleColumn) VALUES " + + "(CAST('1.4E-45' as FLOAT), CAST('4.9E-324' as DOUBLE))" + + ", (CAST('-1.4E-45' as FLOAT), CAST('-4.9E-324' as DOUBLE))" + + ", (0, 0);" + )), + querySql = "SELECT COUNT(*), MIN(FloatColumn), MAX(FloatColumn), MIN(DoubleColumn)" + + ", MAX(DoubleColumn) FROM TestFloatPrecision", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4]"), + new SqlTestParams( + name = "min-max - NULL and non-NULL values", + querySetup = Some(Seq( + "CREATE TABLE TestNullValues (Column1 INT, Column2 INT, Column3 INT) USING DELTA", + "INSERT INTO TestNullValues (Column1, Column2, Column3) VALUES (NULL, 1, 1);", + "INSERT INTO TestNullValues (Column1, Column2, Column3) VALUES (NULL, NULL, 1);" + )), + querySql = "SELECT COUNT(*), MIN(Column1), MAX(Column1)," + + "MIN(Column2), MAX(Column2) FROM TestNullValues", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4]"), + new SqlTestParams( + name = "min-max - only NULL values", + querySetup = Some(Seq( + "CREATE TABLE TestOnlyNullValues (Column1 INT, Column2 INT, Column3 INT) USING DELTA", + "INSERT INTO TestOnlyNullValues (Column1, Column2, Column3) VALUES (NULL, NULL, 1);", + "INSERT INTO TestOnlyNullValues (Column1, Column2, Column3) VALUES (NULL, NULL, 2);" + )), + querySql = "SELECT COUNT(*), MIN(Column1), MAX(Column1), MIN(Column2), MAX(Column2), " + + "MIN(Column3), MAX(Column3) FROM TestOnlyNullValues", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4, none#5, none#6]"), + new SqlTestParams( + name = "min-max - all supported data types", + querySetup = Some(Seq( + "CREATE TABLE TestMinMaxValues (" + + "TINYINTColumn TINYINT, SMALLINTColumn SMALLINT, INTColumn INT, BIGINTColumn BIGINT, " + + "FLOATColumn FLOAT, DOUBLEColumn DOUBLE, DATEColumn DATE) USING DELTA", + "INSERT INTO TestMinMaxValues (TINYINTColumn, SMALLINTColumn, INTColumn, BIGINTColumn," + + " FLOATColumn, DOUBLEColumn, DATEColumn)" + + " VALUES (-128, -32768, -2147483648, -9223372036854775808," + + " -3.4028235E38, -1.7976931348623157E308, CAST('1582-10-15' AS DATE));", + "INSERT INTO TestMinMaxValues (TINYINTColumn, SMALLINTColumn, INTColumn, BIGINTColumn," + + " FLOATColumn, DOUBLEColumn, DATEColumn)" + + " VALUES (127, 32767, 2147483647, 9223372036854775807," + + " 3.4028235E38, 1.7976931348623157E308, CAST('9999-12-31' AS DATE));" + )), + querySql = "SELECT COUNT(*)," + + "MIN(TINYINTColumn), MAX(TINYINTColumn)" + + ", MIN(SMALLINTColumn), MAX(SMALLINTColumn)" + + ", MIN(INTColumn), MAX(INTColumn)" + + ", MIN(BIGINTColumn), MAX(BIGINTColumn)" + + ", MIN(FLOATColumn), MAX(FLOATColumn)" + + ", MIN(DOUBLEColumn), MAX(DOUBLEColumn)" + + ", MIN(DATEColumn), MAX(DATEColumn)" + + " FROM TestMinMaxValues", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4, none#5, none#6" + + ", none#7L, none#8L, none#9, none#10, none#11, none#12, none#13, none#14]"), + new SqlTestParams( + name = "count-min-max - partitioned table - simple query", + querySetup = Some(Seq( + "CREATE TABLE TestPartitionedTable (Column1 INT, Column2 INT, Column3 INT, Column4 INT)" + + " USING DELTA PARTITIONED BY (Column2, Column3)", + "INSERT INTO TestPartitionedTable" + + " (Column1, Column2, Column3, Column4) VALUES (1, 2, 3, 4);", + "INSERT INTO TestPartitionedTable" + + " (Column1, Column2, Column3, Column4) VALUES (2, 2, 3, 5);", + "INSERT INTO TestPartitionedTable" + + " (Column1, Column2, Column3, Column4) VALUES (3, 3, 2, 6);", + "INSERT INTO TestPartitionedTable" + + " (Column1, Column2, Column3, Column4) VALUES (4, 3, 2, 7);" + )), + querySql = "SELECT COUNT(*)" + + ", MIN(Column1), MAX(Column1)" + + ", MIN(Column2), MAX(Column2)" + + ", MIN(Column3), MAX(Column3)" + + ", MIN(Column4), MAX(Column4)" + + " FROM TestPartitionedTable", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3," + + " none#4, none#5, none#6, none#7, none#8]"), + /** Partitioned columns should be able to return MIN and MAX data + * even when there are no column stats */ + new SqlTestParams( + name = "count-min-max - partitioned table - no stats", + querySetup = Some(Seq( + "CREATE TABLE TestPartitionedTableNoStats" + + " (Column1 INT, Column2 INT, Column3 INT, Column4 INT)" + + " USING DELTA PARTITIONED BY (Column2, Column3)" + + " TBLPROPERTIES('delta.dataSkippingNumIndexedCols' = 0)", + "INSERT INTO TestPartitionedTableNoStats" + + " (Column1, Column2, Column3, Column4) VALUES (1, 2, 3, 4);", + "INSERT INTO TestPartitionedTableNoStats" + + " (Column1, Column2, Column3, Column4) VALUES (2, 2, 3, 5);", + "INSERT INTO TestPartitionedTableNoStats" + + " (Column1, Column2, Column3, Column4) VALUES (3, 3, 2, 6);", + "INSERT INTO TestPartitionedTableNoStats" + + " (Column1, Column2, Column3, Column4) VALUES (4, 3, 2, 7);" + )), + querySql = "SELECT COUNT(*)" + + ", MIN(Column2), MAX(Column2)" + + ", MIN(Column3), MAX(Column3)" + + " FROM TestPartitionedTableNoStats", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4]"), + new SqlTestParams( + name = "min-max - partitioned table - all supported data types", + querySetup = Some(Seq( + "CREATE TABLE TestAllTypesPartitionedTable (" + + "TINYINTColumn TINYINT, SMALLINTColumn SMALLINT, INTColumn INT, BIGINTColumn BIGINT, " + + "FLOATColumn FLOAT, DOUBLEColumn DOUBLE, DATEColumn DATE, Data INT) USING DELTA" + + " PARTITIONED BY (TINYINTColumn, SMALLINTColumn, INTColumn, BIGINTColumn," + + " FLOATColumn, DOUBLEColumn, DATEColumn)", + "INSERT INTO TestAllTypesPartitionedTable" + + " (TINYINTColumn, SMALLINTColumn, INTColumn, BIGINTColumn," + + " FLOATColumn, DOUBLEColumn, DATEColumn, Data)" + + " VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);", + "INSERT INTO TestAllTypesPartitionedTable" + + " (TINYINTColumn, SMALLINTColumn, INTColumn, BIGINTColumn," + + " FLOATColumn, DOUBLEColumn, DATEColumn, Data)" + + " VALUES (-128, -32768, -2147483648, -9223372036854775808," + + " -3.4028235E38, -1.7976931348623157E308, CAST('1582-10-15' AS DATE), 1);" + )), + querySql = "SELECT COUNT(*)," + + "MIN(TINYINTColumn), MAX(TINYINTColumn)" + + ", MIN(SMALLINTColumn), MAX(SMALLINTColumn)" + + ", MIN(INTColumn), MAX(INTColumn)" + + ", MIN(BIGINTColumn), MAX(BIGINTColumn)" + + ", MIN(FLOATColumn), MAX(FLOATColumn)" + + ", MIN(DOUBLEColumn), MAX(DOUBLEColumn)" + + ", MIN(DATEColumn), MAX(DATEColumn)" + + ", MIN(Data), MAX(Data)" + + " FROM TestAllTypesPartitionedTable", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4, none#5, none#6, " + + "none#7L, none#8L, none#9, none#10, none#11, none#12, none#13, none#14, none#15, none#16]"), + new SqlTestParams( + name = "min-max - partitioned table - only NULL values", + querySetup = Some(Seq( + "CREATE TABLE TestOnlyNullValuesPartitioned (" + + "TINYINTColumn TINYINT, SMALLINTColumn SMALLINT, INTColumn INT, BIGINTColumn BIGINT, " + + "FLOATColumn FLOAT, DOUBLEColumn DOUBLE, DATEColumn DATE, Data INT) USING DELTA" + + " PARTITIONED BY (TINYINTColumn, SMALLINTColumn, INTColumn, BIGINTColumn," + + " FLOATColumn, DOUBLEColumn, DATEColumn)", + "INSERT INTO TestOnlyNullValuesPartitioned" + + " (TINYINTColumn, SMALLINTColumn, INTColumn, BIGINTColumn," + + " FLOATColumn, DOUBLEColumn, DATEColumn, Data)" + + " VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);" + )), + querySql = "SELECT COUNT(*)," + + "MIN(TINYINTColumn), MAX(TINYINTColumn)" + + ", MIN(SMALLINTColumn), MAX(SMALLINTColumn)" + + ", MIN(INTColumn), MAX(INTColumn)" + + ", MIN(BIGINTColumn), MAX(BIGINTColumn)" + + ", MIN(FLOATColumn), MAX(FLOATColumn)" + + ", MIN(DOUBLEColumn), MAX(DOUBLEColumn)" + + ", MIN(DATEColumn), MAX(DATEColumn)" + + ", MIN(Data), MAX(Data)" + + " FROM TestOnlyNullValuesPartitioned", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4, none#5, none#6, " + + "none#7L, none#8L, none#9, none#10, none#11, none#12, none#13, none#14, none#15, none#16]"), + new SqlTestParams( + name = "min-max - partitioned table - NULL and NON-NULL values", + querySetup = Some(Seq( + "CREATE TABLE TestNullPartitioned (Column1 INT, Column2 INT, Column3 INT)" + + " USING DELTA PARTITIONED BY (Column2, Column3)", + "INSERT INTO TestNullPartitioned (Column1, Column2, Column3) VALUES (NULL, NULL, 1);", + "INSERT INTO TestNullPartitioned (Column1, Column2, Column3) VALUES (NULL, NULL, NULL);", + "INSERT INTO TestNullPartitioned (Column1, Column2, Column3) VALUES (NULL, NULL, 2);" + )), + querySql = "SELECT COUNT(*), MIN(Column1), MAX(Column1), MIN(Column2), MAX(Column2), " + + "MIN(Column3), MAX(Column3) FROM TestNullPartitioned", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3, none#4, none#5, none#6]"), + new SqlTestParams( + name = "min-max - column name containing punctuation", + querySetup = Some(Seq( + "CREATE TABLE TestPunctuationColumnName (`My.!?Column` INT) USING DELTA", + "INSERT INTO TestPunctuationColumnName (`My.!?Column`) VALUES (1), (2), (3);" + )), + querySql = "SELECT COUNT(*), MIN(`My.!?Column`), MAX(`My.!?Column`)" + + " FROM TestPunctuationColumnName", + expectedPlan = "LocalRelation [none#0L, none#1, none#2]"), + new SqlTestParams( + name = "min-max - partitioned table - column name containing punctuation", + querySetup = Some(Seq( + "CREATE TABLE TestPartitionedPunctuationColumnName (`My.!?Column` INT, Data INT)" + + " USING DELTA PARTITIONED BY (`My.!?Column`)", + "INSERT INTO TestPartitionedPunctuationColumnName" + + " (`My.!?Column`, Data) VALUES (1, 1), (2, 1), (3, 1);" + )), + querySql = "SELECT COUNT(*), MIN(`My.!?Column`), MAX(`My.!?Column`)" + + " FROM TestPartitionedPunctuationColumnName", + expectedPlan = "LocalRelation [none#0L, none#1, none#2]"), + new SqlTestParams( + name = "min-max - partitioned table - special characters in column name", + querySetup = Some(Seq( + "CREATE TABLE TestColumnMappingPartitioned" + + " (Column1 INT, Column2 INT, `Column3 .,;{}()\n\t=` INT, Column4 INT)" + + " USING DELTA PARTITIONED BY (Column2, `Column3 .,;{}()\n\t=`)" + + " TBLPROPERTIES('delta.columnMapping.mode' = 'name')", + "INSERT INTO TestColumnMappingPartitioned" + + " (Column1, Column2, `Column3 .,;{}()\n\t=`, Column4)" + + " VALUES (1, 2, 3, 4);", + "INSERT INTO TestColumnMappingPartitioned" + + " (Column1, Column2, `Column3 .,;{}()\n\t=`, Column4)" + + " VALUES (2, 2, 3, 5);", + "INSERT INTO TestColumnMappingPartitioned" + + " (Column1, Column2, `Column3 .,;{}()\n\t=`, Column4)" + + " VALUES (3, 3, 2, 6);", + "INSERT INTO TestColumnMappingPartitioned" + + " (Column1, Column2, `Column3 .,;{}()\n\t=`, Column4)" + + " VALUES (4, 3, 2, 7);")), + querySql = "SELECT COUNT(*)" + + ", MIN(Column1), MAX(Column1)" + + ", MIN(Column2), MAX(Column2)" + + ", MIN(`Column3 .,;{}()\n\t=`), MAX(`Column3 .,;{}()\n\t=`)" + + ", MIN(Column4), MAX(Column4)" + + " FROM TestColumnMappingPartitioned", + expectedPlan = "LocalRelation [none#0L, none#1, none#2, none#3," + + " none#4, none#5, none#6, none#7, none#8]")) + .foreach { testParams => + test(s"optimization supported - SQL - ${testParams.name}") { + if (testParams.querySetup.isDefined) { + testParams.querySetup.get.foreach(spark.sql) + } + checkResultsAndOptimizedPlan(testParams.querySql, testParams.expectedPlan) + } + } + + test("count-min-max - external table") { + withTempDir { dir => + val testTablePath = dir.getCanonicalPath + dfPart1.write.format("delta").mode("overwrite").save(testTablePath) + DeltaTable.forPath(spark, testTablePath).delete("id = 1") + dfPart2.write.format("delta").mode(SaveMode.Append).save(testTablePath) + + checkResultsAndOptimizedPlan( + s"SELECT COUNT(*), MIN(id), MAX(id) FROM delta.`$testTablePath`", + "LocalRelation [none#0L, none#1L, none#2L]") + } + } + + test("min-max - partitioned column stats disabled") { + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + val tableName = "TestPartitionedNoStats" + + spark.sql(s"CREATE TABLE $tableName (Column1 INT, Column2 INT)" + + " USING DELTA PARTITIONED BY (Column2)") + + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (1, 3);") + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (2, 4);") + + // Has no stats, including COUNT + checkOptimizationIsNotTriggered( + s"SELECT COUNT(*), MIN(Column2), MAX(Column2) FROM $tableName") + + // Should work for partitioned columns even without stats + checkResultsAndOptimizedPlan( + s"SELECT MIN(Column2), MAX(Column2) FROM $tableName", + "LocalRelation [none#0, none#1]") + } + } + + test("min-max - recompute column missing stats") { + val tableName = "TestRecomputeMissingStat" + + spark.sql(s"CREATE TABLE $tableName (Column1 INT, Column2 INT) USING DELTA" + + s" TBLPROPERTIES('delta.dataSkippingNumIndexedCols' = 0)") + + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (1, 4);") + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (2, 5);") + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (3, 6);") + + checkOptimizationIsNotTriggered(s"SELECT COUNT(*), MIN(Column1), MAX(Column1) FROM $tableName") + + spark.sql(s"ALTER TABLE $tableName SET TBLPROPERTIES('delta.dataSkippingNumIndexedCols' = 1);") + + StatisticsCollection.recompute( + spark, + DeltaLog.forTable(spark, TableIdentifier(tableName)), + DeltaTableV2(spark, TableIdentifier(tableName)).catalogTable) + + checkResultsAndOptimizedPlan( + s"SELECT COUNT(*), MIN(Column1), MAX(Column1) FROM $tableName", + "LocalRelation [none#0L, none#1, none#2]") + + checkOptimizationIsNotTriggered(s"SELECT COUNT(*), MIN(Column2), MAX(Column2) FROM $tableName") + + spark.sql(s"ALTER TABLE $tableName SET TBLPROPERTIES('delta.dataSkippingNumIndexedCols' = 2);") + + StatisticsCollection.recompute( + spark, + DeltaLog.forTable(spark, TableIdentifier(tableName)), + DeltaTableV2(spark, TableIdentifier(tableName)).catalogTable) + + checkResultsAndOptimizedPlan( + s"SELECT COUNT(*), MIN(Column2), MAX(Column2) FROM $tableName", + "LocalRelation [none#0L, none#1, none#2]") + } + + test("min-max - recompute added column") { + val tableName = "TestRecomputeAddedColumn" + + spark.sql(s"CREATE TABLE $tableName (Column1 INT) USING DELTA") + spark.sql(s"INSERT INTO $tableName (Column1) VALUES (1);") + + spark.sql(s"ALTER TABLE $tableName ADD COLUMN (Column2 INT)") + + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (2, 5);") + + checkResultsAndOptimizedPlan( + s"SELECT COUNT(*), MIN(Column1), MAX(Column1) FROM $tableName", + "LocalRelation [none#0L, none#1, none#2]") + + checkOptimizationIsNotTriggered(s"SELECT COUNT(*), " + + s"MIN(Column1), MAX(Column1), MIN(Column2), MAX(Column2) FROM $tableName") + + StatisticsCollection.recompute( + spark, + DeltaLog.forTable(spark, TableIdentifier(tableName)), + DeltaTableV2(spark, TableIdentifier(tableName)).catalogTable) + + checkResultsAndOptimizedPlan(s"SELECT COUNT(*), " + + s"MIN(Column1), MAX(Column1), MIN(Column2), MAX(Column2) FROM $tableName", + "LocalRelation [none#0L, none#1, none#2, none#3, none#4]") + } + + test("Select Count: snapshot isolation") { + sql(s"CREATE TABLE TestSnapshotIsolation (c1 int) USING DELTA") + spark.sql("INSERT INTO TestSnapshotIsolation VALUES (1)") + + val scannedVersions = mutable.ArrayBuffer[Long]() + val query = "SELECT (SELECT COUNT(*) FROM TestSnapshotIsolation), " + + "(SELECT COUNT(*) FROM TestSnapshotIsolation)" + + checkResultsAndOptimizedPlan( + query, + "Project [scalar-subquery#0 [] AS #0L, scalar-subquery#0 [] AS #1L]\n" + + ": :- LocalRelation [none#0L]\n" + + ": +- LocalRelation [none#0L]\n" + + "+- OneRowRelation") + + PrepareDeltaScanBase.withCallbackOnGetDeltaScanGenerator(scanGenerator => { + // Record the scanned version and make changes to the table. We will verify changes in the + // middle of the query are not visible to the query. + scannedVersions += scanGenerator.snapshotToScan.version + // Insert a row after each call to get scanGenerator + // to test if the count doesn't change in the same query + spark.sql("INSERT INTO TestSnapshotIsolation VALUES (1)") + }) { + val result = spark.sql(query).collect()(0) + val c1 = result.getLong(0) + val c2 = result.getLong(1) + assertResult(c1, "Snapshot isolation should guarantee the results are always the same")(c2) + assert( + scannedVersions.toSet.size == 1, + s"Scanned multiple versions of the same table in one query: ${scannedVersions.toSet}") + } + } + + test(".collect() and .show() both use this optimization") { + var resultRow: Row = null + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "false") { + resultRow = spark.sql(s"SELECT COUNT(*), MIN(id), MAX(id) FROM $testTableName").head + } + + val totalRows = resultRow.getLong(0) + val minId = resultRow.getLong(1) + val maxId = resultRow.getLong(2) + + val collectPlans = DeltaTestUtils.withLogicalPlansCaptured(spark, optimizedPlan = true) { + spark.sql(s"SELECT COUNT(*) FROM $testTableName").collect() + } + val collectResultData = collectPlans.collect { case x: LocalRelation => x.data } + assert(collectResultData.size === 1) + assert(collectResultData.head.head.getLong(0) === totalRows) + + val showPlans = DeltaTestUtils.withLogicalPlansCaptured(spark, optimizedPlan = true) { + spark.sql(s"SELECT COUNT(*) FROM $testTableName").show() + } + val showResultData = showPlans.collect { case x: LocalRelation => x.data } + assert(showResultData.size === 1) + assert(showResultData.head.head.getString(0).toLong === totalRows) + + val showMultAggPlans = DeltaTestUtils.withLogicalPlansCaptured(spark, optimizedPlan = true) { + spark.sql(s"SELECT COUNT(*), MIN(id), MAX(id) FROM $testTableName").show() + } + + val showMultipleAggResultData = showMultAggPlans.collect { case x: LocalRelation => x.data } + assert(showMultipleAggResultData.size === 1) + val firstRow = showMultipleAggResultData.head.head + assert(firstRow.getString(0).toLong === totalRows) + assert(firstRow.getString(1).toLong === minId) + assert(firstRow.getString(2).toLong === maxId) + } + + test("min-max .show() - only NULL values") { + val tableName = "TestOnlyNullValuesShow" + + spark.sql(s"CREATE TABLE $tableName (Column1 INT) USING DELTA") + spark.sql(s"INSERT INTO $tableName (Column1) VALUES (NULL);") + + val showMultAggPlans = DeltaTestUtils.withLogicalPlansCaptured(spark, optimizedPlan = true) { + spark.sql(s"SELECT MIN(Column1), MAX(Column1) FROM $tableName").show() + } + + val showMultipleAggResultData = showMultAggPlans.collect { case x: LocalRelation => x.data } + assert(showMultipleAggResultData.size === 1) + val firstRow = showMultipleAggResultData.head.head + assert(firstRow.getString(0) === "NULL") + assert(firstRow.getString(1) === "NULL") + } + + test("min-max .show() - Date Columns") { + val tableName = "TestDateColumnsShow" + + spark.sql(s"CREATE TABLE $tableName (Column1 DATE, Column2 DATE) USING DELTA") + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES " + + s"(CAST('1582-10-15' AS DATE), NULL);") + + val showMultAggPlans = DeltaTestUtils.withLogicalPlansCaptured(spark, optimizedPlan = true) { + spark.sql(s"SELECT MIN(Column1), MIN(Column2) FROM $tableName").show() + } + + val showMultipleAggResultData = showMultAggPlans.collect { case x: LocalRelation => x.data } + assert(showMultipleAggResultData.size === 1) + val firstRow = showMultipleAggResultData.head.head + assert(firstRow.getString(0) === "1582-10-15") + assert(firstRow.getString(1) === "NULL") + } + + test("count - dv-enabled") { + withTempDir { dir => + val tempPath = dir.getCanonicalPath + spark.range(1, 10, 1, 1).write.format("delta").save(tempPath) + + enableDeletionVectorsInTable(new Path(tempPath), true) + DeltaTable.forPath(spark, tempPath).delete("id = 1") + assert(!getFilesWithDeletionVectors(DeltaLog.forTable(spark, new Path(tempPath))).isEmpty) + + checkResultsAndOptimizedPlan( + s"SELECT COUNT(*) FROM delta.`$tempPath`", + "LocalRelation [none#0L]") + } + } + + test("count - zero rows AddFile") { + withTempDir { dir => + val tempPath = dir.getCanonicalPath + val df = spark.range(1, 10) + val expectedResult = df.count() + df.write.format("delta").save(tempPath) + + // Creates AddFile entries with non-existing files + // The query should read only the delta log and not the parquet files + val log = DeltaLog.forTable(spark, tempPath) + val txn = log.startTransaction() + txn.commitManually( + DeltaTestUtils.createTestAddFile(path = "1.parquet", stats = "{\"numRecords\": 0}"), + DeltaTestUtils.createTestAddFile(path = "2.parquet", stats = "{\"numRecords\": 0}"), + DeltaTestUtils.createTestAddFile(path = "3.parquet", stats = "{\"numRecords\": 0}")) + + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "true") { + val queryDf = spark.sql(s"SELECT COUNT(*) FROM delta.`$tempPath`") + val optimizedPlan = queryDf.queryExecution.optimizedPlan.canonicalized.toString() + + assert(queryDf.head().getLong(0) === expectedResult) + + assertResult("LocalRelation [none#0L]") { + optimizedPlan.trim + } + } + } + } + + // Tests to validate the optimizer won't incorrectly change queries it can't correctly handle + + Seq((s"SELECT COUNT(*) FROM $mixedStatsTableName", "missing stats"), + (s"SELECT COUNT(*) FROM $noStatsTableName", "missing stats"), + (s"SELECT MIN(id), MAX(id) FROM $mixedStatsTableName", "missing stats"), + (s"SELECT MIN(id), MAX(id) FROM $noStatsTableName", "missing stats"), + (s"SELECT group, COUNT(*) FROM $testTableName GROUP BY group", "group by"), + (s"SELECT group, MIN(id), MAX(id) FROM $testTableName GROUP BY group", "group by"), + (s"SELECT COUNT(*) + 1 FROM $testTableName", "plus literal"), + (s"SELECT MAX(id) + 1 FROM $testTableName", "plus literal"), + (s"SELECT COUNT(DISTINCT data) FROM $testTableName", "distinct count"), + (s"SELECT COUNT(*) FROM $testTableName WHERE id > 0", "filter"), + (s"SELECT MAX(id) FROM $testTableName WHERE id > 0", "filter"), + (s"SELECT (SELECT COUNT(*) FROM $testTableName WHERE id > 0)", "sub-query with filter"), + (s"SELECT (SELECT MAX(id) FROM $testTableName WHERE id > 0)", "sub-query with filter"), + (s"SELECT COUNT(ALL data) FROM $testTableName", "count non-null"), + (s"SELECT COUNT(data) FROM $testTableName", "count non-null"), + (s"SELECT COUNT(*) FROM $testTableName A, $testTableName B", "join"), + (s"SELECT MAX(A.id) FROM $testTableName A, $testTableName B", "join"), + (s"SELECT COUNT(*) OVER() FROM $testTableName LIMIT 1", "over"), + ( s"SELECT MAX(id) OVER() FROM $testTableName LIMIT 1", "over") + ) + .foreach { case (query, desc) => + test(s"optimization not supported - $desc - $query") { + checkOptimizationIsNotTriggered(query) + } + } + + test("optimization not supported - min-max unsupported data types") { + val tableName = "TestUnsupportedTypes" + + spark.sql(s"CREATE TABLE $tableName " + + s"(STRINGColumn STRING, DECIMALColumn DECIMAL(38,0)" + + s", TIMESTAMPColumn TIMESTAMP, BINARYColumn BINARY, " + + s"BOOLEANColumn BOOLEAN, ARRAYColumn ARRAY, MAPColumn MAP, " + + s"STRUCTColumn STRUCT) USING DELTA") + + spark.sql(s"INSERT INTO $tableName" + + s" (STRINGColumn, DECIMALColumn, TIMESTAMPColumn, BINARYColumn" + + s", BOOLEANColumn, ARRAYColumn, MAPColumn, STRUCTColumn) VALUES " + + s"('A', -99999999999999999999999999999999999999, CAST('1900-01-01 00:00:00.0' AS TIMESTAMP)" + + s", X'1ABF', TRUE, ARRAY(1, 2, 3), MAP(1, 10, 2, 20), STRUCT(1, 'Spark'));") + + val columnNames = List("STRINGColumn", "DECIMALColumn", "TIMESTAMPColumn", + "BINARYColumn", "BOOLEANColumn", "ARRAYColumn", "STRUCTColumn") + + columnNames.foreach(colName => + checkOptimizationIsNotTriggered(s"SELECT MAX($colName) FROM $tableName") + ) + } + + test("optimization not supported - min-max column without stats") { + val tableName = "TestColumnWithoutStats" + + spark.sql(s"CREATE TABLE $tableName (Column1 INT, Column2 INT) USING DELTA" + + s" TBLPROPERTIES('delta.dataSkippingNumIndexedCols' = 1)") + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (1, 2);") + + checkOptimizationIsNotTriggered( + s"SELECT MAX(Column2) FROM $tableName") + } + + // For empty tables the stats won't be found and the query should not be optimized + test("optimization not supported - min-max empty table") { + val tableName = "TestMinMaxEmptyTable" + + spark.sql(s"CREATE TABLE $tableName (Column1 INT) USING DELTA") + + checkOptimizationIsNotTriggered( + s"SELECT MIN(Column1), MAX(Column1) FROM $tableName") + } + + test("optimization not supported - min-max dv-enabled") { + withTempDir { dir => + val tempPath = dir.getCanonicalPath + spark.range(1, 10, 1, 1).write.format("delta").save(tempPath) + val querySql = s"SELECT MIN(id), MAX(id) FROM delta.`$tempPath`" + checkResultsAndOptimizedPlan(querySql, "LocalRelation [none#0L, none#1L]") + + enableDeletionVectorsInTable(new Path(tempPath), true) + DeltaTable.forPath(spark, tempPath).delete("id = 1") + assert(!getFilesWithDeletionVectors(DeltaLog.forTable(spark, new Path(tempPath))).isEmpty) + checkOptimizationIsNotTriggered(querySql) + } + } + + test("optimization not supported - filter on partitioned column") { + val tableName = "TestPartitionedFilter" + + spark.sql(s"CREATE TABLE $tableName (Column1 INT, Column2 INT)" + + " USING DELTA PARTITIONED BY (Column2)") + + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (1, 2);") + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (2, 2);") + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (3, 3);") + spark.sql(s"INSERT INTO $tableName (Column1, Column2) VALUES (4, 3);") + + // Filter by partition column + checkOptimizationIsNotTriggered( + "SELECT COUNT(*)" + + ", MIN(Column1), MAX(Column1)" + + ", MIN(Column2), MAX(Column2)" + + s" FROM $tableName WHERE Column2 = 2") + + // Filter both partition and data columns + checkOptimizationIsNotTriggered( + "SELECT COUNT(*)" + + ", MIN(Column1), MAX(Column1)" + + ", MIN(Column2), MAX(Column2)" + + s" FROM $tableName WHERE Column1 = 2 AND Column2 = 2") + } + + test("optimization not supported - sub-query with column alias") { + val tableName = "TestColumnAliasSubQuery" + + spark.sql(s"CREATE TABLE $tableName (Column1 INT, Column2 INT, Column3 INT) USING DELTA") + + spark.sql(s"INSERT INTO $tableName (Column1, Column2, Column3) VALUES (1, 2, 3);") + + checkOptimizationIsNotTriggered( + s"SELECT MAX(Column2) FROM (SELECT Column1 AS Column2 FROM $tableName)") + + checkOptimizationIsNotTriggered( + s"SELECT MAX(Column1), MAX(Column2), MAX(Column3) FROM " + + s"(SELECT Column1 AS Column2, Column2 AS Column3, Column3 AS Column1 FROM $tableName)") + } + + test("optimization not supported - nested columns") { + val tableName = "TestNestedColumns" + + spark.sql(s"CREATE TABLE $tableName " + + s"(Column1 STRUCT, " + + s"`Column1.Id` INT) USING DELTA") + + spark.sql(s"INSERT INTO $tableName" + + s" (Column1, `Column1.Id`) VALUES " + + s"(STRUCT(1), 2);") + + // Nested Column + checkOptimizationIsNotTriggered( + s"SELECT MAX(Column1.Id) FROM $tableName") + + checkOptimizationIsNotTriggered( + s"SELECT MAX(Column1.Id) AS XYZ FROM $tableName") + + // Validate the scenario where all the columns are read + // since it creates a different query plan + checkOptimizationIsNotTriggered( + s"SELECT MAX(Column1.Id), " + + s"MAX(`Column1.Id`) FROM $tableName") + + // The optimization for columns with dots should still work + checkResultsAndOptimizedPlan(s"SELECT MAX(`Column1.Id`) FROM $tableName", + "LocalRelation [none#0]") + } + + private def generateRowsDataFrame(source: Dataset[java.lang.Long]): DataFrame = { + import testImplicits._ + + source.select('id, + 'id.cast("tinyint") as 'TinyIntColumn, + 'id.cast("smallint") as 'SmallIntColumn, + 'id.cast("int") as 'IntColumn, + 'id.cast("bigint") as 'BigIntColumn, + ('id / 3.3).cast("float") as 'FloatColumn, + ('id / 3.3).cast("double") as 'DoubleColumn, + date_add(lit("2022-08-31").cast("date"), col("id").cast("int")) as 'DateColumn, + ('id % 2).cast("integer") as 'group, + 'id.cast("string") as 'data) + } + + /** Validate the results of the query is the same with the flag + * DELTA_OPTIMIZE_METADATA_QUERY_ENABLED enabled and disabled. + * And the expected Optimized Query Plan with the flag enabled */ + private def checkResultsAndOptimizedPlan( + query: String, + expectedOptimizedPlan: String): Unit = { + checkResultsAndOptimizedPlan(() => spark.sql(query), expectedOptimizedPlan) + } + + /** Validate the results of the query is the same with the flag + * DELTA_OPTIMIZE_METADATA_QUERY_ENABLED enabled and disabled. + * And the expected Optimized Query Plan with the flag enabled. */ + private def checkResultsAndOptimizedPlan( + generateQueryDf: () => DataFrame, + expectedOptimizedPlan: String): Unit = { + var expectedAnswer: scala.Seq[Row] = null + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "false") { + expectedAnswer = generateQueryDf().collect() + } + + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "true") { + val queryDf = generateQueryDf() + val optimizedPlan = queryDf.queryExecution.optimizedPlan.canonicalized.toString() + + assert(queryDf.collect().sameElements(expectedAnswer)) + + assertResult(expectedOptimizedPlan.trim) { + optimizedPlan.trim + } + } + } + + /** + * Verify the query plans and results are the same with/without metadata query optimization. + * This method can be used to verify cases that we shouldn't trigger optimization + * or cases that we can potentially improve. + * @param query + */ + private def checkOptimizationIsNotTriggered(query: String) { + var expectedOptimizedPlan: String = null + var expectedAnswer: scala.Seq[Row] = null + + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "false") { + + val generateQueryDf = spark.sql(query) + expectedOptimizedPlan = generateQueryDf.queryExecution.optimizedPlan + .canonicalized.toString() + expectedAnswer = generateQueryDf.collect() + } + + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> "true") { + + val generateQueryDf = spark.sql(query) + val optimizationEnabledQueryPlan = generateQueryDf.queryExecution.optimizedPlan + .canonicalized.toString() + + assert(generateQueryDf.collect().sameElements(expectedAnswer)) + + assertResult(expectedOptimizedPlan) { + optimizationEnabledQueryPlan + } + } + } +} + +trait OptimizeMetadataOnlyDeltaQueryColumnMappingSuiteBase + extends DeltaColumnMappingSelectedTestMixin { + override protected def runAllTests = true +} + +class OptimizeMetadataOnlyDeltaQueryIdColumnMappingSuite + extends OptimizeMetadataOnlyDeltaQuerySuite + with DeltaColumnMappingEnableIdMode + with OptimizeMetadataOnlyDeltaQueryColumnMappingSuiteBase + +class OptimizeMetadataOnlyDeltaQueryNameColumnMappingSuite + extends OptimizeMetadataOnlyDeltaQuerySuite + with DeltaColumnMappingEnableNameMode + with OptimizeMetadataOnlyDeltaQueryColumnMappingSuiteBase diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizedWritesSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizedWritesSuite.scala new file mode 100644 index 00000000000..f03bc96ec5f --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/perf/OptimizedWritesSuite.scala @@ -0,0 +1,355 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.perf + +import java.io.File + +import scala.language.implicitConversions + +import com.databricks.spark.util.Log4jUsageLogger +import org.apache.spark.sql.delta.{DeltaConfigs, DeltaLog, DeltaOptions, DeltaTestUtils} +import org.apache.spark.sql.delta.CommitStats +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.JsonUtils + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.streaming.StreamingQuery +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{LongType, StructType} + +abstract class OptimizedWritesSuiteBase extends QueryTest + with SharedSparkSession { + + import testImplicits._ + + protected def writeTest(testName: String)(f: String => Unit): Unit = { + test(testName) { + withTempDir { dir => + withSQLConf(DeltaConfigs.OPTIMIZE_WRITE.defaultTablePropertyKey -> "true") { + f(dir.getCanonicalPath) + } + } + } + } + + protected def checkResult(df: DataFrame, numFileCheck: Long => Boolean, dir: String): Unit = { + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, dir) + val files = snapshot.numOfFiles + assert(numFileCheck(files), s"file check failed: received $files") + + checkAnswer( + spark.read.format("delta").load(dir), + df + ) + } + + protected implicit def fileToPathString(dir: File): String = dir.getCanonicalPath + + writeTest("non-partitioned write - table config") { dir => + val df = spark.range(0, 100, 1, 4).toDF() + df.write.format("delta").save(dir) + checkResult( + df, + numFileCheck = _ === 1, + dir) + } + + test("non-partitioned write - table config compatibility") { + withTempDir { tempDir => + val dir = tempDir.getCanonicalPath + // When table property is not set, we use session conf value. + // Writes 1 file instead of 4 when OW is enabled + withSQLConf( + DeltaSQLConf.DELTA_OPTIMIZE_WRITE_ENABLED.key -> "true") { + val df = spark.range(0, 100, 1, 4).toDF() + val commitStats = Log4jUsageLogger.track { + df.write.format("delta").mode("append").save(dir) + }.filter(_.tags.get("opType") === Some("delta.commit.stats")) + assert(commitStats.length >= 1) + checkResult( + df, + numFileCheck = _ === 1, + dir) + } + } + + // Test order of precedence between table property "delta.autoOptimize.optimizeWrite" and + // session conf. + for { + sqlConf <- DeltaTestUtils.BOOLEAN_DOMAIN + tableProperty <- DeltaTestUtils.BOOLEAN_DOMAIN + } { + withTempDir { tempDir => + withSQLConf( + DeltaSQLConf.DELTA_OPTIMIZE_WRITE_ENABLED.key -> sqlConf.toString) { + val dir = tempDir.getCanonicalPath + // Write one file to be able to set tblproperties + spark.range(10).coalesce(1).write.format("delta") + .mode("append").save(dir) + + sql(s"ALTER TABLE delta.`$dir` SET TBLPROPERTIES" + + s" (delta.autoOptimize.optimizeWrite = ${tableProperty.toString})") + + val df = spark.range(0, 100, 1, 4).toDF() + // OW adds one file vs non-OW adds 4 files + val expectedNumberOfFiles = if (sqlConf) 2 else 5 + df.write.format("delta").mode("append").save(dir) + checkResult( + df.union(spark.range(10).toDF()), + numFileCheck = _ === expectedNumberOfFiles, + dir) + } + } + } + } + + test("non-partitioned write - data frame config") { + withTempDir { dir => + val df = spark.range(0, 100, 1, 4).toDF() + df.write.format("delta") + .option(DeltaOptions.OPTIMIZE_WRITE_OPTION, "true").save(dir) + checkResult( + df, + numFileCheck = _ === 1, + dir) + } + } + + writeTest("non-partitioned write - data frame config trumps table config") { dir => + val df = spark.range(0, 100, 1, 4).toDF() + df.write.format("delta").option(DeltaOptions.OPTIMIZE_WRITE_OPTION, "false").save(dir) + checkResult( + df, + numFileCheck = _ === 4, + dir) + } + + writeTest("partitioned write - table config") { dir => + val df = spark.range(0, 100, 1, 4) + .withColumn("part", 'id % 5) + + df.write.partitionBy("part").format("delta").save(dir) + checkResult( + df, + numFileCheck = _ <= 5, + dir) + } + + test("partitioned write - data frame config") { + withTempDir { dir => + val df = spark.range(0, 100, 1, 4) + .withColumn("part", 'id % 5) + + df.write.partitionBy("part").option(DeltaOptions.OPTIMIZE_WRITE_OPTION, "true") + .format("delta").save(dir) + + checkResult( + df, + numFileCheck = _ <= 5, + dir) + } + } + + writeTest("partitioned write - data frame config trumps table config") { dir => + val df = spark.range(0, 100, 1, 4) + .withColumn("part", 'id % 5) + + df.write.partitionBy("part").format("delta") + .option(DeltaOptions.OPTIMIZE_WRITE_OPTION, "false").save(dir) + + checkResult( + df, + numFileCheck = _ === 20, + dir) + } + + writeTest("multi-partitions - table config") { dir => + val df = spark.range(0, 100, 1, 4) + .withColumn("part", 'id % 5) + .withColumn("part2", ('id / 20).cast("int")) + + df.write.partitionBy("part", "part2").format("delta").save(dir) + + checkResult( + df, + numFileCheck = _ <= 25, + dir) + } + + test("multi-partitions - data frame config") { + withTempDir { dir => + val df = spark.range(0, 100, 1, 4) + .withColumn("part", 'id % 5) + .withColumn("part2", ('id / 20).cast("int")) + + df.write.partitionBy("part", "part2") + .option(DeltaOptions.OPTIMIZE_WRITE_OPTION, "true").format("delta").save(dir) + + checkResult( + df, + numFileCheck = _ <= 25, + dir) + } + } + + test("optimized writes used if enabled when a stream starts") { + withTempDir { f => + // Write some data into the table so it already exists + Seq(1).toDF().write.format("delta").save(f) + + // Use optimized writes just when starting the stream + val inputData = MemoryStream[Int] + + val df = inputData.toDF().repartition(10) + var stream: StreamingQuery = null + + // Start the stream with optimized writes enabled, and then reset the conf + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_ENABLED.key -> "true") { + val checkpoint = new File(f, "checkpoint").getCanonicalPath + stream = df.writeStream.format("delta").option("checkpointLocation", checkpoint).start(f) + } + try { + inputData.addData(1 to 100) + stream.processAllAvailable() + } finally { + stream.stop() + } + + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, f) + assert(snapshot.numOfFiles == 2, "Optimized writes were not used") + } + } + + writeTest("multi-partitions - data frame config trumps table config") { dir => + val df = spark.range(0, 100, 1, 4) + .withColumn("part", 'id % 5) + .withColumn("part2", ('id / 20).cast("int")) + + df.write.partitionBy("part", "part2") + .option(DeltaOptions.OPTIMIZE_WRITE_OPTION, "false").format("delta").save(dir) + + checkResult( + df, + numFileCheck = _ > 25, + dir) + } + + writeTest("optimize should not leverage optimized writes") { dir => + val df = spark.range(0, 10, 1, 2) + + val logs1 = Log4jUsageLogger.track { + df.write.format("delta").mode("append").save(dir) + df.write.format("delta").mode("append").save(dir) + }.filter(_.metric == "tahoeEvent") + + assert(logs1.count(_.tags.get("opType") === Some("delta.optimizeWrite.planned")) === 2) + + val logs2 = Log4jUsageLogger.track { + sql(s"optimize delta.`$dir`") + }.filter(_.metric == "tahoeEvent") + + assert(logs2.count(_.tags.get("opType") === Some("delta.optimizeWrite.planned")) === 0) + } + + writeTest("map task with more partitions than target shuffle blocks - non-partitioned") { dir => + val df = spark.range(0, 20, 1, 4) + + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_SHUFFLE_BLOCKS.key -> "2") { + df.write.format("delta").mode("append").save(dir) + } + + checkResult( + df.toDF(), + numFileCheck = _ === 1, + dir) + } + + writeTest("map task with more partitions than target shuffle blocks - partitioned") { dir => + val df = spark.range(0, 20, 1, 4).withColumn("part", 'id % 5) + + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_SHUFFLE_BLOCKS.key -> "2") { + df.write.format("delta").partitionBy("part").mode("append").save(dir) + } + + checkResult( + df, + numFileCheck = _ === 5, + dir) + } + + writeTest("zero partition dataframe write") { dir => + val df = spark.range(0, 20, 1, 4).withColumn("part", 'id % 5) + df.write.format("delta").partitionBy("part").mode("append").save(dir) + val schema = new StructType().add("id", LongType).add("part", LongType) + + spark.createDataFrame(sparkContext.emptyRDD[Row], schema).write.format("delta") + .partitionBy("part").mode("append").save(dir) + + checkResult( + df, + numFileCheck = _ === 5, + dir) + } + + test("OptimizedWriterBlocks is not serializable") { + assert(!new OptimizedWriterBlocks(Array.empty).isInstanceOf[Serializable], + "The blocks should not be serializable so that they don't get shipped to executors.") + } + + writeTest("single partition dataframe write") { dir => + val df = spark.range(0, 20).repartition(1).withColumn("part", 'id % 5) + val logs1 = Log4jUsageLogger.track { + df.write.format("delta").partitionBy("part").mode("append").save(dir) + }.filter(_.metric == "tahoeEvent") + + // doesn't use optimized writes + assert(logs1.count(_.tags.get("opType") === Some("delta.optimizeWrite.planned")) === 0) + + checkResult( + df, + numFileCheck = _ === 5, + dir) + } + + writeTest("do not create tons of shuffle partitions during optimized writes") { dir => + // 50M shuffle blocks would've led to 25M shuffle partitions + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_SHUFFLE_BLOCKS.key -> "50000000") { + val df = spark.range(0, 20).repartition(2).withColumn("part", 'id % 5) + val logs1 = Log4jUsageLogger.track { + df.write.format("delta").partitionBy("part").mode("append").save(dir) + }.filter(_.metric == "tahoeEvent") + .filter(_.tags.get("opType") === Some("delta.optimizeWrite.planned")) + + assert(logs1.length === 1) + val blob = JsonUtils.fromJson[Map[String, Any]](logs1.head.blob) + assert(blob("outputPartitions") === 5) + assert(blob("originalPartitions") === 2) + assert(blob("numShuffleBlocks") === 50000000) + assert(blob("shufflePartitions") === + spark.conf.get(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_MAX_SHUFFLE_PARTITIONS)) + + checkResult( + df, + numFileCheck = _ === 5, + dir) + } + } +} + +class OptimizedWritesSuite extends OptimizedWritesSuiteBase with DeltaSQLCommandTest {} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/rowid/RowIdSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/rowid/RowIdSuite.scala new file mode 100644 index 00000000000..bfd07754bc3 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/rowid/RowIdSuite.scala @@ -0,0 +1,284 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.rowid + +import org.apache.spark.sql.delta.{DeltaConfigs, DeltaIllegalStateException, DeltaLog, RowId, Serializable, SnapshotIsolation, WriteSerializable} +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.RowId.RowTrackingMetadataDomain +import org.apache.spark.sql.delta.actions.CommitInfo +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.FileNames + +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.test.SharedSparkSession + +class RowIdSuite extends QueryTest + with SharedSparkSession + with RowIdTestUtils { + test("Enabling row IDs on existing table does not set row IDs as readable") { + withRowTrackingEnabled(enabled = false) { + withTable("tbl") { + spark.range(10).write.format("delta") + .saveAsTable("tbl") + + sql( + s""" + |ALTER TABLE tbl + |SET TBLPROPERTIES ( + |'$rowTrackingFeatureName' = 'supported', + |'delta.minWriterVersion' = $TABLE_FEATURES_MIN_WRITER_VERSION)""".stripMargin) + + val (log, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier("tbl")) + assert(RowId.isSupported(snapshot.protocol)) + assert(!RowId.isEnabled(snapshot.protocol, snapshot.metadata)) + } + } + } + + test("row ids are assigned when they are enabled") { + withRowTrackingEnabled(enabled = true) { + withTempDir { dir => + spark.range(start = 0, end = 1000, step = 1, numPartitions = 10) + .write.format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + assertRowIdsAreValid(log) + + spark.range(start = 1000, end = 1500, step = 1, numPartitions = 3) + .write.format("delta").mode("append").save(dir.getAbsolutePath) + assertRowIdsAreValid(log) + } + } + } + + test("row ids are not assigned when they are disabled") { + withRowTrackingEnabled(enabled = false) { + withTempDir { dir => + spark.range(start = 0, end = 1000, step = 1, numPartitions = 10) + .write.format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + assertRowIdsAreNotSet(log) + + spark.range(start = 1000, end = 1500, step = 1, numPartitions = 3) + .write.format("delta").mode("append").save(dir.getAbsolutePath) + assertRowIdsAreNotSet(log) + } + } + } + + test("row ids can be disabled") { + withRowTrackingEnabled(enabled = true) { + withTempDir { dir => + spark.range(start = 0, end = 1000, step = 1, numPartitions = 10) + .write.format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + assertRowIdsAreValid(log) + + sql(s"ALTER TABLE delta.`${dir.getAbsolutePath}` " + + s"SET TBLPROPERTIES ('${DeltaConfigs.ROW_TRACKING_ENABLED.key}' = false)") + checkAnswer( + spark.read.load(dir.getAbsolutePath), + (0 until 1000).map(Row(_))) + } + } + } + + test("high watermark survives checkpointing") { + withRowTrackingEnabled(enabled = true) { + withTempDir { dir => + spark.range(start = 0, end = 1000, step = 1, numPartitions = 10) + .write.format("delta").save(dir.getAbsolutePath) + val log1 = DeltaLog.forTable(spark, dir) + assertRowIdsAreValid(log1) + + // Force a checkpoint and add an empty commit, so that we can delete the first commit + log1.checkpoint(log1.update()) + log1.startTransaction().commit(Nil, ManualUpdate) + DeltaLog.clearCache() + + // Delete the first commit and all checksum files to force the next read to read the high + // watermark from the checkpoint. + val fs = log1.logPath.getFileSystem(log1.newDeltaHadoopConf()) + fs.delete(FileNames.deltaFile(log1.logPath, version = 0), true) + fs.delete(FileNames.checksumFile(log1.logPath, version = 0), true) + fs.delete(FileNames.checksumFile(log1.logPath, version = 1), true) + + spark.range(start = 1000, end = 1500, step = 1, numPartitions = 3) + .write.format("delta").mode("append").save(dir.getAbsolutePath) + val log2 = DeltaLog.forTable(spark, dir) + assertRowIdsAreValid(log2) + } + } + } + + test("re-added files keep their row ids") { + withRowTrackingEnabled(enabled = true) { + withTempDir { dir => + spark.range(start = 0, end = 1000, step = 1, numPartitions = 10) + .write.format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + assertRowIdsAreValid(log) + + val filesBefore = log.update().allFiles.collect() + val baseRowIdsBefore = filesBefore.map(f => f.path -> f.baseRowId.get).toMap + + log.startTransaction().commit(filesBefore, ManualUpdate) + assertRowIdsAreValid(log) + + val filesAfter = log.update().allFiles.collect() + val baseRowIdsAfter = filesAfter.map(f => f.path -> f.baseRowId.get).toMap + + assert(baseRowIdsBefore == baseRowIdsAfter) + } + } + } + + test("RESTORE retains high watermark") { + withRowTrackingEnabled(enabled = true) { + withTempDir { dir => + // version 0: high watermark = 9 + spark.range(start = 0, end = 10) + .write.format("delta").save(dir.getAbsolutePath) + val log = DeltaLog.forTable(spark, dir) + val deltaTable = io.delta.tables.DeltaTable.forPath(spark, dir.getAbsolutePath) + + // version 1: high watermark = 19 + spark.range(start = 10, end = 20) + .write.mode("append").format("delta").save(dir.getAbsolutePath) + val highWatermarkBeforeRestore = RowId.extractHighWatermark(log.update()) + + // back to version 0: high watermark should be still equal to before the restore. + deltaTable.restoreToVersion(0) + + val highWatermarkAfterRestore = RowId.extractHighWatermark(log.update()) + assert(highWatermarkBeforeRestore == highWatermarkAfterRestore) + assertRowIdsDoNotOverlap(log) + + // version 1 (overridden): high watermark = 29 + spark.range(start = 10, end = 20) + .write.mode("append").format("delta").save(dir.getAbsolutePath) + assertHighWatermarkIsCorrectAfterUpdate( + log, + highWatermarkBeforeUpdate = highWatermarkAfterRestore.get, + expectedNumRecordsWritten = 10) + assertRowIdsDoNotOverlap(log) + val highWatermarkWithNewData = RowId.extractHighWatermark(log.update()) + + // back to version 0: high watermark should still be 29. + deltaTable.restoreToVersion(0) + + val highWatermarkWithNewDataAfterRestore = + RowId.extractHighWatermark(log.update()) + assert(highWatermarkWithNewData == highWatermarkWithNewDataAfterRestore) + assertRowIdsDoNotOverlap(log) + + } + } + } + + test("row_id column with row ids disabled") { + withRowTrackingEnabled(enabled = false) { + withTempDir { dir => + spark.range(start = 0, end = 1000, step = 1, numPartitions = 5) + .select((col("id") + 10000L).as("row_id")) + .write.format("delta").save(dir.getAbsolutePath) + + checkAnswer( + spark.read.load(dir.getAbsolutePath), + (0 until 1000).map(i => Row(i + 10000L)) + ) + } + } + } + + test("Throws error when assigning row IDs without stats") { + withSQLConf( + DeltaConfigs.ROW_TRACKING_ENABLED.defaultTablePropertyKey -> "true", + DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + withTable("target") { + val err = intercept[DeltaIllegalStateException] { + spark.range(end = 10).write.format("delta").saveAsTable("target") + } + checkError(err, "DELTA_ROW_ID_ASSIGNMENT_WITHOUT_STATS") + } + } + } + + test("manually setting row ID high watermark is not allowed") { + withRowTrackingEnabled(enabled = true) { + withTempDir { dir => + spark.range(start = 0, end = 1000, step = 1, numPartitions = 10) + .write.format("delta").save(dir.getAbsolutePath) + + val log = DeltaLog.forTable(spark, dir) + + val exception = intercept[IllegalStateException] { + log.startTransaction().commit( + Seq(RowTrackingMetadataDomain(rowIdHighWaterMark = 9001).toDomainMetadata), + ManualUpdate) + } + assert(exception.getMessage.contains( + "Manually setting the Row ID high water mark is not allowed")) + } + } + } + + for (prevIsolationLevel <- Seq( + Serializable)) + test(s"Maintenance operations can downgrade to snapshot isolation, " + + s"previousIsolationLevel = $prevIsolationLevel") { + withTable("table") { + withSQLConf( + DeltaConfigs.ROW_TRACKING_ENABLED.defaultTablePropertyKey -> "true", + DeltaConfigs.ISOLATION_LEVEL.defaultTablePropertyKey -> prevIsolationLevel.toString) { + // Create two files that will be picked up by OPTIMIZE + spark.range(10).repartition(2).write.format("delta").saveAsTable("table") + val (log, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier("table")) + val versionBeforeOptimize = snapshot.version + + spark.sql("OPTIMIZE table").collect() + + val commitInfos = log.getChanges(versionBeforeOptimize + 1).flatMap(_._2).flatMap { + case commitInfo: CommitInfo => Some(commitInfo) + case _ => None + }.toList + assert(commitInfos.size == 1) + assert(commitInfos.forall(_.isolationLevel.get == SnapshotIsolation.toString)) + } + } + } + + test("ALTER TABLE cannot enable Row IDs on existing table") { + withRowTrackingEnabled(enabled = false) { + withTable("tbl") { + spark.range(10).write.format("delta").saveAsTable("tbl") + + val (log, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier("tbl")) + assert(!RowId.isEnabled(snapshot.protocol, snapshot.metadata)) + + val err = intercept[UnsupportedOperationException] { + sql(s"ALTER TABLE tbl " + + s"SET TBLPROPERTIES ('${DeltaConfigs.ROW_TRACKING_ENABLED.key}' = true)") + } + assert(err.getMessage === "Cannot enable Row IDs on an existing table.") + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/rowid/RowIdTestUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/rowid/RowIdTestUtils.scala new file mode 100644 index 00000000000..2852d608f0b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/rowid/RowIdTestUtils.scala @@ -0,0 +1,81 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.rowid + +import org.apache.spark.sql.delta.{DeltaLog, RowId} +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.rowtracking.RowTrackingTestUtils +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +trait RowIdTestUtils extends RowTrackingTestUtils with DeltaSQLCommandTest { + protected def getRowIdRangeInclusive(f: AddFile): (Long, Long) = { + val min = f.baseRowId.get + val max = min + f.numPhysicalRecords.get - 1L + (min, max) + } + + def assertRowIdsDoNotOverlap(log: DeltaLog): Unit = { + val files = log.update().allFiles.collect() + + val sortedRanges = files + .map(f => (f.path, getRowIdRangeInclusive(f))) + .sortBy { case (_, (min, _)) => min } + + for (i <- sortedRanges.indices.dropRight(1)) { + val (curPath, (_, curMax)) = sortedRanges(i) + val (nextPath, (nextMin, _)) = sortedRanges(i + 1) + assert(curMax < nextMin, s"$curPath and $nextPath have overlapping row IDs") + } + } + + def assertHighWatermarkIsCorrect(log: DeltaLog): Unit = { + val snapshot = log.update() + val files = snapshot.allFiles.collect() + + val highWatermarkOpt = RowId.extractHighWatermark(snapshot) + if (files.isEmpty) { + assert(highWatermarkOpt.isDefined) + } else { + val maxAssignedRowId = files + .map(a => a.baseRowId.get + a.numPhysicalRecords.get - 1L) + .max + assert(highWatermarkOpt.get == maxAssignedRowId) + } + } + + def assertRowIdsAreValid(log: DeltaLog): Unit = { + assertRowIdsDoNotOverlap(log) + assertHighWatermarkIsCorrect(log) + } + + def assertHighWatermarkIsCorrectAfterUpdate( + log: DeltaLog, highWatermarkBeforeUpdate: Long, expectedNumRecordsWritten: Long): Unit = { + val highWaterMarkAfterUpdate = RowId.extractHighWatermark(log.update()).get + assert((highWatermarkBeforeUpdate + expectedNumRecordsWritten) === highWaterMarkAfterUpdate) + assertRowIdsAreValid(log) + } + + def assertRowIdsAreNotSet(log: DeltaLog): Unit = { + val snapshot = log.update() + + val highWatermarks = RowId.extractHighWatermark(snapshot) + assert(highWatermarks.isEmpty) + + val files = snapshot.allFiles.collect() + assert(files.forall(_.baseRowId.isEmpty)) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/DefaultRowCommitVersionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/DefaultRowCommitVersionSuite.scala new file mode 100644 index 00000000000..81b7eff8298 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/DefaultRowCommitVersionSuite.scala @@ -0,0 +1,221 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.rowtracking + +import scala.collection.mutable + +import org.apache.spark.sql.delta.{DeltaConfigs, DeltaLog, RowTrackingFeature} +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.actions.{AddFile, Metadata, Protocol, RemoveFile} +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.{TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION} +import org.apache.spark.sql.delta.rowid.RowIdTestUtils +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.test.SharedSparkSession + +class DefaultRowCommitVersionSuite extends QueryTest + with SharedSparkSession + with RowIdTestUtils { + def expectedCommitVersionsForAllFiles(deltaLog: DeltaLog): Map[String, Long] = { + val commitVersionForFiles = mutable.Map.empty[String, Long] + deltaLog.getChanges(startVersion = 0).foreach { case (commitVersion, actions) => + actions.foreach { + case a: AddFile if !commitVersionForFiles.contains(a.path) => + commitVersionForFiles += a.path -> commitVersion + case r: RemoveFile if commitVersionForFiles.contains(r.path) => + assert(r.defaultRowCommitVersion.contains(commitVersionForFiles(r.path))) + case _ => + // Do nothing + } + } + commitVersionForFiles.toMap + } + + test("defaultRowCommitVersion is not set when feature is disabled") { + withRowTrackingEnabled(enabled = false) { + withTempDir { tempDir => + spark.range(start = 0, end = 100, step = 1, numPartitions = 1) + .write.format("delta").mode("overwrite").save(tempDir.getAbsolutePath) + spark.range(start = 100, end = 200, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + deltaLog.update().allFiles.collect().foreach { f => + assert(f.defaultRowCommitVersion.isEmpty) + } + } + } + } + + test("checkpoint preserves defaultRowCommitVersion") { + withRowTrackingEnabled(enabled = true) { + withTempDir { tempDir => + spark.range(start = 0, end = 100, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + spark.range(start = 100, end = 200, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + spark.range(start = 200, end = 300, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val commitVersionForFiles = expectedCommitVersionsForAllFiles(deltaLog) + + deltaLog.update().allFiles.collect().foreach { f => + assert(f.defaultRowCommitVersion.contains(commitVersionForFiles(f.path))) + } + + deltaLog.checkpoint(deltaLog.update()) + + deltaLog.update().allFiles.collect().foreach { f => + assert(f.defaultRowCommitVersion.contains(commitVersionForFiles(f.path))) + } + } + } + } + + test("data skipping reads defaultRowCommitVersion") { + withRowTrackingEnabled(enabled = true) { + withTempDir { tempDir => + spark.range(start = 0, end = 100, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + spark.range(start = 100, end = 200, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + spark.range(start = 200, end = 300, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val commitVersionForFiles = expectedCommitVersionsForAllFiles(deltaLog) + + val filters = Seq(col("id = 150").expr) + val scan = deltaLog.update().filesForScan(filters) + + scan.files.foreach { f => + assert(f.defaultRowCommitVersion.contains(commitVersionForFiles(f.path))) + } + } + } + } + + test("clone does not preserve default row commit versions") { + withRowTrackingEnabled(enabled = true) { + withTempDir { sourceDir => + spark.range(start = 0, end = 100, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(sourceDir.getAbsolutePath) + spark.range(start = 100, end = 200, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(sourceDir.getAbsolutePath) + spark.range(start = 200, end = 300, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(sourceDir.getAbsolutePath) + + withTable("target") { + spark.sql(s"CREATE TABLE target SHALLOW CLONE delta.`${sourceDir.getAbsolutePath}` " + + s"TBLPROPERTIES ('${DeltaConfigs.ROW_TRACKING_ENABLED.key}' = 'true')") + + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier("target")) + snapshot.allFiles.collect().foreach { f => + assert(f.defaultRowCommitVersion.contains(0L)) + } + } + } + } + } + + test("restore does preserve default row commit versions") { + withRowTrackingEnabled(enabled = true) { + withTempDir { tempDir => + spark.range(start = 0, end = 100, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + spark.range(start = 100, end = 200, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + spark.range(start = 200, end = 300, step = 1, numPartitions = 1) + .write.format("delta").mode("append").save(tempDir.getAbsolutePath) + + val deltaLog = DeltaLog.forTable(spark, tempDir) + val commitVersionForFiles = expectedCommitVersionsForAllFiles(deltaLog) + + spark.sql(s"RESTORE delta.`${tempDir.getAbsolutePath}` TO VERSION AS OF 1") + + deltaLog.update().allFiles.collect().foreach { f => + assert(f.defaultRowCommitVersion.contains(commitVersionForFiles(f.path))) + } + } + } + } + + test("default row commit versions are reassigned on conflict") { + withTempDir { tempDir => + val deltaLog = DeltaLog.forTable(spark, tempDir) + + // Initial setup - version 0 + val protocol = Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + .withFeature(RowTrackingFeature) + val metadata = Metadata() + deltaLog.startTransaction().commit(Seq(protocol, metadata), ManualUpdate) + + // Start a transaction + val txn = deltaLog.startTransaction() + + // Commit two concurrent transactions - version 1 and 2 + deltaLog.startTransaction().commit(Nil, ManualUpdate) + deltaLog.startTransaction().commit(Nil, ManualUpdate) + + // Commit the transaction - version 3 + val addA = AddFile(path = "a", partitionValues = Map.empty, size = 1, modificationTime = 1, + dataChange = true, stats = "{\"numRecords\": 1}") + val addB = AddFile(path = "b", partitionValues = Map.empty, size = 1, modificationTime = 1, + dataChange = true, stats = "{\"numRecords\": 1}") + txn.commit(Seq(addA, addB), ManualUpdate) + + deltaLog.update().allFiles.collect().foreach { f => + assert(f.defaultRowCommitVersion.contains(3)) + } + } + } + + test("default row commit versions are assigned when concurrent txn enables row tracking") { + withTempDir { tempDir => + val deltaLog = DeltaLog.forTable(spark, tempDir) + + // Initial setup - version 0 + val protocolWithoutRowTracking = + Protocol(TABLE_FEATURES_MIN_READER_VERSION, TABLE_FEATURES_MIN_WRITER_VERSION) + val metadata = Metadata() + deltaLog.startTransaction().commit(Seq(protocolWithoutRowTracking, metadata), ManualUpdate) + + // Start a transaction + val txn = deltaLog.startTransaction() + + // Commit concurrent transactions enabling row tracking - version 1 and 2 + val protocolWithRowTracking = protocolWithoutRowTracking.withFeature(RowTrackingFeature) + deltaLog.startTransaction().commit(Seq(protocolWithRowTracking), ManualUpdate) + deltaLog.startTransaction().commit(Nil, ManualUpdate) + + // Commit the transaction - version 3 + val addA = AddFile(path = "a", partitionValues = Map.empty, size = 1, modificationTime = 1, + dataChange = true, stats = "{\"numRecords\": 1}") + val addB = AddFile(path = "b", partitionValues = Map.empty, size = 1, modificationTime = 1, + dataChange = true, stats = "{\"numRecords\": 1}") + txn.commit(Seq(addA, addB), ManualUpdate) + + deltaLog.update().allFiles.collect().foreach { f => + assert(f.defaultRowCommitVersion.contains(3)) + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/MaterializedColumnSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/MaterializedColumnSuite.scala new file mode 100644 index 00000000000..d68bb59437c --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/MaterializedColumnSuite.scala @@ -0,0 +1,187 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.rowtracking + +import org.apache.spark.sql.delta.{DeltaConfigs, DeltaLog, DeltaRuntimeException, MaterializedRowCommitVersion, MaterializedRowId, RowTracking} +import org.apache.spark.sql.delta.rowid.RowIdTestUtils + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.execution.datasources.parquet.ParquetTest + +class MaterializedColumnSuite extends RowIdTestUtils + with ParquetTest { + + private val testTableName = "target" + private val testDataColumnName = "test_data" + + private def withTestTable(testFunction: => Unit): Unit = { + withTable(testTableName) { + spark.range(end = 10).toDF(testDataColumnName) + .write.format("delta").saveAsTable(testTableName) + testFunction + } + } + + private def getMaterializedRowIdColumnName(tableName: String): Option[String] = { + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(tableName)) + snapshot.metadata.configuration.get(MaterializedRowId.MATERIALIZED_COLUMN_NAME_PROP) + } + + private def getMaterializedRowCommitVersionColumnName(tableName: String): Option[String] = { + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, TableIdentifier(tableName)) + snapshot.metadata.configuration.get( + MaterializedRowCommitVersion.MATERIALIZED_COLUMN_NAME_PROP) + } + + for ((name, getMaterializedColumnName) <- Map( + "row ids" -> getMaterializedRowIdColumnName _, + "row commit versions" -> getMaterializedRowCommitVersionColumnName _ + )) { + test(s"materialized $name column name is stored when row tracking is enabled") { + withRowTrackingEnabled(enabled = true) { + withTestTable { + assert(getMaterializedColumnName(testTableName).isDefined) + } + } + } + + test(s"materialized $name column name is not stored when row tracking is disabled") { + withRowTrackingEnabled(enabled = false) { + withTestTable { + assert(getMaterializedColumnName(testTableName).isEmpty) + } + } + } + + test(s"adding a column with the same name as the materialized $name column name fails") { + withRowTrackingEnabled(enabled = true) { + withTestTable { + val materializedColumnName = getMaterializedColumnName(testTableName).get + val error = intercept[DeltaRuntimeException] { + sql(s"ALTER TABLE $testTableName ADD COLUMN (`$materializedColumnName` BIGINT)") + } + checkError(error, "DELTA_ADDING_COLUMN_WITH_INTERNAL_NAME_FAILED", + parameters = Map("colName" -> materializedColumnName)) + } + } + } + + test(s"renaming a column to the materialized $name column name fails") { + withRowTrackingEnabled(enabled = true) { + withSQLConf(DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> "name") { + withTestTable { + val materializedColumnName = getMaterializedColumnName(testTableName).get + val error = intercept[DeltaRuntimeException] { + sql(s"ALTER TABLE $testTableName " + + s"RENAME COLUMN $testDataColumnName TO `$materializedColumnName`") + } + checkError(error, errorClass = "DELTA_ADDING_COLUMN_WITH_INTERNAL_NAME_FAILED", + parameters = Map("colName" -> materializedColumnName)) + } + } + } + } + + test(s"cloning a table with a column equal to the materialized $name column name fails") { + val targetName = "target" + val sourceName = "source" + withTable(targetName, sourceName) { + withRowTrackingEnabled(enabled = true) { + spark.range(0).toDF("val") + .write.format("delta").saveAsTable(targetName) + + val materializedColumnName = getMaterializedColumnName(targetName).get + spark.range(0).toDF(materializedColumnName) + .write.format("delta").saveAsTable(sourceName) + + val error = intercept[DeltaRuntimeException] { + sql(s"CREATE OR REPLACE TABLE $targetName SHALLOW CLONE $sourceName") + } + checkError(error, errorClass = "DELTA_ADDING_COLUMN_WITH_INTERNAL_NAME_FAILED", + parameters = Map("colName" -> materializedColumnName)) + } + } + } + + test(s"replace keeps the materialized $name column name") { + withRowTrackingEnabled(enabled = true) { + withTestTable { + val materializedColumnNameBefore = getMaterializedColumnName(testTableName) + sql( + s""" + |CREATE OR REPLACE TABLE $testTableName + |USING delta AS + |SELECT * FROM VALUES (0), (1) + |""".stripMargin) + val materializedColumnNameAfter = getMaterializedColumnName(testTableName) + assert(materializedColumnNameBefore == materializedColumnNameAfter) + } + } + } + + test(s"restore keeps the materialized $name column name") { + withRowTrackingEnabled(enabled = true) { + withTestTable { + spark.range(end = 100).toDF(testDataColumnName) + .write.format("delta").mode("overwrite").saveAsTable(testTableName) + + val materializedColumnNameBefore = getMaterializedColumnName(testTableName) + io.delta.tables.DeltaTable.forName(testTableName).restoreToVersion(0) + val materializedColumnNameAfter = getMaterializedColumnName(testTableName) + assert(materializedColumnNameBefore == materializedColumnNameAfter) + } + } + } + + test(s"clone assigns a materialized $name column when table property is set") { + val sourceTableName = "source" + val targetTableName = "target" + + withTable(sourceTableName, targetTableName) { + withRowTrackingEnabled(enabled = false) { + spark.range(end = 1).write.format("delta").saveAsTable(sourceTableName) + assert(getMaterializedColumnName(sourceTableName).isEmpty) + + sql(s"CREATE OR REPLACE TABLE $targetTableName SHALLOW CLONE $sourceTableName " + + s"TBLPROPERTIES ('${DeltaConfigs.ROW_TRACKING_ENABLED.key}' = 'true')") + + assert(getMaterializedColumnName(targetTableName).isDefined) + } + } + } + + test(s"clone does not assign a materialized $name column when table property is not set") { + val sourceTableName = "source" + val targetTableName = "target" + + withTable(sourceTableName, targetTableName) { + withRowTrackingEnabled(enabled = true) { + spark.range(end = 1).toDF("col1").write.format("delta").saveAsTable(sourceTableName) + + sql(s"CREATE TABLE $targetTableName SHALLOW CLONE $sourceTableName") + + val sourceTableColumnName = getMaterializedColumnName(sourceTableName) + val targetTableColumnName = getMaterializedColumnName(targetTableName) + + assert(sourceTableColumnName.isDefined) + assert(targetTableColumnName.isEmpty) + } + } + } + + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/RowTrackingConflictResolutionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/RowTrackingConflictResolutionSuite.scala new file mode 100644 index 00000000000..4221512c5d9 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/RowTrackingConflictResolutionSuite.scala @@ -0,0 +1,150 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.rowtracking + +import org.apache.spark.sql.delta.{DeltaLog, DeltaOperations, RowId, RowTrackingFeature} +import org.apache.spark.sql.delta.actions.{Action, AddFile} +import org.apache.spark.sql.delta.rowid.RowIdTestUtils +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession + +class RowTrackingConflictResolutionSuite extends QueryTest + with SharedSparkSession with RowIdTestUtils { + + private val testTableName = "test_table" + + private def deltaLog = DeltaLog.forTable(spark, TableIdentifier(testTableName)) + private def latestSnapshot = deltaLog.update() + + private def withTestTable(testBlock: => Unit): Unit = { + withTable(testTableName) { + withRowTrackingEnabled(enabled = false) { + // Table is initially empty. + spark.range(end = 0).toDF().write.format("delta").saveAsTable(testTableName) + + testBlock + } + } + } + + /** Create an AddFile action for testing purposes. */ + private def addFile(path: String): AddFile = { + AddFile( + path = path, + partitionValues = Map.empty, + size = 1337, + modificationTime = 1, + dataChange = true, + stats = """{ "numRecords": 1 }""" + ) + } + + /** Add Row tracking table feature support. */ + private def activateRowTracking(): Unit = { + require(!latestSnapshot.protocol.isFeatureSupported(RowTrackingFeature)) + deltaLog.upgradeProtocol(Action.supportedProtocolVersion()) + } + + // Add 'numRecords' records to the table. + private def commitRecords(numRecords: Int): Unit = { + spark.range(numRecords).write.format("delta").mode("append").saveAsTable(testTableName) + } + + test("Set baseRowId if table feature was committed concurrently") { + withTestTable { + val txn = deltaLog.startTransaction() + activateRowTracking() + txn.commit(Seq(addFile(path = "file_path")), DeltaOperations.ManualUpdate) + + assertRowIdsAreValid(deltaLog) + } + } + + test("Set valid baseRowId if table feature and RowIdHighWaterMark are committed concurrently") { + withTestTable { + val filePath = "file_path" + val numConcurrentRecords = 11 + + val txn = deltaLog.startTransaction() + activateRowTracking() + commitRecords(numConcurrentRecords) + txn.commit(Seq(addFile(filePath)), DeltaOperations.ManualUpdate) + + assertRowIdsAreValid(deltaLog) + val committedAddFile = latestSnapshot.allFiles.collect().filter(_.path == filePath) + assert(committedAddFile.size === 1) + assert(committedAddFile.head.baseRowId === Some(numConcurrentRecords)) + } + } + + test("Conflict resolution if table feature and initial AddFiles are in the same commit") { + withTestTable { + val filePath = "file_path" + + val txn = deltaLog.startTransaction() + deltaLog.startTransaction().commit( + Seq(Action.supportedProtocolVersion(), addFile("other_path")), DeltaOperations.ManualUpdate) + txn.commit(Seq(addFile(filePath)), DeltaOperations.ManualUpdate) + + assertRowIdsAreValid(deltaLog) + val committedAddFile = latestSnapshot.allFiles.collect().filter(_.path == filePath) + assert(committedAddFile.size === 1) + assert(committedAddFile.head.baseRowId === Some(1)) + } + } + + test("Conflict resolution with concurrent INSERT") { + withTestTable { + val filePath = "file_path" + val numInitialRecords = 7 + val numConcurrentRecords = 11 + + activateRowTracking() + commitRecords(numInitialRecords) + val txn = deltaLog.startTransaction() + commitRecords(numConcurrentRecords) + txn.commit(Seq(addFile(filePath)), DeltaOperations.ManualUpdate) + + assertRowIdsAreValid(deltaLog) + val committedAddFile = latestSnapshot.allFiles.collect().filter(_.path == filePath) + assert(committedAddFile.size === 1) + assert(committedAddFile.head.baseRowId === Some(numInitialRecords + numConcurrentRecords)) + val currentHighWaterMark = RowId.extractHighWatermark(latestSnapshot).get + assert(currentHighWaterMark === numInitialRecords + numConcurrentRecords) + } + } + + test("Handle commits that do not bump the high water mark") { + withTestTable { + val filePath = "file_path" + val numInitialRecords = 7 + activateRowTracking() + commitRecords(numInitialRecords) + + val txn = deltaLog.startTransaction() + val concurrentTxn = deltaLog.startTransaction() + val updatedProtocol = latestSnapshot.protocol + concurrentTxn.commit(Seq(updatedProtocol), DeltaOperations.ManualUpdate) + txn.commit(Seq(addFile(filePath)), DeltaOperations.ManualUpdate) + + assertRowIdsAreValid(deltaLog) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/RowTrackingTestUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/RowTrackingTestUtils.scala new file mode 100644 index 00000000000..6b54f607d44 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/rowtracking/RowTrackingTestUtils.scala @@ -0,0 +1,36 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.rowtracking + +import org.apache.spark.sql.delta.{DeltaConfigs, RowTrackingFeature} +import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf + +import org.apache.spark.SparkConf +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession + +trait RowTrackingTestUtils extends QueryTest with SharedSparkSession { + lazy val rowTrackingFeatureName: String = + TableFeatureProtocolUtils.propertyKey(RowTrackingFeature) + lazy val defaultRowTrackingFeatureProperty: String = + TableFeatureProtocolUtils.defaultPropertyKey(RowTrackingFeature) + + def withRowTrackingEnabled(enabled: Boolean)(f: => Unit): Unit = { + withSQLConf(DeltaConfigs.ROW_TRACKING_ENABLED.defaultTablePropertyKey -> enabled.toString)(f) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/schema/CaseSensitivitySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/schema/CaseSensitivitySuite.scala new file mode 100644 index 00000000000..10f8f38beb5 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/schema/CaseSensitivitySuite.scala @@ -0,0 +1,222 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +import java.io.File + +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql._ +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryException} +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + +class CaseSensitivitySuite extends QueryTest + with SharedSparkSession + with SQLTestUtils + with DeltaSQLCommandTest { + + import testImplicits._ + + private def testWithCaseSensitivity(name: String)(f: => Unit): Unit = { + testQuietly(name) { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + f + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + f + } + } + } + + private def getPartitionValues(allFiles: Dataset[AddFile], colName: String): Array[String] = { + allFiles.select(col(s"partitionValues.$colName")).where(col(colName).isNotNull) + .distinct().as[String].collect() + } + + testWithCaseSensitivity("case sensitivity of partition fields") { + withTempDir { tempDir => + val query = "SELECT id + 1 as Foo, id as Bar FROM RANGE(1)" + sql(query).write.partitionBy("foo").format("delta").save(tempDir.getAbsolutePath) + checkAnswer( + sql(query), + spark.read.format("delta").load(tempDir.getAbsolutePath) + ) + + val allFiles = DeltaLog.forTable(spark, tempDir.getAbsolutePath).snapshot.allFiles + assert(getPartitionValues(allFiles, "Foo") === Array("1")) + checkAnswer( + spark.read.format("delta").load(tempDir.getAbsolutePath), + Row(1L, 0L) + ) + } + } + + testQuietly("case sensitivity of partition fields (stream)") { + // DataStreamWriter auto normalizes partition columns, therefore we don't need to check + // case sensitive case + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + withTempDir { tempDir => + val memSource = MemoryStream[(Long, Long)] + val stream1 = startStream(memSource.toDF().toDF("Foo", "Bar"), tempDir) + try { + memSource.addData((1L, 0L)) + stream1.processAllAvailable() + } finally { + stream1.stop() + } + + checkAnswer( + spark.read.format("delta").load(tempDir.getAbsolutePath), + Row(1L, 0L) + ) + + val allFiles = DeltaLog.forTable(spark, tempDir.getAbsolutePath).snapshot.allFiles + assert(getPartitionValues(allFiles, "Foo") === Array("1")) + } + } + } + + testWithCaseSensitivity("two fields with same name") { + withTempDir { tempDir => + intercept[AnalysisException] { + val query = "SELECT id as Foo, id as foo FROM RANGE(1)" + sql(query).write.partitionBy("foo").format("delta").save(tempDir.getAbsolutePath) + } + } + } + + testWithCaseSensitivity("two fields with same name (stream)") { + withTempDir { tempDir => + val memSource = MemoryStream[(Long, Long)] + val stream1 = startStream(memSource.toDF().toDF("Foo", "foo"), tempDir) + try { + val e = intercept[StreamingQueryException] { + memSource.addData((0L, 0L)) + stream1.processAllAvailable() + } + assert(e.cause.isInstanceOf[AnalysisException]) + } finally { + stream1.stop() + } + } + } + + testWithCaseSensitivity("schema merging is case insenstive but preserves original case") { + withTempDir { tempDir => + val query1 = "SELECT id as foo, id as bar FROM RANGE(1)" + sql(query1).write.format("delta").save(tempDir.getAbsolutePath) + + val query2 = "SELECT id + 1 as Foo, id as bar FROM RANGE(1)" // notice how 'F' is capitalized + sql(query2).write.format("delta").mode("append").save(tempDir.getAbsolutePath) + + val query3 = "SELECT id as bAr, id + 2 as Foo FROM RANGE(1)" // changed order as well + sql(query3).write.format("delta").mode("append").save(tempDir.getAbsolutePath) + + val df = spark.read.format("delta").load(tempDir.getAbsolutePath) + checkAnswer( + df, + Row(0, 0) :: Row(1, 0) :: Row(2, 0) :: Nil + ) + assert(df.schema.fieldNames === Seq("foo", "bar")) + } + } + + testWithCaseSensitivity("schema merging preserving column case (stream)") { + withTempDir { tempDir => + val memSource = MemoryStream[(Long, Long)] + val stream1 = startStream(memSource.toDF().toDF("Foo", "Bar"), tempDir, None) + try { + memSource.addData((0L, 0L)) + stream1.processAllAvailable() + } finally { + stream1.stop() + } + val stream2 = startStream(memSource.toDF().toDF("foo", "Bar"), tempDir, None) + try { + memSource.addData((1L, 2L)) + stream2.processAllAvailable() + } finally { + stream2.stop() + } + + val df = spark.read.format("delta").load(tempDir.getAbsolutePath) + checkAnswer( + df, + Row(0L, 0L) :: Row(1L, 2L) :: Nil + ) + assert(df.schema.fieldNames === Seq("Foo", "Bar")) + } + } + + test("SC-12677: replaceWhere predicate should be case insensitive") { + withTempDir { tempDir => + val path = tempDir.getCanonicalPath + Seq((1, "a"), (2, "b")).toDF("Key", "val").write + .partitionBy("key").format("delta").mode("append").save(path) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + Seq((2, "c")).toDF("Key", "val").write + .format("delta") + .mode("overwrite") + .option("replaceWhere", "key = 2") // note the different case + .save(path) + } + + checkAnswer( + spark.read.format("delta").load(path), + Row(1, "a") :: Row(2, "c") :: Nil + ) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val e = intercept[AnalysisException] { + Seq((2, "d")).toDF("Key", "val").write + .format("delta") + .mode("overwrite") + .option("replaceWhere", "key = 2") // note the different case + .save(path) + } + assert(e.getErrorClass == "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION" + || e.getErrorClass == "MISSING_COLUMN" + || e.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION") + } + + checkAnswer( + spark.read.format("delta").load(path), + Row(1, "a") :: Row(2, "c") :: Nil + ) + } + } + + private def startStream( + df: Dataset[_], + tempDir: File, + partitionBy: Option[String] = Some("foo")): StreamingQuery = { + val writer = df.writeStream + .option("checkpointLocation", new File(tempDir, "_checkpoint").getAbsolutePath) + .format("delta") + partitionBy.foreach(writer.partitionBy(_)) + writer.start(tempDir.getAbsolutePath) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/schema/CheckConstraintsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/schema/CheckConstraintsSuite.scala new file mode 100644 index 00000000000..fd1f6a8baca --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/schema/CheckConstraintsSuite.scala @@ -0,0 +1,422 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +import scala.collection.JavaConverters._ + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.constraints.CharVarcharConstraint +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.TaskFailedReason +import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, StringType, StructField, StructType} + +class CheckConstraintsSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with SQLTestUtils { + + + import testImplicits._ + + private def withTestTable(thunk: String => Unit) = { + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "3") { + withTable("checkConstraintsTest") { + Seq( + (1, "a"), (2, "b"), (3, "c"), + (4, "d"), (5, "e"), (6, "f") + ).toDF("num", "text").write.format("delta").saveAsTable("checkConstraintsTest") + thunk("checkConstraintsTest") + } + } + } + + private def errorContains(errMsg: String, str: String): Unit = { + errMsg.contains(str) + } + + test("can't add unparseable constraint") { + withTestTable { table => + val e = intercept[ParseException] { + sql(s"ALTER TABLE $table\nADD CONSTRAINT lessThan5 CHECK (id <)") + } + // Make sure we're still getting a useful parse error, even though we do some complicated + // internal stuff to persist the constraint. Unfortunately this test may be a bit fragile. + errorContains(e.getMessage, "Syntax error at or near end of input") + errorContains(e.getMessage, + """ + |== SQL == + |id < + |----^^^ + |""".stripMargin) + } + } + + test("constraint must be boolean") { + withTestTable { table => + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE $table ADD CONSTRAINT integerVal CHECK (3)") + } + errorContains(e.getMessage, + "CHECK constraint 'integerVal' (3) should be a boolean expression.") + } + } + + test("can't add constraint with duplicate name") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT trivial CHECK (true)") + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE $table ADD CONSTRAINT trivial CHECK (true)") + } + errorContains(e.getMessage, + s"Constraint 'trivial' already exists as a CHECK constraint. Please delete the " + + s"old constraint first.\nOld constraint:\ntrue") + } + } + + test("can't add constraint with names that are reserved for internal usage") { + withTestTable { table => + val reservedName = CharVarcharConstraint.INVARIANT_NAME + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE $table ADD CONSTRAINT $reservedName CHECK (true)") + } + errorContains(e.getMessage, s"Cannot use '$reservedName' as the name of a CHECK constraint") + } + } + + test("duplicate constraint check is case insensitive") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT trivial CHECK (true)") + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE $table ADD CONSTRAINT TRIVIAL CHECK (true)") + } + errorContains(e.getMessage, + s"Constraint 'TRIVIAL' already exists as a CHECK constraint. Please delete the " + + s"old constraint first.\nOld constraint:\ntrue") + } + } + + testQuietly("can't add already violated constraint") { + withTestTable { table => + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE $table ADD CONSTRAINT lessThan5 CHECK (num < 5 and text < 'd')") + } + errorContains(e.getMessage, + s"violate the new CHECK constraint (num < 5 and text < 'd')") + } + } + + testQuietly("can't add row violating constraint") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT lessThan10 CHECK (num < 10 and text < 'g')") + sql(s"INSERT INTO $table VALUES (5, 'a')") + val e = intercept[InvariantViolationException] { + sql(s"INSERT INTO $table VALUES (11, 'a')") + } + errorContains(e.getMessage, + s"CHECK constraint lessthan10 ((num < 10) AND (text < 'g')) violated") + } + } + + test("drop constraint that doesn't exist throws an exception") { + withTestTable { table => + intercept[AnalysisException] { + sql(s"ALTER TABLE $table DROP CONSTRAINT myConstraint") + } + } + + withSQLConf((DeltaSQLConf.DELTA_ASSUMES_DROP_CONSTRAINT_IF_EXISTS.key, "false")) { + withTestTable { table => + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE $table DROP CONSTRAINT myConstraint") + } + assert(e.getErrorClass == "DELTA_CONSTRAINT_DOES_NOT_EXIST") + errorContains(e.getMessage, + "nonexistent constraint myconstraint from table `default`.`checkconstraintstest`") + errorContains(e.getMessage, + "databricks.spark.delta.constraints.assumesDropIfExists.enabled to true") + } + } + } + + test("can drop constraint that doesn't exist with IF EXISTS") { + withTestTable { table => + sql(s"ALTER TABLE $table DROP CONSTRAINT IF EXISTS myConstraint") + } + + withSQLConf((DeltaSQLConf.DELTA_ASSUMES_DROP_CONSTRAINT_IF_EXISTS.key, "true")) { + withTestTable { table => + sql(s"ALTER TABLE $table DROP CONSTRAINT myConstraint") + } + } + } + + + test("drop constraint is case insensitive") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT myConstraint CHECK (true)") + sql(s"ALTER TABLE $table DROP CONSTRAINT MYCONSTRAINT") + } + } + + testQuietly("add row violating constraint after it's dropped") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT lessThan10 CHECK (num < 10 and text < 'g')") + intercept[InvariantViolationException] { + sql(s"INSERT INTO $table VALUES (11, 'a')") + } + sql(s"ALTER TABLE $table DROP CONSTRAINT lessThan10") + sql(s"INSERT INTO $table VALUES (11, 'a')") + checkAnswer(sql(s"SELECT num FROM $table"), Seq(1, 2, 3, 4, 5, 6, 11).toDF()) + } + } + + test("see constraints in table properties") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT toBeDropped CHECK (text < 'n')") + sql(s"ALTER TABLE $table ADD CONSTRAINT trivial CHECK (true)") + sql(s"ALTER TABLE $table ADD CONSTRAINT numLimit CHECK (num < 10)") + sql(s"ALTER TABLE $table ADD CONSTRAINT combo CHECK (concat(num, text) != '9i')") + sql(s"ALTER TABLE $table DROP CONSTRAINT toBeDropped") + val props = + sql(s"DESCRIBE DETAIL $table").selectExpr("properties").head().getMap[String, String](0) + // We've round-tripped through the parser, so the text of the constraints stored won't exactly + // match what was originally given. + assert(props == Map( + "delta.constraints.trivial" -> "true", + "delta.constraints.numlimit" -> "num < 10", + "delta.constraints.combo" -> "concat ( num , text ) != '9i'" + )) + } + } + + test("delta history for constraints") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT lessThan10 CHECK (num < 10)") + checkAnswer( + sql(s"DESCRIBE HISTORY $table") + .where("operation = 'ADD CONSTRAINT'") + .selectExpr("operation", "operationParameters"), + Seq(("ADD CONSTRAINT", Map("name" -> "lessThan10", "expr" -> "num < 10"))).toDF()) + + sql(s"ALTER TABLE $table DROP CONSTRAINT IF EXISTS lessThan10") + checkAnswer( + sql(s"DESCRIBE HISTORY $table") + .where("operation = 'DROP CONSTRAINT'") + .selectExpr("operation", "operationParameters"), + Seq(( + "DROP CONSTRAINT", + Map("name" -> "lessThan10", "expr" -> "num < 10", "existed" -> "true") + )).toDF()) + sql(s"ALTER TABLE $table DROP CONSTRAINT IF EXISTS lessThan10") + checkAnswer( + sql(s"DESCRIBE HISTORY $table") + .where("operation = 'DROP CONSTRAINT'") + .selectExpr("operation", "operationParameters"), + Seq( + ("DROP CONSTRAINT", + Map("name" -> "lessThan10", "expr" -> "num < 10", "existed" -> "true")), + ("DROP CONSTRAINT", + Map("name" -> "lessThan10", "existed" -> "false")) + ).toDF()) + } + } + + testQuietly("constraint on builtin methods") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT textSize CHECK (LENGTH(text) < 10)") + sql(s"INSERT INTO $table VALUES (11, 'abcdefg')") + val e = intercept[InvariantViolationException] { + sql(s"INSERT INTO $table VALUES (12, 'abcdefghijklmnop')") + } + errorContains(e.getMessage, "constraint textsize (LENGTH(text) < 10) violated by row") + } + } + + testQuietly("constraint with implicit casts") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT maxWithImplicitCast CHECK (num < '10')") + val e = intercept[InvariantViolationException] { + sql(s"INSERT INTO $table VALUES (11, 'data')") + } + errorContains(e.getMessage, "constraint maxwithimplicitcast (num < '10') violated by row") + } + } + + testQuietly("constraint with nested parentheses") { + withTestTable { table => + sql(s"ALTER TABLE $table ADD CONSTRAINT maxWithParens " + + s"CHECK (( (num < '10') AND ((LENGTH(text)) < 100) ))") + val e = intercept[InvariantViolationException] { + sql(s"INSERT INTO $table VALUES (11, 'data')") + } + errorContains(e.getMessage, + "constraint maxwithparens ((num < '10') AND (LENGTH(text) < 100)) violated by row") + } + } + + testQuietly("constraint with analyzer-evaluated expressions") { + withTestTable { table => + // We use current_timestamp() as the most convenient analyzer-evaluated expression - of course + // in a realistic use case it'd probably not be right to add a constraint on a + // nondeterministic expression. + sql(s"ALTER TABLE $table ADD CONSTRAINT maxWithAnalyzerEval " + + s"CHECK (num < unix_timestamp())") + val e = intercept[InvariantViolationException] { + sql(s"INSERT INTO $table VALUES (${Int.MaxValue}, 'data')") + } + errorContains(e.getMessage, + "maxwithanalyzereval (num < unix_timestamp()) violated by row") + } + } + + testQuietly("constraints with nulls") { + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "3") { + withTable("checkConstraintsTest") { + val rows = Range(0, 10).map { i => + Row( + i, + null, + Row("constantWithinStruct", Map(i -> i), Array(i, null, i + 2))) + } + + val schema = new StructType(Array( + StructField("id", IntegerType), + StructField("text", StringType), + StructField("nested", new StructType(Array( + StructField("constant", StringType), + StructField("m", MapType(IntegerType, IntegerType, valueContainsNull = true)), + StructField("arr", ArrayType(IntegerType, containsNull = true))))))) + spark.createDataFrame(rows.toList.asJava, schema) + .write.format("delta").saveAsTable("checkConstraintsTest") + + // Constraints checking for a null value should work. + sql("ALTER TABLE checkConstraintsTest ADD CONSTRAINT textNull CHECK (text IS NULL)") + sql("ALTER TABLE checkConstraintsTest ADD CONSTRAINT arr1Null " + + "CHECK (nested.arr[1] IS NULL)") + + // Constraints incompatible with a null value will of course fail, but they should fail with + // the same clear error as normal. + var e: Exception = intercept[AnalysisException] { + sql("ALTER TABLE checkConstraintsTest ADD CONSTRAINT arrLessThan5 " + + "CHECK (nested.arr[1] < 5)") + } + errorContains(e.getMessage, + s"10 rows in default.checkconstraintstest violate the new CHECK constraint " + + s"(nested . arr [ 1 ] < 5)") + + // Adding a null value into a constraint should fail similarly, even if it's null + // because a parent field is null. + sql("ALTER TABLE checkConstraintsTest ADD CONSTRAINT arr0 " + + "CHECK (nested.arr[0] < 100)") + val newRows = Seq( + Row(10, null, Row("c", Map(10 -> null), Array(null, null, 12))), + Row(11, null, Row("c", Map(11 -> null), null)), + Row(12, null, null)) + newRows.foreach { r => + e = intercept[InvariantViolationException] { + spark.createDataFrame(List(r).asJava, schema) + .write.format("delta").mode("append").saveAsTable("checkConstraintsTest") + } + errorContains(e.getMessage, + "CHECK constraint arr0 (nested.arr[0] < 100) violated by row") + } + + // On the other hand, existing constraints like arr1Null which do allow null values should + // permit new rows even if the value's parent is null. + sql("ALTER TABLE checkConstraintsTest DROP CONSTRAINT arr0") + newRows.foreach { r => + spark.createDataFrame(List(r).asJava, schema) + .write.format("delta").mode("append").saveAsTable("checkConstraintsTest") + } + checkAnswer( + spark.read.format("delta").table("checkConstraintsTest").select("id"), + (0 to 12).toDF("id")) + } + } + } + + testQuietly("complex constraints") { + withSQLConf( + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_READER_VERSION.key -> "1", + DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key -> "3") { + withTable("checkConstraintsTest") { + val rows = Range(0, 10).map { i => + Row( + i, + ('a' + i).toString, + Row("constantWithinStruct", Map(i -> i), Array(i, i + 1, i + 2))) + } + val schema = new StructType(Array( + StructField("id", IntegerType), + StructField("text", StringType), + StructField("nested", new StructType(Array( + StructField("constant", StringType), + StructField("m", MapType(IntegerType, IntegerType, valueContainsNull = false)), + StructField("arr", ArrayType(IntegerType, containsNull = false))))))) + spark.createDataFrame(rows.toList.asJava, schema) + .write.format("delta").saveAsTable("checkConstraintsTest") + sql("ALTER TABLE checkConstraintsTest ADD CONSTRAINT arrLen CHECK (SIZE(nested.arr) = 3)") + sql("ALTER TABLE checkConstraintsTest ADD CONSTRAINT mapIntegrity " + + "CHECK (nested.m[id] = id)") + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE checkConstraintsTest ADD CONSTRAINT violated " + + s"CHECK (nested.arr[0] < id)") + } + errorContains(e.getMessage, + s"violate the new CHECK constraint (nested . arr [ 0 ] < id)") + } + } + } + + + // TODO: https://github.com/delta-io/delta/issues/831 + test("SET NOT NULL constraint fails") { + withTable("my_table") { + sql("CREATE TABLE my_table (id INT) USING DELTA;") + sql("INSERT INTO my_table VALUES (1);") + val e = intercept[AnalysisException] { + sql("ALTER TABLE my_table CHANGE COLUMN id SET NOT NULL;") + }.getMessage() + assert(e.contains("Cannot change nullable column to non-nullable")) + } + } + + testQuietly("ending semi-colons no longer makes ADD, DROP constraint commands fail") { + withTable("my_table") { + sql("CREATE TABLE my_table (birthday DATE) USING DELTA;") + sql("INSERT INTO my_table VALUES ('2021-11-11');") + + sql("ALTER TABLE my_table ADD CONSTRAINT aaa CHECK (birthday > '1900-01-01')") + sql("ALTER TABLE my_table ADD CONSTRAINT bbb CHECK (birthday > '1900-02-02')") + sql("ALTER TABLE my_table ADD CONSTRAINT ccc CHECK (birthday > '1900-03-03');") // semi-colon + + sql("ALTER TABLE my_table DROP CONSTRAINT aaa") + sql("ALTER TABLE my_table DROP CONSTRAINT bbb;") // semi-colon + } + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/schema/InvariantEnforcementSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/schema/InvariantEnforcementSuite.scala new file mode 100644 index 00000000000..b8715423d2c --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/schema/InvariantEnforcementSuite.scala @@ -0,0 +1,769 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +// scalastyle:off import.ordering.noEmptyLine +import java.io.File +import java.sql.Date + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.delta.{CheckConstraintsTableFeature, DeltaLog, DeltaOperations} +import org.apache.spark.sql.delta.actions.{Metadata, TableFeatureProtocolUtils} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.constraints.{Constraint, Constraints, Invariants} +import org.apache.spark.sql.delta.constraints.Constraints.NotNull +import org.apache.spark.sql.delta.constraints.Invariants.PersistedExpression +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.streaming.StreamingQueryException +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ + +class InvariantEnforcementSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with SQLTestUtils { + + + import testImplicits._ + + private def tableWithSchema(schema: StructType)(f: String => Unit): Unit = { + withTempDir { tempDir => + val deltaLog = DeltaLog.forTable(spark, tempDir) + val txn = deltaLog.startTransaction() + txn.commit(Metadata(schemaString = schema.json) :: Nil, DeltaOperations.ManualUpdate) + spark.read.format("delta") + .load(tempDir.getAbsolutePath) + .write + .format("delta") + .mode("overwrite") + .save(tempDir.getAbsolutePath) + f(tempDir.getAbsolutePath) + } + } + + private def testBatchWriteRejection( + invariant: Constraint, + schema: StructType, + df: Dataset[_], + expectedErrors: String*): Unit = { + tableWithSchema(schema) { path => + val e = intercept[InvariantViolationException] { + df.write.mode("append").format("delta").save(path) + } + checkConstraintException(e, (invariant.name +: expectedErrors): _*) + } + } + + private def checkConstraintException( + e: InvariantViolationException, expectedErrors: String*): Unit = { + val error = e.getMessage + val allExpected = expectedErrors + allExpected.foreach { expected => + assert(error.contains(expected), s"$error didn't contain $expected") + } + } + + private def testStreamingWriteRejection[T: Encoder]( + invariant: Constraint, + schema: StructType, + toDF: MemoryStream[T] => DataFrame, + data: Seq[T], + expectedErrors: String*): Unit = { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + val txn = deltaLog.startTransaction() + txn.commit(Metadata(schemaString = schema.json) :: Nil, DeltaOperations.ManualUpdate) + val memStream = MemoryStream[T] + val stream = toDF(memStream).writeStream + .outputMode("append") + .format("delta") + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .start(dir.getAbsolutePath) + try { + val e = intercept[StreamingQueryException] { + memStream.addData(data) + stream.processAllAvailable() + } + // Produce a good error if the cause isn't the right type - just an assert makes it hard to + // see what the wrong exception was. + intercept[InvariantViolationException] { throw e.getCause } + + checkConstraintException( + e.getCause.asInstanceOf[InvariantViolationException], + (invariant.name +: expectedErrors): _*) + } finally { + stream.stop() + } + } + } + + test("reject non-nullable top level column") { + val schema = new StructType() + .add("key", StringType, nullable = false) + .add("value", IntegerType) + testBatchWriteRejection( + NotNull(Seq("key")), + schema, + Seq[(String, Int)](("a", 1), (null, 2)).toDF("key", "value"), + "key" + ) + testStreamingWriteRejection[(String, Int)]( + NotNull(Seq("key")), + schema, + _.toDF().toDF("key", "value"), + Seq[(String, Int)](("a", 1), (null, 2)), + "key" + ) + } + + test("reject non-nullable top level column - column doesn't exist") { + val schema = new StructType() + .add("key", StringType, nullable = false) + .add("value", IntegerType) + testBatchWriteRejection( + NotNull(Seq("key")), + schema, + Seq[Int](1, 2).toDF("value"), + "key" + ) + testStreamingWriteRejection[Int]( + NotNull(Seq("key")), + schema, + _.toDF().toDF("value"), + Seq[Int](1, 2), + "key" + ) + } + + testQuietly("write empty DataFrame - zero rows") { + val schema = new StructType() + .add("key", StringType, nullable = false) + .add("value", IntegerType) + tableWithSchema(schema) { path => + spark.createDataFrame(Seq.empty[Row].asJava, schema.asNullable).write + .mode("append").format("delta").save(path) + } + } + + test("write empty DataFrame - zero columns") { + val schema = new StructType() + .add("key", StringType, nullable = false) + .add("value", IntegerType) + testBatchWriteRejection( + NotNull(Seq("key")), + schema, + Seq[Int](1, 2).toDF("value").drop("value"), + "key" + ) + testStreamingWriteRejection[Int]( + NotNull(Seq("key")), + schema, + _.toDF().toDF("value").drop("value"), + Seq[Int](1, 2), + "key" + ) + } + + testQuietly("reject non-nullable nested column") { + val schema = new StructType() + .add("top", new StructType() + .add("key", StringType, nullable = false) + .add("value", IntegerType)) + testBatchWriteRejection( + NotNull(Seq("key")), + schema, + spark.createDataFrame(Seq(Row(Row("a", 1)), Row(Row(null, 2))).asJava, schema.asNullable), + "top.key" + ) + testBatchWriteRejection( + NotNull(Seq("key")), + schema, + spark.createDataFrame(Seq(Row(Row("a", 1)), Row(null)).asJava, schema.asNullable), + "top.key" + ) + } + + testQuietly("reject non-nullable array column") { + val schema = new StructType() + .add("top", ArrayType(ArrayType(new StructType() + .add("key", StringType) + .add("value", IntegerType))), nullable = false) + testBatchWriteRejection( + NotNull(Seq("top", "value")), + schema, + spark.createDataFrame(Seq(Row(Seq(Seq(Row("a", 1)))), Row(null)).asJava, schema.asNullable), + "top" + ) + } + + test("reject expression invariant on top level column") { + val expr = "value < 3" + val rule = Constraints.Check("", spark.sessionState.sqlParser.parseExpression(expr)) + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(expr).json) + .build() + val schema = new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = true, metadata) + testBatchWriteRejection( + rule, + schema, + Seq[(String, Int)](("a", 1), (null, 5)).toDF("key", "value"), + "value", "5" + ) + testStreamingWriteRejection[(String, Int)]( + rule, + schema, + _.toDF().toDF("key", "value"), + Seq[(String, Int)](("a", 1), (null, 5)), + "value" + ) + } + + testQuietly("reject expression invariant on nested column") { + val expr = "top.value < 3" + val rule = Constraints.Check("", spark.sessionState.sqlParser.parseExpression(expr)) + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(expr).json) + .build() + val schema = new StructType() + .add("top", new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = true, metadata)) + testBatchWriteRejection( + rule, + schema, + spark.createDataFrame(Seq(Row(Row("a", 1)), Row(Row(null, 5))).asJava, schema.asNullable), + "top.value", "5" + ) + } + + testQuietly("reject write on top level expression invariant when field is null") { + val expr = "value < 3" + val rule = Constraints.Check("", spark.sessionState.sqlParser.parseExpression(expr)) + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(expr).json) + .build() + val schema = new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = true, metadata) + testBatchWriteRejection( + rule, + schema, + Seq[String]("a", "b").toDF("key"), + " - value : null" + ) + testBatchWriteRejection( + rule, + schema, + Seq[(String, Integer)](("a", 1), ("b", null)).toDF("key", "value"), + " - value : null" + ) + } + + testQuietly("reject write on nested expression invariant when field is null") { + val expr = "top.value < 3" + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(expr).json) + .build() + val rule = Constraints.Check("", spark.sessionState.sqlParser.parseExpression(expr)) + val schema = new StructType() + .add("top", new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = true, metadata)) + testBatchWriteRejection( + rule, + schema, + spark.createDataFrame(Seq(Row(Row("a", 1)), Row(Row("b", null))).asJava, schema.asNullable), + " - top.value : null" + ) + val schema2 = new StructType() + .add("top", new StructType() + .add("key", StringType)) + testBatchWriteRejection( + rule, + schema, + spark.createDataFrame(Seq(Row(Row("a")), Row(Row("b"))).asJava, schema2.asNullable), + " - top.value : null" + ) + } + + testQuietly("is null on top level expression invariant when field is null") { + val expr = "value is null or value < 3" + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(expr).json) + .build() + val schema = new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = true, metadata) + tableWithSchema(schema) { path => + Seq[String]("a", "b").toDF("key").write + .mode("append").format("delta").save(path) + Seq[(String, Integer)](("a", 1), ("b", null)).toDF("key", "value").write + .mode("append").format("delta").save(path) + } + } + + testQuietly("is null on nested expression invariant when field is null") { + val expr = "top.value is null or top.value < 3" + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(expr).json) + .build() + val schema = new StructType() + .add("top", new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = true, metadata)) + val schema2 = new StructType() + .add("top", new StructType() + .add("key", StringType)) + tableWithSchema(schema) { path => + spark.createDataFrame(Seq(Row(Row("a", 1)), Row(Row("b", null))).asJava, schema.asNullable) + .write.mode("append").format("delta").save(path) + spark.createDataFrame(Seq(Row(Row("a")), Row(Row("b"))).asJava, schema2.asNullable) + .write.mode("append").format("delta").save(path) + } + } + + testQuietly("complex expressions - AND") { + val expr = "value < 3 AND value > 0" + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(expr).json) + .build() + val schema = new StructType() + .add("key", StringType) + .add("value", IntegerType, nullable = true, metadata) + tableWithSchema(schema) { path => + Seq(1, 2).toDF("value").write.mode("append").format("delta").save(path) + intercept[InvariantViolationException] { + Seq(1, 4).toDF("value").write.mode("append").format("delta").save(path) + } + intercept[InvariantViolationException] { + Seq(-1, 2).toDF("value").write.mode("append").format("delta").save(path) + } + } + } + + testQuietly("complex expressions - IN SET") { + val expr = "key in ('a', 'b', 'c')" + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(expr).json) + .build() + val schema = new StructType() + .add("key", StringType, nullable = true, metadata) + .add("value", IntegerType) + tableWithSchema(schema) { tempDir => + Seq("a", "b").toDF("key").write.mode("append").format("delta").save(tempDir) + intercept[InvariantViolationException] { + Seq("a", "d").toDF("key").write.mode("append").format("delta").save(tempDir) + } + intercept[InvariantViolationException] { + Seq("e").toDF("key").write.mode("append").format("delta").save(tempDir) + } + } + } + + test("CHECK constraint can't be created through SET TBLPROPERTIES") { + withTable("noCheckConstraints") { + spark.range(10).write.format("delta").saveAsTable("noCheckConstraints") + val ex = intercept[AnalysisException] { + spark.sql( + "ALTER TABLE noCheckConstraints SET TBLPROPERTIES ('delta.constraints.mychk' = '1')") + } + assert(ex.getMessage.contains("ALTER TABLE ADD CONSTRAINT")) + } + } + + for (writerVersion <- Seq(2, TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION)) + testQuietly("CHECK constraint is enforced if somehow created (writerVersion = " + + s"$writerVersion)") { + withSQLConf((DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key, writerVersion.toString)) { + withTable("constraint") { + spark.range(10).selectExpr("id AS valueA", "id AS valueB", "id AS valueC") + .write.format("delta").saveAsTable("constraint") + val table = DeltaTableV2(spark, TableIdentifier("constraint", None)) + val txn = table.startTransactionWithInitialSnapshot() + val newMetadata = txn.metadata.copy( + configuration = txn.metadata.configuration + + ("delta.constraints.mychk" -> "valueA < valueB")) + assert(txn.protocol.minWriterVersion === writerVersion) + txn.commit(Seq(newMetadata), DeltaOperations.ManualUpdate) + val upVersion = if (TableFeatureProtocolUtils.supportsWriterFeatures(writerVersion)) { + TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION + } else { + CheckConstraintsTableFeature.minWriterVersion + } + assert(table.deltaLog.unsafeVolatileSnapshot.protocol.minWriterVersion === upVersion) + spark.sql("INSERT INTO constraint VALUES (50, 100, null)") + val e = intercept[InvariantViolationException] { + spark.sql("INSERT INTO constraint VALUES (100, 50, null)") + } + checkConstraintException(e, + s"""CHECK constraint mychk (valueA < valueB) violated by row with values: + | - valueA : 100 + | - valueB : 50""".stripMargin) + + val e2 = intercept[InvariantViolationException] { + spark.sql("INSERT INTO constraint VALUES (100, null, null)") + } + checkConstraintException(e2, + s"""CHECK constraint mychk (valueA < valueB) violated by row with values: + | - valueA : 100 + | - valueB : null""".stripMargin) + } + } + } + + test("table with CHECK constraint accepts other metadata changes") { + withSQLConf((DeltaSQLConf.DELTA_PROTOCOL_DEFAULT_WRITER_VERSION.key, "3")) { + withTable("constraint") { + spark.range(10).selectExpr("id AS valueA", "id AS valueB") + .write.format("delta").saveAsTable("constraint") + val table = DeltaTableV2(spark, TableIdentifier("constraint", None)) + val txn = table.startTransactionWithInitialSnapshot() + val newMetadata = txn.metadata.copy( + configuration = txn.metadata.configuration + + ("delta.constraints.mychk" -> "valueA < valueB")) + txn.commit(Seq(newMetadata), DeltaOperations.ManualUpdate) + spark.sql("ALTER TABLE constraint ADD COLUMN valueC INT") + } + } + } + + def testUnenforcedNestedConstraints( + testName: String, + schemaString: String, + expectedError: String, + data: Row): Unit = { + testQuietly(testName) { + val nullTable = "nullTbl" + withTable(nullTable) { + // Try creating the table with the check enabled first, which should fail, then create it + // for real with the check off which should succeed. + if (expectedError != null) { + val ex = intercept[AnalysisException] { + sql(s"CREATE TABLE $nullTable ($schemaString) USING delta") + } + assert(ex.getMessage.contains(expectedError)) + } + withSQLConf(("spark.databricks.delta.constraints.allowUnenforcedNotNull.enabled", "true")) { + sql(s"CREATE TABLE $nullTable ($schemaString) USING delta") + } + + // Once we've created the table, writes should succeed even if they violate the constraint. + spark.createDataFrame( + Seq(data).asJava, + spark.table(nullTable).schema + ).write.mode("append").format("delta").saveAsTable(nullTable) + + if (expectedError != null) { + val ex = intercept[AnalysisException] { + sql(s"REPLACE TABLE $nullTable ($schemaString) USING delta") + } + assert(ex.getMessage.contains(expectedError)) + } + withSQLConf(("spark.databricks.delta.constraints.allowUnenforcedNotNull.enabled", "true")) { + sql(s"REPLACE TABLE $nullTable ($schemaString) USING delta") + } + } + } + } + + testUnenforcedNestedConstraints( + "not null within array", + schemaString = "arr array> NOT NULL", + expectedError = "The element type of the field arr contains a NOT NULL constraint.", + data = Row(Seq(Row("myName", null)))) + + testUnenforcedNestedConstraints( + "not null within map key", + schemaString = "m map, int> NOT NULL", + expectedError = "The key type of the field m contains a NOT NULL constraint.", + data = Row(Map(Row("myName", null) -> 1))) + + testUnenforcedNestedConstraints( + "not null within map value", + schemaString = "m map> NOT NULL", + expectedError = "The value type of the field m contains a NOT NULL constraint.", + data = Row(Map(1 -> Row("myName", null)))) + + testUnenforcedNestedConstraints( + "not null within nested array", + schemaString = + "s struct> NOT NULL>", + expectedError = "The element type of the field s.arr contains a NOT NULL constraint.", + data = Row(Row(1, Seq(Row("myName", null))))) + + + // Helper function to construct the full test name as "RuntimeRepalceable: func" + private def testReplaceableExpr(targetFunc: String, testTags: org.scalatest.Tag*) + (testFun: => Any) + (implicit pos: org.scalactic.source.Position): Unit = { + val fulLTestName = s"RuntimeReplaceable: ${targetFunc}" + // Suppress exceptions output for invariant violations + super.test(fulLTestName) { + testFun + } + } + + private def testReplaceable[T: Encoder]( + exprStr: String, + colType: DataType, + badValue: T) = { + val rule = Constraints.Check("", spark.sessionState.sqlParser.parseExpression(exprStr)) + val metadata = new MetadataBuilder() + .putString(Invariants.INVARIANTS_FIELD, PersistedExpression(exprStr).json) + .build() + val schema = new StructType() + .add("value", colType, nullable = true, metadata) + val rows = Seq(Row(badValue)) + testBatchWriteRejection( + rule, + schema, + spark.createDataFrame(rows.toList.asJava, schema), + "violated by row with values" + ) + testStreamingWriteRejection[T]( + rule, + schema, + _.toDF().toDF("value"), + Seq[T](badValue), + "violated by row with values" + ) + } + + testReplaceableExpr("assert_true") { + testReplaceable("assert_true(value < 2) is not null", IntegerType, 1) + } + + testReplaceableExpr("date_part") { + testReplaceable("date_part('YEAR', value) < 2000", DateType, Date.valueOf("2001-01-01")) + } + + testReplaceableExpr("decode") { + testReplaceable("decode(encode(value, 'utf-8'), 'utf-8') = 'abc'", StringType, "a") + } + + testReplaceableExpr("extract") { + testReplaceable("extract(YEAR FROM value) < 2000", DateType, Date.valueOf("2001-01-01")) + } + + testReplaceableExpr("ifnull") { + testReplaceable("ifnull(value, 1) = 1", IntegerType, 2) + } + + testReplaceableExpr("left") { + testReplaceable("left(value, 1) = 'a'", StringType, "b") + } + + testReplaceableExpr("right") { + testReplaceable("right(value, 1) = 'a'", StringType, "b") + } + + testReplaceableExpr("nullif") { + testReplaceable("nullif(value, 1) = 2", IntegerType, 1) + } + + testReplaceableExpr("nvl") { + testReplaceable("nvl(value, 1) = 1", IntegerType, 2) + } + + testReplaceableExpr("nvl2") { + testReplaceable("nvl2(value, 1, 2) = 3", IntegerType, 2) + } + + testReplaceableExpr("to_date") { + testReplaceable("to_date(value) = '2001-01-01'", StringType, "2002-01-01") + } + + testReplaceableExpr("to_timestamp") { + testReplaceable( + "to_timestamp(value) = '2001-01-01'", + StringType, + "2002-01-01 00:12:00") + } + + + // Helper function to test with empty to null conf on and off. + private def testEmptyToNull(name: String)(f: => Any): Unit = { + // Suppress exceptions output for invariant violations + testQuietly(name) { + Seq(true, false).foreach { enabled => + withSQLConf( + DeltaSQLConf.CONVERT_EMPTY_TO_NULL_FOR_STRING_PARTITION_COL.key -> enabled.toString) { + if (enabled) { + f + } else { + intercept[Exception](f) + } + } + } + } + } + + testEmptyToNull("reject empty string for NOT NULL string partition column - create") { + val tblName = "empty_string_test" + withTable(tblName) { + sql( + s""" + |CREATE TABLE $tblName ( + | c1 INT, + | c2 STRING NOT NULL + |) USING delta + |PARTITIONED BY (c2) + |""".stripMargin) + val ex = intercept[InvariantViolationException] ( + sql( + s""" + |INSERT INTO $tblName values (1, '') + |""".stripMargin) + ) + assert(ex.getMessage.contains("violated")) + } + } + + testEmptyToNull("reject empty string for NOT NULL string partition column - multiple") { + val tblName = "empty_string_test" + withTable(tblName) { + sql( + s""" + |CREATE TABLE $tblName ( + | c1 INT, + | c2 STRING NOT NULL, + | c3 STRING + |) USING delta + |PARTITIONED BY (c2, c3) + |""".stripMargin) + val ex = intercept[InvariantViolationException] ( + sql( + s""" + |INSERT INTO $tblName values (1, '', 'a') + |""".stripMargin) + ) + assert(ex.getMessage.contains("violated")) + sql( + s""" + |INSERT INTO $tblName values (1, 'a', '') + |""".stripMargin) + checkAnswer( + sql(s"SELECT COUNT(*) from $tblName where c3 IS NULL"), + Row(1L) + ) + } + } + + testEmptyToNull("reject empty string for NOT NULL string partition column - multiple not null") { + val tblName = "empty_string_test" + withTable(tblName) { + sql( + s""" + |CREATE TABLE $tblName ( + | c1 INT, + | c2 STRING NOT NULL, + | c3 STRING NOT NULL + |) USING delta + |PARTITIONED BY (c2, c3) + |""".stripMargin) + val ex1 = intercept[InvariantViolationException] ( + sql( + s""" + |INSERT INTO $tblName values (1, '', 'a') + |""".stripMargin) + ) + assert(ex1.getMessage.contains("violated")) + val ex2 = intercept[InvariantViolationException] ( + sql( + s""" + |INSERT INTO $tblName values (1, 'a', '') + |""".stripMargin) + ) + assert(ex2.getMessage.contains("violated")) + val ex3 = intercept[InvariantViolationException] ( + sql( + s""" + |INSERT INTO $tblName values (1, '', '') + |""".stripMargin) + ) + assert(ex3.getMessage.contains("violated")) + } + } + + + testEmptyToNull("reject empty string in check constraint") { + val tblName = "empty_string_test" + withTable(tblName) { + sql( + s""" + |CREATE TABLE $tblName ( + | c1 INT, + | c2 STRING + |) USING delta + |PARTITIONED BY (c2); + |""".stripMargin) + sql( + s""" + |ALTER TABLE $tblName ADD CONSTRAINT test CHECK (c2 IS NOT NULL) + |""".stripMargin) + intercept[InvariantViolationException] ( + sql( + s""" + |INSERT INTO ${tblName} VALUES (1, "") + |""".stripMargin) + ) + } + } + + test("streaming with additional project") { + withSQLConf(DeltaSQLConf.CONVERT_EMPTY_TO_NULL_FOR_STRING_PARTITION_COL.key -> "true") { + val tblName = "test" + withTable(tblName) { + withTempDir { checkpointDir => + sql( + s""" + |CREATE TABLE $tblName ( + | c1 INT, + | c2 STRING + |) USING delta + |PARTITIONED BY (c2); + |""".stripMargin) + sql( + s""" + |ALTER TABLE $tblName ADD CONSTRAINT cons CHECK (c1 > 0) + |""".stripMargin) + val path = DeltaLog.forTable(spark, TableIdentifier(tblName)).dataPath.toString + val stream = MemoryStream[Int] + val q = stream.toDF() + .map(_ => Tuple2(1, "a")) + .toDF("c1", "c2") + .writeStream + .option("checkpointLocation", checkpointDir.getCanonicalPath) + .format("delta") + .start(path) + stream.addData(1) + q.processAllAvailable() + q.stop() + } + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaEnforcementSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaEnforcementSuite.scala new file mode 100644 index 00000000000..3751b13adda --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaEnforcementSuite.scala @@ -0,0 +1,904 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +import java.io.File + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.{DeltaLog, DeltaOptions} +import org.apache.spark.sql.delta.actions.SingleAction +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} + +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.StreamingQueryException +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ + +sealed trait SaveOperation { + def apply(dfw: DataFrameWriter[_]): Unit +} + +case class SaveWithPath(path: String = null) extends SaveOperation { + override def apply(dfw: DataFrameWriter[_]): Unit = { + if (path == null) dfw.save() else dfw.save(path) + } +} + +case class SaveAsTable(tableName: String) extends SaveOperation { + override def apply(dfw: DataFrameWriter[_]): Unit = dfw.saveAsTable(tableName) +} + +sealed trait SchemaEnforcementSuiteBase extends QueryTest + with SharedSparkSession { + protected def enableAutoMigration(f: => Unit): Unit = { + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "true") { + f + } + } + + protected def disableAutoMigration(f: => Unit): Unit = { + withSQLConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE.key -> "false") { + f + } + } +} + +sealed trait BatchWriterTest extends SchemaEnforcementSuiteBase with SharedSparkSession { + + def saveOperation: SaveOperation + + implicit class RichDataFrameWriter(dfw: DataFrameWriter[_]) { + def append(path: File): Unit = { + saveOperation(dfw.format("delta").mode("append").option("path", path.getAbsolutePath)) + } + + def overwrite(path: File): Unit = { + saveOperation(dfw.format("delta").mode("overwrite").option("path", path.getAbsolutePath)) + } + } + + def equivalenceTest(testName: String)(f: => Unit): Unit = { + test(s"batch: $testName") { + saveOperation match { + case _: SaveWithPath => f + case SaveAsTable(tbl) => withTable(tbl) { f } + } + } + } +} + +trait AppendSaveModeNullTests extends BatchWriterTest { + import testImplicits._ + + equivalenceTest("JSON ETL workflow, NullType being only data column") { + enableAutoMigration { + val row1 = """{"key":"abc","id":null}""" + withTempDir { dir => + val schema1 = new StructType().add("key", StringType).add("id", NullType) + val e = intercept[AnalysisException] { + spark.read.schema(schema1).json(Seq(row1).toDS()).write.partitionBy("key").append(dir) + } + assert(e.getMessage.contains("NullType have been dropped")) + } + } + } + + equivalenceTest("JSON ETL workflow, schema merging NullTypes") { + enableAutoMigration { + val row1 = """{"key":"abc","id":null,"extra":1}""" + val row2 = """{"key":"def","id":2,"extra":null}""" + val row3 = """{"key":"ghi","id":null,"extra":3}""" + withTempDir { dir => + val schema1 = new StructType() + .add("key", StringType).add("id", NullType).add("extra", IntegerType) + val schema2 = new StructType() + .add("key", StringType).add("id", IntegerType).add("extra", NullType) + spark.read.schema(schema1).json(Seq(row1).toDS()).write.append(dir) + + // NullType will be removed during the read + checkAnswer( + spark.read.format("delta").load(dir.getAbsolutePath), + Row("abc", 1) :: Nil + ) + + spark.read.schema(schema2).json(Seq(row2).toDS()).write.append(dir) + spark.read.schema(schema1).json(Seq(row3).toDS()).write.append(dir) + + checkAnswer( + spark.read.format("delta").load(dir.getAbsolutePath), + Row("abc", null, 1) :: Row("def", 2, null) :: Row("ghi", null, 3) :: Nil + ) + } + } + } + + equivalenceTest("JSON ETL workflow, schema merging NullTypes - nested struct") { + enableAutoMigration { + val row1 = """{"key":"abc","top":{"id":null,"extra":1}}""" + val row2 = """{"key":"def","top":{"id":2,"extra":null}}""" + val row3 = """{"key":"ghi","top":{"id":null,"extra":3}}""" + withTempDir { dir => + val schema1 = new StructType().add("key", StringType) + .add("top", new StructType().add("id", NullType).add("extra", IntegerType)) + val schema2 = new StructType().add("key", StringType) + .add("top", new StructType().add("id", IntegerType).add("extra", NullType)) + val mergedSchema = new StructType().add("key", StringType) + .add("top", new StructType().add("id", IntegerType).add("extra", IntegerType)) + spark.read.schema(schema1).json(Seq(row1).toDS()).write.append(dir) + // NullType will be removed during the read + checkAnswer( + spark.read.format("delta").load(dir.getAbsolutePath), + Row("abc", Row(1)) :: Nil + ) + + spark.read.schema(schema2).json(Seq(row2).toDS()).write.append(dir) + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema === mergedSchema) + spark.read.schema(schema1).json(Seq(row3).toDS()).write.append(dir) + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema === mergedSchema) + + checkAnswer( + spark.read.format("delta").load(dir.getAbsolutePath), + Row("abc", Row(null, 1)) :: Row("def", Row(2, null)) :: Row("ghi", Row(null, 3)) :: Nil + ) + } + } + } + + equivalenceTest("JSON ETL workflow, schema merging NullTypes - throw error on complex types") { + enableAutoMigration { + val row1 = """{"key":"abc","top":[]}""" + val row2 = """{"key":"abc","top":[{"id":null}]}""" + withTempDir { dir => + val schema1 = new StructType().add("key", StringType).add("top", ArrayType(NullType)) + val schema2 = new StructType().add("key", StringType) + .add("top", ArrayType(new StructType().add("id", NullType))) + val e1 = intercept[AnalysisException] { + spark.read.schema(schema1).json(Seq(row1).toDS()).write.append(dir) + } + assert(e1.getMessage.contains("NullType")) + val e2 = intercept[AnalysisException] { + spark.read.schema(schema2).json(Seq(row2).toDS()).write.append(dir) + } + assert(e2.getMessage.contains("NullType")) + } + } + } +} + +trait AppendSaveModeTests extends BatchWriterTest { + import testImplicits._ + + equivalenceTest("reject schema changes by default") { + disableAutoMigration { + withTempDir { dir => + spark.range(10).write.append(dir) + val e = intercept[AnalysisException] { + spark.range(10).withColumn("part", 'id + 1).write.append(dir) + } + assert(e.getMessage.contains(DeltaOptions.MERGE_SCHEMA_OPTION)) + } + } + } + + equivalenceTest("allow schema changes when autoMigrate is enabled") { + enableAutoMigration { + withTempDir { dir => + spark.range(10).write.append(dir) + spark.range(10).withColumn("part", 'id + 1).write.append(dir) + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema.length == 2) + } + } + } + + equivalenceTest("disallow schema changes when autoMigrate enabled but writer config disabled") { + enableAutoMigration { + withTempDir { dir => + spark.range(10).write.append(dir) + val e = intercept[AnalysisException] { + spark.range(10).withColumn("part", 'id + 1).write + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "false").append(dir) + } + assert(e.getMessage.contains(DeltaOptions.MERGE_SCHEMA_OPTION)) + } + } + } + + equivalenceTest("allow schema change with option") { + disableAutoMigration { + withTempDir { dir => + spark.range(10).write.append(dir) + spark.range(10).withColumn("part", 'id + 1).write + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "true").append(dir) + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema.length == 2) + } + } + } + + equivalenceTest("JSON ETL workflow, NullType partition column should fail") { + enableAutoMigration { + val row1 = """{"key":"abc","id":null}""" + withTempDir { dir => + val schema1 = new StructType().add("key", StringType).add("id", NullType) + intercept[AnalysisException] { + spark.read.schema(schema1).json(Seq(row1).toDS()).write.partitionBy("id").append(dir) + } + intercept[AnalysisException] { + // check case sensitivity with regards to column dropping + spark.read.schema(schema1).json(Seq(row1).toDS()).write.partitionBy("iD").append(dir) + } + } + } + } + + equivalenceTest("reject columns that only differ by case - append") { + withTempDir { dir => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + intercept[AnalysisException] { + spark.range(10).withColumn("ID", 'id + 1).write.append(dir) + } + + intercept[AnalysisException] { + spark.range(10).withColumn("ID", 'id + 1).write + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "true").append(dir) + } + + intercept[AnalysisException] { + spark.range(10).withColumn("a", 'id + 1).write + .partitionBy("a", "A") + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "true").append(dir) + } + } + } + } + + equivalenceTest("ensure schema mismatch error message contains table ID") { + disableAutoMigration { + withTempDir { dir => + spark.range(10).write.append(dir) + val e = intercept[AnalysisException] { + spark.range(10).withColumn("part", 'id + 1).write.append(dir) + } + assert(e.getMessage.contains("schema mismatch detected")) + assert(e.getMessage.contains(s"Table ID: ${DeltaLog.forTable(spark, dir).tableId}")) + } + } + } +} + +trait AppendOutputModeTests extends SchemaEnforcementSuiteBase with SharedSparkSession + with SQLTestUtils { + import testImplicits._ + + testQuietly("reject schema changes by default - streaming") { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + + val memStream = MemoryStream[Long] + val stream = memStream.toDS().toDF("value1234") // different column name + .writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .format("delta") + .start(dir.getAbsolutePath) + try { + disableAutoMigration { + val e = intercept[StreamingQueryException] { + memStream.addData(1L) + stream.processAllAvailable() + } + assert(e.cause.isInstanceOf[AnalysisException]) + assert(e.cause.getMessage.contains(DeltaOptions.MERGE_SCHEMA_OPTION)) + } + } finally { + stream.stop() + } + } + } + + testQuietly("reject schema changes when autoMigrate enabled but writer config disabled") { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + + val memStream = MemoryStream[Long] + val stream = memStream.toDS().toDF("value1234") // different column name + .writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .format("delta") + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "false") + .start(dir.getAbsolutePath) + try { + enableAutoMigration { + val e = intercept[StreamingQueryException] { + memStream.addData(1L) + stream.processAllAvailable() + } + assert(e.cause.isInstanceOf[AnalysisException]) + assert(e.cause.getMessage.contains(DeltaOptions.MERGE_SCHEMA_OPTION)) + } + } finally { + stream.stop() + } + } + } + + test("allow schema changes when autoMigrate is enabled - streaming") { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + + enableAutoMigration { + val memStream = MemoryStream[Long] + val stream = memStream.toDS().toDF("value1234") // different column name + .writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .format("delta") + .start(dir.getAbsolutePath) + try { + memStream.addData(1L) + stream.processAllAvailable() + + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema.length == 2) + } finally { + stream.stop() + } + } + } + } + + test("allow schema change with option - streaming") { + withTempDir { dir => + spark.range(10).write.format("delta").save(dir.getAbsolutePath) + + val memStream = MemoryStream[Long] + val stream = memStream.toDS().toDF("value1234") // different column name + .writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "true") + .format("delta") + .start(dir.getAbsolutePath) + try { + disableAutoMigration { + memStream.addData(1L) + stream.processAllAvailable() + + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema.length == 2) + } + } finally { + stream.stop() + } + } + } + + testQuietly("JSON ETL workflow, reject NullTypes") { + enableAutoMigration { + val row1 = """{"key":"abc","id":null}""" + withTempDir { dir => + val schema = new StructType().add("key", StringType).add("id", NullType) + + val memStream = MemoryStream[String] + val stream = memStream.toDS().select(from_json('value, schema).as("value")) + .select($"value.*") + .writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .format("delta") + .start(dir.getAbsolutePath) + + try { + val e = intercept[StreamingQueryException] { + memStream.addData(row1) + stream.processAllAvailable() + } + assert(e.cause.isInstanceOf[AnalysisException]) + assert(e.cause.getMessage.contains("NullType")) + } finally { + stream.stop() + } + } + } + } + + testQuietly("JSON ETL workflow, reject NullTypes on nested column") { + enableAutoMigration { + val row1 = """{"key":"abc","id":{"a":null}}""" + withTempDir { dir => + val schema = new StructType().add("key", StringType) + .add("id", new StructType().add("a", NullType)) + + val memStream = MemoryStream[String] + val stream = memStream.toDS().select(from_json('value, schema).as("value")) + .select($"value.*") + .writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .format("delta") + .start(dir.getAbsolutePath) + + try { + val e = intercept[StreamingQueryException] { + memStream.addData(row1) + stream.processAllAvailable() + } + assert(e.cause.isInstanceOf[AnalysisException]) + assert(e.cause.getMessage.contains("NullType")) + } finally { + stream.stop() + } + } + } + } +} + +trait OverwriteSaveModeTests extends BatchWriterTest { + import testImplicits._ + + equivalenceTest("reject schema overwrites by default") { + disableAutoMigration { + withTempDir { dir => + spark.range(10).write.overwrite(dir) + val e = intercept[AnalysisException] { + spark.range(10).withColumn("part", 'id + 1).write.overwrite(dir) + } + assert(e.getMessage.contains(DeltaOptions.OVERWRITE_SCHEMA_OPTION)) + } + } + } + + equivalenceTest("can overwrite schema when using overwrite mode - option") { + disableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").write.overwrite(dir) + spark.range(5).toDF("value").write.option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .overwrite(dir) + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + assert(df.schema.fieldNames === Array("value")) + } + } + } + + equivalenceTest("when autoMerge sqlConf is enabled, we merge schemas") { + enableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").write.overwrite(dir) + spark.range(5).toDF("value").write.overwrite(dir) + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + assert(df.schema.fieldNames === Array("id", "value")) + } + } + } + + equivalenceTest("reject migration when autoMerge sqlConf is enabled and writer config disabled") { + enableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").write.overwrite(dir) + intercept[AnalysisException] { + spark.range(5).toDF("value").write.option(DeltaOptions.MERGE_SCHEMA_OPTION, "false") + .overwrite(dir) + } + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + assert(df.schema.fieldNames === Array("id")) + } + } + } + + equivalenceTest("schema merging with replaceWhere - sqlConf") { + enableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .overwrite(dir) + Seq((1L, 0L), (2L, 0L)).toDF("value", "part").write + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part = 0") + .overwrite(dir) + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + assert(df.schema.fieldNames === Array("id", "part", "value")) + } + } + } + + equivalenceTest("schema merging with replaceWhere - option") { + disableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .overwrite(dir) + Seq((1L, 0L), (2L, 0L)).toDF("value", "part").write + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part = 0") + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "true") + .overwrite(dir) + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + assert(df.schema.fieldNames === Array("id", "part", "value")) + } + } + } + + equivalenceTest("schema merging with replaceWhere - option case insensitive") { + disableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .overwrite(dir) + Seq((1L, 0L), (2L, 0L)).toDF("value", "part").write + .option("RePlAcEwHeRe", "part = 0") + .option("mErGeScHeMa", "true") + .overwrite(dir) + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + assert(df.schema.fieldNames === Array("id", "part", "value")) + } + } + } + + equivalenceTest("reject schema merging with replaceWhere - overwrite option") { + disableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .overwrite(dir) + val e = intercept[AnalysisException] { + Seq((1L, 0L), (2L, 0L)).toDF("value", "part").write + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part = 0") + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .overwrite(dir) + } + assert(e.getMessage.contains(DeltaOptions.MERGE_SCHEMA_OPTION)) + } + } + } + + equivalenceTest("reject schema merging with replaceWhere - no option") { + disableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .overwrite(dir) + val e = intercept[AnalysisException] { + Seq((1L, 0L), (2L, 0L)).toDF("value", "part").write + .partitionBy("part") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part = 0") + .overwrite(dir) + } + assert(e.getMessage.contains(DeltaOptions.MERGE_SCHEMA_OPTION)) + } + } + } + + equivalenceTest("reject schema merging with replaceWhere - option set to false, config true") { + enableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .overwrite(dir) + val e = intercept[AnalysisException] { + Seq((1L, 0L), (2L, 0L)).toDF("value", "part").write + .partitionBy("part") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part = 0") + .option(DeltaOptions.MERGE_SCHEMA_OPTION, "false") + .overwrite(dir) + } + assert(e.getMessage.contains(DeltaOptions.MERGE_SCHEMA_OPTION)) + } + } + } + + equivalenceTest("reject change partitioning with overwrite - sqlConf") { + enableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").write + .overwrite(dir) + val e = intercept[AnalysisException] { + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .overwrite(dir) + } + assert(e.getMessage.contains(DeltaOptions.OVERWRITE_SCHEMA_OPTION)) + + val deltaLog = DeltaLog.forTable(spark, dir) + assert(deltaLog.snapshot.metadata.partitionColumns === Nil) + assert(deltaLog.snapshot.metadata.schema.fieldNames === Array("id")) + } + } + } + + equivalenceTest("can change partitioning with overwrite - option") { + disableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").write + .overwrite(dir) + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .overwrite(dir) + + val deltaLog = DeltaLog.forTable(spark, dir) + assert(deltaLog.snapshot.metadata.partitionColumns === Seq("part")) + assert(deltaLog.snapshot.metadata.schema.fieldNames === Array("id", "part")) + } + } + } + + equivalenceTest("can't change partitioning with overwrite and replaceWhere - option") { + disableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .partitionBy("part") + .overwrite(dir) + + intercept[AnalysisException] { + spark.range(5).toDF("id").withColumn("part", lit(0L)).withColumn("test", 'id + 1).write + .partitionBy("part", "test") + .option(DeltaOptions.REPLACE_WHERE_OPTION, "part = 0") + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .overwrite(dir) + } + } + } + } + + equivalenceTest("can drop columns with overwriteSchema") { + disableAutoMigration { + withTempDir { dir => + spark.range(5).toDF("id").withColumn("part", 'id % 2).write + .overwrite(dir) + spark.range(5).toDF("id").write + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .overwrite(dir) + + val deltaLog = DeltaLog.forTable(spark, dir) + assert(deltaLog.snapshot.metadata.partitionColumns === Nil) + assert(deltaLog.snapshot.metadata.schema.fieldNames === Array("id")) + } + } + } + + equivalenceTest("can change column data type with overwriteSchema") { + disableAutoMigration { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + spark.range(5).toDF("id").write + .overwrite(dir) + assert(deltaLog.snapshot.metadata.schema.head === StructField("id", LongType)) + spark.range(5).toDF("id").selectExpr("cast(id as string) as id").write + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .overwrite(dir) + assert(deltaLog.snapshot.metadata.schema.head === StructField("id", StringType)) + } + } + } + + equivalenceTest("reject columns that only differ by case - overwrite") { + withTempDir { dir => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + intercept[AnalysisException] { + spark.range(10).withColumn("ID", 'id + 1).write.overwrite(dir) + } + + intercept[AnalysisException] { + spark.range(10).withColumn("ID", 'id + 1).write + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .overwrite(dir) + } + + intercept[AnalysisException] { + spark.range(10).withColumn("a", 'id + 1).write + .partitionBy("a", "A") + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .overwrite(dir) + } + } + } + } +} + +trait CompleteOutputModeTests extends SchemaEnforcementSuiteBase with SharedSparkSession + with SQLTestUtils { + import testImplicits._ + + testQuietly("reject complete mode with new schema by default") { + disableAutoMigration { + withTempDir { dir => + val memStream = MemoryStream[Long] + val query = memStream.toDS().toDF("id") + .withColumn("part", 'id % 3) + .groupBy("part") + .count() + + val stream1 = query.writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .outputMode("complete") + .format("delta") + .start(dir.getAbsolutePath) + try { + memStream.addData(1L) + stream1.processAllAvailable() + } finally { + stream1.stop() + } + + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema.length == 2) + + val stream2 = query.withColumn("test", lit("abc")).writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .outputMode("complete") + .format("delta") + .start(dir.getAbsolutePath) + try { + val e = intercept[StreamingQueryException] { + memStream.addData(2L) + stream2.processAllAvailable() + } + assert(e.cause.isInstanceOf[AnalysisException]) + assert(e.cause.getMessage.contains(DeltaOptions.OVERWRITE_SCHEMA_OPTION)) + + } finally { + stream2.stop() + } + } + } + } + + test("complete mode can overwrite schema with option") { + disableAutoMigration { + withTempDir { dir => + val memStream = MemoryStream[Long] + val query = memStream.toDS().toDF("id") + .withColumn("part", 'id % 3) + .groupBy("part") + .count() + + val stream1 = query.writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .outputMode("complete") + .format("delta") + .start(dir.getAbsolutePath) + try { + memStream.addData(1L) + stream1.processAllAvailable() + } finally { + stream1.stop() + } + + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema.length == 2) + + val stream2 = query.withColumn("test", lit("abc")).writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .option(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "true") + .outputMode("complete") + .format("delta") + .start(dir.getAbsolutePath) + try { + memStream.addData(2L) + stream2.processAllAvailable() + + memStream.addData(3L) + stream2.processAllAvailable() + } finally { + stream2.stop() + } + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + assert(df.schema.length == 3) + + val deltaLog = DeltaLog.forTable(spark, dir) + val hadoopConf = deltaLog.newDeltaHadoopConf() + val lastCommitFile = deltaLog.store + .listFrom(FileNames.listingPrefix(deltaLog.logPath, 0L), hadoopConf) + .map(_.getPath).filter(FileNames.isDeltaFile).toArray.last + val lastCommitContainsMetadata = deltaLog.store.read(lastCommitFile, hadoopConf) + .exists(JsonUtils.mapper.readValue[SingleAction](_).metaData != null) + + assert(!lastCommitContainsMetadata, + "Metadata shouldn't be updated as long as schema doesn't change") + + checkAnswer( + df, + Row(0L, 1L, "abc") :: Row(1L, 1L, "abc") :: Row(2L, 1L, "abc") :: Nil) + } + } + } + + test("complete mode behavior with autoMigrate enabled is to migrate schema") { + enableAutoMigration { + withTempDir { dir => + val memStream = MemoryStream[Long] + val query = memStream.toDS().toDF("id") + .withColumn("part", 'id % 3) + .groupBy("part") + .count() + + val stream1 = query.writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .outputMode("complete") + .format("delta") + .start(dir.getAbsolutePath) + try { + memStream.addData(1L) + stream1.processAllAvailable() + } finally { + stream1.stop() + } + + assert(spark.read.format("delta").load(dir.getAbsolutePath).schema.length == 2) + + val stream2 = query.withColumn("test", lit("abc")).writeStream + .option("checkpointLocation", new File(dir, "_checkpoint").getAbsolutePath) + .outputMode("complete") + .format("delta") + .start(dir.getAbsolutePath) + try { + memStream.addData(2L) + stream2.processAllAvailable() + + memStream.addData(3L) + stream2.processAllAvailable() + } finally { + stream2.stop() + } + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + assert(df.schema.length == 3) + + val deltaLog = DeltaLog.forTable(spark, dir) + val hadoopConf = deltaLog.newDeltaHadoopConf() + val lastCommitFile = deltaLog.store + .listFrom(FileNames.listingPrefix(deltaLog.logPath, 0L), hadoopConf) + .map(_.getPath).filter(FileNames.isDeltaFile).toArray.last + val lastCommitContainsMetadata = deltaLog.store.read(lastCommitFile, hadoopConf) + .exists(JsonUtils.mapper.readValue[SingleAction](_).metaData != null) + + assert(!lastCommitContainsMetadata, + "Metadata shouldn't be updated as long as schema doesn't change") + + checkAnswer( + df, + Row(0L, 1L, "abc") :: Row(1L, 1L, "abc") :: Row(2L, 1L, "abc") :: Nil) + } + } + } +} + +class SchemaEnforcementWithPathSuite + extends AppendSaveModeTests + with AppendSaveModeNullTests + with OverwriteSaveModeTests + with DeltaSQLCommandTest { + override val saveOperation = SaveWithPath() +} + +class SchemaEnforcementWithTableSuite + extends AppendSaveModeTests + with OverwriteSaveModeTests + with DeltaSQLCommandTest { + + override val saveOperation = SaveAsTable("delta_schema_test") +} + +class SchemaEnforcementStreamingSuite + extends AppendOutputModeTests + with CompleteOutputModeTests + with DeltaSQLCommandTest { +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala new file mode 100644 index 00000000000..8baf0820f68 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala @@ -0,0 +1,2178 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.schema + +// scalastyle:off import.ordering.noEmptyLine +import java.util.Locale +import java.util.regex.Pattern + +import org.apache.spark.sql.delta.commands.cdc.CDCReader +import org.apache.spark.sql.delta.schema.SchemaMergingUtils._ +import org.apache.spark.sql.delta.sources.DeltaSourceUtils.GENERATION_EXPRESSION_METADATA_KEY +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import io.delta.tables.DeltaTable +import org.scalatest.GivenWhenThen + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ + +class SchemaUtilsSuite extends QueryTest + with SharedSparkSession + with GivenWhenThen + with SQLTestUtils + with DeltaSQLCommandTest { + import SchemaUtils._ + import testImplicits._ + + private def expectFailure(shouldContain: String*)(f: => Unit): Unit = { + val e = intercept[AnalysisException] { + f + } + val msg = e.getMessage.toLowerCase(Locale.ROOT) + assert(shouldContain.map(_.toLowerCase(Locale.ROOT)).forall(msg.contains), + s"Error message '$msg' didn't contain: $shouldContain") + } + + private def expectFailurePattern(shouldContainPatterns: String*)(f: => Unit): Unit = { + val e = intercept[AnalysisException] { + f + } + val patterns = + shouldContainPatterns.map(regex => Pattern.compile(regex, Pattern.CASE_INSENSITIVE)) + assert(patterns.forall(_.matcher(e.getMessage).find()), + s"Error message '${e.getMessage}' didn't contain the patterns: $shouldContainPatterns") + } + + ///////////////////////////// + // Duplicate Column Checks + ///////////////////////////// + + test("duplicate column name in top level") { + val schema = new StructType() + .add("dupColName", IntegerType) + .add("b", IntegerType) + .add("dupColName", StringType) + expectFailure("dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in top level - case sensitivity") { + val schema = new StructType() + .add("dupColName", IntegerType) + .add("b", IntegerType) + .add("dupCOLNAME", StringType) + expectFailure("dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name for nested column + non-nested column") { + val schema = new StructType() + .add("dupColName", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)) + .add("dupColName", IntegerType) + expectFailure("dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name for nested column + non-nested column - case sensitivity") { + val schema = new StructType() + .add("dupColName", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)) + .add("dupCOLNAME", IntegerType) + expectFailure("dupCOLNAME") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in nested level") { + val schema = new StructType() + .add("top", new StructType() + .add("dupColName", IntegerType) + .add("b", IntegerType) + .add("dupColName", StringType) + ) + expectFailure("top.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in nested level - case sensitivity") { + val schema = new StructType() + .add("top", new StructType() + .add("dupColName", IntegerType) + .add("b", IntegerType) + .add("dupCOLNAME", StringType) + ) + expectFailure("top.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in double nested level") { + val schema = new StructType() + .add("top", new StructType() + .add("b", new StructType() + .add("dupColName", StringType) + .add("c", IntegerType) + .add("dupColName", StringType)) + .add("d", IntegerType) + ) + expectFailure("top.b.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in double nested array") { + val schema = new StructType() + .add("top", new StructType() + .add("b", ArrayType(ArrayType(new StructType() + .add("dupColName", StringType) + .add("c", IntegerType) + .add("dupColName", StringType)))) + .add("d", IntegerType) + ) + expectFailure("top.b.element.element.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in double nested map") { + val keyType = new StructType() + .add("dupColName", IntegerType) + .add("d", StringType) + expectFailure("top.b.key.dupColName") { + val schema = new StructType() + .add("top", new StructType() + .add("b", MapType(keyType.add("dupColName", StringType), keyType)) + ) + checkColumnNameDuplication(schema, "") + } + expectFailure("top.b.value.dupColName") { + val schema = new StructType() + .add("top", new StructType() + .add("b", MapType(keyType, keyType.add("dupColName", StringType))) + ) + checkColumnNameDuplication(schema, "") + } + // This is okay + val schema = new StructType() + .add("top", new StructType() + .add("b", MapType(keyType, keyType)) + ) + checkColumnNameDuplication(schema, "") + } + + test("duplicate column name in nested array") { + val schema = new StructType() + .add("top", ArrayType(new StructType() + .add("dupColName", IntegerType) + .add("b", IntegerType) + .add("dupColName", StringType)) + ) + expectFailure("top.element.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in nested array - case sensitivity") { + val schema = new StructType() + .add("top", ArrayType(new StructType() + .add("dupColName", IntegerType) + .add("b", IntegerType) + .add("dupCOLNAME", StringType)) + ) + expectFailure("top.element.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("non duplicate column because of back tick") { + val schema = new StructType() + .add("top", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)) + .add("top.a", IntegerType) + checkColumnNameDuplication(schema, "") + } + + test("non duplicate column because of back tick - nested") { + val schema = new StructType() + .add("first", new StructType() + .add("top", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)) + .add("top.a", IntegerType)) + checkColumnNameDuplication(schema, "") + } + + test("duplicate column with back ticks - nested") { + val schema = new StructType() + .add("first", new StructType() + .add("top.a", StringType) + .add("b", IntegerType) + .add("top.a", IntegerType)) + expectFailure("first.`top.a`") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column with back ticks - nested and case sensitivity") { + val schema = new StructType() + .add("first", new StructType() + .add("TOP.a", StringType) + .add("b", IntegerType) + .add("top.a", IntegerType)) + expectFailure("first.`top.a`") { checkColumnNameDuplication(schema, "") } + } + + ///////////////////////////// + // Read Compatibility Checks + ///////////////////////////// + + /** + * Tests change of datatype within a schema. + * - the make() function is a "factory" function to create schemas that vary only by the + * given datatype in a specific position in the schema. + * - other tests will call this method with different make() functions to test datatype + * incompatibility in all the different places within a schema (in a top-level struct, + * in a nested struct, as the element type of an array, etc.) + */ + def testDatatypeChange(scenario: String)(make: DataType => StructType): Unit = { + val schemas = Map( + ("int", make(IntegerType)), + ("string", make(StringType)), + ("struct", make(new StructType().add("a", StringType))), + ("array", make(ArrayType(IntegerType))), + ("map", make(MapType(StringType, FloatType))) + ) + test(s"change of datatype should fail read compatibility - $scenario") { + for (a <- schemas.keys; b <- schemas.keys if a != b) { + assert(!isReadCompatible(schemas(a), schemas(b)), + s"isReadCompatible should have failed for: ${schemas(a)}, ${schemas(b)}") + } + } + } + + /** + * Tests change of nullability within a schema (making a field nullable is not allowed, + * but making a nullable field non-nullable is ok). + * - the make() function is a "factory" function to create schemas that vary only by the + * nullability (of a field, array elemnt, or map values) in a specific position in the schema. + * - other tests will call this method with different make() functions to test nullability + * incompatibility in all the different places within a schema (in a top-level struct, + * in a nested struct, for the element type of an array, etc.) + */ + def testNullability(scenario: String)(make: Boolean => StructType): Unit = { + val nullable = make(true) + val nonNullable = make(false) + Seq(true, false).foreach { forbidTightenNullability => + val (blockedCase, blockedExisting, blockedRead) = if (forbidTightenNullability) { + (s"tighten nullability should fail read compatibility " + + s"(forbidTightenNullability=$forbidTightenNullability) - $scenario", + nullable, nonNullable) + } else { + (s"relax nullability should fail read compatibility " + + s"(forbidTightenNullability=$forbidTightenNullability) - $scenario", + nonNullable, nullable) + } + val (allowedCase, allowedExisting, allowedRead) = if (forbidTightenNullability) { + (s"relax nullability should not fail read compatibility " + + s"(forbidTightenNullability=$forbidTightenNullability) - $scenario", + nonNullable, nullable) + } else { + (s"tighten nullability should not fail read compatibility " + + s"(forbidTightenNullability=$forbidTightenNullability) - $scenario", + nullable, nonNullable) + } + test(blockedCase) { + assert(!isReadCompatible(blockedExisting, blockedRead, forbidTightenNullability)) + } + test(allowedCase) { + assert(isReadCompatible(allowedExisting, allowedRead, forbidTightenNullability)) + } + } + } + + /** + * Tests for fields of a struct: adding/dropping fields, changing nullability, case variation + * - The make() function is a "factory" method to produce schemas. It takes a function that + * mutates a struct (for example, but adding a column, or it could just not make any change). + * - Following tests will call this method with different factory methods, to mutate the + * various places where a struct can appear (at the top-level, nested in another struct, + * within an array, etc.) + * - This allows us to have one shared code to test compatibility of a struct field in all the + * different places where it may occur. + */ + def testColumnVariations(scenario: String) + (make: (StructType => StructType) => StructType): Unit = { + + // generate one schema without extra column, one with, one nullable, and one with mixed case + val withoutExtra = make(struct => struct) // produce struct WITHOUT extra field + val withExtraNullable = make(struct => struct.add("extra", StringType)) + val withExtraMixedCase = make(struct => struct.add("eXtRa", StringType)) + val withExtraNonNullable = make(struct => struct.add("extra", StringType, nullable = false)) + + test(s"dropping a field should fail read compatibility - $scenario") { + assert(!isReadCompatible(withExtraNullable, withoutExtra)) + } + test(s"adding a nullable field should not fail read compatibility - $scenario") { + assert(isReadCompatible(withoutExtra, withExtraNullable)) + } + test(s"adding a non-nullable field should not fail read compatibility - $scenario") { + assert(isReadCompatible(withoutExtra, withExtraNonNullable)) + } + test(s"case variation of field name should fail read compatibility - $scenario") { + assert(!isReadCompatible(withExtraNullable, withExtraMixedCase)) + } + testNullability(scenario)(b => make(struct => struct.add("extra", StringType, nullable = b))) + testDatatypeChange(scenario)(datatype => make(struct => struct.add("extra", datatype))) + } + + // -------------------------------------------------------------------- + // tests for all kinds of places where a field can appear in a struct + // -------------------------------------------------------------------- + + testColumnVariations("top level")( + f => f(new StructType().add("a", IntegerType))) + + testColumnVariations("nested struct")( + f => new StructType() + .add("a", f(new StructType().add("b", IntegerType)))) + + testColumnVariations("nested in array")( + f => new StructType() + .add("array", ArrayType( + f(new StructType().add("b", IntegerType))))) + + testColumnVariations("nested in map key")( + f => new StructType() + .add("map", MapType( + f(new StructType().add("b", IntegerType)), + StringType))) + + testColumnVariations("nested in map value")( + f => new StructType() + .add("map", MapType( + StringType, + f(new StructType().add("b", IntegerType))))) + + // -------------------------------------------------------------------- + // tests for data type change in places other than struct + // -------------------------------------------------------------------- + + testDatatypeChange("array element")( + datatype => new StructType() + .add("array", ArrayType(datatype))) + + testDatatypeChange("map key")( + datatype => new StructType() + .add("map", MapType(datatype, StringType))) + + testDatatypeChange("map value")( + datatype => new StructType() + .add("map", MapType(StringType, datatype))) + + // -------------------------------------------------------------------- + // tests for nullability change in places other than struct + // -------------------------------------------------------------------- + + testNullability("array contains null")( + b => new StructType() + .add("array", ArrayType(StringType, containsNull = b))) + + testNullability("map contains null values")( + b => new StructType() + .add("map", MapType(IntegerType, StringType, valueContainsNull = b))) + + testNullability("map nested in array")( + b => new StructType() + .add("map", ArrayType( + MapType(IntegerType, StringType, valueContainsNull = b)))) + + testNullability("array nested in map")( + b => new StructType() + .add("map", MapType( + IntegerType, + ArrayType(StringType, containsNull = b)))) + + //////////////////////////// + // reportDifference + //////////////////////////// + + /** + * @param existing the existing schema to compare to + * @param specified the new specified schema + * @param expected an expected list of messages, each describing a schema difference. + * Every expected message is actually a regex patterns that is matched + * against all diffs that are returned. This is necessary to tolerate + * variance in ordering of field names, for example in a message such as + * "Specified schema has additional field(s): x, y", we cannot predict + * the order of x and y. + */ + def testReportDifferences(testName: String) + (existing: StructType, specified: StructType, expected: String*): Unit = { + test(testName) { + val differences = SchemaUtils.reportDifferences(existing, specified) + // make sure every expected difference is reported + expected foreach ((exp: String) => + assert(differences.exists(message => exp.r.findFirstMatchIn(message).isDefined), + s"""Difference not reported. + |Expected: + |- $exp + |Reported: ${differences.mkString("\n- ", "\n- ", "")} + """.stripMargin)) + // make sure there are no extra differences reported + assert(expected.size == differences.size, + s"""Too many differences reported. + |Expected: ${expected.mkString("\n- ", "\n- ", "")} + |Reported: ${differences.mkString("\n- ", "\n- ", "")} + """.stripMargin) + } + } + + testReportDifferences("extra columns should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType), + specified = new StructType() + .add("a", IntegerType) + .add("b", StringType), + expected = "additional field[(]s[)]: b" + ) + + testReportDifferences("missing columns should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType) + .add("b", StringType), + specified = new StructType() + .add("a", IntegerType), + expected = "missing field[(]s[)]: b" + ) + + testReportDifferences("making a column nullable should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType, nullable = false) + .add("b", StringType, nullable = true), + specified = new StructType() + .add("a", IntegerType, nullable = true) + .add("b", StringType, nullable = true), + expected = "a is nullable in specified schema but non-nullable in existing schema" + ) + + testReportDifferences("making a column non-nullable should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType, nullable = false) + .add("b", StringType, nullable = true), + specified = new StructType() + .add("a", IntegerType, nullable = false) + .add("b", StringType, nullable = false), + expected = "b is non-nullable in specified schema but nullable in existing schema" + ) + + testReportDifferences("change in column metadata should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType, nullable = true, new MetadataBuilder().putString("x", "1").build()) + .add("b", StringType), + specified = new StructType() + .add("a", IntegerType, nullable = true, new MetadataBuilder().putString("x", "2").build()) + .add("b", StringType), + expected = "metadata for field a is different" + ) + + testReportDifferences("change in generation expression for generated columns")( + existing = new StructType() + .add("a", IntegerType, nullable = true, + new MetadataBuilder() + .putString(GENERATION_EXPRESSION_METADATA_KEY, "b + 1") + .putString("x", "1").build()) + .add("b", StringType), + specified = new StructType() + .add("a", IntegerType, nullable = true, new MetadataBuilder() + .putString(GENERATION_EXPRESSION_METADATA_KEY, "1 + b") + .putString("x", "1").build()) + .add("b", StringType), + // Regex flags: DOTALL and MULTILINE + expected = "(?sm)generation expression for field a is different" + + // Not include + "(?!.*metadata for field a is different)" + ) + + testReportDifferences("change in column metadata for generated columns")( + existing = new StructType() + .add("a", IntegerType, nullable = true, + new MetadataBuilder() + .putString(GENERATION_EXPRESSION_METADATA_KEY, "b + 1") + .putString("x", "1").build()) + .add("b", StringType), + specified = new StructType() + .add("a", IntegerType, nullable = true, new MetadataBuilder() + .putString(GENERATION_EXPRESSION_METADATA_KEY, "b + 1") + .putString("x", "2").build()) + .add("b", StringType), + expected = "metadata for field a is different" + ) + + testReportDifferences("change in generation expression and metadata for generated columns")( + existing = new StructType() + .add("a", IntegerType, nullable = true, + new MetadataBuilder() + .putString(GENERATION_EXPRESSION_METADATA_KEY, "b + 1") + .putString("x", "1").build()) + .add("b", StringType), + specified = new StructType() + .add("a", IntegerType, nullable = true, new MetadataBuilder() + .putString(GENERATION_EXPRESSION_METADATA_KEY, "b + 2") + .putString("x", "2").build()) + .add("b", StringType), + // Regex flags: DOTALL and MULTILINE + expected = "(?sm)generation expression for field a is different" + + ".*metadata for field a is different" + ) + + testReportDifferences("change of column type should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType) + .add("b", StringType), + specified = new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + StringType, containsNull = false)), + expected = "type for b is different" + ) + + testReportDifferences("change of array nullability should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + new StructType().add("x", LongType), containsNull = true)), + specified = new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + new StructType().add("x", LongType), containsNull = false)), + expected = "b\\[\\] can not contain null in specified schema but can in existing" + ) + + testReportDifferences("change of element type should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType) + .add("b", new ArrayType(LongType, containsNull = true)), + specified = new StructType() + .add("a", IntegerType) + .add("b", new ArrayType(StringType, containsNull = true)), + expected = "type for b\\[\\] is different" + ) + + testReportDifferences("change of element struct type should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + new StructType() + .add("x", LongType), + containsNull = true)), + specified = new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + new StructType() + .add("x", StringType), + containsNull = true)), + expected = "type for b\\[\\].x is different" + ) + + testReportDifferences("change of map value nullability should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType) + .add("b", new MapType( + StringType, + new StructType().add("x", LongType), valueContainsNull = true)), + specified = new StructType() + .add("a", IntegerType) + .add("b", new MapType( + StringType, + new StructType().add("x", LongType), valueContainsNull = false)), + expected = "b can not contain null values in specified schema but can in existing" + ) + + testReportDifferences("change of map key type should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType) + .add("b", new MapType(LongType, StringType, valueContainsNull = true)), + specified = new StructType() + .add("a", IntegerType) + .add("b", new MapType(StringType, StringType, valueContainsNull = true)), + expected = "type for b\\[key\\] is different" + ) + + testReportDifferences("change of value struct type should be reported as a difference")( + existing = new StructType() + .add("a", IntegerType) + .add("b", new MapType( + StringType, + new StructType().add("x", LongType), + valueContainsNull = true)), + specified = new StructType() + .add("a", IntegerType) + .add("b", new MapType( + StringType, + new StructType().add("x", FloatType), + valueContainsNull = true)), + expected = "type for b\\[value\\].x is different" + ) + + testReportDifferences("nested extra columns should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType)), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", StringType) + .add("c", LongType)), + expected = "additional field[(]s[)]: (x.b, x.c|x.c, x.b)" + ) + + testReportDifferences("nested missing columns should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", StringType) + .add("c", FloatType)), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType)), + expected = "missing field[(]s[)]: (x.b, x.c|x.c, x.b)" + ) + + testReportDifferences("making a nested column nullable should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType, nullable = false) + .add("b", StringType, nullable = true)), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType, nullable = true) + .add("b", StringType, nullable = true)), + expected = "x.a is nullable in specified schema but non-nullable in existing schema" + ) + + testReportDifferences("making a nested column non-nullable should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType, nullable = false) + .add("b", StringType, nullable = true)), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType, nullable = false) + .add("b", StringType, nullable = false)), + expected = "x.b is non-nullable in specified schema but nullable in existing schema" + ) + + testReportDifferences("change in nested column metadata should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType, nullable = true, new MetadataBuilder().putString("x", "1").build()) + .add("b", StringType)), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType, nullable = true, new MetadataBuilder().putString("x", "2").build()) + .add("b", StringType)), + expected = "metadata for field x.a is different" + ) + + testReportDifferences("change of nested column type should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", StringType)), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + StringType, containsNull = false))), + expected = "type for x.b is different" + ) + + testReportDifferences("change of nested array nullability should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + new StructType() + .add("x", LongType), + containsNull = true))), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + new StructType() + .add("x", LongType), + containsNull = false))), + expected = "x.b\\[\\] can not contain null in specified schema but can in existing" + ) + + testReportDifferences("change of nested element type should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", new ArrayType(LongType, containsNull = true))), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", new ArrayType(StringType, containsNull = true))), + expected = "type for x.b\\[\\] is different" + ) + + testReportDifferences("change of nested element struct type should be reported as a difference")( + existing = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + new StructType() + .add("x", LongType), + containsNull = true))), + specified = new StructType() + .add("x", new StructType() + .add("a", IntegerType) + .add("b", new ArrayType( + new StructType() + .add("x", StringType), + containsNull = true))), + expected = "type for x.b\\[\\].x is different" + ) + + private val piiTrue = new MetadataBuilder().putBoolean("pii", value = true).build() + private val piiFalse = new MetadataBuilder().putBoolean("pii", value = false).build() + + testReportDifferences("multiple differences should be reported")( + existing = new StructType() + .add("a", IntegerType) + .add("b", StringType) + .add("c", BinaryType) + .add("f", LongType, nullable = true, piiTrue) + .add("g", new MapType( + IntegerType, + new StructType() + .add("a", IntegerType, nullable = false, piiFalse) + .add("b", StringType) + .add("d", new ArrayType( + LongType, + containsNull = false + )), + valueContainsNull = true)) + .add("h", new MapType( + LongType, + StringType, + valueContainsNull = true)), + specified = new StructType() + .add("a", FloatType) + .add("d", StringType) + .add("e", LongType) + .add("f", LongType, nullable = false, piiFalse) + .add("g", new MapType( + StringType, + new StructType() + .add("a", LongType, nullable = true) + .add("c", StringType) + .add("d", new ArrayType( + BooleanType, + containsNull = true + )), + valueContainsNull = false)) + .add("h", new MapType( + LongType, + new ArrayType(IntegerType, containsNull = false), + valueContainsNull = true)), + "type for a is different", + "additional field[(]s[)]: (d, e|e, d)", + "missing field[(]s[)]: (b, c|c, b)", + "f is non-nullable in specified schema but nullable", + "metadata for field f is different", + "type for g\\[key\\] is different", + "g can not contain null values in specified schema but can in existing", + "additional field[(]s[)]: g\\[value\\].c", + "missing field[(]s[)]: g\\[value\\].b", + "type for g\\[value\\].a is different", + "g\\[value\\].a is nullable in specified schema but non-nullable in existing", + "metadata for field g\\[value\\].a is different", + "field g\\[value\\].d\\[\\] can contain null in specified schema but can not in existing", + "type for g\\[value\\].d\\[\\] is different", + "type for h\\[value\\] is different" + ) + + //////////////////////////// + // findColumnPosition + //////////////////////////// + + test("findColumnPosition") { + val schema = new StructType() + .add("struct", new StructType() + .add("a", IntegerType) + .add("b", IntegerType)) + .add("array", ArrayType(new StructType() + .add("c", IntegerType) + .add("d", IntegerType))) + .add("field", StringType) + .add("map", MapType( + new StructType() + .add("e", IntegerType), + new StructType() + .add("f", IntegerType))) + .add("mapStruct", MapType( + IntegerType, + new StructType() + .add("g", new StructType() + .add("h", IntegerType)))) + .add("arrayMap", ArrayType( + MapType( + new StructType() + .add("i", IntegerType), + new StructType() + .add("j", IntegerType)))) + + val List(structIdx, arrayIdx, fieldIdx, mapIdx, mapStructIdx, arrayMapIdx) = (0 to 5).toList + val ARRAY_ELEMENT_INDEX = 0 + val MAP_KEY_INDEX = 0 + val MAP_VALUE_INDEX = 1 + + def checkPosition(column: Seq[String], position: Seq[Int]): Unit = + assert(SchemaUtils.findColumnPosition(column, schema) === position) + + checkPosition(Seq("struct"), Seq(structIdx)) + checkPosition(Seq("STRucT"), Seq(structIdx)) + expectFailure("Couldn't find", schema.treeString) { + SchemaUtils.findColumnPosition(Seq("struct", "array"), schema) + } + checkPosition(Seq("struct", "a"), Seq(structIdx, 0)) + checkPosition(Seq("STRucT", "a"), Seq(structIdx, 0)) + checkPosition(Seq("struct", "A"), Seq(structIdx, 0)) + checkPosition(Seq("STRucT", "A"), Seq(structIdx, 0)) + checkPosition(Seq("struct", "b"), Seq(structIdx, 1)) + checkPosition(Seq("array"), Seq(arrayIdx)) + checkPosition(Seq("array", "element", "C"), Seq(arrayIdx, ARRAY_ELEMENT_INDEX, 0)) + checkPosition(Seq("array", "element", "d"), Seq(arrayIdx, ARRAY_ELEMENT_INDEX, 1)) + checkPosition(Seq("field"), Seq(fieldIdx)) + checkPosition(Seq("map"), Seq(mapIdx)) + checkPosition(Seq("map", "key", "e"), Seq(mapIdx, MAP_KEY_INDEX, 0)) + checkPosition(Seq("map", "value", "f"), Seq(mapIdx, MAP_VALUE_INDEX, 0)) + checkPosition(Seq("map", "value", "F"), Seq(mapIdx, MAP_VALUE_INDEX, 0)) + checkPosition(Seq("mapStruct", "key"), Seq(mapStructIdx, MAP_KEY_INDEX)) + checkPosition(Seq("mapStruct", "value", "g"), Seq(mapStructIdx, MAP_VALUE_INDEX, 0)) + checkPosition(Seq("mapStruct", "key"), Seq(mapStructIdx, MAP_KEY_INDEX)) + checkPosition(Seq("mapStruct", "value"), Seq(mapStructIdx, MAP_VALUE_INDEX)) + checkPosition(Seq("arrayMap"), Seq(arrayMapIdx)) + checkPosition(Seq("arrayMap", "element"), Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX)) + checkPosition( + Seq("arrayMap", "element", "key"), + Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_KEY_INDEX)) + checkPosition( + Seq("arrayMap", "element", "value"), + Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_VALUE_INDEX)) + checkPosition( + Seq("arrayMap", "element", "key", "i"), + Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_KEY_INDEX, 0)) + checkPosition( + Seq("arrayMap", "element", "value", "j"), + Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_VALUE_INDEX, 0)) + + val resolver = org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution + Seq(Seq("STRucT", "b"), Seq("struct", "B"), Seq("array", "element", "C"), + Seq("map", "key", "E")).foreach { column => + expectFailure("Couldn't find", schema.treeString) { + SchemaUtils.findColumnPosition(column, schema, resolver) + } + } + } + + test("findColumnPosition that doesn't exist") { + val schema = new StructType() + .add("a", IntegerType) + .add("b", MapType(StringType, StringType)) + .add("c", ArrayType(IntegerType)) + expectFailure("Couldn't find", schema.treeString) { + SchemaUtils.findColumnPosition(Seq("d"), schema) + } + expectFailure("A MapType was found", "mapType", schema.treeString) { + SchemaUtils.findColumnPosition(Seq("b", "c"), schema) + } + expectFailure("An ArrayType was found", "arrayType", schema.treeString) { + SchemaUtils.findColumnPosition(Seq("c", "b"), schema) + } + } + + //////////////////////////// + // getNestedFieldFromPosition + //////////////////////////// + + test("getNestedFieldFromPosition") { + val a = StructField("a", IntegerType) + val b = StructField("b", IntegerType) + val c = StructField("c", IntegerType) + val d = StructField("d", IntegerType) + val e = StructField("e", IntegerType) + val f = StructField("f", IntegerType) + val g = StructField("g", IntegerType) + + val field = StructField("field", StringType) + val struct = StructField("struct", new StructType().add(a).add(b)) + val arrayElement = StructField("element", new StructType().add(c)) + val array = StructField("array", ArrayType(arrayElement.dataType)) + val mapKey = StructField("key", new StructType().add(d)) + val mapValue = StructField("value", new StructType().add(e)) + val map = StructField("map", MapType( + keyType = mapKey.dataType, + valueType = mapValue.dataType)) + val arrayMapKey = StructField("key", new StructType().add(f)) + val arrayMapValue = StructField("value", new StructType().add(g)) + val arrayMapElement = StructField("element", MapType( + keyType = arrayMapKey.dataType, + valueType = arrayMapValue.dataType)) + val arrayMap = StructField("arrayMap", ArrayType(arrayMapElement.dataType)) + + val root = StructField("root", StructType(Seq(field, struct, array, map, arrayMap))) + + val List(fieldIdx, structIdx, arrayIdx, mapIdx, arrayMapIdx) = (0 to 4).toList + val ARRAY_ELEMENT_INDEX = 0 + val MAP_KEY_INDEX = 0 + val MAP_VALUE_INDEX = 1 + + def checkField(position: Seq[Int], expected: StructField): Unit = + assert(getNestedFieldFromPosition(root, position) === expected) + + checkField(Seq.empty, root) + checkField(Seq(fieldIdx), field) + checkField(Seq(structIdx), struct) + checkField(Seq(structIdx, 0), a) + checkField(Seq(structIdx, 1), b) + checkField(Seq(arrayIdx), array) + checkField(Seq(arrayIdx, ARRAY_ELEMENT_INDEX), arrayElement) + checkField(Seq(arrayIdx, ARRAY_ELEMENT_INDEX, 0), c) + checkField(Seq(mapIdx), map) + checkField(Seq(mapIdx, MAP_KEY_INDEX), mapKey) + checkField(Seq(mapIdx, MAP_VALUE_INDEX), mapValue) + checkField(Seq(mapIdx, MAP_KEY_INDEX, 0), d) + checkField(Seq(mapIdx, MAP_VALUE_INDEX, 0), e) + checkField(Seq(arrayMapIdx), arrayMap) + checkField(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX), arrayMapElement) + checkField(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_KEY_INDEX), arrayMapKey) + checkField(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_VALUE_INDEX), arrayMapValue) + checkField(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_KEY_INDEX, 0), f) + checkField(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_VALUE_INDEX, 0), g) + + def checkError(position: Seq[Int]): Unit = + assertThrows[IllegalArgumentException] { + getNestedFieldFromPosition(root, position) + } + + checkError(Seq(-1)) + checkError(Seq(fieldIdx, 0)) + checkError(Seq(structIdx, -1)) + checkError(Seq(structIdx, 2)) + checkError(Seq(arrayIdx, ARRAY_ELEMENT_INDEX - 1)) + checkError(Seq(arrayIdx, ARRAY_ELEMENT_INDEX + 1)) + checkError(Seq(mapIdx, MAP_KEY_INDEX - 1)) + checkError(Seq(mapIdx, MAP_VALUE_INDEX + 1)) + checkError(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX - 1)) + checkError(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX + 1)) + checkError(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_KEY_INDEX - 1)) + checkError(Seq(arrayMapIdx, ARRAY_ELEMENT_INDEX, MAP_VALUE_INDEX + 1)) + checkError(Seq(arrayMapIdx + 1)) + } + + test("getNestedTypeFromPosition") { + val schema = new StructType().add("a", IntegerType) + assert(getNestedTypeFromPosition(schema, Seq.empty) === schema) + assert(getNestedTypeFromPosition(schema, Seq(0)) === IntegerType) + assertThrows[IllegalArgumentException] { + getNestedTypeFromPosition(schema, Seq(-1)) + } + assertThrows[IllegalArgumentException] { + getNestedTypeFromPosition(schema, Seq(1)) + } + } + + //////////////////////////// + // addColumn + //////////////////////////// + + test("addColumn - simple") { + val a = StructField("a", IntegerType) + val b = StructField("b", StringType) + val schema = new StructType().add(a).add(b) + + val x = StructField("x", LongType) + assert(SchemaUtils.addColumn(schema, x, Seq(0)) === new StructType().add(x).add(a).add(b)) + assert(SchemaUtils.addColumn(schema, x, Seq(1)) === new StructType().add(a).add(x).add(b)) + assert(SchemaUtils.addColumn(schema, x, Seq(2)) === new StructType().add(a).add(b).add(x)) + + expectFailure("Index -1", "lower than 0") { + SchemaUtils.addColumn(schema, x, Seq(-1)) + } + expectFailure("Index 3", "larger than struct length: 2") { + SchemaUtils.addColumn(schema, x, Seq(3)) + } + expectFailure("parent is not a structtype") { + SchemaUtils.addColumn(schema, x, Seq(0, 0)) + } + } + + test("addColumn - nested struct") { + val a = StructField("a", IntegerType) + val b = StructField("b", StringType) + val first = StructField("first", new StructType().add(a).add(b)) + val middle = StructField("middle", new StructType().add(a).add(b)) + val last = StructField("last", new StructType().add(a).add(b)) + val schema = new StructType().add(first).add(middle).add(last) + + val x = StructField("x", LongType) + assert(SchemaUtils.addColumn(schema, x, Seq(0)) === + new StructType().add(x).add(first).add(middle).add(last)) + assert(SchemaUtils.addColumn(schema, x, Seq(1)) === + new StructType().add(first).add(x).add(middle).add(last)) + assert(SchemaUtils.addColumn(schema, x, Seq(2)) === + new StructType().add(first).add(middle).add(x).add(last)) + assert(SchemaUtils.addColumn(schema, x, Seq(3)) === + new StructType().add(first).add(middle).add(last).add(x)) + + assert(SchemaUtils.addColumn(schema, x, Seq(0, 2)) === + new StructType().add("first", new StructType().add(a).add(b).add(x)).add(middle).add(last)) + assert(SchemaUtils.addColumn(schema, x, Seq(0, 1)) === + new StructType().add("first", new StructType().add(a).add(x).add(b)).add(middle).add(last)) + assert(SchemaUtils.addColumn(schema, x, Seq(0, 0)) === + new StructType().add("first", new StructType().add(x).add(a).add(b)).add(middle).add(last)) + assert(SchemaUtils.addColumn(schema, x, Seq(1, 0)) === + new StructType().add(first).add("middle", new StructType().add(x).add(a).add(b)).add(last)) + assert(SchemaUtils.addColumn(schema, x, Seq(2, 0)) === + new StructType().add(first).add(middle).add("last", new StructType().add(x).add(a).add(b))) + + expectFailure("Index -1", "lower than 0") { + SchemaUtils.addColumn(schema, x, Seq(0, -1)) + } + expectFailure("Index 3", "larger than struct length: 2") { + SchemaUtils.addColumn(schema, x, Seq(0, 3)) + } + expectFailure("Struct not found at position 2") { + SchemaUtils.addColumn(schema, x, Seq(0, 2, 0)) + } + expectFailure("parent is not a structtype") { + SchemaUtils.addColumn(schema, x, Seq(0, 0, 0)) + } + } + + test("addColumn - nested map") { + val k = StructField("k", IntegerType) + val v = StructField("v", StringType) + val schema = new StructType().add("m", MapType( + keyType = new StructType().add(k), + valueType = new StructType().add(v))) + + val MAP_KEY_INDEX = 0 + val MAP_VALUE_INDEX = 1 + + val x = StructField("x", LongType) + assert(SchemaUtils.addColumn(schema, x, Seq(0, MAP_KEY_INDEX, 0)) === + new StructType().add("m", MapType( + keyType = new StructType().add(x).add(k), + valueType = new StructType().add(v)))) + + assert(SchemaUtils.addColumn(schema, x, Seq(0, MAP_KEY_INDEX, 1)) === + new StructType().add("m", MapType( + keyType = new StructType().add(k).add(x), + valueType = new StructType().add(v)))) + + assert(SchemaUtils.addColumn(schema, x, Seq(0, MAP_VALUE_INDEX, 0)) === + new StructType().add("m", MapType( + keyType = new StructType().add(k), + valueType = new StructType().add(x).add(v)))) + + assert(SchemaUtils.addColumn(schema, x, Seq(0, MAP_VALUE_INDEX, 1)) === + new StructType().add("m", MapType( + keyType = new StructType().add(k), + valueType = new StructType().add(v).add(x)))) + + expectFailure("parent is not a structtype") { + SchemaUtils.addColumn(schema, x, Seq(0, MAP_KEY_INDEX - 1, 0)) + } + expectFailure("parent is not a structtype") { + SchemaUtils.addColumn(schema, x, Seq(0, MAP_VALUE_INDEX + 1, 0)) + } + } + + test("addColumn - nested maps") { + // Helper method to create a 2-level deep nested map of structs. The tests below each cover + // adding a field to one of the leaf struct. + def schema( + kk: StructType = new StructType().add("kk", IntegerType), + kv: StructType = new StructType().add("kv", IntegerType), + vk: StructType = new StructType().add("vk", IntegerType), + vv: StructType = new StructType().add("vv", IntegerType)) + : StructType = new StructType().add("m", MapType( + keyType = MapType( + keyType = kk, + valueType = kv), + valueType = MapType( + keyType = vk, + valueType = vv))) + + val MAP_KEY_INDEX = 0 + val MAP_VALUE_INDEX = 1 + + val x = StructField("x", LongType) + // Add field `x` at the front of each leaf struct. + assert(SchemaUtils.addColumn(schema(), x, Seq(0, MAP_KEY_INDEX, MAP_KEY_INDEX, 0)) === + schema(kk = new StructType().add(x).add("kk", IntegerType))) + assert(SchemaUtils.addColumn(schema(), x, Seq(0, MAP_VALUE_INDEX, MAP_KEY_INDEX, 0)) === + schema(vk = new StructType().add(x).add("vk", IntegerType))) + assert(SchemaUtils.addColumn(schema(), x, Seq(0, MAP_KEY_INDEX, MAP_VALUE_INDEX, 0)) === + schema(kv = new StructType().add(x).add("kv", IntegerType))) + assert(SchemaUtils.addColumn(schema(), x, Seq(0, MAP_VALUE_INDEX, MAP_VALUE_INDEX, 0)) === + schema(vv = new StructType().add(x).add("vv", IntegerType))) + + // Add field `x` at the back of each leaf struct. + assert(SchemaUtils.addColumn(schema(), x, Seq(0, MAP_KEY_INDEX, MAP_KEY_INDEX, 1)) === + schema(kk = new StructType().add("kk", IntegerType).add(x))) + assert(SchemaUtils.addColumn(schema(), x, Seq(0, MAP_VALUE_INDEX, MAP_KEY_INDEX, 1)) === + schema(vk = new StructType().add("vk", IntegerType).add(x))) + assert(SchemaUtils.addColumn(schema(), x, Seq(0, MAP_KEY_INDEX, MAP_VALUE_INDEX, 1)) === + schema(kv = new StructType().add("kv", IntegerType).add(x))) + assert(SchemaUtils.addColumn(schema(), x, Seq(0, MAP_VALUE_INDEX, MAP_VALUE_INDEX, 1)) === + schema(vv = new StructType().add("vv", IntegerType).add(x))) + + // Invalid map access. + expectFailure("parent is not a structtype") { + SchemaUtils.addColumn(schema(), x, Seq(0, MAP_KEY_INDEX, MAP_KEY_INDEX - 1, 0)) + } + expectFailure("parent is not a structtype") { + SchemaUtils.addColumn(schema(), x, Seq(0, MAP_KEY_INDEX - 1, MAP_KEY_INDEX, 0)) + } + expectFailure("parent is not a structtype") { + SchemaUtils.addColumn(schema(), x, Seq(0, MAP_KEY_INDEX, MAP_VALUE_INDEX + 1, 0)) + } + expectFailure("parent is not a structtype") { + SchemaUtils.addColumn(schema(), x, Seq(0, MAP_VALUE_INDEX + 1, MAP_KEY_INDEX, 0)) + } + } + + test("addColumn - nested array") { + val e = StructField("e", IntegerType) + val schema = new StructType().add("a", ArrayType(new StructType().add(e))) + val x = StructField("x", LongType) + + val ARRAY_ELEMENT_INDEX = 0 + + // Add field `x` at the front of the leaf struct. + assert(SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX, 0)) === + new StructType().add("a", ArrayType(new StructType().add(x).add(e)))) + // Add field `x` at the back of the leaf struct. + assert(SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX, 1)) === + new StructType().add("a", ArrayType(new StructType().add(e).add(x)))) + + // Invalid array access. + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX - 1, 0)) + } + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX + 1, 0)) + } + } + + test("addColumn - nested arrays") { + val e = StructField("e", IntegerType) + val schema = new StructType().add("a", ArrayType(ArrayType(new StructType().add(e)))) + val x = StructField("x", LongType) + + val ARRAY_ELEMENT_INDEX = 0 + + // Add field `x` at the front of the leaf struct. + assert(SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX, ARRAY_ELEMENT_INDEX, 0)) === + new StructType().add("a", ArrayType(ArrayType(new StructType().add(x).add(e))))) + // Add field `x` at the back of the leaf struct. + assert(SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX, ARRAY_ELEMENT_INDEX, 1)) === + new StructType().add("a", ArrayType(ArrayType(new StructType().add(e).add(x))))) + + // Invalid array access. + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX, ARRAY_ELEMENT_INDEX - 1, 0)) + } + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX - 1, ARRAY_ELEMENT_INDEX, 0)) + } + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX, ARRAY_ELEMENT_INDEX + 1, 0)) + } + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.addColumn(schema, x, Seq(0, ARRAY_ELEMENT_INDEX + 1, ARRAY_ELEMENT_INDEX, 0)) + } + } + + //////////////////////////// + // dropColumn + //////////////////////////// + + test("dropColumn - simple") { + val a = StructField("a", IntegerType) + val b = StructField("b", StringType) + val schema = new StructType().add(a).add(b) + + assert(SchemaUtils.dropColumn(schema, Seq(0)) === ((new StructType().add(b), a))) + assert(SchemaUtils.dropColumn(schema, Seq(1)) === ((new StructType().add(a), b))) + + expectFailure("Index -1", "lower than 0") { + SchemaUtils.dropColumn(schema, Seq(-1)) + } + expectFailure("Index 2", "equals to or is larger than struct length: 2") { + SchemaUtils.dropColumn(schema, Seq(2)) + } + expectFailure("Can only drop nested columns from StructType") { + SchemaUtils.dropColumn(schema, Seq(0, 0)) + } + } + + test("dropColumn - nested struct") { + val a = StructField("a", IntegerType) + val b = StructField("b", StringType) + val c = StructField("c", StringType) + val first = StructField("first", new StructType().add(a).add(b).add(c)) + val middle = StructField("middle", new StructType().add(a).add(b).add(c)) + val last = StructField("last", new StructType().add(a).add(b).add(c)) + val schema = new StructType().add(first).add(middle).add(last) + + assert(SchemaUtils.dropColumn(schema, Seq(0)) === + new StructType().add(middle).add(last) -> first) + assert(SchemaUtils.dropColumn(schema, Seq(1)) === + new StructType().add(first).add(last) -> middle) + assert(SchemaUtils.dropColumn(schema, Seq(2)) === + new StructType().add(first).add(middle) -> last) + + assert(SchemaUtils.dropColumn(schema, Seq(0, 2)) === + new StructType().add("first", new StructType().add(a).add(b)).add(middle).add(last) -> c) + assert(SchemaUtils.dropColumn(schema, Seq(0, 1)) === + new StructType().add("first", new StructType().add(a).add(c)).add(middle).add(last) -> b) + assert(SchemaUtils.dropColumn(schema, Seq(0, 0)) === + new StructType().add("first", new StructType().add(b).add(c)).add(middle).add(last) -> a) + assert(SchemaUtils.dropColumn(schema, Seq(1, 0)) === + new StructType().add(first).add("middle", new StructType().add(b).add(c)).add(last) -> a) + assert(SchemaUtils.dropColumn(schema, Seq(2, 0)) === + new StructType().add(first).add(middle).add("last", new StructType().add(b).add(c)) -> a) + + expectFailure("Index -1", "lower than 0") { + SchemaUtils.dropColumn(schema, Seq(0, -1)) + } + expectFailure("Index 3", "equals to or is larger than struct length: 3") { + SchemaUtils.dropColumn(schema, Seq(0, 3)) + } + expectFailure("Can only drop nested columns from StructType") { + SchemaUtils.dropColumn(schema, Seq(0, 0, 0)) + } + } + + test("dropColumn - nested map") { + val a = StructField("a", IntegerType) + val b = StructField("b", StringType) + val c = StructField("c", LongType) + val d = StructField("d", DateType) + val schema = new StructType().add("m", MapType( + keyType = new StructType().add(a).add(b), + valueType = new StructType().add(c).add(d))) + + val MAP_KEY_INDEX = 0 + val MAP_VALUE_INDEX = 1 + + assert(SchemaUtils.dropColumn(schema, Seq(0, MAP_KEY_INDEX, 0)) === + (new StructType().add("m", MapType( + keyType = new StructType().add(b), + valueType = new StructType().add(c).add(d))), + a)) + + assert(SchemaUtils.dropColumn(schema, Seq(0, MAP_KEY_INDEX, 1)) === + (new StructType().add("m", MapType( + keyType = new StructType().add(a), + valueType = new StructType().add(c).add(d))), + b)) + + assert(SchemaUtils.dropColumn(schema, Seq(0, MAP_VALUE_INDEX, 0)) === + (new StructType().add("m", MapType( + keyType = new StructType().add(a).add(b), + valueType = new StructType().add(d))), + c)) + + assert(SchemaUtils.dropColumn(schema, Seq(0, MAP_VALUE_INDEX, 1)) === + (new StructType().add("m", MapType( + keyType = new StructType().add(a).add(b), + valueType = new StructType().add(c))), + d)) + + expectFailure("can only drop nested columns from structtype") { + SchemaUtils.dropColumn(schema, Seq(0, MAP_KEY_INDEX - 1, 0)) + } + expectFailure("can only drop nested columns from structtype") { + SchemaUtils.dropColumn(schema, Seq(0, MAP_VALUE_INDEX + 1, 0)) + } + } + + test("dropColumn - nested maps") { + // Helper method to create a 2-level deep nested map of structs. The tests below each cover + // dropping a field to one of the leaf struct. Each test adds an extra field `a` at a specific + // position then drops it to end up with the default schema returned by `schema()` + def schema( + kk: StructType = new StructType().add("kk", IntegerType), + kv: StructType = new StructType().add("kv", IntegerType), + vk: StructType = new StructType().add("vk", IntegerType), + vv: StructType = new StructType().add("vv", IntegerType)) + : StructType = new StructType().add("m", MapType( + keyType = MapType( + keyType = kk, + valueType = kv), + valueType = MapType( + keyType = vk, + valueType = vv))) + + val a = StructField("a", LongType) + + val MAP_KEY_INDEX = 0 + val MAP_VALUE_INDEX = 1 + + def checkDrop(initialSchema: StructType, position: Seq[Int]): Unit = + assert(SchemaUtils.dropColumn(initialSchema, position) === (schema(), a)) + // Drop field `a` from the front of each leaf struct. + checkDrop( + initialSchema = schema(kk = new StructType().add(a).add("kk", IntegerType)), + position = Seq(0, MAP_KEY_INDEX, MAP_KEY_INDEX, 0)) + + checkDrop( + initialSchema = schema(kv = new StructType().add(a).add("kv", IntegerType)), + position = Seq(0, MAP_KEY_INDEX, MAP_VALUE_INDEX, 0)) + + checkDrop( + initialSchema = schema(vk = new StructType().add(a).add("vk", IntegerType)), + position = Seq(0, MAP_VALUE_INDEX, MAP_KEY_INDEX, 0)) + + checkDrop( + initialSchema = schema(vv = new StructType().add(a).add("vv", IntegerType)), + position = Seq(0, MAP_VALUE_INDEX, MAP_VALUE_INDEX, 0)) + + // Drop field `a` from the back of each leaf struct. + checkDrop( + initialSchema = schema(kk = new StructType().add("kk", IntegerType).add(a)), + position = Seq(0, MAP_KEY_INDEX, MAP_KEY_INDEX, 1)) + + checkDrop( + initialSchema = schema(kv = new StructType().add("kv", IntegerType).add(a)), + position = Seq(0, MAP_KEY_INDEX, MAP_VALUE_INDEX, 1)) + + checkDrop( + initialSchema = schema(vk = new StructType().add("vk", IntegerType).add(a)), + position = Seq(0, MAP_VALUE_INDEX, MAP_KEY_INDEX, 1)) + + checkDrop( + initialSchema = schema(vv = new StructType().add("vv", IntegerType).add(a)), + position = Seq(0, MAP_VALUE_INDEX, MAP_VALUE_INDEX, 1)) + + // Invalid map access. + expectFailure("can only drop nested columns from structtype") { + SchemaUtils.dropColumn(schema(), Seq(0, MAP_KEY_INDEX, MAP_KEY_INDEX - 1, 0)) + } + expectFailure("can only drop nested columns from structtype") { + SchemaUtils.dropColumn(schema(), Seq(0, MAP_KEY_INDEX - 1, MAP_KEY_INDEX, 0)) + } + expectFailure("can only drop nested columns from structtype") { + SchemaUtils.dropColumn(schema(), Seq(0, MAP_KEY_INDEX, MAP_VALUE_INDEX + 1, 0)) + } + expectFailure("can only drop nested columns from structtype") { + SchemaUtils.dropColumn(schema(), Seq(0, MAP_VALUE_INDEX + 1, MAP_KEY_INDEX, 0)) + } + } + + test("dropColumn - nested array") { + val e = StructField("e", IntegerType) + val f = StructField("f", IntegerType) + val schema = new StructType().add("a", ArrayType(new StructType().add(e).add(f))) + + val ARRAY_ELEMENT_INDEX = 0 + + // Drop field from the front of the leaf struct. + assert(SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX, 0)) === + (new StructType().add("a", ArrayType(new StructType().add(f))), e)) + // Drop field from the back of the leaf struct. + assert(SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX, 1)) === + (new StructType().add("a", ArrayType(new StructType().add(e))), f)) + + // Invalid array access. + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX - 1, 0)) + } + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX + 1, 0)) + } + } + + test("dropColumn - nested arrays") { + val e = StructField("e", IntegerType) + val f = StructField("f", IntegerType) + val schema = new StructType().add("a", ArrayType(ArrayType(new StructType().add(e).add(f)))) + + val ARRAY_ELEMENT_INDEX = 0 + + // Drop field `x` from the front of the leaf struct. + assert(SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX, ARRAY_ELEMENT_INDEX, 0)) === + (new StructType().add("a", ArrayType(ArrayType(new StructType().add(f)))), e)) + // Drop field `x` from the back of the leaf struct. + assert(SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX, ARRAY_ELEMENT_INDEX, 1)) === + (new StructType().add("a", ArrayType(ArrayType(new StructType().add(e)))), f)) + + // Invalid array access. + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX, ARRAY_ELEMENT_INDEX - 1, 0)) + } + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX - 1, ARRAY_ELEMENT_INDEX, 0)) + } + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX, ARRAY_ELEMENT_INDEX + 1, 0)) + } + expectFailure("Incorrectly accessing an ArrayType") { + SchemaUtils.dropColumn(schema, Seq(0, ARRAY_ELEMENT_INDEX + 1, ARRAY_ELEMENT_INDEX, 0)) + } + } + + //////////////////////////// + // normalizeColumnNames + //////////////////////////// + + test("normalize column names") { + val df = Seq((1, 2, 3)).toDF("Abc", "def", "gHi") + val schema = new StructType() + .add("abc", IntegerType) + .add("Def", IntegerType) + .add("ghi", IntegerType) + assert(normalizeColumnNames(schema, df).schema.fieldNames === schema.fieldNames) + } + + test("normalize column names - different ordering") { + val df = Seq((1, 2, 3)).toDF("def", "gHi", "abC") + val schema = new StructType() + .add("abc", IntegerType) + .add("Def", IntegerType) + .add("ghi", IntegerType) + assert(normalizeColumnNames(schema, df).schema.fieldNames === Seq("Def", "ghi", "abc")) + } + + test("normalize column names - dots in the name") { + val df = Seq((1, 2)).toDF("a.b", "c.D") + val schema = new StructType().add("a.b", IntegerType).add("c.d", IntegerType) + assert(normalizeColumnNames(schema, df).schema.fieldNames === Seq("a.b", "c.d")) + } + + test("throw error if nested column cases don't match") { + val df = spark.read.json(Seq("""{"a":1,"b":{"X":1,"y":2}}""").toDS()) + val schema = new StructType() + .add("a", IntegerType) + .add("b", new StructType() + .add("x", IntegerType) + .add("y", IntegerType)) + expectFailure("[b.X]", "b.x") { + normalizeColumnNames(schema, df) + } + } + + test("can rename top level nested column") { + val df = spark.read.json(Seq("""{"a":1,"B":{"x":1,"y":2}}""").toDS()).select('a, 'b) + val schema = new StructType() + .add("a", IntegerType) + .add("b", new StructType() + .add("x", IntegerType) + .add("y", IntegerType)) + assert(normalizeColumnNames(schema, df).schema.fieldNames === Seq("a", "b")) + } + + test("can normalize CDC type column") { + val df = Seq((1, 2, 3, 4)).toDF("Abc", "def", "gHi", CDCReader.CDC_TYPE_COLUMN_NAME) + val schema = new StructType() + .add("abc", IntegerType) + .add("Def", IntegerType) + .add("ghi", IntegerType) + assert(normalizeColumnNames(schema, df).schema.fieldNames === + schema.fieldNames :+ CDCReader.CDC_TYPE_COLUMN_NAME) + } + + //////////////////////////// + // mergeSchemas + //////////////////////////// + + test("mergeSchemas: missing columns in df") { + val base = new StructType().add("a", IntegerType).add("b", IntegerType) + val write = new StructType().add("a", IntegerType) + assert(mergeSchemas(base, write) === base) + } + + test("mergeSchemas: missing columns in df - case sensitivity") { + val base = new StructType().add("a", IntegerType).add("b", IntegerType) + val write = new StructType().add("A", IntegerType) + assert(mergeSchemas(base, write) === base) + } + + test("new columns get added to the tail of the schema") { + val base = new StructType().add("a", IntegerType) + val write = new StructType().add("a", IntegerType).add("b", IntegerType) + val write2 = new StructType().add("b", IntegerType).add("a", IntegerType) + assert(mergeSchemas(base, write) === write) + assert(mergeSchemas(base, write2) === write) + } + + test("new columns get added to the tail of the schema - nested") { + val base = new StructType() + .add("regular", StringType) + .add("struct", new StructType() + .add("a", IntegerType)) + + val write = new StructType() + .add("other", StringType) + .add("struct", new StructType() + .add("b", DateType) + .add("a", IntegerType)) + .add("this", StringType) + + val expected = new StructType() + .add("regular", StringType) + .add("struct", new StructType() + .add("a", IntegerType) + .add("b", DateType)) + .add("other", StringType) + .add("this", StringType) + assert(mergeSchemas(base, write) === expected) + } + + test("schema merging of incompatible types") { + val base = new StructType() + .add("top", StringType) + .add("struct", new StructType() + .add("a", IntegerType)) + .add("array", ArrayType(new StructType() + .add("b", DecimalType(18, 10)))) + .add("map", MapType(StringType, StringType)) + + expectFailure("StringType", "IntegerType") { + mergeSchemas(base, new StructType().add("top", IntegerType)) + } + expectFailure("IntegerType", "DateType") { + mergeSchemas(base, new StructType() + .add("struct", new StructType().add("a", DateType))) + } + // StructType's toString is different between Scala 2.12 and 2.13. + // - In Scala 2.12, it extends `scala.collection.Seq` which returns + // `StructType(StructField(a,IntegerType,true))`. + // - In Scala 2.13, it extends `scala.collection.immutable.Seq` which returns + // `Seq(StructField(a,IntegerType,true))`. + expectFailurePattern("'struct'", "StructType|Seq\\(", "MapType") { + mergeSchemas(base, new StructType() + .add("struct", MapType(StringType, IntegerType))) + } + expectFailure("'array'", "DecimalType", "DoubleType") { + mergeSchemas(base, new StructType() + .add("array", ArrayType(new StructType().add("b", DoubleType)))) + } + expectFailure("'array'", "scale") { + mergeSchemas(base, new StructType() + .add("array", ArrayType(new StructType().add("b", DecimalType(18, 12))))) + } + expectFailure("'array'", "precision") { + mergeSchemas(base, new StructType() + .add("array", ArrayType(new StructType().add("b", DecimalType(16, 10))))) + } + // See the above comment about `StructType` + expectFailurePattern("'map'", "MapType", "StructType|Seq\\(") { + mergeSchemas(base, new StructType() + .add("map", new StructType().add("b", StringType))) + } + expectFailure("'map'", "StringType", "IntegerType") { + mergeSchemas(base, new StructType() + .add("map", MapType(StringType, IntegerType))) + } + expectFailure("'map'", "StringType", "IntegerType") { + mergeSchemas(base, new StructType() + .add("map", MapType(IntegerType, StringType))) + } + } + + test("schema merging should pick current nullable and metadata") { + val m = new MetadataBuilder().putDouble("a", 0.2).build() + val base = new StructType() + .add("top", StringType, nullable = false, m) + .add("struct", new StructType() + .add("a", IntegerType, nullable = false, m)) + .add("array", ArrayType(new StructType() + .add("b", DecimalType(18, 10))), nullable = false, m) + .add("map", MapType(StringType, StringType), nullable = false, m) + + assert(mergeSchemas(base, new StructType().add("top", StringType)) === base) + assert(mergeSchemas(base, new StructType().add("struct", new StructType() + .add("a", IntegerType))) === base) + assert(mergeSchemas(base, new StructType().add("array", ArrayType(new StructType() + .add("b", DecimalType(18, 10))))) === base) + assert(mergeSchemas(base, new StructType() + .add("map", MapType(StringType, StringType))) === base) + } + + test("schema merging null type") { + val base = new StructType().add("top", NullType) + val update = new StructType().add("top", StringType) + + assert(mergeSchemas(base, update) === update) + assert(mergeSchemas(update, base) === update) + } + + test("schema merging performs upcast between ByteType, ShortType, and IntegerType") { + val byteType = new StructType().add("top", ByteType) + val shortType = new StructType().add("top", ShortType) + val intType = new StructType().add("top", IntegerType) + + assert(mergeSchemas(byteType, shortType) === shortType) + assert(mergeSchemas(byteType, intType) === intType) + assert(mergeSchemas(shortType, intType) === intType) + assert(mergeSchemas(shortType, byteType) === shortType) + assert(mergeSchemas(intType, shortType) === intType) + assert(mergeSchemas(intType, byteType) === intType) + + val structInt = new StructType().add("top", new StructType().add("leaf", IntegerType)) + val structShort = new StructType().add("top", new StructType().add("leaf", ShortType)) + assert(mergeSchemas(structInt, structShort) === structInt) + + val map1 = new StructType().add("top", new MapType(IntegerType, ShortType, true)) + val map2 = new StructType().add("top", new MapType(ShortType, IntegerType, true)) + val mapMerged = new StructType().add("top", new MapType(IntegerType, IntegerType, true)) + assert(mergeSchemas(map1, map2) === mapMerged) + + val arrInt = new StructType().add("top", new ArrayType(IntegerType, true)) + val arrShort = new StructType().add("top", new ArrayType(ShortType, true)) + assert(mergeSchemas(arrInt, arrShort) === arrInt) + } + + test("schema merging allows upcasting to LongType with allowImplicitConversions") { + val byteType = new StructType().add("top", ByteType) + val shortType = new StructType().add("top", ShortType) + val intType = new StructType().add("top", IntegerType) + val longType = new StructType().add("top", LongType) + + Seq(byteType, shortType, intType).foreach { sourceType => + assert( + longType === mergeSchemas( + longType, sourceType, allowImplicitConversions = true)) + val e = intercept[AnalysisException] { + mergeSchemas(longType, sourceType) + } + assert(e.getMessage.contains( + s"Failed to merge incompatible data types LongType and ${sourceType.head.dataType}")) + } + } + + test("Upcast between ByteType, ShortType and IntegerType is OK for parquet") { + import org.apache.spark.sql.functions._ + def testParquetUpcast(): Unit = { + withTempDir { dir => + val tempDir = dir.getCanonicalPath + spark.range(1.toByte).select(col("id") cast ByteType).write.save(tempDir + "/byte") + spark.range(1.toShort).select(col("id") cast ShortType).write.save(tempDir + "/short") + spark.range(1).select(col("id") cast IntegerType).write.save(tempDir + "/int") + + val shortSchema = new StructType().add("id", ShortType) + val intSchema = new StructType().add("id", IntegerType) + + spark.read.schema(shortSchema).parquet(tempDir + "/byte").collect() === Seq(Row(1.toShort)) + spark.read.schema(intSchema).parquet(tempDir + "/short").collect() === Seq(Row(1)) + spark.read.schema(intSchema).parquet(tempDir + "/byte").collect() === Seq(Row(1)) + } + } + + testParquetUpcast() + + } + //////////////////////////// + // transformColumns + //////////////////////////// + + test("transform columns - simple") { + val base = new StructType() + .add("a", IntegerType) + .add("b", StringType) + val update = new StructType() + .add("c", IntegerType) + .add("b", StringType) + + // Identity. + var visitedFields = 0 + val res1 = SchemaMergingUtils.transformColumns(base) { + case (Seq(), field, _) => + visitedFields += 1 + field + } + assert(visitedFields === 2) + assert(base === res1) + + // Rename a -> c + visitedFields = 0 + val res2 = SchemaMergingUtils.transformColumns(base) { + case (Seq(), field, _) => + visitedFields += 1 + val name = field.name + field.copy(name = if (name == "a") "c" else name) + } + assert(visitedFields === 2) + assert(update === res2) + + // Rename a -> c; using input map. + visitedFields = 0 + val res3 = transformColumns(base, (Seq("A"), "c") :: Nil) { + case (Seq(), field, Seq((_, newName))) => + visitedFields += 1 + field.copy(name = newName) + } + assert(visitedFields === 1) + assert(update === res3) + } + + test("transform element field type") { + val base = new StructType() + .add("a", new StructType() + .add("element", StringType)) + + val update = new StructType() + .add("a", new StructType() + .add("element", IntegerType)) + + // Update type + var visitedFields = 0 + val res = SchemaMergingUtils.transformColumns(base) { (path, field, _) => + visitedFields += 1 + val dataType = path :+ field.name match { + case Seq("a", "element") => IntegerType + case _ => field.dataType + } + field.copy(dataType = dataType) + } + assert(visitedFields === 2) + assert(update === res) + } + + test("transform array nested field type") { + val nested = new StructType() + .add("s1", IntegerType) + .add("s2", LongType) + val base = new StructType() + .add("arr", ArrayType(nested)) + + val updatedNested = new StructType() + .add("s1", StringType) + .add("s2", LongType) + val update = new StructType() + .add("arr", ArrayType(updatedNested)) + + // Update type + var visitedFields = 0 + val res = SchemaMergingUtils.transformColumns(base) { (path, field, _) => + visitedFields += 1 + val dataType = path :+ field.name match { + case Seq("arr", "element", "s1") => StringType + case _ => field.dataType + } + field.copy(dataType = dataType) + } + assert(visitedFields === 3) + assert(update === res) + } + + test("transform map nested field type") { + val nested = new StructType() + .add("s1", IntegerType) + .add("s2", LongType) + val base = new StructType() + .add("m", MapType(StringType, nested)) + + val updatedNested = new StructType() + .add("s1", StringType) + .add("s2", LongType) + val update = new StructType() + .add("m", MapType(StringType, updatedNested)) + + // Update type + var visitedFields = 0 + val res = SchemaMergingUtils.transformColumns(base) { (path, field, _) => + visitedFields += 1 + val dataType = path :+ field.name match { + case Seq("m", "value", "s1") => StringType + case _ => field.dataType + } + field.copy(dataType = dataType) + } + assert(visitedFields === 3) + assert(update === res) + } + + test("transform map type") { + val base = new StructType() + .add("m", MapType(StringType, IntegerType)) + val update = new StructType() + .add("m", MapType(StringType, StringType)) + + // Update type + var visitedFields = 0 + val res = SchemaMergingUtils.transformColumns(base) { (path, field, _) => + visitedFields += 1 + val dataType = path :+ field.name match { + case Seq("m") => MapType(field.dataType.asInstanceOf[MapType].keyType, StringType) + case _ => field.dataType + } + field.copy(dataType = dataType) + } + assert(visitedFields === 1) + assert(update === res) + } + + test("transform columns - nested") { + val nested = new StructType() + .add("s1", IntegerType) + .add("s2", LongType) + val base = new StructType() + .add("nested", nested) + .add("arr", ArrayType(nested)) + .add("kvs", MapType(nested, nested)) + val update = new StructType() + .add("nested", + new StructType() + .add("t1", IntegerType) + .add("s2", LongType)) + .add("arr", ArrayType( + new StructType() + .add("s1", IntegerType) + .add("a2", LongType))) + .add("kvs", MapType( + new StructType() + .add("k1", IntegerType) + .add("s2", LongType), + new StructType() + .add("s1", IntegerType) + .add("v2", LongType))) + + // Identity. + var visitedFields = 0 + val res1 = SchemaMergingUtils.transformColumns(base) { + case (_, field, _) => + visitedFields += 1 + field + } + assert(visitedFields === 11) + assert(base === res1) + + // Rename + visitedFields = 0 + val res2 = SchemaMergingUtils.transformColumns(base) { (path, field, _) => + visitedFields += 1 + val name = path :+ field.name match { + case Seq("nested", "s1") => "t1" + case Seq("arr", "element", "s2") => "a2" + case Seq("kvs", "key", "s1") => "k1" + case Seq("kvs", "value", "s2") => "v2" + case _ => field.name + } + field.copy(name = name) + } + assert(visitedFields === 11) + assert(update === res2) + + // Rename; using map + visitedFields = 0 + val mapping = Seq( + Seq("nested", "s1") -> "t1", + Seq("arr", "element", "s2") -> "a2", + Seq("kvs", "key", "S1") -> "k1", + Seq("kvs", "value", "s2") -> "v2") + val res3 = transformColumns(base, mapping) { + case (_, field, Seq((_, name))) => + visitedFields += 1 + field.copy(name = name) + } + assert(visitedFields === 4) + assert(update === res3) + } + + //////////////////////////// + // checkFieldNames + //////////////////////////// + + test("check non alphanumeric column characters") { + val badCharacters = " ,;{}()\n\t=" + val goodCharacters = "#.`!@$%^&*~_<>?/:" + + badCharacters.foreach { char => + Seq(s"a${char}b", s"${char}ab", s"ab${char}", char.toString).foreach { name => + checkError( + exception = intercept[AnalysisException] { + SchemaUtils.checkFieldNames(Seq(name)) + }, + errorClass = "INVALID_COLUMN_NAME_AS_PATH", + parameters = Map("datasource" -> "delta", "columnName" -> s"`$name`") + ) + } + } + + goodCharacters.foreach { char => + // no issues here + SchemaUtils.checkFieldNames(Seq(s"a${char}b", s"${char}ab", s"ab${char}", char.toString)) + } + } + + test("fieldToColumn") { + assert(SchemaUtils.fieldToColumn(StructField("a", IntegerType)).expr == + new UnresolvedAttribute("a" :: Nil)) + // Dot in the column name should be converted correctly + assert(SchemaUtils.fieldToColumn(StructField("a.b", IntegerType)).expr == + new UnresolvedAttribute("a.b" :: Nil)) + } + + //////////////////////////// + // findNestedFieldIgnoreCase + //////////////////////////// + + test("complex schema access") { + val st = StringType + val it = IntegerType + def m(a: DataType, b: DataType): MapType = MapType(a, b) + def a(el: DataType): ArrayType = ArrayType(el) + def struct(el: DataType): StructType = new StructType().add("f1", el) + + val schema = new StructType() + .add("a", it) + .add("b", struct(st)) + .add("c", struct(struct(struct(st)))) + .add("d", a(it)) + .add("e", a(a(it))) + .add("f", a(a(struct(st)))) + .add("g", m(m(st, it), m(st, it))) + .add("h", m(a(st), a(it))) + .add("i", m(a(struct(st)), a(struct(st)))) + .add("j", m(m(struct(st), struct(it)), m(struct(st), struct(it)))) + .add("k", m(struct(a(a(struct(a(struct(st)))))), + m(m(struct(st), struct(it)), m(struct(st), struct(it))))) + + def find(names: Seq[String]): Option[StructField] = + SchemaUtils.findNestedFieldIgnoreCase(schema, names, true) + + val checks = Map( + "a" -> it, + "b" -> struct(st), + "b.f1" -> st, + "c.f1.f1.f1" -> st, + "d.element" -> it, + "e.element.element" -> it, + "f.element.element.f1" -> st, + "g.key.key" -> st, + "g.key.value" -> it, + "g.value.key" -> st, + "g.value.value" -> it, + "h.key.element" -> st, + "h.value.element" -> it, + "i.key.element.f1" -> st, + "i.value.element.f1" -> st, + "j.key.key.f1" -> st, + "j.key.value.f1" -> it, + "j.value.key.f1" -> st, + "j.value.value.f1" -> it, + "k.key.f1.element.element.f1.element.f1" -> st, + "k.value.key.key.f1" -> st, + "k.value.key.value.f1" -> it, + "k.value.value.key.f1" -> st, + "k.value.value.value.f1" -> it + ) + + checks.foreach { pair => + val (key, t) = pair + val path = key.split('.') + val f = find(path) + assert(f.isDefined, s"cannot find $key") + assert(f.get.name == path.last && f.get.dataType == t) + } + + val negativeChecks = Seq( + "x", + "b.f2", + "c.f1.f2", + "c.f1.f1.f2", + "d.f1", + "d.element.f1", + "e.element.element.f1", + "f.element.key.f1", + "g.key.element", + "g.key.keyy", + "g.key.valuee", + "h.key.element.f1", + "k.key.f1.element.element.f2.element.f1", + "k.value.value.f1" + ) + + negativeChecks.foreach { key => + val path = key.split('.') + val f = find(path) + assert(f.isEmpty, s"$key should be empty") + } + + } + + test("findUnsupportedDataTypes") { + def assertUnsupportedDataType( + dataType: DataType, + expected: Seq[UnsupportedDataTypeInfo]): Unit = { + val schema = StructType(Seq(StructField("col", dataType))) + assert(findUnsupportedDataTypes(schema) == expected) + } + + assertUnsupportedDataType(NullType, Nil) + assertUnsupportedDataType(BooleanType, Nil) + assertUnsupportedDataType(ByteType, Nil) + assertUnsupportedDataType(ShortType, Nil) + assertUnsupportedDataType(IntegerType, Nil) + assertUnsupportedDataType(LongType, Nil) + assertUnsupportedDataType( + YearMonthIntervalType.DEFAULT, + Seq(UnsupportedDataTypeInfo("col", YearMonthIntervalType.DEFAULT))) + assertUnsupportedDataType( + DayTimeIntervalType.DEFAULT, + Seq(UnsupportedDataTypeInfo("col", DayTimeIntervalType.DEFAULT))) + assertUnsupportedDataType(FloatType, Nil) + assertUnsupportedDataType(DoubleType, Nil) + assertUnsupportedDataType(StringType, Nil) + assertUnsupportedDataType(DateType, Nil) + assertUnsupportedDataType(TimestampType, Nil) + assertUnsupportedDataType( + CalendarIntervalType, + Seq(UnsupportedDataTypeInfo("col", CalendarIntervalType))) + assertUnsupportedDataType(BinaryType, Nil) + assertUnsupportedDataType(DataTypes.createDecimalType(), Nil) + assertUnsupportedDataType( + UnsupportedDataType, + Seq(UnsupportedDataTypeInfo("col", UnsupportedDataType))) + + // array + assertUnsupportedDataType(ArrayType(IntegerType, true), Nil) + assertUnsupportedDataType( + ArrayType(UnsupportedDataType, true), + Seq(UnsupportedDataTypeInfo("col[]", UnsupportedDataType))) + + // map + assertUnsupportedDataType(MapType(IntegerType, IntegerType, true), Nil) + assertUnsupportedDataType( + MapType(UnsupportedDataType, IntegerType, true), + Seq(UnsupportedDataTypeInfo("col[key]", UnsupportedDataType))) + assertUnsupportedDataType( + MapType(IntegerType, UnsupportedDataType, true), + Seq(UnsupportedDataTypeInfo("col[value]", UnsupportedDataType))) + assertUnsupportedDataType( + MapType(UnsupportedDataType, UnsupportedDataType, true), + Seq( + UnsupportedDataTypeInfo("col[key]", UnsupportedDataType), + UnsupportedDataTypeInfo("col[value]", UnsupportedDataType))) + + // struct + assertUnsupportedDataType(StructType(StructField("f", LongType) :: Nil), Nil) + assertUnsupportedDataType( + StructType(StructField("a", LongType) :: StructField("dot.name", UnsupportedDataType) :: Nil), + Seq(UnsupportedDataTypeInfo("col.`dot.name`", UnsupportedDataType))) + val nestedStructType = StructType(Seq( + StructField("a", LongType), + StructField("b", StructType(Seq( + StructField("c", LongType), + StructField("d", UnsupportedDataType) + ))), + StructField("e", StructType(Seq( + StructField("f", LongType), + StructField("g", UnsupportedDataType) + ))) + )) + assertUnsupportedDataType( + nestedStructType, + Seq( + UnsupportedDataTypeInfo("col.b.d", UnsupportedDataType), + UnsupportedDataTypeInfo("col.e.g", UnsupportedDataType))) + + // udt + assertUnsupportedDataType(new PointUDT, Nil) + assertUnsupportedDataType( + new UnsupportedUDT, + Seq(UnsupportedDataTypeInfo("col", UnsupportedDataType))) + } + + test("findUndefinedTypes: basic types") { + val schema = StructType(Seq( + StructField("c1", NullType), + StructField("c2", BooleanType), + StructField("c3", ByteType), + StructField("c4", ShortType), + StructField("c5", IntegerType), + StructField("c6", LongType), + StructField("c7", FloatType), + StructField("c8", DoubleType), + StructField("c9", StringType), + StructField("c10", DateType), + StructField("c11", TimestampType), + StructField("c12", BinaryType), + StructField("c13", DataTypes.createDecimalType()), + // undefined types + StructField("c14", TimestampNTZType), + StructField("c15", YearMonthIntervalType.DEFAULT), + StructField("c16", DayTimeIntervalType.DEFAULT), + StructField("c17", new PointUDT) // UserDefinedType + )) + val udts = findUndefinedTypes(schema) + assert(udts.map(_.getClass.getName.stripSuffix("$")) == + Seq( + classOf[TimestampNTZType], + classOf[YearMonthIntervalType], + classOf[DayTimeIntervalType], + classOf[PointUDT] + ).map(_.getName.stripSuffix("$")) + ) + } + + test("findUndefinedTypes: complex types") { + val schema = StructType(Seq( + StructField("c1", new PointUDT), + StructField("c2", ArrayType(new PointUDT, true)), + StructField("c3", MapType(new PointUDT, new PointUDT, true)), + StructField("c4", StructType(Seq( + StructField("c1", new PointUDT), + StructField("c2", ArrayType(new PointUDT, true)), + StructField("c3", MapType(new PointUDT, new PointUDT, true)) + ))) + )) + val udts = findUndefinedTypes(schema) + assert(udts.size == 8) + assert(udts.map(_.getClass.getName).toSet == Set(classOf[PointUDT].getName)) + } + +} + +object UnsupportedDataType extends DataType { + override def defaultSize: Int = throw new UnsupportedOperationException("defaultSize") + override def asNullable: DataType = throw new UnsupportedOperationException("asNullable") + override def toString: String = "UnsupportedDataType" +} + +@SQLUserDefinedType(udt = classOf[PointUDT]) +case class Point(x: Int, y: Int) + +class PointUDT extends UserDefinedType[Point] { + override def sqlType: DataType = StructType(Array( + StructField("x", IntegerType, nullable = false), + StructField("y", IntegerType, nullable = false))) + + override def serialize(obj: Point): Any = InternalRow(obj.x, obj.y) + + override def deserialize(datum: Any): Point = datum match { + case row: InternalRow => Point(row.getInt(0), row.getInt(1)) + } + + override def userClass: Class[Point] = classOf[Point] + + override def toString: String = "PointUDT" +} + +class UnsupportedUDT extends PointUDT { + override def sqlType: DataType = UnsupportedDataType +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/ClusteredTableTestUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/ClusteredTableTestUtils.scala new file mode 100644 index 00000000000..5045a2a9ba2 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/ClusteredTableTestUtils.scala @@ -0,0 +1,148 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping + +import org.apache.spark.sql.delta.skipping.clustering.{ClusteredTableUtils, ClusteringColumn} +import org.apache.spark.sql.delta.{DeltaLog, Snapshot} +import org.apache.spark.sql.delta.commands.optimize.OptimizeMetrics + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +trait ClusteredTableTestUtilsBase extends SparkFunSuite with SharedSparkSession { + import testImplicits._ + + /** + * Helper for running optimize on the table with different APIs. + * @param table the name of table + */ + def optimizeTable(table: String): DataFrame = { + sql(s"OPTIMIZE $table") + } + + /** + * Runs optimize on the table and calls postHook on the metrics. + * @param table the name of table + * @param postHook callback triggered with OptimizeMetrics returned by the OPTIMIZE command + */ + def runOptimize(table: String)(postHook: OptimizeMetrics => Unit): Unit = { + postHook(optimizeTable(table).select($"metrics.*").as[OptimizeMetrics].head()) + } + + def verifyClusteringColumnsInDomainMetadata( + snapshot: Snapshot, + expectedLogicalClusteringColumns: String): Unit = { + val logicalColumnNames = if (expectedLogicalClusteringColumns.trim.isEmpty) { + Seq.empty[String] + } else { + expectedLogicalClusteringColumns.split(",").map(_.trim).toSeq + } + val expectedClusteringColumns = logicalColumnNames.map(ClusteringColumn(snapshot.schema, _)) + val actualClusteringColumns = + ClusteredTableUtils.getClusteringColumnsOptional(snapshot).getOrElse(Seq.empty) + assert(expectedClusteringColumns == actualClusteringColumns) + } + + def withClusteredTable[T]( + table: String, + schema: String, + clusterBy: String, + tableProperties: Map[String, String] = Map.empty, + location: Option[String] = None)(f: => T): T = { + createOrReplaceClusteredTable("CREATE", table, schema, clusterBy, tableProperties, location) + + Utils.tryWithSafeFinally(f) { + spark.sql(s"DROP TABLE IF EXISTS $table") + } + } + + /** + * Helper for creating or replacing table with different APIs. + * @param clause clause for SQL API ('CREATE', 'REPLACE', 'CREATE OR REPLACE') + * @param table the name of table + * @param schema comma separated list of "colName dataType" + * @param clusterBy comma separated list of clustering columns + */ + def createOrReplaceClusteredTable( + clause: String, + table: String, + schema: String, + clusterBy: String, + tableProperties: Map[String, String] = Map.empty, + location: Option[String] = None): Unit = { + val locationClause = if (location.isEmpty) "" else s"LOCATION '${location.get}'" + val tablePropertiesClause = if (!tableProperties.isEmpty) { + val tablePropertiesString = tableProperties.map { + case (key, value) => s"'$key' = '$value'" + }.mkString(",") + s"TBLPROPERTIES($tablePropertiesString)" + } else { + "" + } + spark.sql(s"$clause TABLE $table ($schema) USING delta CLUSTER BY ($clusterBy) " + + s"$tablePropertiesClause $locationClause") + } + + protected def createOrReplaceAsSelectClusteredTable( + clause: String, + table: String, + srcTable: String, + clusterBy: String, + location: Option[String] = None): Unit = { + val locationClause = if (location.isEmpty) "" else s"LOCATION '${location.get}'" + spark.sql(s"$clause TABLE $table USING delta CLUSTER BY ($clusterBy) " + + s"$locationClause AS SELECT * FROM $srcTable") + } + + def verifyClusteringColumns( + tableIdentifier: TableIdentifier, + expectedLogicalClusteringColumns: String + ): Unit = { + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, tableIdentifier) + verifyClusteringColumnsInternal( + snapshot, + tableIdentifier.table, + expectedLogicalClusteringColumns + ) + } + + def verifyClusteringColumns( + dataPath: String, + expectedLogicalClusteringColumns: String + ): Unit = { + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, dataPath) + verifyClusteringColumnsInternal( + snapshot, + s"delta.`$dataPath`", + expectedLogicalClusteringColumns + ) + } + + def verifyClusteringColumnsInternal( + snapshot: Snapshot, + tableNameOrPath: String, + expectedLogicalClusteringColumns: String + ): Unit = { + assert(ClusteredTableUtils.isSupported(snapshot.protocol) === true) + verifyClusteringColumnsInDomainMetadata(snapshot, expectedLogicalClusteringColumns) + } +} + +trait ClusteredTableTestUtils extends ClusteredTableTestUtilsBase diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringFunctionsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringFunctionsSuite.scala new file mode 100644 index 00000000000..7c4e9039799 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringFunctionsSuite.scala @@ -0,0 +1,254 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping + +import java.nio.ByteBuffer + +import scala.util.Random + +import org.apache.spark.sql.delta.expressions.{HilbertByteArrayIndex, HilbertLongIndex} +import org.apache.spark.sql.delta.skipping.MultiDimClusteringFunctions._ +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.SparkException +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.expressions.Cast +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +/** Tests for [[MultiDimClusterFunctions]] */ +class MultiDimClusteringFunctionsSuite extends QueryTest + with SharedSparkSession with DeltaSQLCommandTest { + import testImplicits._ + + test("range_partition_id(): simple") { + val numTuples = 20 + val data = 0.to(numTuples - 1) + + for { div <- Seq(1, 2, 4, 5, 10, 20) } { + checkAnswer( + Random.shuffle(data).toDF("col") + .withColumn("rpi", range_partition_id($"col", data.size / div)), + data.map(i => Row(i, i / div)) + ) + } + } + + test("range_partition_id(): two columns") { + val data = Seq("a" -> 10, "b" -> 20, "c" -> 30, "d" -> 40) + + checkAnswer( + // randomize the order and expect the partition ids assigned correctly in sorted order + Random.shuffle(data).toDF("c1", "c2") + .withColumn("r1", range_partition_id($"c1", 2)) + .withColumn("r2", range_partition_id($"c2", 4)), + Seq( + // Column c1 has values (a, b, c, d). Splitting this value range into two partitions + // gets ranges [a, b] and [c, d]. Values in each range map to partition 0 and 1. + // Similarly column c2 has values (10, 20, 30, 40). Splitting this into four partitions + // gets ranges [10], [20], [30] and [40] which map to partition ids 0 to 3. + Row("a", 10, 0, 0), + Row("b", 20, 0, 1), + Row("c", 30, 1, 2), + Row("d", 40, 1, 3))) + + checkAnswer( + Random.shuffle(data).toDF("c1", "c2") + .withColumn("r1", range_partition_id($"c1", 2)) + .distinct + .withColumn("r2", range_partition_id($"c2", 4)), + Seq( + Row("a", 10, 0, 0), + Row("b", 20, 0, 1), + Row("c", 30, 1, 2), + Row("d", 40, 1, 3))) + + checkAnswer( + Random.shuffle(data).toDF("c1", "c2") + .where(range_partition_id($"c1", 2) === 0) + .sort(range_partition_id($"c2", 4)), + Seq( + Row("a", 10), + Row("b", 20))) + } + + testQuietly("range_partition_id(): corner cases") { + // invalid number of partitions + val ex1 = intercept[IllegalArgumentException] { + spark.range(10).select(range_partition_id($"id", 0)).show + } + assert(ex1.getMessage contains "expected the number partitions to be greater than zero") + + val ex2 = intercept[IllegalArgumentException] { + withSQLConf(SQLConf.RANGE_EXCHANGE_SAMPLE_SIZE_PER_PARTITION.key -> "0") { + spark.range(10).withColumn("rpi", range_partition_id($"id", 10)).show + } + } + assert(ex2.getMessage contains "Sample points per partition must be greater than 0 but found 0") + + // Number of partitions is way more than the cardinality of input column values + checkAnswer( + spark.range(1).withColumn("rpi", range_partition_id($"id", 1000)), + Row(0, 0)) + + // compute range_partition_id on a dataframe with zero rows + checkAnswer( + spark.range(0).withColumn("rpi", range_partition_id($"id", 1000)), + Seq.empty[Row]) + + // compute range_partition_id on column with null values + checkAnswer( + Seq("a", null, "b", null).toDF("id").withColumn("rpi", range_partition_id($"id", 10)), + Seq( + Row("a", 0), + Row("b", 1), + Row(null, 0), + Row(null, 0))) + + // compute range_partition_id on column with one value which is null + checkAnswer( + spark.range(1).withColumn("id", lit(null)).withColumn("rpi", range_partition_id($"id", 10)), + Row(null, 0)) + + // compute range_partition_id on array type column + checkAnswer( + spark.range(1).withColumn("id", lit(Array(1, 2))) + .withColumn("rpi", range_partition_id($"id", 10)), + Row(Array(1, 2), 0)) + } + + test("interleave_bits(): 1 input = cast to binary") { + val data = Seq.fill(100)(Random.nextInt()) + checkAnswer( + data.toDF("id").select(interleave_bits($"id")), + data.map(i => Row(intToBinary(i))) + ) + } + + test(s"interleave_bits(): arbitrary num inputs") { + val n = 1 + Random.nextInt(7) + val zDF = spark.range(1).select() + + // Output is an array with number of elements equal to 4 * num_of_input_columns to interleave + + // Multiple columns each has value 0. Expect the final output an array of zeros + checkAnswer( + 1.to(n).foldLeft(zDF)((df, i) => df.withColumn(s"c$i", lit(0x00000000))) + .select(interleave_bits(1.to(n).map(i => $"c$i"): _*)), + Row(Array.fill(n * 4)(0x00.toByte)) + ) + + // Multiple column each has value 1. As the bits are interleaved expect the following output + // Inputs: c1=0x00000001, c2=0x00000001, c3=0x00000001, c4=0x00000001 + // Output (divided into array of 4 bytes for readability) + // [0x00, 0x00, 0x00, 0x00] [0x00, 0x00, 0x00, 0x00] + // [0x00, 0x00, 0x00, 0x00] [0x00, 0x00, 0x00, 0x08] + // (Inputs have last bit as 1 as we are interleaving bits across columns, all these + // bits of value 1 they will end up as last 4 bits in the last byte of the output) + checkAnswer( + 1.to(n).foldLeft(zDF)((df, i) => df.withColumn(s"c$i", lit(0x00000001))) + .select(interleave_bits(1.to(n).map(i => $"c$i"): _*)), + Row(Array.fill(n * 4 - 1)(0x00.toByte) :+ ((1 << n) - 1).toByte) + ) + + // Multiple columns each has value 0xFFFFFFFF. Expect the final output an array of 0xFF + checkAnswer( + 1.to(n).foldLeft(zDF)((df, i) => df.withColumn(s"c$i", lit(0xffffffff))) + .select(interleave_bits(1.to(n).map(i => $"c$i"): _*)), + Row(Array.fill(n * 4)(0xff.toByte)) + ) + } + + test("interleave_bits(): corner cases") { + // null input + checkAnswer( + spark.range(1).select(interleave_bits(lit(null))), + Row(Array.fill(4)(0x00.toByte)) + ) + + // no inputs to interleave_bits -> expect an empty row + checkAnswer( + spark.range(1).select(interleave_bits()), + Row(Array.empty[Byte]) + ) + + // Non-integer type as input column + val ex = intercept[AnalysisException] { + Seq(false).toDF("col").select(interleave_bits($"col")).show + } + assert(ex.getMessage contains "") + + def invalidColumnTypeInput(df: DataFrame): Unit = { + val ex = intercept[AnalysisException] { + df.select(interleave_bits($"col")).show + } + assert(ex.getMessage contains "") + } + + // Expect failure when a non-int type column is provided as input + invalidColumnTypeInput(Seq(0L).toDF("col")) + invalidColumnTypeInput(Seq(0.0).toDF("col")) + invalidColumnTypeInput(Seq("asd").toDF("col")) + invalidColumnTypeInput(Seq(Array(1, 2, 3)).toDF("col")) + } + + test("interleave_bits(range_partition_ids)") { + // test the combination of range_partition_id and interleave + checkAnswer( + spark.range(100).select(interleave_bits(range_partition_id($"id", 10))), + 0.until(100).map(i => Row(intToBinary(i / 10))) + ) + + // test the combination of range_partition_id and interleave on multiple columns + checkAnswer( + Seq( + (false, 0, "0"), + (true, 1, "1") + ).toDF("c1", "c2", "c3") + .select(interleave_bits( + range_partition_id($"c1", 2), + range_partition_id($"c2", 2), + range_partition_id($"c3", 2) + )), + Seq( + Row(Array.fill(3 * 4)(0x00.toByte)), + Row(Array.fill(3 * 4 - 1)(0x00.toByte) :+ 0x07.toByte) + ) + ) + } + + test("hilbert_index selects underlying expression correctly") { + assert(hilbert_index(10, Seq($"c1", $"c2", $"c3", $"c4", $"c5", $"c6"): _*).expr + .isInstanceOf[HilbertLongIndex]) + assert( + hilbert_index( + 10, + Seq($"c1", $"c2", $"c3", $"c4", $"c5", $"c6", $"c7", $"c8", $"c9"): _*) + .expr.asInstanceOf[Cast].child.isInstanceOf[HilbertByteArrayIndex]) + val e = intercept[SparkException]( + hilbert_index( + 11, + Seq($"c1", $"c2", $"c3", $"c4", $"c5", $"c6", $"c7", $"c8", $"c9", $"c10"): _*) + .expr.isInstanceOf[HilbertByteArrayIndex]) + assert(e.getMessage.contains("Hilbert indexing can only be used on 9 or fewer columns.")) + } + + private def intToBinary(x: Int): Array[Byte] = { + ByteBuffer.allocate(4).putInt(x).array() + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringSuite.scala new file mode 100644 index 00000000000..28baf076870 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/MultiDimClusteringSuite.scala @@ -0,0 +1,208 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping + +import java.io.{File, FilenameFilter} + +import scala.util.Random + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.sources.DeltaSQLConf._ +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.functions.expr +import org.apache.spark.sql.test.SharedSparkSession + +class MultiDimClusteringSuite extends QueryTest + with SharedSparkSession with DeltaSQLCommandTest { + + private lazy val sparkSession = spark + // scalastyle:off sparkimplicits + import sparkSession.implicits._ + // scalastyle:on sparkimplicits + + test("Negative case - ZOrder clustering expression with zero columns") { + val ex = intercept[AssertionError] { + ZOrderClustering.getClusteringExpression(Seq.empty, 20) + } + assert(ex.getMessage contains "Cannot do Z-Order clustering by zero columns!") + } + + test("ZOrder clustering expression with one column") { + val cluster = ZOrderClustering.getClusteringExpression(Seq(expr("col1")), 20) + assert(cluster.expr.toString === + "cast(interleavebits(rangepartitionid('col1, 20)) as string)") + } + + test("ZOrder clustering expression with two column") { + val cluster = ZOrderClustering.getClusteringExpression(Seq(expr("col1"), expr("col2")), 20) + assert(cluster.expr.toString === + "cast(interleavebits(rangepartitionid('col1, 20), rangepartitionid('col2, 20)) as string)") + } + + test("ensure records with close Z-order values are close in the output") { + withTempDir { tempDir => + withSQLConf( + MDC_NUM_RANGE_IDS.key -> "4", + MDC_ADD_NOISE.key -> "false") { + val data = Seq( + // "c1" -> "c2", // (rangeId_c1, rangeId_c2) -> ZOrder (decimal Z-Order) + "a" -> 20, "a" -> 20, // (0, 0) -> 0b000000 (0) + "b" -> 20, // (0, 0) -> 0b000000 (0) + "c" -> 30, // (1, 1) -> 0b000011 (3) + "d" -> 70, // (1, 2) -> 0b001011 (3) + "e" -> 90, "e" -> 90, "e" -> 90, // (1, 2) -> 0b001001 (9) + "f" -> 200, // (2, 3) -> 0b001110 (14) + "g" -> 10, // (3, 0) -> 0b000101 (5) + "h" -> 20) // (3, 0) -> 0b000101 (5) + + // Randomize the data. Use seed for deterministic input. + val inputDf = new Random(seed = 101).shuffle(data) + .toDF("c1", "c2") + + // Cluster the data and range partition into four partitions + val outputDf = MultiDimClustering.cluster( + inputDf, + approxNumPartitions = 4, + colNames = Seq("c1", "c2"), + curve = "zorder") + outputDf.write.parquet(new File(tempDir, "source").getCanonicalPath) + + // Load the partition 0 and verify that it contains (a, 20), (a, 20), (b, 20) + checkAnswer( + Seq("a" -> 20, "a" -> 20, "b" -> 20).toDF("c1", "c2"), + sparkSession.read.parquet(new File(tempDir, "source/part-00000*").getCanonicalPath)) + + // partition 1 + checkAnswer( + Seq("c" -> 30, "d" -> 70, "e" -> 90, "e" -> 90, "e" -> 90).toDF("c1", "c2"), + sparkSession.read.parquet(new File(tempDir, "source/part-00001*").getCanonicalPath)) + + // partition 2 + checkAnswer( + Seq("h" -> 20, "g" -> 10).toDF("c1", "c2"), + sparkSession.read.parquet(new File(tempDir, "source/part-00002*").getCanonicalPath)) + + // partition 3 + checkAnswer( + Seq("f" -> 200).toDF("c1", "c2"), + sparkSession.read.parquet(new File(tempDir, "source/part-00003*").getCanonicalPath)) + } + } + } + + test("ensure records with close Hilbert curve values are close in the output") { + withTempDir { tempDir => + withSQLConf(MDC_NUM_RANGE_IDS.key -> "4", MDC_ADD_NOISE.key -> "false") { + val data = Seq( + // "c1" -> "c2", // (rangeId_c1, rangeId_c2) -> Decimal Hilbert index + "a" -> 20, "a" -> 20, // (0, 0) -> 0 + "b" -> 20, // (0, 0) -> 0 + "c" -> 30, // (1, 1) -> 2 + "d" -> 70, // (1, 2) -> 13 + "e" -> 90, "e" -> 90, "e" -> 90, // (1, 2) -> 13 + "f" -> 200, // (2, 3) -> 11 + "g" -> 10, // (3, 0) -> 5 + "h" -> 20) // (3, 0) -> 5 + + // Randomize the data. Use seed for deterministic input. + val inputDf = new Random(seed = 101) + .shuffle(data) + .toDF("c1", "c2") + + // Cluster the data and range partition into four partitions + val outputDf = MultiDimClustering.cluster( + inputDf, + approxNumPartitions = 2, + colNames = Seq("c1", "c2"), + curve = "hilbert") + outputDf.write.parquet(new File(tempDir, "source").getCanonicalPath) + + // Load the partition 0 and verify its records. + checkAnswer( + Seq("a" -> 20, "a" -> 20, "b" -> 20, "c" -> 30, "g" -> 10, "h" -> 20).toDF("c1", "c2"), + sparkSession.read.parquet(new File(tempDir, "source/part-00000*").getCanonicalPath) + ) + + // partition 1 + checkAnswer( + Seq("d" -> 70, "e" -> 90, "e" -> 90, "e" -> 90, "f" -> 200).toDF("c1", "c2"), + sparkSession.read.parquet(new File(tempDir, "source/part-00001*").getCanonicalPath) + ) + } + } + } + + test("noise is helpful in skew handling") { + Seq("zorder", "hilbert").foreach { curve => + Seq("true", "false").foreach { addNoise => + withTempDir { tempDir => + withSQLConf( + MDC_NUM_RANGE_IDS.key -> "4", + MDC_ADD_NOISE.key -> addNoise) { + val data = Array.fill(100)(20, 20) // all records have the same values + val inputDf = data.toSeq.toDF("c1", "c2") + + // Cluster the data and range partition into four partitions + val outputDf = MultiDimClustering.cluster( + inputDf, + approxNumPartitions = 4, + colNames = Seq("c1", "c2"), + curve) + + outputDf.write.parquet(new File(tempDir, "source").getCanonicalPath) + + // If there is no noise added, expect only one partition, otherwise four partition + // as mentioned in the cluster command above. + val partCount = new File(tempDir, "source").listFiles(new FilenameFilter { + override def accept(dir: File, name: String): Boolean = { + name.startsWith("part-0000") + } + }).length + + if ("true".equals(addNoise)) { + assert(4 === partCount, s"Incorrect number of partitions when addNoise=$addNoise") + } else { + assert(1 === partCount, s"Incorrect number of partitions when addNoise=$addNoise") + } + } + } + } + } + } + + test(s"try clustering with different ranges and noise flag on/off") { + Seq("zorder", "hilbert").foreach { curve => + Seq("true", "false").foreach { addNoise => + Seq("20", "100", "200", "1000").foreach { numRanges => + withSQLConf(MDC_NUM_RANGE_IDS.key -> numRanges, MDC_ADD_NOISE.key -> addNoise) { + val data = Seq.range(0, 100) + val inputDf = Random.shuffle(data).map(x => (x, x * 113 % 101)).toDF("col1", "col2") + val outputDf = MultiDimClustering.cluster( + inputDf, + approxNumPartitions = 10, + colNames = Seq("col1", "col2"), + curve) + // Underlying data shouldn't change + checkAnswer(outputDf, inputDf) + } + } + } + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala new file mode 100644 index 00000000000..60ab3883a1e --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala @@ -0,0 +1,651 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping.clustering + +import java.io.File + +import org.apache.spark.sql.delta.skipping.ClusteredTableTestUtils +import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaColumnMappingEnableIdMode, DeltaColumnMappingEnableNameMode, DeltaConfigs, DeltaLog, DeltaUnsupportedOperationException} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.stats.SkippingEligibleDataType +import org.apache.spark.sql.delta.test.{DeltaColumnMappingSelectedTestMixin, DeltaSQLCommandTest} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{ArrayType, IntegerType, StructField, StructType} + +trait ClusteredTableCreateOrReplaceDDLSuiteBase + extends QueryTest with SharedSparkSession with ClusteredTableTestUtils { + + protected val testTable: String = "test_ddl_table" + protected val sourceTable: String = "test_ddl_source" + protected val targetTable: String = "test_ddl_target" + + protected def isPathBased: Boolean = false + + protected def supportedClauses: Seq[String] + + testCtasRtasHelper(supportedClauses) + testClusteringColumnsPartOfStatsColumn(supportedClauses) + testColTypeValidation("CREATE") + + def testCtasRtasHelper(clauses: Seq[String]): Unit = { + Seq( + ("", + "a INT, b STRING, ts TIMESTAMP", + "a, b"), + (" multipart name", + "a STRUCT, ts TIMESTAMP", + "a.b, ts") + ).foreach { case (testSuffix, columns, clusteringColumns) => + test(s"create/replace table$testSuffix") { + withTable(testTable) { + clauses.foreach { clause => + createOrReplaceClusteredTable(clause, testTable, columns, clusteringColumns) + verifyClusteringColumns(TableIdentifier(testTable), clusteringColumns) + } + } + } + + test(s"ctas/rtas$testSuffix") { + withTable(sourceTable, targetTable) { + sql(s"CREATE TABLE $sourceTable($columns) USING delta") + withTempDirIfNecessary { location => + clauses.foreach { clause => + createOrReplaceAsSelectClusteredTable( + clause, targetTable, sourceTable, clusteringColumns, location = location) + verifyClusteringColumns(targetTable, clusteringColumns, location) + } + } + } + } + + if (clauses.contains("REPLACE")) { + test(s"Replace from non clustered table$testSuffix") { + withTable(targetTable) { + sql(s"CREATE TABLE $targetTable($columns) USING delta") + createOrReplaceClusteredTable("REPLACE", targetTable, columns, clusteringColumns) + verifyClusteringColumns(TableIdentifier(targetTable), clusteringColumns) + } + } + } + } + } + + protected def createTableWithStatsColumns( + clause: String, + table: String, + clusterColumns: Seq[String], + numIndexedColumns: Int, + tableSchema: Option[String], + statsColumns: Seq[String] = Seq.empty, + location: Option[String] = None): Unit = { + val clusterSpec = clusterColumns.mkString(",") + val updatedTableProperties = + collection.mutable.Map("delta.dataSkippingNumIndexedCols" -> s"$numIndexedColumns") + if (statsColumns.nonEmpty) { + updatedTableProperties(DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS.key) = + statsColumns.mkString(",") + } + val tablePropertiesString = updatedTableProperties.map { + case (key, value) => s"'$key' = '$value'" + }.mkString(",") + val locationClause = if (location.isEmpty) "" else s"LOCATION '${location.get}'" + if (clause == "REPLACE") { + // Create the default before it can be replaced. + sql(s"CREATE TABLE IF NOT EXISTS $table USING DELTA $locationClause") + } + if (tableSchema.isEmpty) { + sql( + s""" + |$clause TABLE $table USING DELTA CLUSTER BY ($clusterSpec) + |TBLPROPERTIES($tablePropertiesString) + |$locationClause + |AS SELECT * FROM $sourceTable + |""".stripMargin) + } else { + createOrReplaceClusteredTable( + clause, table, tableSchema.get, clusterSpec, updatedTableProperties.toMap, location) + } + } + + protected def testStatsCollectionHelper( + tableSchema: String, + numberOfIndexedCols: Int)(cb: => Unit): Unit = { + withTable(sourceTable) { + // Create a source table for CTAS. + sql( + s""" + | CREATE TABLE $sourceTable($tableSchema) USING DELTA + | TBLPROPERTIES('delta.dataSkippingNumIndexedCols' = '$numberOfIndexedCols') + |""".stripMargin) + // Run additional steps. + cb + } + } + + protected def testColTypeValidation(clause: String): Unit = { + test(s"validate column datatype checking on $clause table") { + withTable("srcTbl", "dstTbl") { + // Create reference table for CTAS/RTAS. + sql(s"CREATE table srcTbl (a STRUCT, d BOOLEAN, e MAP) USING delta") + + val data = (0 to 1000).map(i => Row(Row(i + 1, i * 10), i % 2 == 0, Map(i -> i))) + val schema = StructType(List( + StructField("a", StructType( + Array( + StructField("b", IntegerType), + StructField("c", IntegerType) + ) + )))) + spark.createDataFrame(spark.sparkContext.parallelize(data), StructType(schema)) + .write.mode("append").format("delta").saveAsTable("srcTbl") + + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, new TableIdentifier("srcTbl")) + // Test multiple data types. + Seq("a", "d", "e").foreach { colName => + withTempDir { tmpDir => + // Since validation happens both on create and replace, validate for both cases to + // ensure that datatype validation behaves consistently between the two. + if (clause == "REPLACE") { + sql("DROP TABLE IF EXISTS dstTbl") + sql(s"CREATE TABLE dstTbl LIKE srcTbl LOCATION '${tmpDir.getAbsolutePath}'") + } + + Seq( + // Scenario 1: Standard CREATE/REPLACE TABLE. + () => { + val schema = "a STRUCT, d BOOLEAN, e MAP" + createOrReplaceClusteredTable( + clause, "dstTbl", schema, colName, location = Some(tmpDir.getAbsolutePath)) + }, + // Scenario 2: CTAS/RTAS. + () => + createOrReplaceAsSelectClusteredTable( + clause, "dstTbl", "srcTbl", colName, location = Some(tmpDir.getAbsolutePath))) + .foreach { f => + val e = intercept[DeltaAnalysisException] { + f() + } + checkError( + exception = e, + errorClass = "DELTA_CLUSTERING_COLUMN_MISSING_STATS", + parameters = Map( + "columns" -> colName, + "schema" -> """root + | |-- a: struct (nullable = true) + | | |-- b: integer (nullable = true) + | | |-- c: integer (nullable = true) + | |-- d: boolean (nullable = true) + | |-- e: map (nullable = true) + | | |-- key: integer + | | |-- value: integer (valueContainsNull = true) + |""".stripMargin) + ) + } + } + } + } + } + } + + test("cluster by with more than 4 columns - create table") { + val testTable = "test_table" + withTable(testTable) { + val e = intercept[DeltaAnalysisException] { + createOrReplaceClusteredTable( + "CREATE", testTable, "a INT, b INT, c INT, d INT, e INT", "a, b, c, d, e") + } + checkError( + exception = e, + errorClass = "DELTA_CLUSTER_BY_INVALID_NUM_COLUMNS", + parameters = Map("numColumnsLimit" -> "4", "actualNumColumns" -> "5") + ) + } + } + + test("cluster by with more than 4 columns - ctas") { + val testTable = "test_table" + val schema = "a INT, b INT, c INT, d INT, e INT" + withTempDirIfNecessary { location => + withTable(sourceTable, testTable) { + sql(s"CREATE TABLE $sourceTable($schema) USING delta") + val e = intercept[DeltaAnalysisException] { + createOrReplaceAsSelectClusteredTable( + "CREATE", testTable, sourceTable, "a, b, c, d, e", location = location) + } + checkError( + exception = e, + errorClass = "DELTA_CLUSTER_BY_INVALID_NUM_COLUMNS", + parameters = Map("numColumnsLimit" -> "4", "actualNumColumns" -> "5") + ) + } + } + } + + protected def verifyPartitionColumns( + tableIdentifier: TableIdentifier, + expectedPartitionColumns: Seq[String]): Unit = { + val (_, snapshot) = DeltaLog.forTableWithSnapshot(spark, tableIdentifier) + assert(snapshot.metadata.partitionColumns === expectedPartitionColumns) + } + + protected def verifyClusteringColumns( + table: String, + expectedLogicalClusteringColumns: String, + locationOpt: Option[String]): Unit = { + locationOpt.map { location => + verifyClusteringColumns( + location, expectedLogicalClusteringColumns + ) + }.getOrElse { + verifyClusteringColumns(TableIdentifier(table), expectedLogicalClusteringColumns) + } + } + + def testClusteringColumnsPartOfStatsColumn(clauses: Seq[String]): Unit = { + clauses.foreach { clause => + val mode = if (clause == "CREATE") "create table" else "replace table" + test(s"Validate clustering columns part of stats columns - $mode") { + val tableSchema = "col0 int, col1 STRUCT, col2 int" + val indexedColumns = 2 + testStatsCollectionHelper( + tableSchema = tableSchema, + numberOfIndexedCols = indexedColumns) { + withTable(targetTable) { + val deltaLogSrc = DeltaLog.forTable(spark, TableIdentifier(sourceTable)) + // Validate the 3rd column `col1.col12` and 4th column `col2` can not be + // clustering columns. + val e = intercept[DeltaAnalysisException]( + createTableWithStatsColumns( + clause, + targetTable, + "col0" :: "col1.col11" :: "col1.col12" :: "col2" :: Nil, + indexedColumns, + Some(tableSchema))) + checkError( + exception = e, + errorClass = "DELTA_CLUSTERING_COLUMN_MISSING_STATS", + parameters = Map( + "columns" -> "col1.col12, col2", + "schema" -> """root + | |-- col0: integer (nullable = true) + | |-- col1: struct (nullable = true) + | | |-- col11: integer (nullable = true) + |""".stripMargin) + ) + // Validate the first two columns can be clustering columns. + createTableWithStatsColumns( + clause, + targetTable, + "col0" :: "col1.col11" :: Nil, + indexedColumns, + Some(tableSchema)) + } + } + } + } + + clauses.foreach { clause => + val mode = if (clause == "CREATE") "ctas" else "rtas" + test(s"Validate clustering columns part of stats columns - $mode") { + // Add a suffix for the target table name to work around the issue that delta table's + // location isn't removed by the DROP TABLE from ctas/rtas test cases. + val table = targetTable + "_" + clause + + val tableSchema = "col0 int, col1 STRUCT, col2 int" + val indexedColumns = 2 + testStatsCollectionHelper( + tableSchema = tableSchema, + numberOfIndexedCols = indexedColumns) { + withTable(table) { + withTempDir { dir => + val deltaLogSrc = DeltaLog.forTable(spark, TableIdentifier(sourceTable)) + val targetLog = DeltaLog.forTable(spark, s"${dir.getPath}") + val dataPath = new File(targetLog.dataPath.toString.replace("file:", "")) + val initialNumFiles = + if (dataPath.listFiles() != null) { // Returns null if directory doesn't exist -> 0 + dataPath.listFiles().size + } + else { + 0 + } + // Validate the 3rd column `col1.col12` and 4th column `col2` can not be + // clustering columns. + val e = intercept[DeltaAnalysisException]( + createTableWithStatsColumns( + clause, + table, + "col0" :: "col1.col11" :: "col1.col12" :: "col2" :: Nil, + indexedColumns, + None, + location = Some(dir.getPath))) + checkError( + exception = e, + errorClass = "DELTA_CLUSTERING_COLUMN_MISSING_STATS", + parameters = Map( + "columns" -> "col1.col12, col2", + "schema" -> """root + | |-- col0: integer (nullable = true) + | |-- col1: struct (nullable = true) + | | |-- col11: integer (nullable = true) + |""".stripMargin) + ) + + // Validate the first two columns can be clustering columns. + createTableWithStatsColumns( + clause, + table, + "col0" :: "col1.col11" :: Nil, + indexedColumns, + None) + } + } + } + } + } + } + + test("Validate clustering columns cannot be non-eligible data types") { + val indexedColumns = 3 + // Validate non-eligible column stat data type. + val nonEligibleType = ArrayType(IntegerType) + assert(!SkippingEligibleDataType(nonEligibleType)) + val nonEligibleTableSchema = s"col0 int, col1 STRUCT, col12: string>" + testStatsCollectionHelper( + tableSchema = nonEligibleTableSchema, + numberOfIndexedCols = indexedColumns) { + withTable(targetTable) { + val deltaLogSrc = DeltaLog.forTable(spark, TableIdentifier(sourceTable)) + // Validate the 2nd column `col1.col11` cannot be clustering column. + val e = intercept[DeltaAnalysisException]( + createTableWithStatsColumns( + "CREATE", + targetTable, + "col0" :: "col1.col11" :: Nil, + indexedColumns, + Some(nonEligibleTableSchema))) + checkError( + exception = e, + errorClass = "DELTA_CLUSTERING_COLUMN_MISSING_STATS", + parameters = Map( + "columns" -> "col1.col11", + "schema" -> """root + | |-- col0: integer (nullable = true) + | |-- col1: struct (nullable = true) + | | |-- col11: array (nullable = true) + | | | |-- element: integer (containsNull = true) + | | |-- col12: string (nullable = true) + |""".stripMargin) + ) + } + } + } + + protected def withTempDirIfNecessary(f: Option[String] => Unit): Unit = { + if (isPathBased) { + withTempDir { dir => + f(Some(dir.getAbsolutePath)) + } + } else { + f(None) + } + } +} + +trait ClusteredTableDDLWithColumnMapping + extends ClusteredTableCreateOrReplaceDDLSuite + with DeltaColumnMappingSelectedTestMixin { + + override protected def runOnlyTests: Seq[String] = Seq( + "validate dropping clustering column is not allowed: single clustering column", + "validate dropping clustering column is not allowed: multiple clustering columns", + "validate dropping clustering column is not allowed: clustering column + " + + "non-clustering column" + ) + + test("validate dropping clustering column is not allowed: single clustering column") { + withClusteredTable(testTable, "col1 INT, col2 STRING, col3 LONG", "col1") { + val e = intercept[DeltaAnalysisException] { + sql(s"ALTER TABLE $testTable DROP COLUMNS (col1)") + } + checkError( + exception = e, + errorClass = "DELTA_UNSUPPORTED_DROP_CLUSTERING_COLUMN", + parameters = Map("columnList" -> "col1") + ) + // Drop non-clustering columns are allowed. + sql(s"ALTER TABLE $testTable DROP COLUMNS (col2)") + } + } + + test("validate dropping clustering column is not allowed: multiple clustering columns") { + withClusteredTable(testTable, "col1 INT, col2 STRING, col3 LONG", "col1, col2") { + val e = intercept[DeltaAnalysisException] { + sql(s"ALTER TABLE $testTable DROP COLUMNS (col1, col2)") + } + checkError( + exception = e, + errorClass = "DELTA_UNSUPPORTED_DROP_CLUSTERING_COLUMN", + parameters = Map("columnList" -> "col1,col2") + ) + } + } + + test("validate dropping clustering column is not allowed: clustering column + " + + "non-clustering column") { + withClusteredTable(testTable, "col1 INT, col2 STRING, col3 LONG", "col1, col2") { + val e = intercept[DeltaAnalysisException] { + sql(s"ALTER TABLE $testTable DROP COLUMNS (col1, col3)") + } + checkError( + exception = e, + errorClass = "DELTA_UNSUPPORTED_DROP_CLUSTERING_COLUMN", + parameters = Map("columnList" -> "col1") + ) + } + } +} + +trait ClusteredTableDDLWithColumnMappingV2Base extends ClusteredTableDDLWithColumnMapping + +trait ClusteredTableDDLWithColumnMappingV2 + extends ClusteredTableDDLWithColumnMappingV2Base + +trait ClusteredTableCreateOrReplaceDDLSuite + extends ClusteredTableCreateOrReplaceDDLSuiteBase + +trait ClusteredTableDDLSuiteBase + extends ClusteredTableCreateOrReplaceDDLSuite + with DeltaSQLCommandTest { + + test("optimize clustered table - error scenarios") { + withClusteredTable(testTable, "a INT, b STRING", "a") { + // Specify partition predicate. + val e = intercept[DeltaUnsupportedOperationException] { + sql(s"OPTIMIZE $testTable WHERE a > 0 and b = foo") + } + checkError( + e, + "DELTA_CLUSTERING_WITH_PARTITION_PREDICATE", + parameters = Map("predicates" -> "a > 0 and b = foo") + ) + + // Specify ZORDER BY. + val e2 = intercept[DeltaAnalysisException] { + sql(s"OPTIMIZE $testTable ZORDER BY (a)") + } + checkError( + exception = e2, + errorClass = "DELTA_CLUSTERING_WITH_ZORDER_BY", + parameters = Map("zOrderBy" -> "a") + ) + } + } +} + +trait ClusteredTableDDLSuite extends ClusteredTableDDLSuiteBase +trait ClusteredTableDDLWithNameColumnMapping + extends ClusteredTableCreateOrReplaceDDLSuite with DeltaColumnMappingEnableNameMode + +trait ClusteredTableDDLWithIdColumnMapping + extends ClusteredTableCreateOrReplaceDDLSuite with DeltaColumnMappingEnableIdMode + +trait ClusteredTableDDLWithV2Base + extends ClusteredTableCreateOrReplaceDDLSuite + with SharedSparkSession { + override protected def supportedClauses: Seq[String] = Seq("CREATE", "REPLACE") + + testColTypeValidation("REPLACE") + + test("replace with different clustering columns") { + withTable(sourceTable) { + sql(s"CREATE TABLE $sourceTable(i int, s string) USING delta") + // Validate REPLACE TABLE (AS SELECT). + Seq("REPLACE", "CREATE OR REPLACE").foreach { clause => + Seq(true, false).foreach { isRTAS => + withTempDirIfNecessary { location => + withClusteredTable(testTable, "a int", "a", location = location) { + if (isRTAS) { + createOrReplaceAsSelectClusteredTable( + clause, testTable, sourceTable, "i", location = location) + } else { + createOrReplaceClusteredTable( + clause, testTable, "i int, b string", "i", location = location) + } + verifyClusteringColumns(testTable, "i", location) + } + } + } + } + } + } + + test("Validate replacing clustered tables with partitioned tables is not allowed") { + withTable(sourceTable) { + sql(s"CREATE TABLE $sourceTable(i int, s string) USING delta") + + // Validate REPLACE TABLE (AS SELECT). + Seq("REPLACE", "CREATE OR REPLACE").foreach { clause => + withClusteredTable(testTable, "a int", "a") { + verifyClusteringColumns(TableIdentifier(testTable), "a") + + Seq(true, false).foreach { isRTAS => + val e = intercept[DeltaAnalysisException] { + if (isRTAS) { + sql(s"$clause TABLE $testTable USING delta PARTITIONED BY (i) " + + s"AS SELECT * FROM $sourceTable") + } else { + sql(s"$clause TABLE $testTable (i int, b string) USING delta PARTITIONED BY (i)") + } + } + checkError( + e, + "DELTA_CLUSTERING_REPLACE_TABLE_WITH_PARTITIONED_TABLE" + ) + } + } + } + } + } + + test("Validate replacing partitioned tables with clustered tables is allowed") { + withTable(sourceTable) { + sql(s"CREATE TABLE $sourceTable(i int, s string) USING delta") + + // Validate REPLACE TABLE (AS SELECT). + Seq("REPLACE", "CREATE OR REPLACE").foreach { clause => + Seq(true, false).foreach { isRTAS => + withTable(testTable) { + withTempDirIfNecessary { location => + val locationClause = if (location.isEmpty) "" else s"LOCATION '${location.get}'" + sql(s"CREATE TABLE $testTable USING delta PARTITIONED BY (i) $locationClause" + + s" SELECT 1 i, 'a' s") + verifyPartitionColumns(TableIdentifier(testTable), Seq("i")) + if (isRTAS) { + createOrReplaceAsSelectClusteredTable( + clause, testTable, sourceTable, "i", location = location) + } else { + createOrReplaceClusteredTable( + clause, testTable, "i int, b string", "i", location = location) + } + verifyClusteringColumns(testTable, "i", location) + verifyPartitionColumns(TableIdentifier(testTable), Seq()) + } + } + } + } + } + } + + Seq( + ("", + "a INT, b STRING, ts TIMESTAMP", + "a, b"), + (" multipart name", + "a STRUCT, ts TIMESTAMP", + "a.b, ts") + ).foreach { case (testSuffix, columns, clusteringColumns) => + test(s"create/replace table createOrReplace$testSuffix") { + withTable(testTable) { + // Repeat two times to test both create and replace cases. + (1 to 2).foreach { _ => + createOrReplaceClusteredTable("CREATE OR REPLACE", testTable, columns, clusteringColumns) + verifyClusteringColumns(TableIdentifier(testTable), clusteringColumns) + } + } + } + + test(s"ctas/rtas createOrReplace$testSuffix") { + withTable(sourceTable, targetTable) { + sql(s"CREATE TABLE $sourceTable($columns) USING delta") + withTempDirIfNecessary { location => + // Repeat two times to test both create and replace cases. + (1 to 2).foreach { _ => + createOrReplaceAsSelectClusteredTable( + "CREATE OR REPLACE", targetTable, sourceTable, clusteringColumns, location = location) + verifyClusteringColumns(targetTable, clusteringColumns, location) + } + } + } + } + } +} + +trait ClusteredTableDDLWithV2 + extends ClusteredTableDDLWithV2Base + +trait ClusteredTableDDLDataSourceV2SuiteBase + extends ClusteredTableDDLWithV2 + with ClusteredTableDDLSuite + +class ClusteredTableDDLDataSourceV2Suite + extends ClusteredTableDDLDataSourceV2SuiteBase + +class ClusteredTableDDLDataSourceV2IdColumnMappingSuite + extends ClusteredTableDDLWithIdColumnMapping + with ClusteredTableDDLWithV2 + with ClusteredTableDDLWithColumnMappingV2 + with ClusteredTableDDLSuite + +class ClusteredTableDDLDataSourceV2NameColumnMappingSuite + extends ClusteredTableDDLWithNameColumnMapping + with ClusteredTableDDLWithV2 + with ClusteredTableDDLWithColumnMappingV2 + with ClusteredTableDDLSuite diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteringProviderSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteringProviderSuite.scala new file mode 100644 index 00000000000..50b4c63683b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteringProviderSuite.scala @@ -0,0 +1,76 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.skipping.clustering + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta.{DeltaConfigs, DeltaLog, DeltaOperations} +import org.apache.spark.sql.delta.actions.{AddFile, Metadata} +import org.apache.spark.sql.delta.actions.SingleAction._ +import org.apache.spark.sql.delta.stats.DataSkippingReader +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession + +class ClusteringProviderSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + private def testAddFileWithSnapshotReconstructionHelper( + prefix: String)(collectFiles: DeltaLog => Seq[AddFile]): Unit = { + for (checkpointPolicy <- Seq("none", "classic", "v2")) { + test(s"$prefix - Validate clusteringProvider in snapshot reconstruction, " + + s"checkpointPolicy = $checkpointPolicy") { + val file = AddFile( + path = "path", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + clusteringProvider = Some("liquid")) + + withTempDir { dir => + val log = DeltaLog.forTable(spark, new Path(dir.getCanonicalPath)) + log.startTransaction(None).commit(Metadata() :: Nil, DeltaOperations.ManualUpdate) + log.startTransaction(None).commit(file :: Nil, DeltaOperations.ManualUpdate) + + if (checkpointPolicy != "none") { + spark.sql(s"ALTER TABLE delta.`${dir.getAbsolutePath}` SET TBLPROPERTIES " + + s"('${DeltaConfigs.CHECKPOINT_POLICY.key}' = '$checkpointPolicy')") + log.checkpoint(log.update()) + // clear cache to force the snapshot reconstruction. + DeltaLog.clearCache() + } + val files = collectFiles(log) + assert(files.size === 1) + assert(files.head.clusteringProvider === Some("liquid")) + } + } + } + } + + testAddFileWithSnapshotReconstructionHelper("Default snapshot reconstruction") { log => + log.update().allFiles.collect() + } + + testAddFileWithSnapshotReconstructionHelper("AddFile with stats") { log => + val statsDF = log.update().withStats.withColumn("stats", DataSkippingReader.nullStringLiteral) + statsDF.as[AddFile].collect() + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala b/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala new file mode 100644 index 00000000000..c48adab3e3d --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala @@ -0,0 +1,2151 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import java.io.File + +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.AddFile +import org.apache.spark.sql.delta.metering.ScanReport +import org.apache.spark.sql.delta.schema.SchemaUtils +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.test.ScanReportHelper +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path +import org.scalatest.GivenWhenThen + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.QueryPlanningTracker +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, PredicateHelper} +import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + +trait DataSkippingDeltaTestsBase extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with PredicateHelper + with GivenWhenThen + with ScanReportHelper { + + val defaultNumIndexedCols = DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.fromString( + DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.defaultValue) + + import testImplicits._ + + protected def checkpointAndCreateNewLogIfNecessary(log: DeltaLog): DeltaLog = log + + protected val tableSchemaOnlyTag = org.scalatest.Tag("StatsCollectionWithTableSchemaOnly") + + /** + * Test stats collection using both the table schema and DataFrame schema (if applicable) + * TODO(lin): remove this after we remove the DELTA_COLLECT_STATS_USING_TABLE_SCHEMA flag + */ + protected override def test(testName: String, testTags: org.scalatest.Tag*) + (testFun: => Any) + (implicit pos: org.scalactic.source.Position): Unit = { + super.test(testName, testTags : _*)(testFun)(pos) + if (!testTags.contains(tableSchemaOnlyTag)) { + super.test(testName + " - old behavior with DataFrame schema", testTags: _*) { + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS_USING_TABLE_SCHEMA.key -> "false") { + testFun + } + } + } + } + + testSkipping( + "top level, single 1", + """{"a": 1}""", + hits = Seq( + "True", // trivial base case + "a = 1", + "a <=> 1", + "a >= 1", + "a <= 1", + "a <= 2", + "a >= 0", + "1 = a", + "1 <=> a", + "1 <= a", + "1 >= a", + "2 >= a", + "0 <= a", + "NOT a <=> 2" + ), + misses = Seq( + "NOT a = 1", + "NOT a <=> 1", + "a = 2", + "a <=> 2", + "a != 1", + "2 = a", + "2 <=> a", + "1 != a", + "a > 1", + "a < 1", + "a >= 2", + "a <= 0", + "1 < a", + "1 > a", + "2 <= a", + "0 >= a" + ) + ) + + testSkipping( + "nested, single 1", + """{"a": {"b": 1}}""", + hits = Seq( + "a.b = 1", + "a.b >= 1", + "a.b <= 1", + "a.b <= 2", + "a.b >= 0" + ), + misses = Seq( + "a.b = 2", + "a.b > 1", + "a.b < 1" + ) + ) + + testSkipping( + "double nested, single 1", + """{"a": {"b": {"c": 1}}}""", + hits = Seq( + "a.b.c = 1", + "a.b.c >= 1", + "a.b.c <= 1", + "a.b.c <= 2", + "a.b.c >= 0" + ), + misses = Seq( + "a.b.c = 2", + "a.b.c > 1", + "a.b.c < 1" + ) + ) + + private def longString(str: String) = str * 1000 + + testSkipping( + "long strings - long min", + s""" + {"a": '${longString("A")}'} + {"a": 'B'} + {"a": 'C'} + """, + hits = Seq( + "a like 'A%'", + s"a = '${longString("A")}'", + "a > 'BA'", + "a < 'AB'" + ), + misses = Seq( + "a < 'AA'", + "a > 'CD'" + ) + ) + + testSkipping( + "long strings - long max", + s""" + {"a": 'A'} + {"a": 'B'} + {"a": '${longString("C")}'} + """, + hits = Seq( + "a like 'A%'", + "a like 'C%'", + s"a = '${longString("C")}'", + "a > 'BA'", + "a < 'AB'", + "a > 'CC'" + ), + misses = Seq( + "a >= 'D'", + "a > 'CD'" + ) + ) + + testSkipping( + "starts with", + """ + {"a": 'apple'} + {"a": 'microsoft'} + """, + hits = Seq( + "a like 'a%'", + "a like 'ap%'", + "a like 'm%'", + "a like 'mic%'", + "a like '%'" + ), + misses = Seq( + "a like 'xyz%'" + ) + ) + + testSkipping( + "starts with, nested", + """ + {"a":{"b": 'apple'}} + {"a":{"b": 'microsoft'}} + """, + hits = Seq( + "a.b like 'a%'", + "a.b like 'ap%'", + "a.b like 'm%'", + "a.b like 'mic%'", + "a.b like '%'" + ), + misses = Seq( + "a.b like 'xyz%'" + ) + ) + + testSkipping( + "and statements - simple", + """ + {"a": 1} + {"a": 2} + """, + hits = Seq( + "a > 0 AND a < 3", + "a <= 1 AND a > -1" + ), + misses = Seq( + "a < 0 AND a > -2" + ) + ) + + testSkipping( + "and statements - two fields", + """ + {"a": 1, "b": "2017-09-01"} + {"a": 2, "b": "2017-08-31"} + """, + hits = Seq( + "a > 0 AND b = '2017-09-01'", + "a = 2 AND b >= '2017-08-30'", + "a >= 2 AND b like '2017-08-%'" + ), + misses = Seq( + "a > 0 AND b like '2016-%'" + ) + ) + + // One side of AND by itself still has pruning power. + testSkipping( + "and statements - one side unsupported", + """ + {"a": 10, "b": 10} + {"a": 20: "b": 20} + """, + hits = Seq( + "a % 100 < 10 AND b % 100 > 20" + ), + misses = Seq( + "a < 10 AND b % 100 > 20", + "a % 100 < 10 AND b > 20" + ) + ) + + testSkipping( + "or statements - simple", + """ + {"a": 1} + {"a": 2} + """, + hits = Seq( + "a > 0 or a < -3", + "a >= 2 or a < -1" + ), + misses = Seq( + "a > 5 or a < -2" + ) + ) + + testSkipping( + "or statements - two fields", + """ + {"a": 1, "b": "2017-09-01"} + {"a": 2, "b": "2017-08-31"} + """, + hits = Seq( + "a < 0 or b = '2017-09-01'", + "a = 2 or b < '2017-08-30'", + "a < 2 or b like '2017-08-%'", + "a >= 2 or b like '2016-08-%'" + ), + misses = Seq( + "a < 0 or b like '2016-%'" + ) + ) + + // One side of OR by itself isn't powerful enough to prune any files. + testSkipping( + "or statements - one side unsupported", + """ + {"a": 10, "b": 10} + {"a": 20: "b": 20} + """, + hits = Seq( + "a % 100 < 10 OR b > 20", + "a < 10 OR b % 100 > 20" + ), + misses = Seq( + "a < 10 OR b > 20" + ) + ) + + testSkipping( + "not statements - simple", + """ + {"a": 1} + {"a": 2} + """, + hits = Seq( + "not a < 0" + ), + misses = Seq( + "not a > 0" + ) + ) + + // NOT(AND(a, b)) === OR(NOT(a), NOT(b)) ==> One side by itself cannot prune. + testSkipping( + "not statements - and", + """ + {"a": 10, "b": 10} + {"a": 20: "b": 20} + """, + hits = Seq( + "NOT(a % 100 >= 10 AND b % 100 <= 20)", + "NOT(a >= 10 AND b % 100 <= 20)", + "NOT(a % 100 >= 10 AND b <= 20)" + ), + misses = Seq( + "NOT(a >= 10 AND b <= 20)" + ) + ) + + // NOT(OR(a, b)) === AND(NOT(a), NOT(b)) => One side by itself is enough to prune. + testSkipping( + "not statements - or", + """ + {"a": 1, "b": 10} + {"a": 2, "b": 20} + """, + hits = Seq( + "NOT(a < 1 OR b > 20)", + "NOT(a % 100 >= 1 OR b % 100 <= 20)" + ), + misses = Seq( + "NOT(a >= 1 OR b <= 20)", + "NOT(a % 100 >= 1 OR b <= 20)", + "NOT(a >= 1 OR b % 100 <= 20)" + ) + ) + + // If a column does not have stats, it does not participate in data skipping, which disqualifies + // that leg of whatever conjunct it was part of. + testSkipping( + "missing stats columns", + """ + {"a": 1, "b": 10} + {"a": 2, "b": 20} + """, + hits = Seq( + "b < 10", // disqualified + "a < 1 OR b < 10", // a disqualified by b (same conjunct) + "a < 1 OR (a >= 1 AND b < 10)" // ==> a < 1 OR a >=1 ==> TRUE + ), + misses = Seq( + "a < 1 AND b < 10", // ==> a < 1 ==> FALSE + "a < 1 OR (a > 10 AND b < 10)" // ==> a < 1 OR a > 10 ==> FALSE + ), + indexedCols = 1 + ) + + private def generateJsonData(numCols: Int): String = { + val fields = (0 until numCols).map(i => s""""col${"%02d".format(i)}":$i""".stripMargin) + + "{" + fields.mkString(",") + "}" + } + + testSkipping( + "more columns than indexed", + generateJsonData(defaultNumIndexedCols + 1), + hits = Seq( + "col00 = 0", + s"col$defaultNumIndexedCols = $defaultNumIndexedCols", + s"col$defaultNumIndexedCols = -1" + ), + misses = Seq( + "col00 = 1" + ) + ) + + testSkipping( + "nested schema - # indexed column = 3", + """{ + "a": 1, + "b": { + "c": { + "d": 2, + "e": 3, + "f": { + "g": 4, + "h": 5, + "i": 6 + }, + "j": 7, + "k": 8 + }, + "l": 9 + }, + "m": 10 + }""".replace("\n", ""), + hits = Seq( + "a = 1", + "b.c.d = 2", + "b.c.e = 3", + // below matches due to missing stats + "b.c.f.g < 0", + "b.c.f.i < 0", + "b.l < 0"), + misses = Seq( + "a < 0", + "b.c.d < 0", + "b.c.e < 0"), + indexedCols = 3 + ) + + testSkipping( + "nested schema - # indexed column = 6", + """{ + "a": 1, + "b": { + "c": { + "d": 2, + "e": 3, + "f": { + "g": 4, + "h": 5, + "i": 6 + }, + "j": 7, + "k": 8 + }, + "l": 9 + }, + "m": 10 + }""".replace("\n", ""), + hits = Seq( + "b.c.f.i = 6", + // below matches are due to missing stats + "b.c.j < 0", + "b.c.k < 0", + "b.l < 0"), + misses = Seq( + "a < 0", + "b.c.f.i < 0" + ), + indexedCols = 6 + ) + + testSkipping( + "nested schema - # indexed column = 9", + """{ + "a": 1, + "b": { + "c": { + "d": 2, + "e": 3, + "f": { + "g": 4, + "h": 5, + "i": 6 + }, + "j": 7, + "k": 8 + }, + "l": 9 + }, + "m": 10 + }""".replace("\n", ""), + hits = Seq( + "b.c.d = 2", + "b.c.f.i = 6", + "b.l = 9", + // below matches are due to missing stats + "m < 0"), + misses = Seq( + "b.l < 0", + "b.c.f.i < 0" + ), + indexedCols = 9 + ) + + testSkipping( + "nested schema - # indexed column = 0", + """{ + "a": 1, + "b": { + "c": { + "d": 2, + "e": 3, + "f": { + "g": 4, + "h": 5, + "i": 6 + }, + "j": 7, + "k": 8 + }, + "l": 9 + }, + "m": 10 + }""".replace("\n", ""), + hits = Seq( + // all included due to missing stats + "a < 0", + "b.c.d < 0", + "b.c.f.i < 0", + "b.l < 0", + "m < 0"), + misses = Seq(), + indexedCols = 0 + ) + + testSkipping( + "indexed column names - empty list disables stats collection", + """{ + "a": 1, + "b": 2, + "c": 3, + "d": 4 + }""".replace("\n", ""), + hits = Seq( + "a < 0", + "b < 0", + "c < 0", + "d < 0" + ), + misses = Seq(), + indexedCols = 3, + deltaStatsColNamesOpt = Some(" ") + ) + + testSkipping( + "indexed column names - naming a nested column indexes all leaf fields of that column", + """{ + "a": 1, + "b": { + "c": { + "d": 2, + "e": 3, + "f": { + "g": 4, + "h": 5, + "i": 6 + }, + "j": 7, + "k": 8 + }, + "l": 9 + }, + "m": 10 + }""".replace("\n", ""), + hits = Seq( + // these all have missing stats + "a < 0", + "b.l < 0", + "m < 0" + ), + misses = Seq( + "b.c.d < 0", + "b.c.e < 0", + "b.c.f.g < 0", + "b.c.f.h < 0", + "b.c.f.i < 0", + "b.c.j < 0", + "b.c.k < 0" + ), + indexedCols = 3, + deltaStatsColNamesOpt = Some("b.c") + ) + + testSkipping( + "indexed column names - index only a subset of leaf columns", + """{ + "a": 1, + "b": { + "c": { + "d": 2, + "e": 3, + "f": { + "g": 4, + "h": 5, + "i": 6 + }, + "j": 7, + "k": 8 + }, + "l": 9 + }, + "m": 10 + }""".replace("\n", ""), + hits = Seq( + // these all have missing stats + "a < 0", + "b.c.d < 0", + "b.c.f.g < 0", + "b.c.f.i < 0", + "b.c.j < 0", + "m < 0" + ), + misses = Seq( + "b.c.e < 0", + "b.c.f.h < 0", + "b.c.k < 0", + "b.l < 0" + ), + indexedCols = 3, + deltaStatsColNamesOpt = Some("b.c.e, b.c.f.h, b.c.k, b.l") + ) + + testSkipping( + "indexed column names - backtick escapes work as expected", + """{ + "a": 1, + "b.c": 2, + "b": { + "c": 3, + "d": 4 + } + }""".replace("\n", ""), + hits = Seq( + "b.c < 0" + ), + misses = Seq( + "a < 0", + "`b.c` < 0", + "b.d < 0" + ), + indexedCols = 3, + deltaStatsColNamesOpt = Some("`a`, `b.c`, `b`.`d`") + ) + + testSkipping( + "boolean comparisons", + """{"a": false}""", + hits = Seq( + "!a", + "NOT a", + "a", // there is no skipping for BooleanValues + "a = false", + "NOT a = false", + "a > true", + "a <= false", + "true = a", + "true < a", + "false = a or a" + ), + misses = Seq() + ) + + // Data skipping by stats should still work even when the only data in file is null, in spite of + // the NULL min/max stats that result -- this is different to having no stats at all. + testSkipping( + "nulls - only null in file", + """ + {"a": null } + """, + schema = new StructType().add(new StructField("a", IntegerType)), + hits = Seq( + "a IS NULL", + "a = NULL", // Ideally this should not hit as it is always FALSE, but its correct to not skip + "NOT a = NULL", // Same as previous case + "a <=> NULL", // This is optimized to `IsNull(a)` by NullPropagation + "TRUE", + "FALSE", // Ideally this should not hit, but its correct to not skip + "NULL AND a = 1", // This is optimized to FALSE by ReplaceNullWithFalse, so it's same as above + "NOT a <=> 1" + ), + misses = Seq( + // stats tell us a is always NULL, so any predicate that requires non-NULL a should skip + "a IS NOT NULL", + "NOT a <=> NULL", // This is optimized to `IsNotNull(a)` + "a = 1", + "NOT a = 1", + "a > 1", + "a < 1", + "a <> 1", + "a <=> 1" + ) + ) + + testSkipping( + "nulls - null + not-null in same file", + """ + {"a": null } + {"a": 1 } + """, + schema = new StructType().add(new StructField("a", IntegerType)), + hits = Seq( + "a IS NULL", + "a IS NOT NULL", + "a = NULL", // Ideally this should not hit as it is always FALSE, but its correct to not skip + "NOT a = NULL", // Same as previous case + "a <=> NULL", // This is optimized to `IsNull(a)` by NullPropagation + "NOT a <=> NULL", // This is optimized to `IsNotNull(a)` + "a = 1", + "a <=> 1", + "TRUE", + "FALSE", // Ideally this should not hit, but its correct to not skip + "NULL AND a = 1", // This is optimized to FALSE by ReplaceNullWithFalse, so it's same as above + "NOT a <=> 1" + ), + misses = Seq( + "a <> 1", + "a > 1", + "a < 1", + "NOT a = 1" + ) + ) + + test("data skipping with missing stats") { + val tempDir = Utils.createTempDir() + Seq(1, 2, 3).toDF().write.format("delta").save(tempDir.toString) + val log = DeltaLog.forTable(spark, new Path(tempDir.toString)) + val txn = log.startTransaction() + val noStats = txn.filterFiles(Nil).map(_.copy(stats = null)) + txn.commit(noStats, DeltaOperations.ComputeStats(Nil)) + + val df = spark.read.format("delta").load(tempDir.toString) + checkAnswer(df.where("value > 0"), Seq(Row(1), Row(2), Row(3))) + } + + test("data skipping stats before and after optimize") { + val tempDir = Utils.createTempDir() + var r = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + val (numTuples, numFiles) = (10, 2) + val data = spark.range(0, numTuples, 1, 2).repartition(numFiles) + data.write.format("delta").save(r.dataPath.toString) + r = checkpointAndCreateNewLogIfNecessary(r) + def rStats: DataFrame = + getStatsDf(r, $"numRecords", $"minValues.id".as("id_min"), $"maxValues.id".as("id_max")) + + checkAnswer(rStats, Seq(Row(4, 0, 8), Row(6, 1, 9))) + val optimizeDf = sql(s"OPTIMIZE '$tempDir'") + checkAnswer(rStats, Seq(Row(10, 0, 9))) + } + + test("number of indexed columns") { + val numTotalCols = defaultNumIndexedCols + 5 + val path = Utils.createTempDir().getCanonicalPath + var r = DeltaLog.forTable(spark, new Path(path)) + val data = spark.range(10).select(Seq.tabulate(numTotalCols)(i => lit(i) as s"col$i"): _*) + data.coalesce(1).write.format("delta").save(r.dataPath.toString) + + def checkNumIndexedCol(numIndexedCols: Int): Unit = { + if (defaultNumIndexedCols != numTotalCols) { + setNumIndexedColumns(r.dataPath.toString, numIndexedCols) + } + data.coalesce(1).write.format("delta").mode("overwrite").save(r.dataPath.toString) + r = checkpointAndCreateNewLogIfNecessary(r) + + if (numIndexedCols == 0) { + intercept[AnalysisException] { + getStatsDf(r, $"numRecords", $"minValues.col0").first() + } + } else if (numIndexedCols < numTotalCols) { + checkAnswer( + getStatsDf(r, $"numRecords", $"minValues.col${numIndexedCols - 1}"), + Seq(Row(10, numIndexedCols - 1))) + intercept[AnalysisException] { + getStatsDf(r, $"minValues.col$numIndexedCols").first() + } + } else { + checkAnswer( + getStatsDf(r, $"numRecords", $"minValues.col${numTotalCols - 1}"), + Seq(Row(10, numTotalCols - 1))) + intercept[AnalysisException] { + getStatsDf(r, $"minValues.col$numTotalCols").first() + } + } + } + + checkNumIndexedCol(defaultNumIndexedCols) + checkNumIndexedCol(numTotalCols - 1) + checkNumIndexedCol(numTotalCols) + checkNumIndexedCol(numTotalCols + 1) + checkNumIndexedCol(0) + } + + test("remove redundant stats column references in data skipping expression") { + withTable("table") { + val colNames = (0 to 100).map(i => s"col_$i") + sql(s"""CREATE TABLE `table` (${colNames.map(x => x + " INT").mkString(", ")}) using delta""") + val conditions = colNames.map(i => s"$i != 1") + val whereClause = conditions.mkString("WHERE ", " AND ", "") + + // This query reproduces the issue raised by running TPC-DS q41. Basically the breaking + // condition is when the query involves a big boolean expression. As data skipping + // generates many redundant null checks on the non-leaf stats columns, e.g., stats + // and stats.minValues, the query complexity is amplified in the data skipping expression. + // This fix was to simply apply a distinct() on stats column references before generating + // the data skipping expression. + sql(s"select col_0 from table $whereClause").collect + } + } + + test("data skipping shouldn't use expressions involving a subquery ") { + withTable("t1", "t2") { + sql(s"CREATE TABLE t1(i int, p string) USING delta partitioned by (i)") + sql("INSERT INTO t1 SELECT 1, 'a1'") + sql("INSERT INTO t1 SELECT 2, 'a2'") + sql("INSERT INTO t1 SELECT 3, 'a3'") + sql("INSERT INTO t1 SELECT 4, 'a4'") + + sql("CREATE TABLE t2(j int, q string) USING delta") + sql("INSERT INTO t2 SELECT 1, 'b1'") + sql("INSERT INTO t2 SELECT 2, 'b2'") + + // This query would fail before the fix, i.e., when skipping considers subquery filters. + checkAnswer(sql("SELECT i FROM t1 join t2 on i + 2 = j + 1 where q = 'b2'"), Row(1)) + + // Partition filter with subquery should be ignored for skipping + val r1 = getScanReport { checkAnswer( + sql("SELECT p from t1 where i in (select j from t2 where q = 'b1')"), + Seq(Row("a1"))) + } + assert(isFullScan(r1(0))) + + + // Partition filter with subquery should be ignored for skipping + val r3 = getScanReport { checkAnswer( + sql("SELECT p from t1 where i in (select j from t2 where q = 'b1') and p = 'a2'"), Nil) + } + assert(r3(0).size("scanned").rows === Some(1)) + } + } + + test("support case insensitivity for partitioning filters") { + withTable("table") { + sql(s"CREATE TABLE table(Year int, P string, Y int) USING delta partitioned by (Year)") + sql("INSERT INTO table SELECT 1999, 'a1', 1990") + sql("INSERT INTO table SELECT 1989, 'a2', 1990") + + val Seq(r1) = getScanReport { + checkAnswer(sql("SELECT * from table where year > 1990"), Row(1999, "a1", 1990)) + } + assert(!isFullScan(r1)) + + val Seq(r2) = getScanReport { + checkAnswer( + sql("SELECT * from table where year > 1990 and p = 'a1'"), Row(1999, "a1", 1990)) + } + assert(!isFullScan(r2)) + + val Seq(r3) = getScanReport { + checkAnswer(sql("SELECT * from table where p = 'a1'"), Row(1999, "a1", 1990)) + } + assert(!isFullScan(r3)) + + + checkAnswer(sql("SELECT * from table where year < y"), Row(1989, "a2", 1990)) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + intercept[AnalysisException] { + sql("SELECT * from table where year > 1990") + } + } + } + } + + test("Test file pruning metrics with data skipping") { + withTempDir { tempDir => + withTempView("t1", "t2") { + val data = spark.range(10).toDF("col1") + .withColumn("col2", 'col1./(3).cast(DataTypes.IntegerType)) + data.write.format("delta").partitionBy("col1") + .save(tempDir.getCanonicalPath) + spark.read.format("delta").load(tempDir.getAbsolutePath).createTempView("t1") + val deltaLog = DeltaLog.forTable(spark, tempDir.toString()) + + val query = "SELECT * from t1 where col1 > 5" + val Seq(r1) = getScanReport { + assert(sql(query).collect().length == 4) + } + val inputFiles = spark.sql(query).inputFiles + assert(deltaLog.snapshot.numOfFiles - inputFiles.length == 6) + + val allQuery = "SELECT * from t1" + val Seq(r2) = getScanReport { + assert(sql(allQuery).collect().length == 10) + } + } + } + } + + test("loading data from Delta to parquet should skip data") { + withTempDir { dir => + val path = dir.getCanonicalPath + spark.range(5).write.format("delta").save(path) + spark.range(5, 10).write.format("delta").mode("append").save(path) + + withTempDir { dir2 => + val path2 = dir2.getCanonicalPath + val scans = getScanReport { + spark.read.format("delta").load(path).where("id < 2") + .write.format("parquet").mode("overwrite").save(path2) + } + assert(scans.size == 1) + assert( + scans.head.size("scanned").bytesCompressed != scans.head.size("total").bytesCompressed) + } + } + } + + test("data skipping with a different DataFrame schema order", tableSchemaOnlyTag) { + withTable("table") { + sql("CREATE TABLE table (col1 Int, col2 Int, col3 Int) USING delta") + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + // Only index the first two columns + setNumIndexedColumns(r.dataPath.toString, 2) + val dataSeq = Seq((1, 2, 3)) + // We should use the table schema to create stats and the DataFrame schema should be ignored + dataSeq.toDF("col1", "col2", "col3") + .select("col2", "col3", "col1") // DataFrame schema order + .write.mode("append").format("delta") + .save(r.dataPath.toString) + + var hits = Seq( + "col3 = 10", + "col1 = 1", + "col2 = 2", + "col3 = 3" + ) + var misses = Seq( + "col1 = 5", + "col1 = 5 AND col2 = 10", + "col1 = 5 and col3 = 10", + "col2 = 10", + "col2 = 5 and col3 = 10", + "col1 = 5 and col2 = 10 and col3 = 10" + ) + + checkSkipping(r, hits, misses, dataSeq.toString(), false) + + // Change the statsSchema to 3 columns. But there are only two columns in the stats from + // the file + setNumIndexedColumns(r.dataPath.toString, 3) + hits = Seq( + "col3 = 3", // 3 is in col3, but no stats + "col3 = 10", // No stats on col3 + // The data skipping filters will be generated but verifyStatsForFilter will invalidate + // the entire predicate + "col1 = 5 and col3 = 10" + ) + misses = Seq( + "col1 = 5", + "col1 = 5 AND col2 = 10" + ) + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + } + + test("data skipping with a different DataFrame schema and column name case", tableSchemaOnlyTag) { + withTable("table") { + sql("CREATE TABLE table (col1 Int, col2 Int, col3 Int) USING delta") + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + // Only index the first two columns + setNumIndexedColumns(r.dataPath.toString, 2) + val dataSeq = Seq((1, 2, 3)) + // We should use the table schema to create stats and the DataFrame schema should be ignored + dataSeq.toDF("col1", "col2", "col3") + .select("COL2", "Col3", "coL1") // DataFrame schema order + .write.mode("append").format("delta") + .save(r.dataPath.toString) + + val hits = Seq( + "col3 = 10", // No stats for col3 + // These values should be in the columns + "col1 = 1", + "col2 = 2", + "col3 = 3" + ) + val misses = Seq( + "col1 = 5", + "col1 = 5 AND col2 = 10", + "col1 = 5 and col3 = 10", + "col2 = 10", + "col2 = 5 and col3 = 10", + "col1 = 5 and col2 = 10 and col3 = 10" + ) + + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + } + + test("data skipping with a different DataFrame schema order and nested columns", + tableSchemaOnlyTag) { + withTempDir { dir => + val structureData = Seq( + Row(Row("James ", "", "Smith"), "36636", "M", 3100) + ) + + val structureDataSchema = new StructType() + .add("name", new StructType() + .add("firstname", StringType) + .add("middlename", StringType) + .add("lastname", StringType)) + .add("id", StringType) + .add("gender", StringType) + .add("salary", IntegerType) + + val data = spark.createDataFrame( + spark.sparkContext.parallelize(structureData), structureDataSchema) + + data.write.partitionBy("id").format("delta").save(dir.getAbsolutePath) + // Only index the first three columns (unnested), excluding partition column id + val deltaLog = DeltaLog.forTable(spark, new Path(dir.getCanonicalPath)) + setNumIndexedColumns(deltaLog.dataPath.toString, 3) + + val structureDfData = Seq( + // The same content as previous row but different DataFrame schema order + Row(3100, "M", Row("James ", "", "Smith"), "36636") + ) + val structureDfSchema = new StructType() + .add("salary", IntegerType) + .add("gender", StringType) + .add("name", new StructType() + .add("firstname", StringType) + .add("middlename", StringType) + .add("lastname", StringType)) + .add("id", StringType) + + // middlename is missing, but we collect NULL_COUNT for it + val df = spark.createDataFrame( + spark.sparkContext.parallelize(structureDfData), structureDfSchema) + df.write.mode("append").format("delta").save(dir.getAbsolutePath) + + val hits = Seq( + // Can't skip them since stats schema only has three columns now + "gender = 'M'", + "salary = 3100" + ) + val misses = Seq( + "name.firstname = 'Michael'", + "name.middlename = 'L'", + "name.lastname = 'Miller'", + "id = '10000'", + "name.firstname = 'Robert' and name.middlename = ''", + "name.firstname = 'Robert' and salary = 3100" + ) + checkSkipping(deltaLog, hits, misses, structureDfData.toString(), false) + } + } + + test("compatibility with the old behavior that collect stats based on DataFrame schema", + tableSchemaOnlyTag) { + withTable("table") { + sql("CREATE TABLE table (col2 Int, col3 Int, col1 Int) USING delta") + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + // Only index the first two columns + setNumIndexedColumns(r.dataPath.toString, 2) + val dataSeq = Seq((1, 2, 3)) + // Only collect stats for col2 and col3 + dataSeq.toDF("col1", "col2", "col3") + .select("col2", "col3", "col1") // DataFrame schema order + .write.mode("append").format("delta") + .save(r.dataPath.toString) + + // Change the schema to (col1, col2, col3). The final result would be the same as using the + // old approach to collect stats based on the DataFrame schema + sql("ALTER TABLE table ALTER COLUMN col1 FIRST") + + // Since the stats schema is (col1, col2), and we only have stats on col2 and col3, only + // the predicate on col2 can be used for filters + val hits = Seq( + "col1 = 1", + "col2 = 2", + "col3 = 3", + "col1 = 5", + "col3 = 10", + "col1 = 5 AND col2 = 10", + "col1 = 5 and col3 = 10", + "col1 = 5 and col2 = 10 and col3 = 10" + ) + val misses = Seq( + "col2 = 10", + "col2 = 5 and col3 = 10" // This can pass because stats also exists on col3 + ) + + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + } + + // TODO(lin): remove this after we remove the DELTA_COLLECT_STATS_USING_TABLE_SCHEMA flag + test("old behavior with DELTA_COLLECT_STATS_USING_TABLE_SCHEMA set to false") { + // This force the system restore the old stats collection behavior based on the DataFrame schema + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS_USING_TABLE_SCHEMA.key -> "false") { + withTable("table") { + sql("CREATE TABLE table (col1 Int, col2 Int, col3 Int) USING delta") + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + // Only index the first two columns + setNumIndexedColumns(r.dataPath.toString, 2) + val dataSeq = Seq((1, 2, 3)) + // Only collect stats for col2 and col3 + dataSeq.toDF("col1", "col2", "col3") + .select("col2", "col3", "col1") // DataFrame schema order + .write.mode("append").format("delta") + .save(r.dataPath.toString) + + // Since the stats schema is (col1, col2), and we only have stats on col2 and col3, only + // the predicate on col2 can be used for filters + val hits = Seq( + "col1 = 1", + "col2 = 2", + "col3 = 3", + "col1 = 5", + "col3 = 10", + "col1 = 5 AND col2 = 10", + "col1 = 5 and col3 = 10", + "col1 = 5 and col2 = 10 and col3 = 10" + ) + val misses = Seq( + "col2 = 10", + "col2 = 5 and col3 = 10" // This can pass because stats also exists on col3 + ) + + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + } + } + + test("data skipping with missing columns in DataFrame", tableSchemaOnlyTag) { + // case-1: dataframe schema has less columns than the dataSkippingNumIndexedCols + withTable("table") { + sql("CREATE TABLE table (a Int, b Int, c Int, d Int, e Int) USING delta PARTITIONED BY(b)") + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + // Only index the first three columns, excluding partition column b + setNumIndexedColumns(r.dataPath.toString, 3) + val dataSeq = Seq((1, 2, 3, 4, 5)) + + dataSeq.toDF("a", "b", "c", "d", "e") + .select("a", "b") // DataFrame schema order + .write.mode("append").format("delta") + .save(r.dataPath.toString) + + val hits = Seq( + // These values are in the table + "a = 1", + "b = 2", + "c <=> null", + "d is null", + // No stats for e + "e = 10" + ) + val misses = Seq( + "a = 10", + "b = 10", + "c = 10", + "c is not null", + "d = 10", + "isnotnull(d)" + ) + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + + // case-2: dataframe schema lacks columns that are supposed to be part of the stats schema, + // but has an additional column that should not collect stats on + withTable("table") { + sql("CREATE TABLE table (a Int, b Int, c Int, d Int, e Int) USING delta PARTITIONED BY(b)") + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + // Only index the first three columns, excluding partition column b + setNumIndexedColumns(r.dataPath.toString, 3) + val dataSeq = Seq((1, 2, 3, 4, 5)) + + dataSeq.toDF("a", "b", "c", "d", "e") + .select("a", "b", "d", "e") // DataFrame schema order + .write.mode("append").format("delta") + .save(r.dataPath.toString) + + val hits = Seq( + "a = 1", // In table + "isnull(c)", // In table + "e = 20" // No stats + ) + val misses = Seq( + "a = 20", + "b = 20", + "c = 20", + "d = 20", + "a = 20 and c = 20", + "a = 20 and e = 20" + ) + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + + // case-3: Structured data with some columns missing and some additional columns + withTempDir { dir => + val structureData = Seq( + Row(Row("James ", "", "Smith"), "36636", "M", 3100) + ) + + val structureDataSchema = new StructType() + .add("name", new StructType() + .add("firstname", StringType) + .add("middlename", StringType) + .add("lastname", StringType)) + .add("id", StringType) + .add("gender", StringType) + .add("salary", IntegerType) + + val data = spark.createDataFrame( + spark.sparkContext.parallelize(structureData), structureDataSchema) + + data.write.partitionBy("id").format("delta").save(dir.getAbsolutePath) + // Only index the first three columns (unnested), excluding partition column id + val deltaLog = DeltaLog.forTable(spark, new Path(dir.getCanonicalPath)) + setNumIndexedColumns(deltaLog.dataPath.toString, 3) + + val structureDfData = Seq( + Row(2000, Row("Robert ", "Johnson"), "40000") + ) + val structureDfSchema = new StructType() + .add("salary", IntegerType) + .add("name", new StructType() + .add("firstname", StringType) + .add("lastname", StringType)) + .add("id", StringType) + + // middlename is missing, but we collect NULL_COUNT for it + val df = spark.createDataFrame( + spark.sparkContext.parallelize(structureDfData), structureDfSchema) + df.write.mode("append").format("delta").save(dir.getAbsolutePath) + + val hits = Seq( + "gender = 'M'", // No stats + "salary = 1000" // No stats + ) + val misses = Seq( + "name.firstname = 'Michael'", + "name.middlename = 'L'", + "name.lastname = 'Miller'", + "id = '10000'", + "name.firstname = 'Robert' and name.middlename = 'L'" + ) + checkSkipping(deltaLog, hits, misses, structureDfData.toString(), false) + } + + // case-4: dataframe schema does not have any columns within the first + // dataSkippingNumIndexedCols columns of the table schema + withTable("table") { + sql("CREATE TABLE table (a Int, b Int, c Int, d Int, e Int) USING delta") + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + // Only index the first three columns + setNumIndexedColumns(r.dataPath.toString, 3) + val dataSeq = Seq((1, 2, 3, 4, 5)) + + dataSeq.toDF("a", "b", "c", "d", "e") + .select("d", "e") // DataFrame schema order + .write.mode("append").format("delta") + .save(r.dataPath.toString) + + val hits = Seq( + "d = 40", // No stats + "e = 40" // No stats + ) + // We can still collect NULL_COUNT for a, b, and c + val misses = Seq( + "a = 40", + "b = 40", + "c = 40" + ) + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + + // case-5: The first dataSkippingNumIndexedCols columns of the table schema has map or array + // types, which we only collect NULL_COUNT + withTable("table") { + sql("CREATE TABLE table (a Int, b Map, c Array, d Int, e Int)" + + " USING delta") + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + // Only index the first three columns + setNumIndexedColumns(r.dataPath.toString, 3) + val dataSeq = Seq((1, Map("key" -> 2), Seq(3, 3, 3), 4, 5)) + + dataSeq.toDF("a", "b", "c", "d", "e") + .select("b", "c", "d") // DataFrame schema order + .write.mode("append").format("delta") + .save(r.dataPath.toString) + + val hits = Seq( + "d = 50", // No stats + "e = 50", // No stats + // No min/max stats for c. We couldn't check = for b since EqualTo does not support + // ordering on type maP + "c = array(50, 50)", + // b and c should have NULL_COUNT stats, but currently they're not SkippingEligibleColumn + // (since they're not AtomicType), we couldn't skip for them + "isnull(b)", + "c is null" + ) + val misses = Seq( + // a has NULL_COUNT stats since it's missing from DataFrame schema + "a = 50" + ) + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + } + + + test("data skipping with generated column") { + withTable("table") { + // OSS does not support the generated column syntax in SQL so we have to use table builder + val tableBuilder = io.delta.tables.DeltaTable.create(spark).tableName("table") + // add regular columns + val col1 = io.delta.tables.DeltaTable.columnBuilder(spark, "col1") + .dataType("int") + .build() + val col2 = io.delta.tables.DeltaTable.columnBuilder(spark, "col2") + .dataType("string") + .build() + // add generated column + val genCol3 = io.delta.tables.DeltaTable.columnBuilder(spark, "genCol3") + .dataType("string") + .generatedAlwaysAs("substring(col2, 3, 2)") + .build() + + tableBuilder + .addColumn(col1) + .addColumn(col2) + .addColumn(genCol3) + .execute() + // Only pass in two columns, and col3 will be generated as "st" + val tableData = Seq((1, "test string")) + tableData.toDF("col1", "col2") + .write.format("delta").mode("append") + .saveAsTable("table") + + val hits = Seq( + "genCol3 = 'st'" + ) + val misses = Seq( + "col1 = 10", + "col2 = 'test'", + "genCol3 = 'test'" + ) + + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + checkSkipping(r, hits, misses, tableData.toString(), false) + } + } + + test("data skipping by partitions and data values - nulls") { + val tableDir = Utils.createTempDir().getAbsolutePath + val dataSeqs = Seq( // each sequence produce a single file + Seq((null, null)), + Seq((null, "a")), + Seq((null, "b")), + Seq(("a", "a"), ("a", null)), + Seq(("b", null)) + ) + dataSeqs.foreach { seq => + seq.toDF("key", "value").coalesce(1) + .write.format("delta").partitionBy("key").mode("append").save(tableDir) + } + val allData = dataSeqs.flatten + + def checkResults( + predicate: String, + expResults: Seq[(String, String)], + expNumPartitions: Int, + expNumFiles: Long): Unit = + checkResultsWithPartitions(tableDir, predicate, expResults, expNumPartitions, expNumFiles) + + // Trivial base case + checkResults( + predicate = "True", + expResults = allData, + expNumPartitions = 3, + expNumFiles = 5) + + // Conditions on partition key + checkResults( + predicate = "key IS NULL", + expResults = allData.filter(_._1 == null), + expNumPartitions = 1, + expNumFiles = 3) // 3 files with key = null + + checkResults( + predicate = "key IS NOT NULL", + expResults = allData.filter(_._1 != null), + expNumPartitions = 2, + expNumFiles = 2) // 2 files with key = 'a', and 1 file with key = 'b' + + checkResults( + predicate = "key <=> NULL", + expResults = allData.filter(_._1 == null), + expNumPartitions = 1, + expNumFiles = 3) // 3 files with key = null + + checkResults( + predicate = "key = 'a'", + expResults = allData.filter(_._1 == "a"), + expNumPartitions = 1, + expNumFiles = 1) // 1 files with key = 'a' + + checkResults( + predicate = "key <=> 'a'", + expResults = allData.filter(_._1 == "a"), + expNumPartitions = 1, + expNumFiles = 1) // 1 files with key <=> 'a' + + checkResults( + predicate = "key = 'b'", + expResults = allData.filter(_._1 == "b"), + expNumPartitions = 1, + expNumFiles = 1) // 1 files with key = 'b' + + checkResults( + predicate = "key <=> 'b'", + expResults = allData.filter(_._1 == "b"), + expNumPartitions = 1, + expNumFiles = 1) // 1 files with key <=> 'b' + + // Conditions on partitions keys and values + checkResults( + predicate = "value IS NULL", + expResults = allData.filter(_._2 == null), + expNumPartitions = 3, + expNumFiles = 3) // files with all non-NULL values get skipped + + checkResults( + predicate = "value IS NOT NULL", + expResults = allData.filter(_._2 != null), + expNumPartitions = 2, // one of the partitions has no files left after data skipping + expNumFiles = 3) // files with all NULL values get skipped + + checkResults( + predicate = "value <=> NULL", + expResults = allData.filter(_._2 == null), + expNumPartitions = 3, + expNumFiles = 3) // same as IS NULL case above + + checkResults( + predicate = "value = 'a'", + expResults = allData.filter(_._2 == "a"), + expNumPartitions = 2, // one partition has no files left after data skipping + expNumFiles = 2) // only two files contain "a" + + checkResults( + predicate = "value <=> 'a'", + expResults = allData.filter(_._2 == "a"), + expNumPartitions = 2, // one partition has no files left after data skipping + expNumFiles = 2) // only two files contain "a" + + checkResults( + predicate = "value <> 'a'", + expResults = allData.filter(x => x._2 != "a" && x._2 != null), // i.e., only (null, b) + expNumPartitions = 1, + expNumFiles = 1) // only one file contains 'b' + + checkResults( + predicate = "value = 'b'", + expResults = allData.filter(_._2 == "b"), + expNumPartitions = 1, + expNumFiles = 1) // same as previous case + + checkResults( + predicate = "value <=> 'b'", + expResults = allData.filter(_._2 == "b"), + expNumPartitions = 1, + expNumFiles = 1) // same as previous case + + // Conditions on both, partition keys and values + checkResults( + predicate = "key IS NULL AND value = 'a'", + expResults = Seq((null, "a")), + expNumPartitions = 1, + expNumFiles = 1) // only one file in the partition has (*, "a") + + checkResults( + predicate = "key IS NOT NULL AND value IS NOT NULL", + expResults = Seq(("a", "a")), + expNumPartitions = 1, + expNumFiles = 1) // 1 file with (*, a) + + checkResults( + predicate = "key <=> NULL AND value <=> NULL", + expResults = Seq((null, null)), + expNumPartitions = 1, + expNumFiles = 1) // 3 files with key = null, but only 1 with val = null. + + checkResults( + predicate = "key <=> NULL OR value <=> NULL", + expResults = allData.filter(_ != (("a", "a"))), + expNumPartitions = 3, + expNumFiles = 5) // all 5 files + } + + // Note that we cannot use testSkipping here, because the JSON parsing bug we're working around + // prevents specifying a microsecond timestamp as input data. + for (timestampType <- Seq("TIMESTAMP", "TIMESTAMP_NTZ")) { + test(s"data skipping on $timestampType") { + val data = "2019-09-09 01:02:03.456789" + val df = Seq(data).toDF("strTs") + .selectExpr( + s"CAST(strTs AS $timestampType) AS ts", + s"STRUCT(CAST(strTs AS $timestampType) AS ts) AS nested") + + val tempDir = Utils.createTempDir() + val r = DeltaLog.forTable(spark, tempDir) + df.coalesce(1).write.format("delta").save(r.dataPath.toString) + + // Check to ensure that the value actually in the file is always in range queries. + val hits = Seq( + s"""ts >= cast("2019-09-09 01:02:03.456789" AS $timestampType)""", + s"""ts <= cast("2019-09-09 01:02:03.456789" AS $timestampType)""", + s"""nested.ts >= cast("2019-09-09 01:02:03.456789" AS $timestampType)""", + s"""nested.ts <= cast("2019-09-09 01:02:03.456789" AS $timestampType)""", + s"""TS >= cast("2019-09-09 01:02:03.456789" AS $timestampType)""", + s"""nEstED.tS >= cast("2019-09-09 01:02:03.456789" AS $timestampType)""") + + // Check the range of values that are far enough away to be data skipped. Note that the values + // are aligned with millisecond boundaries because of the JSON serialization truncation. + val misses = Seq( + s"""ts >= cast("2019-09-09 01:02:03.457001" AS $timestampType)""", + s"""ts <= cast("2019-09-04 01:02:03.455999" AS $timestampType)""", + s"""nested.ts >= cast("2019-09-09 01:02:03.457001" AS $timestampType)""", + s"""nested.ts <= cast("2019-09-09 01:02:03.455999" AS $timestampType)""", + s"""TS >= cast("2019-09-09 01:02:03.457001" AS $timestampType)""", + s"""nEstED.tS >= cast("2019-09-09 01:02:03.457001" AS $timestampType)""") + + hits.foreach { predicate => + Given(predicate) + if (filesRead(r, predicate) != 1) { + failPretty(s"Expected hit but got miss for $predicate", predicate, data) + } + } + + misses.foreach { predicate => + Given(predicate) + if (filesRead(r, predicate) != 0) { + failPretty(s"Expected miss but got hit for $predicate", predicate, data) + } + } + } + } + + test("Ensure that we don't reuse scans when tables are different") { + withTempDir { dir => + val table1 = new File(dir, "tbl1") + val table1Dir = table1.getCanonicalPath + val table2 = new File(dir, "tbl2") + val table2Dir = table2.getCanonicalPath + spark.range(100).withColumn("part", 'id % 5).withColumn("id2", 'id) + .write.format("delta").partitionBy("part").save(table1Dir) + + FileUtils.copyDirectory(table1, table2) + + sql(s"DELETE FROM delta.`$table2Dir` WHERE part = 0 and id < 65") + + val query = sql(s"SELECT * FROM delta.`$table1Dir` WHERE part = 0 AND id2 < 85 AND " + + s"id NOT IN (SELECT id FROM delta.`$table2Dir` WHERE part = 0 AND id2 < 85)") + + checkAnswer( + query, + sql(s"SELECT * FROM delta.`$table1Dir` WHERE part = 0 and id < 65")) + } + } + + test("Data skipping should always return files from latest commit version") { + withTempDir { dir => + // If this test is flacky it is broken + Seq("aaa").toDF().write.format("delta").save(dir.getCanonicalPath) + val (log, snapshot) = DeltaLog.forTableWithSnapshot(spark, dir.getPath) + val addFile = snapshot.allFiles.collect().head + val fileWithStat = snapshot.getSpecificFilesWithStats(Seq(addFile.path)).head + // Ensure the stats has actual stats, not {} + assert(fileWithStat.stats.size > 2) + log.startTransaction().commitManually(addFile.copy(stats = "{}")) + + // Delta dedup should always keep AddFile from newer version so + // getSpecificFilesWithStats should return the AddFile with empty stats + log.update() + val newfileWithStat = + log.unsafeVolatileSnapshot.getSpecificFilesWithStats(Seq(addFile.path)).head + assert(newfileWithStat.stats === "{}") + } + } + + Seq("create", "alter").foreach { label => + test(s"Basic: Data skipping with delta statistic column $label") { + withTable("table") { + val tableProperty = if (label == "create") { + "TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c1,c2,c3,c4,c5,c6,c9')" + } else { + "" + } + sql( + s"""CREATE TABLE table( + |c1 long, c2 STRING, c3 FLOAT, c4 DOUBLE, c5 TIMESTAMP, c6 DATE, + |c7 BINARY, c8 BOOLEAN, c9 DECIMAL(3, 2) + |) USING delta $tableProperty""".stripMargin) + if (label == "alter") { + sql( + s"""ALTER TABLE table + |SET TBLPROPERTIES ( + | 'delta.dataSkippingStatsColumns' = 'c1,c2,c3,c4,c5,c6,c9' + |)""".stripMargin) + } + sql( + """insert into table values + |(1, '1', 1.0, 1.0, TIMESTAMP'2001-01-01 01:00', DATE'2001-01-01', '1111', true, 1.0), + |(2, '2', 2.0, 2.0, TIMESTAMP'2002-02-02 02:00', DATE'2002-02-02', '2222', false, 2.0) + |""".stripMargin).count() + val hits = Seq( + "c1 = 1", + "c2 = \'2\'", + "c3 < 1.5", + "c4 > 1.0", + "c5 >= \"2001-01-01 01:00:00\"", + "c6 = \"2002-02-02\"", + "c7 = HEX(\"1111\")", // Binary Column doesn't support delta statistics. + "c7 = HEX(\"3333\")", // Binary Column doesn't support delta statistics. + "c8 = true", + "c8 = false", + "c9 > 1.5" + ) + val misses = Seq( + "c1 = 10", + "c2 = \'4\'", + "c3 < 0.5", + "c4 > 5.0", + "c5 >= \"2003-01-01 01:00:00\"", + "c6 = \"2003-02-02\"", + "c9 > 2.5" + ) + val dataSeq = Seq( + (1L, "1", 1.0f, 1.0d, "2002-01-01 01:00", "2001-01-01", "1111", true, 1.0f), + (2L, "2", 2.0f, 2.0d, "2002-02-02 02:00", "2002-02-02", "2222", false, 2.0f) + ) + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + } + } + + test(s"Data skipping with delta statistic column rename column") { + withTable("table") { + sql( + s"""CREATE TABLE table( + |c1 long, c2 STRING, c3 FLOAT, c4 DOUBLE, c5 TIMESTAMP, c6 DATE, + |c7 BINARY, c8 BOOLEAN, c9 DECIMAL(3, 2) + |) USING delta + |TBLPROPERTIES( + |'delta.dataSkippingStatsColumns' = 'c1,c2,c3,c4,c5,c6,c9', + |'delta.columnMapping.mode' = 'name', + |'delta.minReaderVersion' = '2', + |'delta.minWriterVersion' = '5' + |) + |""".stripMargin) + (1 to 9).foreach { i => + sql(s"alter table table RENAME COLUMN c$i to cc$i") + } + val newConfiguration = sql("SHOW TBLPROPERTIES table ") + .collect() + .map { row => + row.getString(0) -> row.getString(1) + } + .filter(_._1 == "delta.dataSkippingStatsColumns") + .toSeq + assert( + newConfiguration == Seq( + ("delta.dataSkippingStatsColumns", "cc1,cc2,cc3,cc4,cc5,cc6,cc9")) + ) + sql( + """insert into table values + |(1, '1', 1.0, 1.0, TIMESTAMP'2001-01-01 01:00', DATE'2001-01-01', '1111', true, 1.0), + |(2, '2', 2.0, 2.0, TIMESTAMP'2002-02-02 02:00', DATE'2002-02-02', '2222', false, 2.0) + |""".stripMargin).count() + val hits = Seq( + "cc1 = 1", + "cc2 = \'2\'", + "cc3 < 1.5", + "cc4 > 1.0", + "cc5 >= \"2001-01-01 01:00:00\"", + "cc6 = \"2002-02-02\"", + "cc7 = HEX(\"1111\")", // Binary Column doesn't support delta statistics. + "cc7 = HEX(\"3333\")", // Binary Column doesn't support delta statistics. + "cc8 = true", + "cc8 = false", + "cc9 > 1.5" + ) + val misses = Seq( + "cc1 = 10", + "cc2 = \'4\'", + "cc3 < 0.5", + "cc4 > 5.0", + "cc5 >= \"2003-01-01 01:00:00\"", + "cc6 = \"2003-02-02\"", + "cc9 > 2.5" + ) + val dataSeq = Seq( + (1L, "1", 1.0f, 1.0d, "2002-01-01 01:00", "2001-01-01", "1111", true, 1.0f), + (2L, "2", 2.0f, 2.0d, "2002-02-02 02:00", "2002-02-02", "2222", false, 2.0f) + ) + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + } + + test(s"Data skipping with delta statistic column drop column") { + withTable("table") { + sql( + s"""CREATE TABLE table( + |c1 long, c2 STRING, c3 FLOAT, c4 DOUBLE, c5 TIMESTAMP, c6 DATE, + |c7 BINARY, c8 BOOLEAN, c9 DECIMAL(3, 2) + |) USING delta + |TBLPROPERTIES( + |'delta.dataSkippingStatsColumns' = 'c1,c2,c3,c4,c5,c6,c9', + |'delta.columnMapping.mode' = 'name', + |'delta.minReaderVersion' = '2', + |'delta.minWriterVersion' = '5' + |) + |""".stripMargin) + sql(s"alter table table drop COLUMN c2") + sql(s"alter table table drop COLUMN c7") + sql(s"alter table table drop COLUMN c8") + val newConfiguration = sql("SHOW TBLPROPERTIES table ") + .collect() + .map { row => + row.getString(0) -> row.getString(1) + } + .filter(_._1 == "delta.dataSkippingStatsColumns") + .toSeq + assert(newConfiguration == Seq(("delta.dataSkippingStatsColumns", "c1,c3,c4,c5,c6,c9"))) + sql( + """insert into table values + |(1, 1.0, 1.0, TIMESTAMP'2001-01-01 01:00', DATE'2001-01-01', 1.0), + |(2, 2.0, 2.0, TIMESTAMP'2002-02-02 02:00', DATE'2002-02-02', 2.0) + |""".stripMargin).count() + val hits = Seq( + "c1 = 1", + "c3 < 1.5", + "c4 > 1.0", + "c5 >= \"2001-01-01 01:00:00\"", + "c6 = \"2002-02-02\"", + "c9 > 1.5" + ) + val misses = Seq( + "c1 = 10", + "c3 < 0.5", + "c4 > 5.0", + "c5 >= \"2003-01-01 01:00:00\"", + "c6 = \"2003-02-02\"", + "c9 > 2.5" + ) + val dataSeq = Seq( + (1L, 1.0f, 1.0d, "2002-01-01 01:00", "2001-01-01", 1.0f), + (2L, 2.0f, 2.0d, "2002-02-02 02:00", "2002-02-02", 2.0f) + ) + val r = DeltaLog.forTable(spark, new TableIdentifier("table")) + checkSkipping(r, hits, misses, dataSeq.toString(), false) + } + } + + protected def expectedStatsForFile(index: Int, colName: String, deltaLog: DeltaLog): String = { + s"""{"numRecords":1,"minValues":{"$colName":$index},"maxValues":{"$colName":$index},""" + + s""""nullCount":{"$colName":0}}""".stripMargin + } + + test("data skipping get specific files with Stats API") { + withTempDir { tempDir => + val tableDirPath = tempDir.getCanonicalPath + + val fileCount = 5 + // Create 5 files each having 1 row - x=1/x=2/x=3/x=4/x=5 + val data = spark.range(1, fileCount).toDF("x").repartition(fileCount, col("x")) + data.write.format("delta").save(tableDirPath) + + var deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + // Get name of file corresponding to row x=1 + val file1 = getFilesRead(deltaLog, "x = 1").head.path + // Get name of file corresponding to row x=2 + val file2 = getFilesRead(deltaLog, "x = 2").head.path + // Get name of file corresponding to row x=3 + val file3 = getFilesRead(deltaLog, "x = 3").head.path + + deltaLog = checkpointAndCreateNewLogIfNecessary(deltaLog) + // Delete rows/files for x >= 3 from snapshot + sql(s"DELETE FROM delta.`$tableDirPath` WHERE x >= 3") + // Add another file with just one row x=6 in snapshot + sql(s"INSERT INTO delta.`$tableDirPath` VALUES (6)") + + // We want the file from the INSERT VALUES (6) stmt. However, this `getFilesRead` call might + // also return the AddFile (due to data file re-writes) from the DELETE stmt above. Since + // they were committed in different commits, we can select the addFile with the higher + // version + val addPathToCommitVersion = deltaLog.getChanges(0).flatMap { + case (version, actions) => actions + .collect { case a: AddFile => a } + .map(a => (a.path, version)) + }.toMap + + val file6 = getFilesRead(deltaLog, "x = 6") + .map(_.path) + .maxBy(path => addPathToCommitVersion(path)) + + // At this point, our latest snapshot has only 3 rows: x=1, x=2, x=6 - all in + // different files + + // Case-1: all passes files to the API exists in the snapshot + val result1 = deltaLog.snapshot.getSpecificFilesWithStats(Seq(file1, file2)) + .map(addFile => (addFile.path, addFile)).toMap + assert(result1.size == 2) + assert(result1.keySet == Set(file1, file2)) + assert(result1(file1).stats === expectedStatsForFile(1, "x", deltaLog)) + assert(result1(file2).stats === expectedStatsForFile(2, "x", deltaLog)) + + // Case-2: few passes files exists in the snapshot and few don't exists + val result2 = deltaLog.snapshot.getSpecificFilesWithStats(Seq(file1, file2, file3)) + .map(addFile => (addFile.path, addFile)).toMap + assert(result1 == result2) + + // Case-3: all passed files don't exists in the snapshot + val result3 = deltaLog.snapshot.getSpecificFilesWithStats(Seq(file3, "xyz")) + assert(result3.isEmpty) + + // Case-4: file3 doesn't exist and file6 exists in the latest commit + val result4 = deltaLog.snapshot.getSpecificFilesWithStats(Seq(file3, file6)) + .map(addFile => (addFile.path, addFile)).toMap + assert(result4.size == 1) + assert(result4(file6).stats == expectedStatsForFile(6, "x", deltaLog)) + } + } + + protected def parse(deltaLog: DeltaLog, predicate: String): Seq[Expression] = { + + // We produce a wrong filter in this case otherwise + if (predicate == "True") return Seq(Literal.TrueLiteral) + + val filtered = spark.read.format("delta").load(deltaLog.dataPath.toString).where(predicate) + filtered + .queryExecution + .optimizedPlan + .expressions + .flatMap(splitConjunctivePredicates) + } + + /** + * Returns the number of files that should be included in a scan after applying the given + * predicate on a snapshot of the Delta log. + * + * @param deltaLog Delta log for a table. + * @param predicate Predicate to run on the Delta table. + * @param checkEmptyUnusedFilters If true, check if there were no unused filters, meaning + * the given predicate was used as data or partition filters. + * @return The number of files that should be included in a scan after applying the predicate. + */ + protected def filesRead( + deltaLog: DeltaLog, + predicate: String, + checkEmptyUnusedFilters: Boolean = false): Int = + getFilesRead(deltaLog, predicate, checkEmptyUnusedFilters).size + + /** + * Returns the files that should be included in a scan after applying the given predicate on + * a snapshot of the Delta log. + * @param deltaLog Delta log for a table. + * @param predicate Predicate to run on the Delta table. + * @param checkEmptyUnusedFilters If true, check if there were no unused filters, meaning + * the given predicate was used as data or partition filters. + * @return The files that should be included in a scan after applying the predicate. + */ + protected def getFilesRead( + deltaLog: DeltaLog, + predicate: String, + checkEmptyUnusedFilters: Boolean = false): Seq[AddFile] = { + val parsed = parse(deltaLog, predicate) + val res = deltaLog.snapshot.filesForScan(parsed) + assert(res.total.files.get == deltaLog.snapshot.numOfFiles) + assert(res.total.bytesCompressed.get == deltaLog.snapshot.sizeInBytes) + assert(res.scanned.files.get == res.files.size) + assert(res.scanned.bytesCompressed.get == res.files.map(_.size).sum) + assert(!checkEmptyUnusedFilters || res.unusedFilters.isEmpty) + res.files + } + + protected def checkResultsWithPartitions( + tableDir: String, + predicate: String, + expResults: Seq[(String, String)], + expNumPartitions: Int, + expNumFiles: Long): Unit = { + Given(predicate) + val df = spark.read.format("delta").load(tableDir).where(predicate) + checkAnswer(df, expResults.toDF()) + + val files = getFilesRead(DeltaLog.forTable(spark, tableDir), predicate) + assert(files.size == expNumFiles, "# files incorrect:\n\t" + files.mkString("\n\t")) + + val partitionValues = files.map(_.partitionValues).distinct + assert(partitionValues.size == expNumPartitions, + "# partitions incorrect:\n\t" + partitionValues.mkString("\n\t")) + } + + protected def getStatsDf(deltaLog: DeltaLog, columns: Column*): DataFrame = { + deltaLog.snapshot.withStats.select("stats.*").select(columns: _*) + } + + protected def failPretty(error: String, predicate: String, data: String) = { + fail( + s"""$error + | + |== Data == + |$data + """.stripMargin) + } + + protected def setNumIndexedColumns(path: String, numIndexedCols: Int): Unit = { + sql(s""" + |ALTER TABLE delta.`$path` + |SET TBLPROPERTIES ( + | 'delta.dataSkippingNumIndexedCols' = '$numIndexedCols' + |)""".stripMargin) + } + + protected def setDeltaStatsColumns(path: String, deltaStatsColumns: String): Unit = { + sql(s""" + |ALTER TABLE delta.`$path` + |SET TBLPROPERTIES ( + | 'delta.dataSkippingStatsColumns' = '$deltaStatsColumns' + |)""".stripMargin) + } + + private def isFullScan(report: ScanReport): Boolean = { + report.size("scanned").bytesCompressed === report.size("total").bytesCompressed + } + + protected def checkSkipping( + log: DeltaLog, + hits: Seq[String], + misses: Seq[String], + data: String, + checkEmptyUnusedFiltersForHits: Boolean): Unit = { + hits.foreach { predicate => + Given(predicate) + if (filesRead(log, predicate, checkEmptyUnusedFiltersForHits) == 0) { + failPretty(s"Expected hit but got miss for $predicate", predicate, data) + } + } + + misses.foreach { predicate => + Given(predicate) + if (filesRead(log, predicate) != 0) { + failPretty(s"Expected miss but got hit for $predicate", predicate, data) + } + } + val schemaDiff = SchemaUtils.reportDifferences( + log.snapshot.statsSchema.asNullable, + log.snapshot.statsSchema) + if (schemaDiff.nonEmpty) { + fail(s"The stats schema should be nullable. Differences:\n${schemaDiff.mkString("\n")}") + } + } + protected def getDataSkippingConfs( + indexedCols: Int, + deltaStatsColNamesOpt: Option[String]): TraversableOnce[(String, String)] = { + val numIndexedColsConfOpt = Option(indexedCols) + .filter(_ != defaultNumIndexedCols) + .map(DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.defaultTablePropertyKey -> _.toString) + val indexedColNamesConfOpt = deltaStatsColNamesOpt + .map(DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS.defaultTablePropertyKey -> _) + numIndexedColsConfOpt ++ indexedColNamesConfOpt + } + + protected def testSkipping( + name: String, + data: String, + schema: StructType = null, + hits: Seq[String], + misses: Seq[String], + sqlConfs: Seq[(String, String)] = Nil, + indexedCols: Int = defaultNumIndexedCols, + deltaStatsColNamesOpt: Option[String] = None, + checkEmptyUnusedFiltersForHits: Boolean = false, + exceptionOpt: Option[Throwable] = None): Unit = { + test(s"data skipping by stats - $name") { + val allSQLConfs = sqlConfs ++ getDataSkippingConfs(indexedCols, deltaStatsColNamesOpt) + withSQLConf(allSQLConfs: _*) { + val jsonRecords = data.split("\n").toSeq + val reader = spark.read + if (schema != null) { reader.schema(schema) } + val df = reader.json(jsonRecords.toDS()) + + val tempDir = Utils.createTempDir() + val r = DeltaLog.forTable(spark, tempDir) + df.coalesce(1).write.format("delta").save(r.dataPath.toString) + + exceptionOpt.map { exception => + val except = intercept[Throwable] { + deltaStatsColNamesOpt.foreach { deltaStatsColNames => + setDeltaStatsColumns(r.dataPath.toString, deltaStatsColNames) + df.coalesce(1).write.format("delta").mode("overwrite").save(r.dataPath.toString) + if (indexedCols != defaultNumIndexedCols) { + setNumIndexedColumns(r.dataPath.toString, indexedCols) + df.coalesce(1).write.format("delta").mode("overwrite").save(r.dataPath.toString) + } + checkSkipping(r, hits, misses, data, checkEmptyUnusedFiltersForHits) + } + } + assert(except.getClass == exception.getClass && + except.getMessage.contains(exception.getMessage)) + }.getOrElse { + if (indexedCols != defaultNumIndexedCols) { + setNumIndexedColumns(r.dataPath.toString, indexedCols) + df.coalesce(1).write.format("delta").mode("overwrite").save(r.dataPath.toString) + } + deltaStatsColNamesOpt.foreach { deltaStatsColNames => + setDeltaStatsColumns(r.dataPath.toString, deltaStatsColNames) + df.coalesce(1).write.format("delta").mode("overwrite").save(r.dataPath.toString) + } + checkSkipping(r, hits, misses, data, checkEmptyUnusedFiltersForHits) + } + } + } + } +} + +trait DataSkippingDeltaTests extends DataSkippingDeltaTestsBase +/** Tests code paths within DataSkippingReader.scala */ +class DataSkippingDeltaV1Suite extends DataSkippingDeltaTests +{ + import testImplicits._ + + test("data skipping flags") { + val tempDir = Utils.createTempDir() + val r = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + def rStats: DataFrame = + getStatsDf(r, $"numRecords", $"minValues.id".as("id_min"), $"maxValues.id".as("id_max")) + + val data = spark.range(10).repartition(2) + + Given("appending data without collecting stats") + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + data.write.format("delta").save(r.dataPath.toString) + checkAnswer(rStats, Seq(Row(null, null, null), Row(null, null, null))) + } + + Given("appending data and collecting stats") + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "true") { + data.write.format("delta").mode("append").save(r.dataPath.toString) + checkAnswer(rStats, + Seq(Row(null, null, null), Row(null, null, null), Row(4, 0, 8), Row(6, 1, 9))) + } + + Given("querying reservoir without using stats") + withSQLConf(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> "false") { + assert(filesRead(r, "id = 0") == 4) + } + + Given("querying reservoir using stats") + withSQLConf(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> "true") { + assert(filesRead(r, "id = 0") == 3) + } + } +} + +/** + * Used to disable the tests with the old stats collection behavior on long-running suites to + * avoid time-out + * TODO(lin): remove this after we remove the DELTA_COLLECT_STATS_USING_TABLE_SCHEMA flag + */ +trait DataSkippingDisableOldStatsSchemaTests extends DataSkippingDeltaTests { + + protected override def test(testName: String, testTags: org.scalatest.Tag*) + (testFun: => Any) + (implicit pos: org.scalactic.source.Position): Unit = { + // Adding the null check in case tableSchemaOnlyTag has not been initialized in base traits + val newTestTags = if (tableSchemaOnlyTag == null) testTags else tableSchemaOnlyTag +: testTags + super.test(testName, newTestTags: _*)(testFun)(pos) + } +} + +/** DataSkipping tests under id column mapping */ +trait DataSkippingDeltaIdColumnMappingTests extends DataSkippingDeltaTests + with DeltaColumnMappingTestUtils { + + override def expectedStatsForFile(index: Int, colName: String, deltaLog: DeltaLog): String = { + val x = colName.phy(deltaLog) + s"""{"numRecords":1,"minValues":{"$x":$index},"maxValues":{"$x":$index},""" + + s""""nullCount":{"$x":0}}""".stripMargin + } +} + +trait DataSkippingDeltaTestV1ColumnMappingMode extends DataSkippingDeltaIdColumnMappingTests { + override protected def getStatsDf(deltaLog: DeltaLog, columns: Column*): DataFrame = { + deltaLog.snapshot.withStats.select("stats.*") + .select(convertToPhysicalColumns(columns, deltaLog): _*) + } +} + +class DataSkippingDeltaV1NameColumnMappingSuite + extends DataSkippingDeltaV1Suite + with DeltaColumnMappingEnableNameMode + with DataSkippingDeltaTestV1ColumnMappingMode { + override protected def runAllTests: Boolean = true +} + +class DataSkippingDeltaV1JsonCheckpointV2Suite extends DataSkippingDeltaV1Suite { + override def sparkConf: SparkConf = { + super.sparkConf.setAll( + Seq( + DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> CheckpointPolicy.V2.name, + DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key -> V2Checkpoint.Format.JSON.name + ) + ) + } +} + +class DataSkippingDeltaV1ParquetCheckpointV2Suite extends DataSkippingDeltaV1Suite { + override def sparkConf: SparkConf = { + super.sparkConf.setAll( + Seq( + DeltaConfigs.CHECKPOINT_POLICY.defaultTablePropertyKey -> CheckpointPolicy.V2.name, + DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key -> V2Checkpoint.Format.PARQUET.name + ) + ) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/stats/StatsCollectionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/stats/StatsCollectionSuite.scala new file mode 100644 index 00000000000..44a47db92b9 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/stats/StatsCollectionSuite.scala @@ -0,0 +1,841 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import java.math.BigDecimal +import java.sql.Date +import java.time.LocalDateTime + +// scalastyle:off import.ordering.noEmptyLine +import org.apache.spark.sql.delta._ +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.{DeltaSQLCommandTest, TestsStatistics} +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ +import org.apache.spark.sql.delta.util.JsonUtils +import org.apache.hadoop.fs.Path +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.SparkException +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.{GenericRow, GenericRowWithSchema} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{IntegerType, StringType, StructType} + +class StatsCollectionSuite + extends QueryTest + with SharedSparkSession + with DeltaColumnMappingTestUtils + with TestsStatistics + with DeltaSQLCommandTest + with DeletionVectorsTestUtils { + + import testImplicits._ + + + test("on write") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + + val data = Seq(1, 2, 3).toDF().coalesce(1) + data.write.format("delta").save(dir.getAbsolutePath) + val snapshot = deltaLog.update() + val statsJson = deltaLog.update().allFiles.head().stats + + // convert data schema to physical name if possible + val dataRenamed = data.toDF( + data.columns.map(name => getPhysicalName(name, deltaLog.snapshot.schema)): _*) + + val skipping = new StatisticsCollection { + override val spark = StatsCollectionSuite.this.spark + override def tableSchema: StructType = dataRenamed.schema + override def outputTableStatsSchema: StructType = dataRenamed.schema + override def outputAttributeSchema: StructType = dataRenamed.schema + override val statsColumnSpec = DeltaStatsColumnSpec( + None, + Some( + DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.fromString( + DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.defaultValue) + ) + ) + override def columnMappingMode: DeltaColumnMappingMode = deltaLog.snapshot.columnMappingMode + override val protocol: Protocol = snapshot.protocol + } + + val correctAnswer = dataRenamed + .select(skipping.statsCollector) + .select(to_json($"stats").as[String]) + .collect() + .head + + assert(statsJson === correctAnswer) + } + } + + test("gather stats") { + withTempDir { dir => + val deltaLog = DeltaLog.forTable(spark, dir) + + val data = spark.range(1, 10, 1, 10).withColumn("odd", $"id" % 2 === 1) + data.write.partitionBy("odd").format("delta").save(dir.getAbsolutePath) + + val df = spark.read.format("delta").load(dir.getAbsolutePath) + withSQLConf("spark.sql.parquet.filterPushdown" -> "false") { + assert(recordsScanned(df) == 9) + assert(recordsScanned(df.where("id = 1")) == 1) + } + } + } + + test("statistics re-computation throws error on Delta tables with DVs") { + withDeletionVectorsEnabled() { + withTempDir { dir => + val df = spark.range(start = 0, end = 20).toDF().repartition(numPartitions = 4) + df.write.format("delta").save(dir.toString()) + + spark.sql(s"DELETE FROM delta.`${dir.toString}` WHERE id in (2, 15)") + val e = intercept[DeltaCommandUnsupportedWithDeletionVectorsException] { + val deltaLog = DeltaLog.forTable(spark, dir) + StatisticsCollection.recompute(spark, deltaLog) + } + assert(e.getErrorClass == "DELTA_UNSUPPORTED_STATS_RECOMPUTE_WITH_DELETION_VECTORS") + assert(e.getSqlState == "0AKDD") + assert(e.getMessage == + "[DELTA_UNSUPPORTED_STATS_RECOMPUTE_WITH_DELETION_VECTORS] " + + "Statistics re-computation on a Delta table with deletion " + + "vectors is not yet supported.") + } + } + } + + statsTest("recompute stats basic") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + val df = spark.range(2).coalesce(1).toDF() + df.write.format("delta").save(tempDir.toString()) + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(statsDF(deltaLog).where('numRecords.isNotNull).count() == 0) + + { + StatisticsCollection.recompute(spark, deltaLog) + } + checkAnswer( + spark.read.format("delta").load(tempDir.getCanonicalPath), + df + ) + val statsDf = statsDF(deltaLog) + assert(statsDf.where('numRecords.isNotNull).count() > 0) + // Make sure stats indicate 2 rows, min [0], max [1] + checkAnswer(statsDf, Row(2, Row(0), Row(1))) + } + } + } + + statsTest("recompute stats multiple columns and files") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + val df = spark.range(10, 20).withColumn("x", 'id + 10).repartition(3) + + df.write.format("delta").save(tempDir.toString()) + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(statsDF(deltaLog).where('numRecords.isNotNull).count() == 0) + + { + StatisticsCollection.recompute(spark, deltaLog) + } + + checkAnswer( + spark.read.format("delta").load(tempDir.getCanonicalPath), + df + ) + val statsDf = statsDF(deltaLog) + assert(statsDf.where('numRecords.isNotNull).count() > 0) + // scalastyle:off line.size.limit + val expectedStats = Seq(Row(3, Row(10, 20), Row(19, 29)), Row(4, Row(12, 22), Row(17, 27)), Row(3, Row(11, 21), Row(18, 28))) + // scalastyle:on line.size.limit + checkAnswer(statsDf, expectedStats) + } + } + } + + statsTest("recompute stats on partitioned table") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + val df = spark.range(15).toDF("a") + .withColumn("b", 'a % 3) + .withColumn("c", 'a % 2) + .repartition(3, 'b) + + df.write.format("delta").partitionBy("b").save(tempDir.toString()) + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(statsDF(deltaLog).where('numRecords.isNotNull).count() == 0) + + { + StatisticsCollection.recompute(spark, deltaLog) + } + checkAnswer( + spark.read.format("delta").load(tempDir.getCanonicalPath), + df + ) + val statsDf = statsDF(deltaLog) + assert(statsDf.where('numRecords.isNotNull).count() > 0) + checkAnswer(statsDf, Seq( + Row(5, Row(1, 0), Row(13, 1)), + Row(5, Row(0, 0), Row(12, 1)), + Row(5, Row(2, 0), Row(14, 1)))) + } + } + } + + statsTest("recompute stats with partition predicates") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + val df = Seq( + (1, 0, 10), (1, 2, 20), (1, 4, 30), (2, 6, 40), (2, 8, 50), (3, 10, 60), (4, 12, 70)) + .toDF("a", "b", "c") + + df.write.format("delta").partitionBy("a").save(tempDir.toString()) + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(statsDF(deltaLog).where('numRecords.isNotNull).count() == 0) + + { + StatisticsCollection.recompute(spark, deltaLog, Seq(('a > 1).expr, ('a < 4).expr)) + } + checkAnswer( + spark.read.format("delta").load(tempDir.getCanonicalPath), + df + ) + val statsDf = statsDF(deltaLog) + assert(statsDf.where('numRecords.isNotNull).count() == 2) + checkAnswer(statsDf, Seq( + Row(null, Row(null, null), Row(null, null)), + Row(2, Row(6, 40), Row(8, 50)), + Row(1, Row(10, 60), Row(10, 60)), + Row(null, Row(null, null), Row(null, null)))) + } + } + } + + statsTest("recompute stats with invalid partition predicates") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + Seq((1, 0, 10), (1, 2, 20), (1, 4, 30), (2, 6, 40), (2, 8, 50), (3, 10, 60), (4, 12, 70)) + .toDF("a", "b", "c") + .write.format("delta").partitionBy("a").save(tempDir.toString()) + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(statsDF(deltaLog).where('numRecords.isNotNull).count() == 0) + + { + intercept[AnalysisException] { + StatisticsCollection.recompute(spark, deltaLog, Seq(('b > 1).expr)) + } + intercept[AnalysisException] { + StatisticsCollection.recompute(spark, deltaLog, Seq(('a > 1).expr, ('c > 1).expr)) + } + } + assert(statsDF(deltaLog).where('numRecords.isNotNull).count() == 0) + } + } + } + + statsTest("recompute stats on a table with corrupted stats") { + withTempDir { tempDir => + val df = Seq( + (1, 0, 10), (1, 2, 20), (1, 4, 30), (2, 6, 40), (2, 8, 50), (3, 10, 60), (4, 12, 70)) + .toDF("a", "b", "c") + + df.write.format("delta").partitionBy("a").save(tempDir.toString()) + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + val correctStats = statsDF(deltaLog) + assert(correctStats.where('numRecords.isNotNull).count() == 4) + + // use physical names if possible + val (a, b, c) = ( + getPhysicalName("a", deltaLog.snapshot.schema), + getPhysicalName("b", deltaLog.snapshot.schema), + getPhysicalName("c", deltaLog.snapshot.schema) + ) + + { + // Corrupt stats on one of the files + val txn = deltaLog.startTransaction() + val f = deltaLog.snapshot.allFiles.filter(_.partitionValues(a) == "1").first() + val corrupted = f.copy(stats = f.stats.replace( + s"""maxValues":{"$b":4,"$c":30}""", + s"""maxValues":{"$b":-100,"$c":100}""")) + txn.commit(Seq(corrupted), DeltaOperations.ComputeStats(Nil)) + intercept[TestFailedException] { + checkAnswer(statsDF(deltaLog), correctStats) + } + + // Recompute stats and verify they match the original ones + StatisticsCollection.recompute(spark, deltaLog) + checkAnswer( + spark.read.format("delta").load(tempDir.getCanonicalPath), + df + ) + checkAnswer(statsDF(deltaLog), correctStats) + } + } + } + + statsTest("recompute stats with file filter") { + withTempDir { tempDir => + withSQLConf(DeltaSQLConf.DELTA_COLLECT_STATS.key -> "false") { + val df = Seq( + (1, 0, 10), (1, 2, 20), (1, 4, 30), (2, 6, 40), (2, 8, 50), (3, 10, 60), (4, 12, 70)) + .toDF("a", "b", "c") + + df.write.format("delta").partitionBy("a").save(tempDir.toString()) + val deltaLog = DeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + assert(statsDF(deltaLog).where('numRecords.isNotNull).count() == 0) + + val biggest = deltaLog.snapshot.allFiles.agg(max('size)).first().getLong(0) + + { + StatisticsCollection.recompute( + spark, deltaLog, catalogTable = None, fileFilter = _.size == biggest) + } + + checkAnswer( + spark.read.format("delta").load(tempDir.getCanonicalPath), + df + ) + val statsDf = statsDF(deltaLog) + assert(statsDf.where('numRecords.isNotNull).count() == 1) + checkAnswer(statsDf, Seq( + Row(null, Row(null, null), Row(null, null)), + Row(null, Row(null, null), Row(null, null)), + Row(null, Row(null, null), Row(null, null)), + Row(3, Row(0, 10), Row(4, 30)))) + } + } + } + + test("Truncate max string") { + // scalastyle:off nonascii + val prefixLen = 6 + // � is the max unicode character with value \ufffd + val inputToExpected = Seq( + (s"abcd", s"abcd"), + (s"abcdef", s"abcdef"), + (s"abcde�", s"abcde�"), + (s"abcd�abcd", s"abcd�a�"), + (s"�abcd", s"�abcd"), + (s"abcdef�", s"abcdef��"), + (s"abcdef-abcdef�", s"abcdef�"), + (s"abcdef�abcdef", s"abcdef��"), + (s"abcdef��abcdef", s"abcdef���"), + (s"abcdef�abcdef�abcdef�abcdef", s"abcdef��") + ) + inputToExpected.foreach { + case (input, expected) => + val actual = StatisticsCollection.truncateMaxStringAgg(prefixLen)(input) + assert(actual == expected, s"input:$input, actual:$actual, expected:$expected") + } + // scalastyle:off nonascii + } + + + test(s"Optimize Zorder for delta statistics column: table creation") { + val tableName = "delta_table" + withTable(tableName) { + sql("create table delta_table (c1 long, c2 long) " + + "using delta " + + "TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c1,c2', " + + "'delta.dataSkippingNumIndexedCols' = 0)") + for (_ <- 1 to 10) { + sql("insert into delta_table values(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8)") + } + sql("optimize delta_table zorder by (c1)") + sql("optimize delta_table zorder by (c2)") + sql("optimize delta_table zorder by (c1,c2)") + } + } + + test(s"Optimize Zorder for delta statistics column: alter TBLPROPERTIES") { + val tableName = "delta_table" + withTable(tableName) { + sql("create table delta_table (c1 long, c2 long) " + + "using delta TBLPROPERTIES('delta.dataSkippingNumIndexedCols' = 0)") + intercept[DeltaAnalysisException] { sql("optimize delta_table zorder by (c1)") } + intercept[DeltaAnalysisException] { sql("optimize delta_table zorder by (c2)") } + intercept[DeltaAnalysisException] { sql("optimize delta_table zorder by (c1,c2)") } + for (_ <- 1 to 10) { + sql("insert into delta_table values(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8)") + } + sql("ALTER TABLE delta_table SET TBLPROPERTIES ('delta.dataSkippingStatsColumns' = 'c1,c2')") + sql("optimize delta_table zorder by (c1)") + sql("optimize delta_table zorder by (c2)") + sql("optimize delta_table zorder by (c1,c2)") + } + } + + test(s"Delta statistic column: special characters") { + val tableName = "delta_table_1" + withTable(tableName) { + sql( + s"create table $tableName (`c1.` long, `c2*` long, `c3,` long, `c-4` long) using delta " + + s"TBLPROPERTIES(" + + s"'delta.dataSkippingStatsColumns'='`c1.`,`c2*`,`c3,`,`c-4`'," + + s"'delta.columnMapping.mode' = 'name')" + ) + val dataSkippingStatsColumns = sql(s"SHOW TBLPROPERTIES $tableName") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == "delta.dataSkippingStatsColumns") + .toSeq + val result1 = Seq(("delta.dataSkippingStatsColumns", "`c1.`,`c2*`,`c3,`,`c-4`")) + assert(dataSkippingStatsColumns == result1) + } + } + + Seq("c1.", "c2*", "c3,", "c-4").foreach { col => + test(s"Delta statistic column: invalid special characters $col") { + val tableName = "delta_table_1" + withTable(tableName) { + val except = intercept[Exception] { + sql( + s"create table $tableName (`c1.` long, `c2*` long, `c3,` long, c4 long) using delta " + + s"TBLPROPERTIES(" + + s"'delta.dataSkippingStatsColumns'='$col'," + + s"'delta.columnMapping.mode' = 'name')" + ) + } + } + } + } + + Seq( + ("BINARY", "BinaryType"), + ("BOOLEAN", "BooleanType"), + ("ARRAY", "ArrayType(ByteType,true)"), + ("MAP", "MapType(DateType,IntegerType,true)"), + ("STRUCT>", "ArrayType(IntegerType,true)") + ).foreach { case (invalidType, typename) => + val tableName1 = "delta_table_1" + val tableName2 = "delta_table_2" + test(s"Delta statistic column: invalid data type $invalidType") { + withTable(tableName1, tableName2) { + val columnName = if (typename.equals("ArrayType(IntegerType,true)")) "c2.c61" else "c2" + val exceptOne = intercept[DeltaIllegalArgumentException] { + sql( + s"create table $tableName1 (c1 long, c2 $invalidType) using delta " + + s"TBLPROPERTIES('delta.dataSkippingStatsColumns'='c2')" + ) + } + assert( + exceptOne.getErrorClass == "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_TYPE" && + exceptOne.getMessageParametersArray.toSeq == Seq(columnName, typename) + ) + sql(s"create table $tableName2 (c1 long, c2 $invalidType) using delta") + val exceptTwo = intercept[Throwable] { + sql(s"ALTER TABLE $tableName2 SET TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c2')") + }.getCause.asInstanceOf[DeltaIllegalArgumentException] + assert( + exceptTwo.getErrorClass == "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_TYPE" && + exceptTwo.getMessageParametersArray.toSeq == Seq(columnName, typename) + ) + } + } + + test(s"Delta statistic column: invalid data type $invalidType in nested column") { + withTable(tableName1, tableName2) { + val columnName = if (typename == "ArrayType(IntegerType,true)") "c2.c21.c61" else "c2.c21" + val exceptOne = intercept[DeltaIllegalArgumentException] { + sql( + s"create table $tableName1 (c1 long, c2 STRUCT) " + + s"using delta TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c2.c21')" + ) + } + assert( + exceptOne.getErrorClass == "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_TYPE" && + exceptOne.getMessageParametersArray.toSeq == Seq(columnName, typename) + ) + val exceptTwo = intercept[DeltaIllegalArgumentException] { + sql( + s"create table $tableName1 (c1 long, c2 STRUCT) " + + s"using delta TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c2')" + ) + } + assert( + exceptTwo.getErrorClass == "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_TYPE" && + exceptTwo.getMessageParametersArray.toSeq == Seq(columnName, typename) + ) + sql(s"create table $tableName2 (c1 long, c2 STRUCT) using delta") + val exceptThree = intercept[Throwable] { + sql( + s"ALTER TABLE $tableName2 SET TBLPROPERTIES('delta.dataSkippingStatsColumns'='c2.c21')" + ) + }.getCause.asInstanceOf[DeltaIllegalArgumentException] + assert( + exceptThree.getErrorClass == "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_TYPE" && + exceptThree.getMessageParametersArray.toSeq == Seq(columnName, typename) + ) + val exceptFour = intercept[Throwable] { + sql(s"ALTER TABLE $tableName2 SET TBLPROPERTIES('delta.dataSkippingStatsColumns'='c2')") + }.getCause.asInstanceOf[DeltaIllegalArgumentException] + assert( + exceptFour.getErrorClass == "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_TYPE" && + exceptFour.getMessageParametersArray.toSeq == Seq(columnName, typename) + ) + } + } + } + + test(s"Delta statistic column: mix case column name") { + val tableName = "delta_table_1" + withTable(tableName) { + sql( + s"create table $tableName (col1 LONG, col2 struct, col3 LONG) " + + s"using delta TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'coL1, COL2.Col20, cOl3');" + ) + (1 to 10).foreach { _ => + sql( + s"""insert into $tableName values + |(1, struct(1, 1), 1), (2, struct(2, 2), 2), (3, struct(3, 3), 3), + |(4, struct(4, 4), 4), (5, struct(5, 5), 5), (6, struct(6, 6), 6), + |(7, struct(7, 7), 7), (8, struct(8, 8), 8), (9, struct(9, 9), 9), + |(10, struct(10, 10), 10), (null, struct(null, null), null), (-1, struct(-1, -1), -1), + |(null, struct(null, null), null);""".stripMargin + ) + } + sql(s"optimize $tableName") + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName)) + val df = deltaLog.update().withStatsDeduplicated + val analyzedDfPlan = df.queryExecution.analyzed.toString + val stats = if (analyzedDfPlan.indexOf("stats_parsed") > 0) "stats_parsed" else "stats" + df.select(s"$stats.numRecords", s"$stats.nullCount", s"$stats.minValues", s"$stats.maxValues") + .collect() + .foreach { row => + assert(row(0) == 130) + assert(row(1).asInstanceOf[GenericRow] == Row(20, Row(20), 20)) + assert(row(2) == Row(-1, Row(-1), -1)) + assert(row(3) == Row(10, Row(10), 10)) + } + } + } + + Seq( + "BIGINT", "DATE", "DECIMAL(3, 2)", "DOUBLE", "FLOAT", "INT", "SMALLINT", "STRING", + "TIMESTAMP", "TIMESTAMP_NTZ", "TINYINT" + ).foreach { validType => + val tableName1 = "delta_table_1" + val tableName2 = "delta_table_2" + test(s"Delta statistic column: valid data type $validType") { + withTable(tableName1, tableName2) { + sql( + s"create table $tableName1 (c1 long, c2 $validType) using delta " + + s"TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c2')" + ) + sql(s"create table $tableName2 (c1 long, c2 $validType) using delta") + sql(s"ALTER TABLE $tableName2 SET TBLPROPERTIES('delta.dataSkippingStatsColumns'='c2')") + } + } + + test(s"Delta statistic column: valid data type $validType in nested column") { + val tableName3 = "delta_table_3" + val tableName4 = "delta_table_4" + withTable(tableName1, tableName2, tableName3, tableName4) { + sql( + s"create table $tableName1 (c1 long, c2 STRUCT) " + + s"using delta TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c2.c21')" + ) + sql( + s"create table $tableName2 (c1 long, c2 STRUCT) " + + s"using delta TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c2')" + ) + sql(s"create table $tableName3 (c1 long, c2 STRUCT) using delta") + sql(s"ALTER TABLE $tableName3 SET TBLPROPERTIES('delta.dataSkippingStatsColumns'='c2.c21')") + sql(s"create table $tableName4 (c1 long, c2 STRUCT) using delta") + sql(s"ALTER TABLE $tableName4 SET TBLPROPERTIES('delta.dataSkippingStatsColumns'='c2')") + } + } + } + + Seq("create", "alter").foreach { label => + val tableName = "delta_table" + val propertyName = "delta.dataSkippingStatsColumns" + test(s"Delta statistics column with partition column: $label") { + withTable(tableName) { + if (label == "create") { + val except = intercept[DeltaIllegalArgumentException] { + sql( + "create table delta_table(c0 int, c1 int) using delta partitioned by(c1) " + + "TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c1')" + ) + } + assert( + except.getErrorClass == "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_PARTITIONED_COLUMN" && + except.getMessageParametersArray.toSeq == Seq("c1") + ) + } else { + sql("create table delta_table(c0 int, c1 int) using delta partitioned by(c1)") + val except = intercept[Throwable] { + sql( + "ALTER TABLE delta_table SET TBLPROPERTIES ('delta.dataSkippingStatsColumns' = 'c1')" + ) + }.getCause.asInstanceOf[DeltaIllegalArgumentException] + assert( + except.getErrorClass == "DELTA_COLUMN_DATA_SKIPPING_NOT_SUPPORTED_PARTITIONED_COLUMN" && + except.getMessageParametersArray.toSeq == Seq("c1") + ) + } + } + } + + test(s"Rename Nested Columns with delta statistics column: $label") { + withTable(tableName) { + if (label == "create") { + sql( + "create table delta_table (" + + " id long," + + " info STRUCT >, " + + " prev_job STRUCT >)" + + " using delta TBLPROPERTIES(" + + s"'$propertyName' = 'info.title,info.depart.org,info.depart.perf'," + + "'delta.columnMapping.mode' = 'name', " + + "'delta.minReaderVersion' = '2', " + + "'delta.minWriterVersion' = '5')" + ) + } else { + sql( + "create table delta_table (" + + " id long," + + " info STRUCT >, " + + " prev_job STRUCT >)" + + " using delta TBLPROPERTIES(" + + "'delta.columnMapping.mode' = 'name', " + + "'delta.minReaderVersion' = '2', " + + "'delta.minWriterVersion' = '5')" + ) + } + if (label == "alter") { + sql(s"alter table delta_table set TBLPROPERTIES(" + + s"'$propertyName' = 'info.title,info.depart.org,info.depart.perf')") + } + // Rename nested column leaf. + sql("ALTER TABLE delta_table RENAME COLUMN info.title TO title_name;") + var dataSkippingStatsColumns = sql("SHOW TBLPROPERTIES delta_table") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == propertyName) + .toSeq + val result1 = Seq((propertyName, "info.title_name,info.depart.org,info.depart.perf")) + assert(dataSkippingStatsColumns == result1) + // Rename nested column root. + sql("ALTER TABLE delta_table RENAME COLUMN info TO detail") + dataSkippingStatsColumns = sql("SHOW TBLPROPERTIES delta_table") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == propertyName) + .toSeq + val result2 = Seq( + (propertyName, "detail.title_name,detail.depart.org,detail.depart.perf") + ) + assert(dataSkippingStatsColumns == result2) + // Rename nested column intermediate node. + sql("ALTER TABLE delta_table RENAME COLUMN detail.DEPART TO organization") + dataSkippingStatsColumns = sql("SHOW TBLPROPERTIES delta_table") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == propertyName) + .toSeq + val result3 = Seq( + (propertyName, "detail.title_name,detail.organization.org,detail.organization.perf") + ) + assert(dataSkippingStatsColumns == result3) + } + } + + test(s"Drop Nested Columns with delta statistics column: $label") { + withTable(tableName) { + if (label == "create") { + sql( + "create table delta_table (" + + " id long, " + + " info STRUCT >, " + + " prev_job STRUCT >)" + + " using delta TBLPROPERTIES(" + + s"'$propertyName' = " + + "'info.title,info.depart.org,info.depart.perf,prev_job.title,prev_job.depart.perf', " + + "'delta.columnMapping.mode' = 'name', " + + "'delta.minReaderVersion' = '2', " + + "'delta.minWriterVersion' = '5')" + ) + } else { + sql( + "create table delta_table (" + + " id long," + + " info STRUCT>, " + + " prev_job STRUCT>)" + + " using delta TBLPROPERTIES(" + + "'delta.columnMapping.mode' = 'name', " + + "'delta.minReaderVersion' = '2', " + + "'delta.minWriterVersion' = '5')" + ) + } + if (label == "alter") { + sql( + s"alter table delta_table set TBLPROPERTIES(" + + s"'$propertyName' = " + + s"'info.title,info.depart.org,info.depart.perf,prev_job.title,prev_job.depart.perf')" + ) + } + // Drop nested column leaf. + sql("ALTER TABLE delta_table DROP COLUMN info.title;") + var dataSkippingStatsColumns = sql("SHOW TBLPROPERTIES delta_table") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == propertyName) + .toSeq + val result1 = Seq( + (propertyName, "info.depart.org,info.depart.perf,prev_job.title,prev_job.depart.perf") + ) + assert(dataSkippingStatsColumns == result1) + // Drop nested column intermediate node. + sql("ALTER TABLE delta_table DROP COLUMN info.depart;") + dataSkippingStatsColumns = sql("SHOW TBLPROPERTIES delta_table") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == propertyName) + .toSeq + val result3 = Seq((propertyName, "prev_job.title,prev_job.depart.perf")) + assert(dataSkippingStatsColumns == result3) + + // Rename nested column root node. + sql("ALTER TABLE delta_table DROP COLUMN prev_job;") + dataSkippingStatsColumns = sql("SHOW TBLPROPERTIES delta_table") + .collect() + .map { row => row.getString(0) -> row.getString(1) } + .filter(_._1 == propertyName) + .toSeq + val result2 = Seq((propertyName, "")) + assert(dataSkippingStatsColumns == result2) + } + } + } + + test("Change Columns with delta statistics column") { + Seq( + "BIGINT", "DATE", "DECIMAL(3, 2)", "DOUBLE", "FLOAT", "INT", "SMALLINT", "STRING", + "TIMESTAMP", "TIMESTAMP_NTZ", "TINYINT" + ).foreach { validType => + Seq( + "BINARY", "BOOLEAN", "ARRAY", "MAP", "STRUCT>" + ).foreach { invalidType => + withTable("delta_table") { + sql( + s"create table delta_table (c0 long, c1 long, c2 $validType) using delta " + + s"TBLPROPERTIES('delta.dataSkippingStatsColumns' = 'c1,c2', " + + "'delta.columnMapping.mode' = 'name', " + + "'delta.minReaderVersion' = '2', " + + "'delta.minWriterVersion' = '5')" + ) + intercept[AnalysisException] { + sql(s"ALTER TABLE delta_table Change c2 TYPE $invalidType;") + } + } + } + } + } + + test("Duplicated delta statistic columns: create") { + Seq( + ("'c0,c0'", "c0"), + ("'c1,c1.c11'", "c1.c11"), + ("'c1.c11,c1.c11'", "c1.c11"), + ("'c1,c1'", "c1.c11,c1.c12") + ).foreach { case (statsColumns, duplicatedColumns) => + val exception = intercept[DeltaIllegalArgumentException] { + sql( + s"create table delta_table (c0 long, c1 struct) using delta " + + s"TBLPROPERTIES('delta.dataSkippingStatsColumns' = $statsColumns, " + + "'delta.columnMapping.mode' = 'name')" + ) + } + assert( + exception.getErrorClass == "DELTA_DUPLICATE_DATA_SKIPPING_COLUMNS" && + exception.getMessageParametersArray.toSeq == Seq(duplicatedColumns) + ) + } + } + + test("Duplicated delta statistic columns: alter") { + sql( + s"create table delta_table_t1 (c0 long, c1 struct) using delta " + + s"TBLPROPERTIES('delta.columnMapping.mode' = 'name')" + ) + Seq( + ("'c0,c0'", "c0"), + ("'c1,c1.c11'", "c1.c11"), + ("'c1.c11,c1.c11'", "c1.c11"), + ("'c1,c1'", "c1.c11,c1.c12") + ).foreach { case (statsColumns, duplicatedColumns) => + val exception = intercept[SparkException] { + sql( + s"ALTER TABLE delta_table_t1 " + + s"SET TBLPROPERTIES('delta.dataSkippingStatsColumns'=$statsColumns)" + ) + }.getCause.asInstanceOf[DeltaIllegalArgumentException] + assert( + exception.getErrorClass == "DELTA_DUPLICATE_DATA_SKIPPING_COLUMNS" && + exception.getMessageParametersArray.toSeq == Seq(duplicatedColumns) + ) + } + } + + private def recordsScanned(df: DataFrame): Long = { + val scan = df.queryExecution.executedPlan.find { + case FileScanExecNode(_) => true + case _ => false + }.get + + var executedScan = false + + if (!executedScan) { + if (scan.supportsColumnar) { + scan.executeColumnar().count() + } else { + scan.execute().count() + } + } + scan.metrics.get("numOutputRows").get.value + } + + private def statsDF(deltaLog: DeltaLog): DataFrame = { + // use physical name if possible + val dataColumns = deltaLog.snapshot.metadata.dataSchema.map(DeltaColumnMapping.getPhysicalName) + val minValues = struct(dataColumns.map(c => $"minValues.$c"): _*) + val maxValues = struct(dataColumns.map(c => $"maxValues.$c"): _*) + val df = getStatsDf(deltaLog, Seq($"numRecords", minValues, maxValues)) + val numRecordsCol = df.schema.head.name + df.withColumnRenamed(numRecordsCol, "numRecords") + } +} + +class StatsCollectionNameColumnMappingSuite extends StatsCollectionSuite + with DeltaColumnMappingEnableNameMode { + + override protected def runOnlyTests = Seq( + "on write", + "recompute stats with partition predicates" + ) +} + diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/stats/StatsUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/stats/StatsUtils.scala new file mode 100644 index 00000000000..617bd607881 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/stats/StatsUtils.scala @@ -0,0 +1,32 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.stats + +import org.apache.spark.sql.delta.DeltaTable + +import org.apache.spark.sql.DataFrame + +trait StatsUtils { + protected def getStats(df: DataFrame): DeltaScan = { + val stats = df.queryExecution.optimizedPlan.collect { + case DeltaTable(prepared: PreparedDeltaFileIndex) => + prepared.preparedScan + } + if (stats.size != 1) sys.error(s"Found ${stats.size} scans!") + stats.head + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/storage/LineClosableIteratorSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/storage/LineClosableIteratorSuite.scala new file mode 100644 index 00000000000..de9a9b1af67 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/storage/LineClosableIteratorSuite.scala @@ -0,0 +1,148 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage + +import java.io.{Reader, StringReader} + +import org.apache.spark.SparkFunSuite + +abstract class LineClosableIteratorSuiteBase extends SparkFunSuite { + + protected def createIter(_reader: Reader): ClosableIterator[String] + + test("empty") { + var iter = createIter(new StringReader("")) + assert(!iter.hasNext) + intercept[NoSuchElementException] { iter.next() } + + iter = createIter(new StringReader("")) + intercept[NoSuchElementException] { iter.next() } + + iter = createIter(new StringReader("")) + iter.close() + intercept[IllegalStateException] { iter.hasNext } + intercept[IllegalStateException] { iter.next() } + } + + test("one elem") { + var iter = createIter(new StringReader("foo")) + assert(iter.hasNext) + assert(iter.next() == "foo") + assert(!iter.hasNext) + intercept[NoSuchElementException] { iter.next() } + + iter = createIter(new StringReader("foo")) + assert(iter.next() == "foo") + intercept[NoSuchElementException] { iter.next() } + + iter = createIter(new StringReader("foo")) + iter.close() + intercept[IllegalStateException] { iter.hasNext } + intercept[IllegalStateException] { iter.next() } + } + + test("two elems") { + var iter = createIter(new StringReader("foo\nbar")) + assert(iter.hasNext) + assert(iter.next() == "foo") + assert(iter.hasNext) + assert(iter.next() == "bar") + assert(!iter.hasNext) + intercept[NoSuchElementException] { iter.next() } + + iter = createIter(new StringReader("foo\nbar")) + assert(iter.next() == "foo") + assert(iter.next() == "bar") + intercept[NoSuchElementException] { iter.next() } + + iter = createIter(new StringReader("foo\nbar")) + assert(iter.next() == "foo") + iter.close() + intercept[IllegalStateException] { iter.hasNext } + intercept[IllegalStateException] { iter.next() } + + iter = createIter(new StringReader("foo\nbar")) + assert(iter.hasNext) // Cache `nextValue` + iter.close() + // We should throw `IllegalStateException` even if there is a cached `nextValue`. + intercept[IllegalStateException] { iter.hasNext } + intercept[IllegalStateException] { iter.next() } + } + + test("close should be called when the iterator reaches the end") { + var closed = false + val reader = new StringReader("foo") { + override def close(): Unit = { + super.close() + closed = true + } + } + val iter = createIter(reader) + assert(iter.toList == "foo" :: Nil) + assert(closed) + } + + test("close should be called when the iterator is closed") { + var closed = false + val reader = new StringReader("foo") { + override def close(): Unit = { + super.close() + closed = true + } + } + val iter = createIter(reader) + iter.close() + assert(closed) + } + + test("close should be called only once") { + var closed = 0 + val reader = new StringReader("foo") { + override def close(): Unit = { + super.close() + closed += 1 + } + } + val iter = createIter(reader) + assert(iter.toList == "foo" :: Nil) + iter.close() + assert(closed == 1) + } +} + +class InternalLineClosableIteratorSuite extends LineClosableIteratorSuiteBase { + override protected def createIter(_reader: Reader): ClosableIterator[String] = { + new LineClosableIterator(_reader) + } +} + +class PublicLineClosableIteratorSuite extends LineClosableIteratorSuiteBase { + override protected def createIter(_reader: Reader): ClosableIterator[String] = { + val impl = new io.delta.storage.LineCloseableIterator(_reader) + new LineClosableIteratorAdaptor(impl) + } +} + +private class LineClosableIteratorAdaptor( + impl: io.delta.storage.LineCloseableIterator) extends ClosableIterator[String] { + + override def hasNext(): Boolean = impl.hasNext + + override def next(): String = impl.next() + + override def close(): Unit = impl.close() +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/storage/dv/DeletionVectorStoreSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/storage/dv/DeletionVectorStoreSuite.scala new file mode 100644 index 00000000000..be7819139a8 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/storage/dv/DeletionVectorStoreSuite.scala @@ -0,0 +1,219 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.storage.dv + +import java.io.{DataInputStream, DataOutputStream, File} + +import org.apache.spark.sql.delta.{DeltaChecksumException, DeltaConfigs, DeltaLog} +import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, RoaringBitmapArrayFormat} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore.{getTotalSizeOfDVFieldsInFile, CHECKSUM_LEN} +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.PathWithFileSystem +import com.google.common.primitives.Ints +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils + +trait DeletionVectorStoreSuiteBase + extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest { + + lazy val dvStore: DeletionVectorStore = + DeletionVectorStore.createInstance(newHadoopConf) + + protected def newHadoopConf: Configuration = { + // scalastyle:off deltahadoopconfiguration + spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + } + + // Test bitmaps + protected lazy val simpleBitmap = { + val data = Seq(1L, 5L, 6L, 7L, 1000L, 8000000L, 8000001L) + RoaringBitmapArray(data: _*) + } + + protected lazy val simpleBitmap2 = { + val data = Seq(78L, 256L, 998L, 1000002L, 22623423L) + RoaringBitmapArray(data: _*) + } + + + def withTempHadoopFileSystemPath[T](f: Path => T): T = { + val dir: File = Utils.createTempDir() + dir.delete() + val tempPath = DeletionVectorStore.unescapedStringToPath(dir.toString) + try f(tempPath) finally Utils.deleteRecursively(dir) + } + + testWithAllSerializationFormats("Write simple DV directly to disk") { serializationFormat => + val readDV = + withTempHadoopFileSystemPath { tableDir => + val tableWithFS = PathWithFileSystem.withConf(tableDir, newHadoopConf) + val dvPath = dvStore.generateUniqueNameInTable(tableWithFS) + val serializedBitmap = simpleBitmap.serializeAsByteArray(serializationFormat) + val dvRange = Utils.tryWithResource(dvStore.createWriter(dvPath)) { writer => + writer.write(serializedBitmap) + } + assert(dvRange.offset === 1) // there's a version id at byte 0 + assert(dvRange.length === serializedBitmap.length) + dvStore.read(dvPath.path, dvRange.offset, dvRange.length) + } + assert(simpleBitmap === readDV) + } + + + testWithAllSerializationFormats("Detect corrupted DV checksum ") { serializationFormat => + withTempHadoopFileSystemPath { tableDir => + val tableWithFS = PathWithFileSystem.withConf(tableDir, newHadoopConf) + val dvPath = dvStore.generateUniqueNameInTable(tableWithFS) + val dvBytes = simpleBitmap.serializeAsByteArray(serializationFormat) + val dvRange = Utils.tryWithResource(dvStore.createWriter(dvPath)) { + writer => writer.write(dvBytes) + } + assert(dvRange.offset === 1) // there's a version id at byte 0 + assert(dvRange.length === dvBytes.length) + // corrupt 1 byte in the middle of the stored DV (after the checksum) + corruptByte(dvPath, byteToCorrupt = DeletionVectorStore.CHECKSUM_LEN + dvRange.length / 2) + val e = intercept[DeltaChecksumException] { + dvStore.read(dvPath.path, dvRange.offset, dvRange.length) + } + // make sure this is our exception not ChecksumFileSystem's + assert(e.getErrorClass == "DELTA_DELETION_VECTOR_CHECKSUM_MISMATCH") + assert(e.getSqlState == "XXKDS") + assert(e.getMessage == "[DELTA_DELETION_VECTOR_CHECKSUM_MISMATCH] " + + "Could not verify deletion vector integrity, CRC checksum verification failed.") + } + } + + testWithAllSerializationFormats("Detect corrupted DV size") { serializationFormat => + withTempHadoopFileSystemPath { tableDir => + val tableWithFS = PathWithFileSystem.withConf(tableDir, newHadoopConf) + val dvPath = dvStore.generateUniqueNameInTable(tableWithFS) + val dvBytes = simpleBitmap.serializeAsByteArray(serializationFormat) + val dvRange = Utils.tryWithResource(dvStore.createWriter(dvPath)) { + writer => writer.write(dvBytes) + } + assert(dvRange.offset === 1) // there's a version id at byte 0 + assert(dvRange.length === dvBytes.length) + + // Corrupt 1 byte in the part where the serialized DV size is stored. + // Format: + corruptByte(dvPath, byteToCorrupt = 2) + val e = intercept[DeltaChecksumException] { + dvStore.read(dvPath.path, dvRange.offset, dvRange.length) + } + assert(e.getErrorClass == "DELTA_DELETION_VECTOR_SIZE_MISMATCH") + assert(e.getSqlState == "XXKDS") + assert(e.getMessage == "[DELTA_DELETION_VECTOR_SIZE_MISMATCH] " + + "Deletion vector integrity check failed. Encountered a size mismatch.") + } + } + + testWithAllSerializationFormats("Multiple DVs in one file") { serializationFormat => + withTempHadoopFileSystemPath { tableDir => + val tableWithFS = PathWithFileSystem.withConf(tableDir, newHadoopConf) + val dvPath = dvStore.generateUniqueNameInTable(tableWithFS) + val dvBytes1 = simpleBitmap.serializeAsByteArray(serializationFormat) + val dvBytes2 = simpleBitmap2.serializeAsByteArray(serializationFormat) + val (dvRange1, dvRange2) = Utils.tryWithResource(dvStore.createWriter(dvPath)) { + writer => + (writer.write(dvBytes1), writer.write(dvBytes2)) + } + assert(dvRange1.offset === 1) // there's a version id at byte 0 + assert(dvRange1.length === dvBytes1.length) + + // DV2 should be written immediately after the DV1 + val totalDV1Size = getTotalSizeOfDVFieldsInFile(dvBytes1.length) + assert(dvRange2.offset === 1 + totalDV1Size) // 1byte for file format version + assert(dvRange2.length === dvBytes2.length) + + // Read back DVs from the file and verify + assert(dvStore.read(dvPath.path, dvRange1.offset, dvRange1.length) === simpleBitmap) + assert(dvStore.read(dvPath.path, dvRange2.offset, dvRange2.length) === simpleBitmap2) + } + } + + test("Exception is thrown for DVDescriptors with invalid maxRowIndex") { + withSQLConf( + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.defaultTablePropertyKey -> "true", + DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key -> true.toString) { + withTempDir { dir => + val path = dir.toString + spark.range(0, 50, 1, 1).write.format("delta").save(path) + val targetTable = io.delta.tables.DeltaTable.forPath(path) + val deltaLog = DeltaLog.forTable(spark, path) + val tableName = s"delta.`$path`" + spark.sql(s"DELETE FROM $tableName WHERE id = 3") + val file = deltaLog.update().allFiles.first() + val dvDescriptorWithInvalidRowIndex = file.deletionVector.copy(maxRowIndex = Some(50)) + + val e = intercept[DeltaChecksumException] { + file.removeRows( + dvDescriptorWithInvalidRowIndex, + updateStats = false + ) + } + assert(e.getErrorClass == "DELTA_DELETION_VECTOR_INVALID_ROW_INDEX") + assert(e.getSqlState == "XXKDS") + assert(e.getMessage == "[DELTA_DELETION_VECTOR_INVALID_ROW_INDEX] " + + "Deletion vector integrity check failed. Encountered an invalid row index.") + } + } + } + + /** Helper method to run the test using all DV serialization formats */ + protected def testWithAllSerializationFormats(name: String) + (func: RoaringBitmapArrayFormat.Value => Unit): Unit = { + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test(s"$name - $serializationFormat") { + func(serializationFormat) + } + } + } + + /** Helper to method to simulate data corruption in on-disk DV */ + private def corruptByte(pathWithFS: PathWithFileSystem, byteToCorrupt: Int): Unit = { + val fs = pathWithFS.fs + val path = pathWithFS.path + val status = fs.getFileStatus(path) + val len = Ints.checkedCast(status.getLen) + + val bytes = Utils.tryWithResource(fs.open(path)) { stream => + val reader = new DataInputStream(stream) + // readAllBytes is not available in 1.8, yet + val buffer = new Array[Byte](len) + reader.readFully(buffer) + buffer + } + bytes(byteToCorrupt) = (bytes(byteToCorrupt) + 1).toByte + val overwrite = true + Utils.tryWithResource(fs.create(path, overwrite)) { stream => + val writer = new DataOutputStream(stream) + writer.write(bytes) + writer.flush() + } + } +} + +class DeletionVectorStoreSuite + extends DeletionVectorStoreSuiteBase diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaColumnMappingSelectedTestMixin.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaColumnMappingSelectedTestMixin.scala new file mode 100644 index 00000000000..5e060266679 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaColumnMappingSelectedTestMixin.scala @@ -0,0 +1,74 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.test + +import scala.collection.mutable + +import org.apache.spark.sql.delta.{DeltaColumnMappingTestUtils, DeltaConfigs, NoMapping} +import org.scalactic.source.Position +import org.scalatest.Tag +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.test.SQLTestUtils + +/** + * A trait for selective enabling certain tests to run for column mapping modes + */ +trait DeltaColumnMappingSelectedTestMixin extends SparkFunSuite + with SQLTestUtils with DeltaColumnMappingTestUtils { + + protected def runOnlyTests: Seq[String] = Seq() + + /** + * If true, will run all tests. + * Requires that `runOnlyTests` is empty. + */ + protected def runAllTests: Boolean = false + + private val testsRun: mutable.Set[String] = mutable.Set.empty + + override protected def test( + testName: String, + testTags: Tag*)(testFun: => Any)(implicit pos: Position): Unit = { + require(!runAllTests || runOnlyTests.isEmpty, + "If `runAllTests` is true then `runOnlyTests` must be empty") + + if (runAllTests || runOnlyTests.contains(testName)) { + super.test(s"$testName - column mapping $columnMappingMode mode", testTags: _*) { + testsRun.add(testName) + withSQLConf( + DeltaConfigs.COLUMN_MAPPING_MODE.defaultTablePropertyKey -> columnMappingMode) { + testFun + } + } + } else { + super.ignore(s"$testName - ignored by DeltaColumnMappingSelectedTestMixin")(testFun) + } + } + + override def afterAll(): Unit = { + super.afterAll() + val missingTests = runOnlyTests.toSet diff testsRun + if (missingTests.nonEmpty) { + throw new TestFailedException( + Some("Not all selected column mapping tests were run. Missing: " + + missingTests.mkString(", ")), None, 0) + } + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaExcludedTestMixin.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaExcludedTestMixin.scala new file mode 100644 index 00000000000..b441fd2c112 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaExcludedTestMixin.scala @@ -0,0 +1,38 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.test + +import org.apache.spark.sql.QueryTest + +import org.scalactic.source.Position +import org.scalatest.Tag + +trait DeltaExcludedTestMixin extends QueryTest { + + /** Tests to be ignored by the runner. */ + override def excluded: Seq[String] = Seq.empty + + protected override def test(testName: String, testTags: Tag*) + (testFun: => Any) + (implicit pos: Position): Unit = { + if (excluded.contains(testName)) { + super.ignore(testName, testTags: _*)(testFun) + } else { + super.test(testName, testTags: _*)(testFun) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala new file mode 100644 index 00000000000..0b8c5565c95 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala @@ -0,0 +1,62 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.test + +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import io.delta.sql.DeltaSparkSessionExtension +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext} +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} +import org.apache.spark.sql.test.SQLTestUtils + +/** + * Test utility for initializing a SparkSession with a Hive Client and a Hive Catalog for testing + * DDL operations. Typical tests leverage an in-memory catalog with a mock catalog client. Here we + * use real Hive classes. + */ +trait DeltaHiveTest extends SparkFunSuite with BeforeAndAfterAll { self: SQLTestUtils => + + private var _session: SparkSession = _ + private var _hiveContext: TestHiveContext = _ + private var _sc: SparkContext = _ + + override def beforeAll(): Unit = { + val conf = TestHive.sparkSession.sparkContext.getConf.clone() + TestHive.sparkSession.stop() + conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[DeltaCatalog].getName) + conf.set(StaticSQLConf.SPARK_SESSION_EXTENSIONS.key, + classOf[DeltaSparkSessionExtension].getName) + _sc = new SparkContext("local", this.getClass.getName, conf) + _hiveContext = new TestHiveContext(_sc) + _session = _hiveContext.sparkSession + SparkSession.setActiveSession(_session) + super.beforeAll() + } + + override protected def spark: SparkSession = _session + + override def afterAll(): Unit = { + try { + _hiveContext.reset() + } finally { + _sc.stop() + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaSQLCommandTest.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaSQLCommandTest.scala new file mode 100644 index 00000000000..cb28a4f7123 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaSQLCommandTest.scala @@ -0,0 +1,39 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.test + +import org.apache.spark.sql.delta.catalog.DeltaCatalog +import io.delta.sql.DeltaSparkSessionExtension + +import org.apache.spark.SparkConf +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} +import org.apache.spark.sql.test.SharedSparkSession + +/** + * A trait for tests that are testing a fully set up SparkSession with all of Delta's requirements, + * such as the configuration of the DeltaCatalog and the addition of all Delta extensions. + */ +trait DeltaSQLCommandTest extends SharedSparkSession { + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set(StaticSQLConf.SPARK_SESSION_EXTENSIONS.key, + classOf[DeltaSparkSessionExtension].getName) + .set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, + classOf[DeltaCatalog].getName) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaTestImplicits.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaTestImplicits.scala new file mode 100644 index 00000000000..4363ec6368e --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaTestImplicits.scala @@ -0,0 +1,166 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.test + +import java.io.File + +import org.apache.spark.sql.delta.{DeltaLog, OptimisticTransaction, Snapshot} +import org.apache.spark.sql.delta.DeltaOperations.{ManualUpdate, Operation, Write} +import org.apache.spark.sql.delta.actions.{Action, AddFile, Metadata, Protocol} +import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.commands.optimize.OptimizeMetrics +import org.apache.spark.sql.delta.hooks.AutoCompact +import org.apache.spark.sql.delta.stats.StatisticsCollection +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.{SaveMode, SparkSession} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.util.Clock + +/** + * Additional method definitions for Delta classes that are intended for use only in testing. + */ +object DeltaTestImplicits { + implicit class OptimisticTxnTestHelper(txn: OptimisticTransaction) { + + /** Ensure that the initial commit of a Delta table always contains a Metadata action */ + def commitActions(op: Operation, actions: Action*): Long = { + if (txn.readVersion == -1) { + val metadataOpt = actions.collectFirst { case m: Metadata => m } + val protocolOpt = actions.collectFirst { case p: Protocol => p } + val otherActions = + actions.filterNot(a => a.isInstanceOf[Metadata] || a.isInstanceOf[Protocol]) + (metadataOpt, protocolOpt) match { + case (Some(metadata), Some(protocol)) => + // When both metadata and protocol are explicitly passed, use them. + txn.updateProtocol(protocol) + // This will auto upgrade any required table features in the passed protocol as per + // given metadata. + txn.updateMetadataForNewTable(metadata) + case (Some(metadata), None) => + // When just metadata is passed, use it. + // This will auto generate protocol as per metadata. + txn.updateMetadataForNewTable(metadata) + case (None, Some(protocol)) => + txn.updateProtocol(protocol) + txn.updateMetadataForNewTable(Metadata()) + case (None, None) => + // If neither metadata nor protocol is explicitly passed, then use default Metadata and + // with the maximum protocol. + txn.updateMetadataForNewTable(Metadata()) + txn.updateProtocol(Action.supportedProtocolVersion()) + } + txn.commit(otherActions, op) + } else { + txn.commit(actions, op) + } + } + + def commitManually(actions: Action*): Long = { + commitActions(ManualUpdate, actions: _*) + } + + def commitWriteAppend(actions: Action*): Long = { + commitActions(Write(SaveMode.Append), actions: _*) + } + } + + /** Add test-only File overloads for DeltaTable.forPath */ + implicit class DeltaLogObjectTestHelper(deltaLog: DeltaLog.type) { + def forTable(spark: SparkSession, dataPath: File): DeltaLog = { + DeltaLog.forTable(spark, new Path(dataPath.getCanonicalPath)) + } + + def forTable(spark: SparkSession, dataPath: File, clock: Clock): DeltaLog = { + DeltaLog.forTable(spark, new Path(dataPath.getCanonicalPath), clock) + } + } + + /** + * Helper class for working with the most recent snapshot in the deltaLog + */ + implicit class DeltaLogTestHelper(deltaLog: DeltaLog) { + def snapshot: Snapshot = { + deltaLog.unsafeVolatileSnapshot + } + + def checkpoint(): Unit = { + deltaLog.checkpoint(snapshot) + } + + def checkpointInterval(): Int = { + deltaLog.checkpointInterval(snapshot.metadata) + } + + def deltaRetentionMillis(): Long = { + deltaLog.deltaRetentionMillis(snapshot.metadata) + } + + def enableExpiredLogCleanup(): Boolean = { + deltaLog.enableExpiredLogCleanup(snapshot.metadata) + } + + def upgradeProtocol(newVersion: Protocol): Unit = { + upgradeProtocol(deltaLog.unsafeVolatileSnapshot, newVersion) + } + + def upgradeProtocol(snapshot: Snapshot, newVersion: Protocol): Unit = { + deltaLog.upgradeProtocol(None, snapshot, newVersion) + } + } + + implicit class DeltaTableV2ObjectTestHelper(dt: DeltaTableV2.type) { + /** Convenience overload that omits the cmd arg (which is not helpful in tests). */ + def apply(spark: SparkSession, id: TableIdentifier): DeltaTableV2 = + dt.apply(spark, id, "test") + } + + implicit class DeltaTableV2TestHelper(deltaTable: DeltaTableV2) { + /** For backward compatibility with existing unit tests */ + def snapshot: Snapshot = deltaTable.initialSnapshot + } + + implicit class AutoCompactObjectTestHelper(ac: AutoCompact.type) { + private[delta] def compact( + spark: SparkSession, + deltaLog: DeltaLog, + partitionPredicates: Seq[Expression] = Nil, + opType: String = AutoCompact.OP_TYPE): Seq[OptimizeMetrics] = { + AutoCompact.compact( + spark, deltaLog, catalogTable = None, + partitionPredicates, opType) + } + } + + implicit class StatisticsCollectionObjectTestHelper(sc: StatisticsCollection.type) { + + /** + * This is an implicit helper required for backward compatibility with existing + * unit tests. It allows to call [[StatisticsCollection.recompute]] without a + * catalog table and in the actual call, sets it to [[None]]. + */ + def recompute( + spark: SparkSession, + deltaLog: DeltaLog, + predicates: Seq[Expression] = Seq(Literal(true)), + fileFilter: AddFile => Boolean = af => true): Unit = { + StatisticsCollection.recompute( + spark, deltaLog, catalogTable = None, predicates, fileFilter) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/ScanReportHelper.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/ScanReportHelper.scala new file mode 100644 index 00000000000..0a9f607a03a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/ScanReportHelper.scala @@ -0,0 +1,163 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.test + +import scala.util.control.NonFatal + +import org.apache.spark.sql.delta.files.TahoeFileIndex +import org.apache.spark.sql.delta.metering.ScanReport +import org.apache.spark.sql.delta.stats.{DataSize, PreparedDeltaFileIndex} +import org.apache.spark.sql.execution.{FileSourceScanExec, QueryExecution, SparkPlan} +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.util.QueryExecutionListener + +/** + * A helper trait used by test classes that want to collect the scans (i.e. [[FileSourceScanExec]]) + * generated by a given input query during query planning. + * + * This trait exposes a single public API [[getScanReport]]. + */ +trait ScanReportHelper extends SharedSparkSession with AdaptiveSparkPlanHelper { + + import ScanReportHelper._ + + /** + * Collect the scan leaves in the given SparkPlan. + */ + private def collectScans(plan: SparkPlan): Seq[FileSourceScanExec] = { + collectWithSubqueries(plan)({ + case fs: FileSourceScanExec => Seq(fs) + case cached: InMemoryTableScanExec => collectScans(cached.relation.cacheBuilder.cachedPlan) + }).flatten + } + + /** + * Returns a new [[QueryExecutionListener]] that can be registered to the Spark listener bus + * to analyse and collect metrics during query execution. + * + * Specifically, this listener will check for any [[FileSourceScanExec]] generated during query + * planning, cast them into [[ScanReport]] (helper class to hold useful info about the scan), and + * append to the singleton [[ScanReportHelper.scans]] + */ + private def getListener(): QueryExecutionListener = { + new QueryExecutionListener { + override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { + try qe.assertAnalyzed() catch { + case NonFatal(e) => + logDebug("Not running Delta Metering because the query failed during analysis.", e) + return + } + + val fileScans = collectScans(qe.executedPlan) + + for (scanExec <- fileScans) { + scanExec.relation.location match { + case deltaTable: PreparedDeltaFileIndex => + val preparedScan = deltaTable.preparedScan + // The names of the partition columns that were used as filters in this scan. + // Convert this to a set first to avoid double-counting partition columns that might + // appear multiple times. + val usedPartitionColumns = + preparedScan.partitionFilters.map(_.references.map(_.name)).flatten.toSet.toSeq + val report = ScanReport( + tableId = deltaTable.metadata.id, + path = deltaTable.path.toString, + scanType = "delta-query", + deltaDataSkippingType = preparedScan.dataSkippingType.toString, + partitionFilters = preparedScan.partitionFilters.map(_.sql).toSeq, + dataFilters = preparedScan.dataFilters.map(_.sql).toSeq, + unusedFilters = preparedScan.unusedFilters.map(_.sql).toSeq, + size = Map( + "total" -> preparedScan.total, + "partition" -> preparedScan.partition, + "scanned" -> preparedScan.scanned), + metrics = scanExec.metrics.mapValues(_.value).toMap + + ("scanDurationMs" -> preparedScan.scanDurationMs), + annotations = Map.empty, + versionScanned = deltaTable.versionScanned, + usedPartitionColumns = usedPartitionColumns, + numUsedPartitionColumns = usedPartitionColumns.size, + allPartitionColumns = deltaTable.metadata.partitionColumns, + numAllPartitionColumns = deltaTable.metadata.partitionColumns.size, + parentFilterOutputRows = None + ) + + scans += report + + case deltaTable: TahoeFileIndex => + val report = ScanReport( + tableId = deltaTable.metadata.id, + path = deltaTable.path.toString, + scanType = "delta-unknown", + partitionFilters = Nil, + dataFilters = Nil, + unusedFilters = Nil, + size = Map( + "total" -> DataSize( + bytesCompressed = Some(deltaTable.deltaLog.unsafeVolatileSnapshot.sizeInBytes)), + "scanned" -> DataSize(bytesCompressed = Some(deltaTable.sizeInBytes)) + ), + metrics = scanExec.metrics.mapValues(_.value).toMap, + versionScanned = None, + annotations = Map.empty + ) + + scans += report + + case _ => // ignore + } + } + } + + override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = { } + } + } + + /** + * Execute function `f` and return the scans generated during query planning + */ + def getScanReport(f: => Unit): Seq[ScanReport] = { + synchronized { + assert(scans == null, "getScanReport does not support nested invocation.") + scans = scala.collection.mutable.ArrayBuffer.empty[ScanReport] + } + + val listener = getListener() + spark.listenerManager.register(listener) + + var result: scala.collection.mutable.ArrayBuffer[ScanReport] = null + try { + f + } finally { + spark.sparkContext.listenerBus.waitUntilEmpty(15000) + spark.listenerManager.unregister(listener) + + result = scans + synchronized { + scans = null + } + } + + result.toSeq + } +} + +object ScanReportHelper { + @volatile var scans: scala.collection.mutable.ArrayBuffer[ScanReport] = null +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala new file mode 100644 index 00000000000..b6527a279a8 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala @@ -0,0 +1,64 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.test + +import org.apache.spark.sql.delta.DeltaLog +import org.apache.spark.sql.delta.test.DeltaTestImplicits._ + +import org.apache.spark.sql.execution.{ColumnarToRowExec, FileSourceScanExec, InputAdapter, SparkPlan} +import org.apache.spark.sql.functions.from_json +import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.test.SQLTestUtils + +/** + * Provides utilities for testing StatisticsCollection. + */ +trait TestsStatistics { self: SQLTestUtils => + + /** A function to get the reconciled statistics DataFrame from the DeltaLog */ + protected var getStatsDf: (DeltaLog, Seq[Column]) => DataFrame = _ + + /** + * Creates the correct `getStatsDf` to be used by the `testFun` and executes the `testFun`. + */ + protected def statsTest(testName: String, testTags: org.scalatest.Tag*)(testFun: => Any): Unit = { + import testImplicits._ + + test(testName, testTags: _*) { + getStatsDf = (deltaLog, columns) => { + val snapshot = deltaLog.snapshot + snapshot.allFiles + .withColumn("stats", from_json($"stats", snapshot.statsSchema)) + .select("stats.*") + .select(columns: _*) + } + testFun + } + } + + /** + * A util to match a physical file scan node. + */ + object FileScanExecNode { + def unapply(plan: SparkPlan): Option[FileSourceScanExec] = plan match { + case f: FileSourceScanExec => Some(f) + case InputAdapter(f: FileSourceScanExec) => Some(f) + case ColumnarToRowExec(InputAdapter(f: FileSourceScanExec)) => Some(f) + case _ => None + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/uniform/SparkSessionSwitch.scala b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/SparkSessionSwitch.scala new file mode 100644 index 00000000000..07d2b3cc2c1 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/SparkSessionSwitch.scala @@ -0,0 +1,112 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import org.apache.spark.sql.SparkSession + +/** + * Helper for easily switch between multiple sessions in test + */ +trait SparkSessionSwitch { + + private val knownSessions = + collection.mutable.HashMap[SparkSession, (Option[SparkContext], SparkEnv)]() + + /** + * Create a SparkSession and save its context. Calling this will not change + * the current active SparkSession. Use [[withSession]] when you want to use + * the newly created session. + * + * @param factory used to create the session + * @return the newly created session + */ + def newSession(factory: => SparkSession): SparkSession = { + registerActiveSession() + val old = SparkSession.getActiveSession + clear() + val created = factory + registerActiveSession() + old.foreach(restore) + created + } + + /** + * Execute code with the given session. + * @param session session to use + * @param thunk code to execute within the specified session + */ + def withSession[T](session: SparkSession)(thunk: SparkSession => T): T = { + val oldSession = SparkSession.getActiveSession + restore(session) + val result = thunk(session) + oldSession.foreach(restore) + result + } + + /** + * Record the SparkContext/SparkEnv for current active session + */ + private def registerActiveSession(): Unit = { + SparkSession.getActiveSession + .foreach(knownSessions.put(_, (SparkContext.getActive, SparkEnv.get))) + } + + /** + * Restore the snapshot made for the given session + * @param session the session to be restore + */ + private def restore(session: SparkSession): Unit = { + val (restoreContext, restoreEnv) = knownSessions.getOrElse( + session, throw new IllegalArgumentException("Unknown Session to restore")) + SparkSession.setActiveSession(session) + SparkSession.setDefaultSession(session) + + val oldContext = SparkContext.getActive + SparkContext.clearActiveContext() + restoreContext.foreach(SparkContext.setActiveContext) + // Synchronize the context + (oldContext, restoreContext) match { + case (Some(off), Some(on)) => syncContext(off, on) + case _ => + } + + SparkEnv.set(restoreEnv) + } + + /** + * Clear the session related context. Necessary before creating new sessions + */ + private def clear(): Unit = { + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + SparkContext.clearActiveContext() + SparkEnv.set(null) + } + + /** + * Synchronize local properties when switch SparkContext by merging + * and overwriting from off to on + * @param off the context to be deactivated + * @param on the context to be activated + */ + private def syncContext(off: SparkContext, on: SparkContext): Unit = { + // NOTE: cannot use putAll due to a problem of Scala2 + JDK9+ + // See https://github.com/scala/bug/issues/10418 for detail + val onProperties = on.localProperties.get() + off.localProperties.get().forEach((k, v) => onProperties.put(k, v)) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ESuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ESuite.scala new file mode 100644 index 00000000000..4f51ac3a82e --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ESuite.scala @@ -0,0 +1,100 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.uniform + +import org.apache.spark.sql.Row +import org.apache.spark.sql.types._ + +abstract class UniFormE2EIcebergSuiteBase extends UniFormE2ETest { + + val testTableName = "delta_table" + + test("Basic Insert") { + withTable(testTableName) { + write( + s"""CREATE TABLE $testTableName (col1 INT) USING DELTA + |TBLPROPERTIES ( + | 'delta.columnMapping.mode' = 'name', + | 'delta.enableIcebergCompatV1' = 'true', + | 'delta.universalFormat.enabledFormats' = 'iceberg' + |)""".stripMargin) + write(s"INSERT INTO $testTableName VALUES (123)") + readAndVerify(testTableName, "col1", "col1", Seq(Row(123))) + } + } + + test("CIUD") { + withTable(testTableName) { + write( + s"""CREATE TABLE `$testTableName` (col1 INT) USING DELTA + |TBLPROPERTIES ( + | 'delta.columnMapping.mode' = 'name', + | 'delta.enableIcebergCompatV1' = 'true', + | 'delta.universalFormat.enabledFormats' = 'iceberg' + |)""".stripMargin) + write(s"INSERT INTO `$testTableName` VALUES (123),(456),(567),(331)") + write(s"UPDATE `$testTableName` SET col1 = 191 WHERE col1 = 567") + write(s"DELETE FROM `$testTableName` WHERE col1 = 456") + + readAndVerify(testTableName, "col1", "col1", Seq(Row(123), Row(191), Row(331))) + } + } + + test("Nested struct schema test") { + withTable(testTableName) { + write(s"""CREATE TABLE $testTableName + | (col1 INT, col2 STRUCT + | , f6: INT>, f7: INT>) USING DELTA + |TBLPROPERTIES ( + | 'delta.columnMapping.mode' = 'name', + | 'delta.enableIcebergCompatV1' = 'true', + | 'delta.universalFormat.enabledFormats' = 'iceberg' + |)""".stripMargin) + + val data = Seq( + Row(1, Row(Row(2, Row(3, 4), 5), 6)) + ) + + val innerStruct3 = StructType( + StructField("f4", IntegerType) :: + StructField("f5", IntegerType) :: Nil) + + val innerStruct2 = StructType( + StructField("f2", IntegerType) :: + StructField("f3", innerStruct3) :: + StructField("f6", IntegerType) :: Nil) + + val innerStruct = StructType( + StructField("f1", innerStruct2) :: + StructField("f7", IntegerType) :: Nil) + + val schema = StructType( + StructField("col1", IntegerType) :: + StructField("col2", innerStruct) :: Nil) + + val tableFullName = tableNameForRead(testTableName) + + spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + .write.format("delta").mode("append") + .saveAsTable(testTableName) + + val result = read(s"SELECT * FROM $tableFullName") + + assert(result.head === data.head) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ETest.scala b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ETest.scala new file mode 100644 index 00000000000..112f7a6408b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/UniFormE2ETest.scala @@ -0,0 +1,106 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.uniform + +import org.apache.spark.SparkSessionSwitch +import org.apache.spark.sql.{QueryTest, Row, SparkSession} +import org.apache.spark.sql.test.SharedSparkSession + +/** + * Base classes for all UniForm end-to-end test cases. Provides support to + * write data with one SparkSession and read data from another for verification. + * + * People who need to write a new test suite should extend this class and + * implement their test cases with [[write]] and [[read]]/[[readAndVerify]], which execute + * with the writer session and reader session respectively. + * + * Implementing classes need to correctly set up the reader and writer environments. + * See [[UniFormE2EIcebergSuiteBase]] for existing examples. + */ +trait UniFormE2ETest + extends QueryTest + with SharedSparkSession + with SparkSessionSwitch { + + private var _readerSparkSession: Option[SparkSession] = None + + /** + * Execute write operations through the writer SparkSession + * + * @param sqlText write query to the UniForm table + */ + protected def write(sqlText: String): Unit = spark.sql(sqlText) + + /** + * Execute a sql with reader SparkSession and return the result. + * NOTE. + * 1. The caller should use the correct table name. See [[tableNameForRead]] + * 2. We eagerly collect the results because we will switch back to the + * writer session after read. + * @param sqlText the read query against the UniForm table + * @return the read result + */ + protected def read(sqlText: String): Array[Row] = { + withSession(readerSparkSession) { session => + session.sql(sqlText).collect() + } + } + + /** + * Verify the result by reading from the reader session and compare the result to the expected. + * + * @param table write table name + * @param fields fields to verify, separated by comma. E.g., "col1, col2" + * @param orderBy fields to order the results, separated by comma. + * @param expect expected result + */ + protected def readAndVerify( + table: String, fields: String, orderBy: String, expect: Seq[Row]): Unit = { + val translated = tableNameForRead(table) + withSession(readerSparkSession) { session => + checkAnswer(session.sql(s"SELECT $fields FROM $translated ORDER BY $orderBy"), expect) + } + } + + protected def readerSparkSession: SparkSession = { + if (_readerSparkSession.isEmpty) { + // call to newSession makes sure + // [[SparkSession.getOrCreate]] gives a new session + // and [[SparkContext.getOrCreate]] uses a new context + _readerSparkSession = Some(newSession(createReaderSparkSession)) + } + _readerSparkSession.get + } + + /** + * Child classes should extend this to create reader SparkSession. + * @return sparkSession for reading data and verify result. + */ + protected def createReaderSparkSession: SparkSession + + /** + * Subclasses should override this method when the table name for reading + * is different from the table name used for writing. For example, when we + * write a table using the name `table1`, and then read it from another catalog + * `catalog_read`, this method should return `catalog_read.default.table1` + * for the input `table1`. + * + * @param tableName table name for writing (name only) + * @return table name for reading, default is no translation + */ + protected def tableNameForRead(tableName: String): String = tableName +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/EmbeddedHMS.scala b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/EmbeddedHMS.scala new file mode 100644 index 00000000000..e64af3c6c2b --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/EmbeddedHMS.scala @@ -0,0 +1,174 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.uniform.hms + +import java.io.{BufferedReader, File, InputStreamReader, IOException} +import java.net.ServerSocket +import java.nio.file.Files +import java.sql.{Connection, DriverManager} + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.conf.HiveConf.ConfVars + + +/** + * EmbeddedHMS is an embedded Hive MetaStore for testing purposes. + * Multiple EmbeddedHMS instances can be started in parallel on the same host + * (see [[HMSTest]] for how to use it in the code). + */ +class EmbeddedHMS { + private var server: HMSServer = _ + private var whFolder: String = _ + private var dbName: String = _ + private var started = false + private var port: Int = 0 + + /** + * Start an EmbeddedHMS instance + */ + def start(): Unit = { + if (started) return + port = EmbeddedHMS.firstAvailablePort() + val dbFolder = Files.createTempDirectory("ehms_metastore") + Files.delete(dbFolder) // Derby needs the folder to be non-existent + dbName = dbFolder.toString + whFolder = Files.createTempDirectory("ehms_warehouse").toString + + initDatabase(dbName) + + val innerConf = new HiveConf() + innerConf.set(ConfVars.HIVE_IN_TEST.varname, "false") + innerConf.set(ConfVars.METASTOREWAREHOUSE.varname, whFolder) + innerConf.set(ConfVars.METASTORECONNECTURLKEY.varname, s"jdbc:derby:$dbName;create=true") + server = new HMSServer(innerConf, port) + server.start() + + started = true + } + + /** + * Stop the instance and cleanup its resources + */ + def stop(): Unit = { + if (!started) return + server.stop() + // Cleanup on exit + FileUtils.deleteDirectory(new File(dbName)) + FileUtils.deleteDirectory(new File(whFolder)) + started = false + } + + /** + * Fetch the configuration used for clients to connect to the MetaStore + * @return conf containing thrift uri and warehouse location + */ + def conf(): Configuration = { + if (!started) throw new IllegalStateException("Not started") + val conf = new Configuration() + conf.set(ConfVars.METASTOREWAREHOUSE.varname, whFolder) + conf.set(ConfVars.METASTOREURIS.varname, s"thrift://localhost:$port") + conf + } + + /** + * Load SQL scripts into Apache Derby instance to initialize the metastore + * schema. The script used here is copied from HMS official repo. + * @param dbFolder the folder to create the database, also the database name + */ + private def initDatabase(dbFolder: String): Unit = { + // scalastyle:off classforname + // Register the Derby JDBC Driver + Class.forName("org.apache.derby.jdbc.EmbeddedDriver").getConstructor().newInstance() + // scalastyle:on classforname + val con = DriverManager.getConnection(s"jdbc:derby:$dbFolder;create=true") + // May need to use another version when upgrading Hive dependencies + executeScript(con, "hms/hive-schema-3.1.0.derby.sql") + con.close() + // Shutdown the Derby instance properly, allowing it to clean up. + try { + DriverManager.getConnection(s"jdbc:derby:$dbFolder;shutdown=true") + } catch { + // From Derby doc: + // "A successful shutdown always results in an SQLException to indicate + // that Derby has shut down and that there is no other exception." + // We thus ignore the exception here. + case _: java.sql.SQLException => + } + } + + /** + * Execute sql scripts in the given resource file + * @param con database connection + * @param scriptFile the name of the resource location of the sql script + */ + private def executeScript(con: Connection, scriptFile: String): Unit = { + val scriptIs = Thread.currentThread().getContextClassLoader.getResourceAsStream(scriptFile) + if (scriptIs == null) { + throw new RuntimeException("Make sure derby init script is in the classpath") + } + val reader = new BufferedReader(new InputStreamReader(scriptIs)) + var line: String = reader.readLine + val buffer: StringBuilder = new StringBuilder() + val stmt = con.createStatement() + while (line != null) { + line match { + case comment if comment.startsWith("--") => + case eos if eos.endsWith(";") => + if (buffer.nonEmpty) buffer.append("\n") + buffer.append(eos) + buffer.deleteCharAt(buffer.length - 1) // Remove semicolon + stmt.addBatch(buffer.toString) + buffer.clear + case piece => + if (buffer.nonEmpty) buffer.append("\n") + buffer.append(piece) + } + line = reader.readLine() + } + reader.close() + stmt.executeBatch() + stmt.close() + } +} + +object EmbeddedHMS { + var start = 9083 + + def firstAvailablePort(): Integer = this.synchronized { + for (port <- start until 65536) { + var ss: ServerSocket = null + try { + ss = new ServerSocket(port) + ss.setReuseAddress(true) + start = port + 1 + return port + } catch { + case e: IOException => + } finally { + if (ss != null) { + try ss.close() + catch { + case e: IOException => + } + } + } + } + throw new RuntimeException("No port is available") + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/HMSServer.scala b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/HMSServer.scala new file mode 100644 index 00000000000..7fa12a2490c --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/HMSServer.scala @@ -0,0 +1,123 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.uniform.hms + +import java.net.InetSocketAddress + +import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler +import org.apache.hadoop.hive.metastore.RetryingHMSHandler +import org.apache.hadoop.hive.metastore.TSetIpAddressProcessor +import org.apache.thrift.protocol.{TBinaryProtocol, TProtocol, TProtocolFactory} +import org.apache.thrift.server.{ServerContext, TServer, TServerEventHandler, TThreadPoolServer} +import org.apache.thrift.transport.{TServerSocket, TTransport, TTransportFactory} + +/** + * Start a Thrift Server that accepts standard HMS thrift client. + * + * @param conf including database connection and warehouse location + * @param port the port this thrift server listens + */ +class HMSServer(val conf: HiveConf, val port: Int) { + + private var tServer: TServer = _ + private var serverThread: MetastoreThread = _ + + def start(): Unit = { + val maxMessageSize = 100L * 1024 * 1024 + + val protocolFactory: TProtocolFactory = new TBinaryProtocol.Factory + val inputProtoFactory: TProtocolFactory = new TBinaryProtocol.Factory( + true, true, maxMessageSize, maxMessageSize) + val hmsHandler = new HMSHandler("default", conf) + val handler = RetryingHMSHandler.getProxy(conf, hmsHandler, false) + val transFactory = new TTransportFactory + val processor = new TSetIpAddressProcessor(handler) + val serverSocket = new TServerSocket(new InetSocketAddress(port)) + + val args = new TThreadPoolServer.Args(serverSocket) + .processor(processor) + .transportFactory(transFactory) + .protocolFactory(protocolFactory) + .inputProtocolFactory(inputProtoFactory) + .minWorkerThreads(5) + .maxWorkerThreads(5); + + tServer = new TThreadPoolServer(args); + + val tServerEventHandler = new TServerEventHandler() { + override def preServe(): Unit = () + + override def createContext(tProtocol: TProtocol, tProtocol1: TProtocol): ServerContext = null + + override def deleteContext( + serverContext: ServerContext, tProtocol: TProtocol, tProtocol1: TProtocol): Unit = { + // If the IMetaStoreClient#close was called, HMSHandler#shutdown would have already + // cleaned up thread local RawStore. Otherwise, do it now. + HMSServer.cleanupRawStore() + } + + override def processContext( + serverContext: ServerContext, tTransport: TTransport, tTransport1: TTransport): Unit = () + } + tServer.setServerEventHandler(tServerEventHandler) + + serverThread = new MetastoreThread + serverThread.start() + + // Wait till the server is up + while (!tServer.isServing) { + Thread.sleep(100) + } + } + + def stop(): Unit = { + HMSServer.cleanupRawStore() + tServer.stop() + } + + /** + * The metastore thrift server will run in this thread + */ + private class MetastoreThread extends Thread { + super.setDaemon(true) + super.setName("EmbeddedHMS Metastore Thread") + + override def run(): Unit = { + tServer.serve() + } + } +} + +object HMSServer { + + private val localConfField = classOf[HMSHandler].getDeclaredField("threadLocalConf") + localConfField.setAccessible(true) + private val localConf = localConfField.get().asInstanceOf[ThreadLocal[HiveConf]] + + private def cleanupRawStore(): Unit = { + try { + val rs = HMSHandler.getRawStore + if (rs != null) { + rs.shutdown() + } + } finally { + HMSHandler.removeRawStore() + localConf.remove() + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/HMSTest.scala b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/HMSTest.scala new file mode 100644 index 00000000000..79ce0733e3a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/uniform/hms/HMSTest.scala @@ -0,0 +1,91 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.uniform.hms + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hive.conf.HiveConf.ConfVars._ +import org.scalatest.{BeforeAndAfterAll, Suite} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession + +/** + * Provide support to testcases that need to use HIM + */ +trait HMSTest extends Suite with BeforeAndAfterAll { + + def withMetaStore(thunk: (Configuration) => Unit): Unit = { + val conf = sharedHMS.conf() + thunk(conf) + } + + private var sharedHMS: EmbeddedHMS = _ + + protected override def beforeAll(): Unit = { + startHMS() + super.beforeAll() + } + + protected override def afterAll(): Unit = { + super.afterAll() + stopHMS() + } + + protected def startHMS(): Unit = { + sharedHMS = new EmbeddedHMS() + sharedHMS.start() + } + + protected def stopHMS(): Unit = sharedHMS.stop() + + protected def setupSparkConfWithHMS(in: SparkConf): SparkConf = { + val conf = sharedHMS.conf() + in.set("spark.sql.warehouse.dir", conf.get(METASTOREWAREHOUSE.varname)) + .set("hive.metastore.uris", conf.get(METASTOREURIS.varname)) + .set("spark.sql.catalogImplementation", "hive") + } + + protected def createDeltaSparkSession: SparkSession = { + val conf = sharedHMS.conf() + val sparkSession = SparkSession.builder() + .master("local[*]") + .appName("DeltaSession") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .config("spark.sql.warehouse.dir", conf.get(METASTOREWAREHOUSE.varname)) + .config("hive.metastore.uris", conf.get(METASTOREURIS.varname)) + .config("spark.sql.catalogImplementation", "hive") + .getOrCreate() + sparkSession + } + + protected def createIcebergSparkSession: SparkSession = { + val conf = sharedHMS.conf() + val sparkSession = SparkSession.builder() + .master("local[*]") + .appName("IcebergSession") + .config("spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") + .config("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog") + .config("spark.sql.catalog.spark_catalog.cache-enabled", "false") + .config("spark.sql.warehouse.dir", conf.get(METASTOREWAREHOUSE.varname)) + .config("hive.metastore.uris", conf.get(METASTOREURIS.varname)) + .config("spark.sql.catalogImplementation", "hive") + .getOrCreate() + sparkSession + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/util/AnalysisHelperSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/util/AnalysisHelperSuite.scala new file mode 100644 index 00000000000..bec90eba753 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/util/AnalysisHelperSuite.scala @@ -0,0 +1,36 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession + +class AnalysisHelperSuite extends QueryTest with SharedSparkSession { + + test("should not throw NullPointerException when Exception has null description") { + class FakeAnalysisHelper extends AnalysisHelper { + def throwInterruptedException(): Unit = super.improveUnsupportedOpError { + throw new InterruptedException() + } + } + + // Should throw original exception + assertThrows[InterruptedException] { + new FakeAnalysisHelper {}.throwInterruptedException() + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/util/BinPackingUtilsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/util/BinPackingUtilsSuite.scala new file mode 100644 index 00000000000..feab4552811 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/util/BinPackingUtilsSuite.scala @@ -0,0 +1,36 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import org.apache.spark.SparkFunSuite + +class BinPackingUtilsSuite extends SparkFunSuite { + test("test bin-packing") { + val binSize = 5 + val cases = Seq[(Seq[Int], Seq[Seq[Int]])]( + (Seq(1, 2, 3, 4, 5), Seq(Seq(1, 2), Seq(3), Seq(4), Seq(5))), + (Seq(5, 4, 3, 2, 1), Seq(Seq(1, 2), Seq(3), Seq(4), Seq(5))), + // Naive coalescing returns 5 bins where sort-then-coalesce gets 4. + (Seq(4, 2, 4, 2, 5), Seq(Seq(2, 2), Seq(4), Seq(4), Seq(5))), + // The last element exceeds binSize and it's in its own bin. + (Seq(1, 2, 4, 5, 6), Seq(Seq(1, 2), Seq(4), Seq(5), Seq(6)))) + + for ((input, expect) <- cases) { + assert(BinPackingUtils.binPackBySize(input, (x: Int) => x, (x: Int) => x, binSize) == expect) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/util/BitmapAggregatorE2ESuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/util/BitmapAggregatorE2ESuite.scala new file mode 100644 index 00000000000..c19e386e891 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/util/BitmapAggregatorE2ESuite.scala @@ -0,0 +1,203 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import java.io.{File, IOException} +import java.net.URI +import java.nio.{ByteBuffer, ByteOrder} +import java.nio.file.Files + +import org.apache.spark.sql.catalyst.expressions.aggregation.BitmapAggregator +import org.apache.spark.sql.delta.deletionvectors.{PortableRoaringBitmapArraySerializationFormat, RoaringBitmapArray, RoaringBitmapArrayFormat} + +import org.apache.spark.sql.{Column, QueryTest} +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + +class BitmapAggregatorE2ESuite extends QueryTest + with SharedSparkSession + with SQLTestUtils { + + import BitmapAggregatorE2ESuite._ + import testImplicits._ + import org.apache.spark.sql.functions._ + + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test(s"DataFrame bitmap groupBy aggregate no duplicates - $serializationFormat") { + dataFrameBitmapGroupByAggregateWithoutDuplicates(format = serializationFormat) + } + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test("DataFrame bitmap groupBy aggregate no duplicates - invalid Int ids" + + s" - $serializationFormat") { + dataFrameBitmapGroupByAggregateWithoutDuplicates( + offset = INVALID_INT_OFFSET, + format = serializationFormat) + } + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test("DataFrame bitmap groupBy aggregate no duplicates - invalid unsigned Int ids" + + s" - $serializationFormat") { + dataFrameBitmapGroupByAggregateWithoutDuplicates( + offset = UNSIGNED_INT_OFFSET, + format = serializationFormat) + } + } + + private def dataFrameBitmapGroupByAggregateWithoutDuplicates( + offset: Long = 0L, + format: RoaringBitmapArrayFormat.Value): Unit = { + val baseDF = spark + .range(DATASET_SIZE) + .map { id => + val newId = id + offset + // put 2 adjacent and one with gap + (newId % 6) match { + case 0 | 1 | 4 => ("file1" -> newId) + case 2 | 3 | 5 => ("file2" -> newId) + } + } + .toDF("file", "id") + .cache() + + val bitmapAgg = bitmapAggColumn(baseDF("id"), format) + val aggregationOutput = baseDF + .groupBy("file") + .agg(bitmapAgg) + .as[(String, (Long, Long, Array[Byte]))] + .collect() + .toMap + .mapValues(v => RoaringBitmapArray.readFrom(v._3)) + + val dfFile1 = baseDF + .select("id") + .where("file = 'file1'") + .as[Long] + .collect() + val dfFile2 = baseDF + .select("id") + .where("file = 'file2'") + .as[Long] + .collect() + + assertEqualContents(aggregationOutput("file1"), dfFile1) + assertEqualContents(aggregationOutput("file2"), dfFile2) + baseDF.unpersist() + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test("DataFrame bitmap groupBy aggregate with duplicates" + + s" - $serializationFormat") { + dataFrameBitmapGroupAggregateWithDuplicates(format = serializationFormat) + } + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test("DataFrame bitmap groupBy aggregate with duplicates - invalid Int ids" + + s" - $serializationFormat") { + dataFrameBitmapGroupAggregateWithDuplicates( + offset = INVALID_INT_OFFSET, + format = serializationFormat) + } + } + + for (serializationFormat <- RoaringBitmapArrayFormat.values) { + test("DataFrame bitmap groupBy aggregate with duplicates - invalid unsigned Int ids" + + s" - $serializationFormat") { + dataFrameBitmapGroupAggregateWithDuplicates( + offset = UNSIGNED_INT_OFFSET, + format = serializationFormat) + } + } + + def dataFrameBitmapGroupAggregateWithDuplicates( + offset: Long = 0L, + format: RoaringBitmapArrayFormat.Value) { + val baseDF = spark + .range(DATASET_SIZE) + .flatMap { id => + val newId = id + offset + // put two adjacent and duplicate the one after a gap + (newId % 6) match { + case 0 | 1 => Seq("file1" -> newId) + case 2 | 3 => Seq("file2" -> newId) + case 4 => Seq("file1" -> newId, "file1" -> newId) // duplicate in file1 + case 5 => Seq("file2" -> newId, "file2" -> newId) // duplicate in file2 + } + } + .toDF("file", "id") + .cache() + + val bitmapAgg = bitmapAggColumn(baseDF("id"), format) + // scalastyle:off countstring + val aggregationOutput = baseDF + .groupBy("file") + .agg(bitmapAgg, count("id")) + .as[(String, (Long, Long, Array[Byte]), Long)] + .collect() + .map(t => (t._1 -> (RoaringBitmapArray.readFrom(t._2._3), t._3))) + .toMap + // scalastyle:on countstring + + val dfFile1 = baseDF + .select("id") + .where("file = 'file1'") + .distinct() + .as[Long] + .collect() + val dfFile2 = baseDF + .select("id") + .where("file = 'file2'") + .distinct() + .as[Long] + .collect() + + val file1Value = aggregationOutput("file1") + assert(file1Value._2 > file1Value._1.cardinality) + val file2Value = aggregationOutput("file2") + assert(file2Value._2 > file2Value._1.cardinality) + + assertEqualContents(file1Value._1, dfFile1) + assertEqualContents(file2Value._1, dfFile2) + } + + // modulo ordering + private def assertEqualContents(aggregator: RoaringBitmapArray, dataset: Array[Long]): Unit = { + // make sure they are in the same order + val aggregatorArray = aggregator.values.sorted + assert(aggregatorArray === dataset.sorted) + } +} + +object BitmapAggregatorE2ESuite { + // Pick something large enough hat 2 files have at least 64k entries each + final val DATASET_SIZE: Long = 1000000L + + // Cross the `isValidInt` threshold + final val INVALID_INT_OFFSET: Long = Int.MaxValue.toLong - DATASET_SIZE / 2 + + // Cross the 32bit threshold + final val UNSIGNED_INT_OFFSET: Long = (1L << 32) - DATASET_SIZE / 2 + + private[delta] def bitmapAggColumn( + column: Column, + format: RoaringBitmapArrayFormat.Value): Column = { + val func = new BitmapAggregator(column.expr, format); + new Column(func.toAggregateExpression(isDistinct = false)) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/util/CodecSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/util/CodecSuite.scala new file mode 100644 index 00000000000..ee460018c0a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/util/CodecSuite.scala @@ -0,0 +1,188 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import java.nio.charset.StandardCharsets.US_ASCII +import java.util.UUID + +import scala.util.Random + +import org.apache.spark.SparkFunSuite + +class CodecSuite extends SparkFunSuite { + + import CodecSuite._ + + // Z85 reference strings are generated by https://cryptii.com/pipes/z85-encoder + val testUuids = Seq[(UUID, String)]( + new UUID(0L, 0L) -> "00000000000000000000", + new UUID(Long.MinValue, Long.MinValue) -> "Fb/MH00000Fb/MH00000", + new UUID(-1L, -1L) -> "%nSc0%nSc0%nSc0%nSc0", + new UUID(0L, Long.MinValue) -> "0000000000Fb/MH00000", + new UUID(0L, -1L) -> "0000000000%nSc0%nSc0", + new UUID(0L, Long.MaxValue) -> "0000000000Fb/MG%nSc0", + new UUID(Long.MinValue, 0L) -> "Fb/MH000000000000000", + new UUID(-1L, 0L) -> "%nSc0%nSc00000000000", + new UUID(Long.MaxValue, 0L) -> "Fb/MG%nSc00000000000", + new UUID(0L, 1L) -> "00000000000000000001", + // Just a few random ones, using literals for test determinism + new UUID(-4124158004264678669L, -6032951921472435211L) -> "-(5oirYA.yTvx6v@H:L>", + new UUID(6453181356142382984L, 8208554093199893996L) -> "s=Mlx-0Pp@AQ6uw@k6=D", + new UUID(6453181356142382984L, -8208554093199893996L) -> "s=Mlx-0Pp@JUL=R13LuL", + new UUID(-4124158004264678669L, 8208554093199893996L) -> "-(5oirYA.yAQ6uw@k6=D") + + // From https://rfc.zeromq.org/spec/32/ - Test Case + test("Z85 spec reference value") { + val inputBytes: Array[Byte] = + Array(0x86, 0x4F, 0xD2, 0x6F, 0xB5, 0x59, 0xF7, 0x5B).map(_.toByte) + val expectedEncodedString = "HelloWorld" + val actualEncodedString = Codec.Base85Codec.encodeBytes(inputBytes) + assert(actualEncodedString === expectedEncodedString) + val outputBytes = Codec.Base85Codec.decodeAlignedBytes(actualEncodedString) + assert(outputBytes sameElements inputBytes) + } + + test("Z85 reference implementation values") { + for ((id, expectedEncodedString) <- testUuids) { + val actualEncodedString = Codec.Base85Codec.encodeUUID(id) + assert(actualEncodedString === expectedEncodedString) + } + } + + test("Z85 spec character map") { + assert(Codec.Base85Codec.ENCODE_MAP.length === 85) + val referenceBytes = Seq( + 0x00, 0x09, 0x98, 0x62, 0x0f, 0xc7, 0x99, 0x43, 0x1f, 0x85, + 0x9a, 0x24, 0x2f, 0x43, 0x9b, 0x05, 0x3f, 0x01, 0x9b, 0xe6, + 0x4e, 0xbf, 0x9c, 0xc7, 0x5e, 0x7d, 0x9d, 0xa8, 0x6e, 0x3b, + 0x9e, 0x89, 0x7d, 0xf9, 0x9f, 0x6a, 0x8d, 0xb7, 0xa0, 0x4b, + 0x9d, 0x75, 0xa1, 0x2c, 0xad, 0x33, 0xa2, 0x0d, 0xbc, 0xf1, + 0xa2, 0xee, 0xcc, 0xaf, 0xa3, 0xcf, 0xdc, 0x6d, 0xa4, 0xb0, + 0xec, 0x2b, 0xa5, 0x91, 0xfb, 0xe9, 0xa6, 0x72) + .map(_.toByte).toArray + val referenceString = new String(Codec.Base85Codec.ENCODE_MAP, US_ASCII) + val encodedString = Codec.Base85Codec.encodeBytes(referenceBytes) + assert(encodedString === referenceString) + val decodedBytes = Codec.Base85Codec.decodeAlignedBytes(encodedString) + assert(decodedBytes sameElements referenceBytes) + } + + test("Reject illegal Z85 input - unaligned string") { + // Minimum string should 5 characters + val illegalEncodedString = "abc" + assertThrows[IllegalArgumentException] { + Codec.Base85Codec.decodeBytes( + illegalEncodedString, + // This value is irrelevant, any value should cause the failure. + outputLength = 3) + } + } + + // scalastyle:off nonascii + test(s"Reject illegal Z85 input - illegal character") { + for (char <- Seq[Char]('î', 'π', '"', 0x7F)) { + val illegalEncodedString = String.valueOf(Array[Char]('a', 'b', char, 'd', 'e')) + val ex = intercept[IllegalArgumentException] { + Codec.Base85Codec.decodeAlignedBytes(illegalEncodedString) + } + assert(ex.getMessage.contains("Input is not valid Z85")) + } + } + // scalastyle:on nonascii + + test("base85 codec uuid roundtrips") { + for ((id, _) <- testUuids) { + val encodedString = Codec.Base85Codec.encodeUUID(id) + // 16 bytes always get encoded into 20 bytes with Base85. + assert(encodedString.length === Codec.Base85Codec.ENCODED_UUID_LENGTH) + val decodedId = Codec.Base85Codec.decodeUUID(encodedString) + assert(id === decodedId, s"encodedString = $encodedString") + } + } + + test("base85 codec empty byte array") { + val empty = Array.empty[Byte] + val encodedString = Codec.Base85Codec.encodeBytes(empty) + assert(encodedString === "") + val decodedArray = Codec.Base85Codec.decodeAlignedBytes(encodedString) + assert(decodedArray.isEmpty) + val decodedArray2 = Codec.Base85Codec.decodeBytes(encodedString, 0) + assert(decodedArray2.isEmpty) + } + + test("base85 codec byte array random roundtrips") { + val rand = new Random(1L) // Fixed seed for determinism + val arrayLengths = (1 to 20) ++ Seq(32, 56, 64, 128, 1022, 11 * 1024 * 1024) + + for (len <- arrayLengths) { + val inputArray: Array[Byte] = Array.ofDim(len) + rand.nextBytes(inputArray) + val encodedString = Codec.Base85Codec.encodeBytes(inputArray) + val decodedArray = Codec.Base85Codec.decodeBytes(encodedString, len) + assert(decodedArray === inputArray, s"encodedString = $encodedString") + } + } + + /** + * Execute `thunk` works for strings containing any of the possible base85 characters at either + * beginning, middle, or end positions. + */ + private def forAllEncodedStrings(thunk: String => Unit): Unit = { + // Basically test that every possible character can occur at any + // position with a 20 character string. + val characterString = new String(Codec.Base85Codec.ENCODE_MAP, US_ASCII) + // Use this to fill in the remaining 17 characters. + val fillerChar = "x" + + var count = 0 + for { + firstChar <- characterString + middleChar <- characterString + finalChar <- characterString + } { + val sb = new StringBuilder + sb += firstChar + sb ++= fillerChar * 9 + sb += middleChar + sb ++= fillerChar * 8 + sb += finalChar + val encodedString = sb.toString() + assert(encodedString.length === 20) + thunk(encodedString) + count += 1 + } + assert(count === 85 * 85 * 85) + } + + test("base85 character set is JSON-safe") { + forAllEncodedStrings { inputString => + val inputObject = JsonRoundTripContainer(inputString) + val jsonString = JsonUtils.toJson(inputObject) + assert(jsonString.contains(inputString), + "Some character from the input had to be escaped to be JSON-safe:" + + s"input = '$inputString' vs JSON = '$jsonString'") + val outputObject = JsonUtils.fromJson[JsonRoundTripContainer](jsonString) + val outputString = outputObject.data + assert(inputString === outputString) + } + } + +} + +object CodecSuite { + final case class JsonRoundTripContainer(data: String) +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/util/DatasetRefCacheSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/util/DatasetRefCacheSuite.scala new file mode 100644 index 00000000000..2b684523f83 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/util/DatasetRefCacheSuite.scala @@ -0,0 +1,33 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util + +import org.apache.spark.sql.{QueryTest, SparkSession} +import org.apache.spark.sql.test.SharedSparkSession + +class DatasetRefCacheSuite extends QueryTest with SharedSparkSession { + + test("should create a new Dataset when the active session is changed") { + val cache = new DatasetRefCache(() => spark.range(1, 10) ) + val ref = cache.get + // Should reuse `Dataset` when the active session is the same + assert(ref eq cache.get) + SparkSession.setActiveSession(spark.newSession()) + // Should create a new `Dataset` when the active session is changed + assert(ref ne cache.get) + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/util/threads/DeltaThreadPoolSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/util/threads/DeltaThreadPoolSuite.scala new file mode 100644 index 00000000000..51d5988ce5f --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/util/threads/DeltaThreadPoolSuite.scala @@ -0,0 +1,69 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util.threads + +import java.util.Properties + +import org.apache.spark.{SparkFunSuite, TaskContext, TaskContextImpl} +import org.apache.spark.sql.test.SharedSparkSession + +class DeltaThreadPoolSuite extends SparkFunSuite with SharedSparkSession { + + val threadPool: DeltaThreadPool = DeltaThreadPool("test", 1) + + def makeTaskContext(id: Int): TaskContext = { + new TaskContextImpl(id, 0, 0, 0, attemptNumber = 45613, 0, null, new Properties(), null) + } + + def testForwarding(testName: String, id: Int)(f: => Unit): Unit = { + test(testName) { + val prevTaskContext = TaskContext.get() + TaskContext.setTaskContext(makeTaskContext(id)) + sparkContext.setLocalProperty("test", id.toString) + + try { + f + } finally { + TaskContext.setTaskContext(prevTaskContext) + } + } + } + + def assertTaskAndProperties(id: Int): Unit = { + assert(TaskContext.get() !== null) + assert(TaskContext.get().stageId() === id) + assert(sparkContext.getLocalProperty("test") === id.toString) + } + + testForwarding("parallelMap captures TaskContext", id = 0) { + threadPool.parallelMap(spark, 0 until 1) { _ => + assertTaskAndProperties(id = 0) + } + } + + testForwarding("submit captures TaskContext and local properties", id = 1) { + threadPool.submit(spark) { + assertTaskAndProperties(id = 1) + } + } + + testForwarding("submitNonFateSharing captures TaskContext and local properties", id = 2) { + threadPool.submitNonFateSharing { _ => + assertTaskAndProperties(id = 2) + } + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingSuite.scala new file mode 100644 index 00000000000..50b946a78b7 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingSuite.scala @@ -0,0 +1,152 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.util.threads + +import java.util.Properties +import java.util.concurrent.{LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit} + +import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future} +import scala.concurrent.duration._ + +import org.apache.spark._ +import org.apache.spark.util.ThreadUtils +import org.apache.spark.util.ThreadUtils.namedThreadFactory + +class SparkThreadLocalForwardingSuite extends SparkFunSuite { + + private def createThreadPool(nThreads: Int, prefix: String): ThreadPoolExecutor = { + val threadFactory = namedThreadFactory(prefix) + val keepAliveTimeSeconds = 60 + val threadPool = new SparkThreadLocalForwardingThreadPoolExecutor( + nThreads, + nThreads, + keepAliveTimeSeconds, + TimeUnit.MILLISECONDS, + new LinkedBlockingQueue[Runnable], + threadFactory) + threadPool.allowCoreThreadTimeOut(true) + threadPool + } + + test("SparkThreadLocalForwardingThreadPoolExecutor properly propagates" + + " TaskContext and Spark Local Properties") { + val sc = SparkContext.getOrCreate(new SparkConf().setAppName("test").setMaster("local")) + val executor = createThreadPool(1, "test-threads") + implicit val executionContext: ExecutionContextExecutor = + ExecutionContext.fromExecutor(executor) + + val prevTaskContext = TaskContext.get() + try { + // assert that each instance of submitting a task to the execution context captures the + // current task context + val futures = (1 to 10) map { i => + setTaskAndProperties(i, sc) + + Future { + checkTaskAndProperties(i, sc) + }(executionContext) + } + + assert(ThreadUtils.awaitResult(Future.sequence(futures), 10.seconds).forall(identity)) + } finally { + ThreadUtils.shutdown(executor) + TaskContext.setTaskContext(prevTaskContext) + sc.stop() + } + } + + def makeTaskContext(id: Int): TaskContext = { + new TaskContextImpl(id, 0, 0, 0, attemptNumber = 45613, 0, null, new Properties(), null) + } + + def setTaskAndProperties(i: Int, sc: SparkContext = SparkContext.getActive.get): Unit = { + val tc = makeTaskContext(i) + TaskContext.setTaskContext(tc) + sc.setLocalProperty("test", i.toString) + } + + def checkTaskAndProperties(i: Int, sc: SparkContext = SparkContext.getActive.get): Boolean = { + TaskContext.get() != null && + TaskContext.get().stageId() == i && + sc.getLocalProperty("test") == i.toString + } + + test("That CapturedSparkThreadLocals properly restores the existing state") { + val sc = SparkContext.getOrCreate(new SparkConf().setAppName("test").setMaster("local")) + val prevTaskContext = TaskContext.get() + try { + setTaskAndProperties(10) + val capturedSparkThreadLocals = CapturedSparkThreadLocals() + setTaskAndProperties(11) + assert(!checkTaskAndProperties(10, sc)) + assert(checkTaskAndProperties(11, sc)) + capturedSparkThreadLocals.runWithCaptured { + assert(checkTaskAndProperties(10, sc)) + } + assert(checkTaskAndProperties(11, sc)) + } finally { + TaskContext.setTaskContext(prevTaskContext) + sc.stop() + } + } + + test("That CapturedSparkThreadLocals properly restores the existing spark properties." + + " Changes to local properties inside a task do not affect the original properties") { + val sc = SparkContext.getOrCreate(new SparkConf().setAppName("test").setMaster("local")) + try { + sc.setLocalProperty("TestProp", "1") + val capturedSparkThreadLocals = CapturedSparkThreadLocals() + assert(sc.getLocalProperty("TestProp") == "1") + capturedSparkThreadLocals.runWithCaptured { + sc.setLocalProperty("TestProp", "2") + assert(sc.getLocalProperty("TestProp") == "2") + } + assert(sc.getLocalProperty("TestProp") == "1") + } finally { + sc.stop() + } + } + + + test("captured spark thread locals are immutable") { + val sc = SparkContext.getOrCreate(new SparkConf().setAppName("test").setMaster("local")) + try { + sc.setLocalProperty("test1", "good") + sc.setLocalProperty("test2", "good") + val threadLocals = CapturedSparkThreadLocals() + sc.setLocalProperty("test2", "bad") + assert(sc.getLocalProperty("test1") == "good") + assert(sc.getLocalProperty("test2") == "bad") + threadLocals.runWithCaptured { + assert(sc.getLocalProperty("test1") == "good") + assert(sc.getLocalProperty("test2") == "good") + sc.setLocalProperty("test1", "bad") + sc.setLocalProperty("test2", "maybe") + assert(sc.getLocalProperty("test1") == "bad") + assert(sc.getLocalProperty("test2") == "maybe") + } + assert(sc.getLocalProperty("test1") == "good") + assert(sc.getLocalProperty("test2") == "bad") + threadLocals.runWithCaptured { + assert(sc.getLocalProperty("test1") == "good") + assert(sc.getLocalProperty("test2") == "good") + } + } finally { + sc.stop() + } + } +} diff --git a/storage-s3-dynamodb/integration_tests/dynamodb_logstore.py b/storage-s3-dynamodb/integration_tests/dynamodb_logstore.py new file mode 100644 index 00000000000..7f55b8b511a --- /dev/null +++ b/storage-s3-dynamodb/integration_tests/dynamodb_logstore.py @@ -0,0 +1,218 @@ +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import sys +import threading + +from pyspark.sql import SparkSession +from multiprocessing.pool import ThreadPool +import time + +""" +Create required dynamodb table with: + +$ aws dynamodb create-table \ + --region \ + --table-name \ + --attribute-definitions AttributeName=tablePath,AttributeType=S \ + AttributeName=fileName,AttributeType=S \ + --key-schema AttributeName=tablePath,KeyType=HASH \ + AttributeName=fileName,KeyType=RANGE \ + --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5 + +Enable TTL with: + +$ aws dynamodb update-time-to-live \ + --region \ + --table-name \ + --time-to-live-specification "Enabled=true, AttributeName=expireTime" + +Run this script in root dir of repository: + +# ===== Mandatory input from user ===== +export RUN_ID=run001 +export S3_BUCKET=delta-lake-dynamodb-test-00 + +# ===== Optional input from user ===== +export DELTA_CONCURRENT_WRITERS=20 +export DELTA_CONCURRENT_READERS=2 +export DELTA_STORAGE=io.delta.storage.S3DynamoDBLogStore +export DELTA_NUM_ROWS=200 +export DELTA_DYNAMO_REGION=us-west-2 +export DELTA_DYNAMO_ERROR_RATES=0.00 + +# ===== Optional input from user (we calculate defaults using S3_BUCKET and RUN_ID) ===== +export RELATIVE_DELTA_TABLE_PATH=___ +export DELTA_DYNAMO_TABLE_NAME=___ + +./run-integration-tests.py --use-local \ + --run-storage-s3-dynamodb-integration-tests \ + --dbb-packages org.apache.hadoop:hadoop-aws:3.3.1 \ + --dbb-conf io.delta.storage.credentials.provider=com.amazonaws.auth.profile.ProfileCredentialsProvider \ + spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.profile.ProfileCredentialsProvider +""" + +# ===== Mandatory input from user ===== +run_id = os.environ.get("RUN_ID") +s3_bucket = os.environ.get("S3_BUCKET") + +# ===== Optional input from user ===== +concurrent_writers = int(os.environ.get("DELTA_CONCURRENT_WRITERS", 2)) +concurrent_readers = int(os.environ.get("DELTA_CONCURRENT_READERS", 2)) +# className to instantiate. io.delta.storage.S3DynamoDBLogStore or .FailingS3DynamoDBLogStore +delta_storage = os.environ.get("DELTA_STORAGE", "io.delta.storage.S3DynamoDBLogStore") +num_rows = int(os.environ.get("DELTA_NUM_ROWS", 16)) +dynamo_region = os.environ.get("DELTA_DYNAMO_REGION", "us-west-2") +# used only by FailingS3DynamoDBLogStore +dynamo_error_rates = os.environ.get("DELTA_DYNAMO_ERROR_RATES", "") + +# ===== Optional input from user (we calculate defaults using RUN_ID) ===== +relative_delta_table_path = os.environ.get("RELATIVE_DELTA_TABLE_PATH", "tables/table_" + run_id)\ + .rstrip("/") +dynamo_table_name = os.environ.get("DELTA_DYNAMO_TABLE_NAME", "ddb_table_" + run_id) + +delta_table_path = "s3a://" + s3_bucket + "/" + relative_delta_table_path +relative_delta_log_path = relative_delta_table_path + "/_delta_log/" + +if delta_table_path is None: + print(f"\nSkipping Python test {os.path.basename(__file__)} due to the missing env variable " + f"`DELTA_TABLE_PATH`\n=====================") + sys.exit(0) + +test_log = f""" +========================================== +run id: {run_id} +delta table path: {delta_table_path} +dynamo table name: {dynamo_table_name} + +concurrent writers: {concurrent_writers} +concurrent readers: {concurrent_readers} +number of rows: {num_rows} +delta storage: {delta_storage} +dynamo_error_rates: {dynamo_error_rates} + +relative_delta_table_path: {relative_delta_table_path} +relative_delta_log_path: {relative_delta_log_path} +========================================== +""" +print(test_log) + +spark = SparkSession \ + .builder \ + .appName("utilities") \ + .master("local[*]") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .config("spark.delta.logStore.s3.impl", delta_storage) \ + .config("spark.delta.logStore.s3a.impl", delta_storage) \ + .config("spark.delta.logStore.s3n.impl", delta_storage) \ + .config("spark.io.delta.storage.S3DynamoDBLogStore.ddb.tableName", dynamo_table_name) \ + .config("spark.io.delta.storage.S3DynamoDBLogStore.ddb.region", dynamo_region) \ + .config("spark.io.delta.storage.S3DynamoDBLogStore.errorRates", dynamo_error_rates) \ + .config("spark.io.delta.storage.S3DynamoDBLogStore.provisionedThroughput.rcu", 12) \ + .config("spark.io.delta.storage.S3DynamoDBLogStore.provisionedThroughput.wcu", 13) \ + .getOrCreate() + +# spark.sparkContext.setLogLevel("INFO") + +data = spark.createDataFrame([], "id: int, a: int") +print("writing:", data.collect()) +data.write.format("delta").mode("overwrite").partitionBy("id").save(delta_table_path) + + +def write_tx(n): + data = spark.createDataFrame([[n, n]], "id: int, a: int") + print("writing:", data.collect()) + data.write.format("delta").mode("append").partitionBy("id").save(delta_table_path) + + +stop_reading = threading.Event() + + +def read_data(): + while not stop_reading.is_set(): + print("Reading {:d} rows ...".format( + spark.read.format("delta").load(delta_table_path).distinct().count()) + ) + time.sleep(1) + + +def start_read_thread(): + thread = threading.Thread(target=read_data) + thread.start() + return thread + + +print("===================== Starting reads and writes =====================") +read_threads = [start_read_thread() for i in range(concurrent_readers)] +pool = ThreadPool(concurrent_writers) +start_t = time.time() +pool.map(write_tx, range(num_rows)) +stop_reading.set() + +for thread in read_threads: + thread.join() + +print("===================== Evaluating number of written rows =====================") +actual = spark.read.format("delta").load(delta_table_path).distinct().count() +print("Actual number of written rows:", actual) +print("Expected number of written rows:", num_rows) +assert actual == num_rows + +t = time.time() - start_t +print(f"{num_rows / t:.02f} tx / sec") + +print("===================== Evaluating DDB writes =====================") +import boto3 +from botocore.config import Config +my_config = Config( + region_name=dynamo_region, +) +dynamodb = boto3.resource('dynamodb', config=my_config) +table = dynamodb.Table(dynamo_table_name) # this ensures we actually used/created the input table +response = table.scan() +items = response['Items'] +items = sorted(items, key=lambda x: x['fileName']) + +print("========== All DDB items ==========") +for item in items: + print(item) + +print("===================== Evaluating _delta_log commits =====================") +s3_client = boto3.client("s3") +print(f"querying {s3_bucket}/{relative_delta_log_path}") +response = s3_client.list_objects_v2(Bucket=s3_bucket, Prefix=relative_delta_log_path) +items = response['Contents'] +print("========== Raw _delta_log contents ========== ") +for item in items: + print(item) + +delta_log_commits = filter(lambda x: ".json" in x['Key'] and ".tmp" not in x['Key'], + items) +delta_log_commits = sorted(delta_log_commits, key=lambda x: x['Key']) + +print("========== _delta_log commits in version order ==========") +for commit in delta_log_commits: + print(commit) + +print("========== _delta_log commits in timestamp order ==========") +delta_log_commits_sorted_timestamp = sorted(delta_log_commits, key=lambda x: x['LastModified']) +for commit in delta_log_commits_sorted_timestamp: + print(commit) + +print("========== ASSERT that these orders (version vs timestamp) are the same ==========") +assert(delta_log_commits == delta_log_commits_sorted_timestamp) diff --git a/storage-s3-dynamodb/src/main/java/io/delta/storage/BaseExternalLogStore.java b/storage-s3-dynamodb/src/main/java/io/delta/storage/BaseExternalLogStore.java new file mode 100644 index 00000000000..1897125c784 --- /dev/null +++ b/storage-s3-dynamodb/src/main/java/io/delta/storage/BaseExternalLogStore.java @@ -0,0 +1,474 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.Iterator; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import com.google.common.annotations.VisibleForTesting; +import io.delta.storage.internal.FileNameUtils; +import io.delta.storage.internal.PathLock; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A base {@link LogStore} implementation for cloud stores (e.g. Amazon S3) that do not provide + * mutual exclusion. + *

+ * This implementation depends on child methods, particularly `putExternalEntry`, to provide + * the mutual exclusion that the cloud store is lacking. + * + * Notation: + * - N: the target commit version we are writing. e.g. 10 for 0000010.json + * - N.json: the actual target commit we want to write. + * - T(N): the temp file path for commit N used during the prepare-commit-acknowledge `write` + * algorithm below. We will eventually copy T(N) into N.json + * - E(N, T(N), complete=true/false): the entry we will atomically commit into the external + * cache. + */ +public abstract class BaseExternalLogStore extends HadoopFileSystemLogStore { + private static final Logger LOG = LoggerFactory.getLogger(BaseExternalLogStore.class); + + /** + * A global path lock to ensure that no two writers/readers are copying a given T(N) into N.json + * at the same time within the same JVM. This can occur + * - while a writer is performing a normal write operation AND a reader happens to see an + * external entry E(complete=false) and so starts a recovery operation + * - while two readers see E(complete=false) and so both start a recovery operation + */ + private static final PathLock pathLock = new PathLock(); + + /** + * The delay, in seconds, after an external entry has been committed to the delta log at which + * point it is safe to be deleted from the external store. + * + * We want a delay long enough such that, after the external entry has been deleted, another + * write attempt for the SAME delta log commit can FAIL using ONLY the FileSystem's existence + * check (e.g. `fs.exists(path)`). Recall we assume that the FileSystem does not provide mutual + * exclusion. + * + * We use a value of 1 day. + * + * If we choose too small of a value, like 0 seconds, then the following scenario is possible: + * - t0: Writers W1 and W2 start writing data files + * - t1: W1 begins to try and write into the _delta_log. + * - t2: W1 checks if N.json exists in FileSystem. It doesn't. + * - t3: W1 writes actions into temp file T1(N) + * - t4: W1 writes to external store entry E1(N, complete=false) + * - t5: W1 copies (with overwrite=false) T1(N) into N.json. + * - t6: W1 overwrites entry in external store E1(N, complete=true, expireTime=now+0) + * - t7: E1 is safe to be deleted, and some external store TTL mechanism deletes E1 + * - t8: W2 begins to try and write into the _delta_log. + * - t9: W1 checks if N.json exists in FileSystem, but too little time has transpired between + * t5 and t9 that the FileSystem check (fs.exists(path)) returns FALSE. + * Note: This isn't possible on S3 (which provides strong consistency) but could be + * possible on eventually-consistent systems. + * - t10: W2 writes actions into temp file T2(N) + * - t11: W2 writes to external store entry E2(N, complete=false) + * - t12: W2 successfully copies (with overwrite=false) T2(N) into N.json. FileSystem didn't + * provide the necessary mutual exclusion, so the copy succeeded. Thus, DATA LOSS HAS + * OCCURRED. + * + * By using an expiration delay of 1 day, we ensure one of the steps at t9 or t12 will fail. + */ + protected static final long DEFAULT_EXTERNAL_ENTRY_EXPIRATION_DELAY_SECONDS = + TimeUnit.DAYS.toSeconds(1); + + /** + * Completed external commit entries will be created with a value of + * NOW_EPOCH_SECONDS + getExpirationDelaySeconds(). + */ + protected long getExpirationDelaySeconds() { + return DEFAULT_EXTERNAL_ENTRY_EXPIRATION_DELAY_SECONDS; + } + + //////////////////////// + // Public API Methods // + //////////////////////// + + public BaseExternalLogStore(Configuration hadoopConf) { + super(hadoopConf); + } + + /** + * First checks if there is any incomplete entry in the external store. If so, tries to perform + * a recovery/fix. + * + * Then, performs a normal listFrom user the `super` implementation. + */ + @Override + public Iterator listFrom(Path path, Configuration hadoopConf) throws IOException { + final FileSystem fs = path.getFileSystem(hadoopConf); + final Path resolvedPath = stripUserInfo(fs.makeQualified(path)); + final Path tablePath = getTablePath(resolvedPath); + final Optional entry = getLatestExternalEntry(tablePath); + + if (entry.isPresent() && !entry.get().complete) { + // Note: `fixDeltaLog` will apply per-JVM mutual exclusion via a lock to help reduce + // the chance of many reader threads in a single JVM doing duplicate copies of + // T(N) -> N.json. + fixDeltaLog(fs, entry.get()); + } + + // This is predicated on the storage system providing consistent listing + // If there was a recovery performed in the `fixDeltaLog` call, then some temp file + // was just copied into some N.json in the delta log. Because of consistent listing, + // the `super.listFrom` is guaranteed to see N.json. + return super.listFrom(path, hadoopConf); + } + + /** + * If overwrite=true, then write normally without any interaction with external store. + * Else, to commit for delta version N: + * - Step 0: Fail if N.json already exists in FileSystem. + * - Step 1: Ensure that N-1.json exists. If not, perform a recovery. + * - Step 2: PREPARE the commit. + * - Write `actions` into temp file T(N) + * - Write with mutual exclusion to external store and entry E(N, T(N), complete=false) + * - Step 3: COMMIT the commit to the delta log. + * - Copy T(N) into N.json + * - Step 4: ACKNOWLEDGE the commit. + * - Overwrite entry E in external store and set complete=true + */ + @Override + public void write( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException { + final FileSystem fs = path.getFileSystem(hadoopConf); + final Path resolvedPath = stripUserInfo(fs.makeQualified(path)); + try { + // Prevent concurrent writers in this JVM from either + // a) concurrently overwriting N.json if overwrite=true + // b) both checking if N-1.json exists and performing a "recovery" where they both + // copy T(N-1) into N-1.json + // + // Note that the mutual exclusion on writing into N.json with overwrite=false from + // different JVMs (which is the entire point of BaseExternalLogStore) is provided by the + // external cache, not by this lock, of course. + // + // Also note that this lock path (resolvedPath) is for N.json, while the lock path used + // below in the recovery `fixDeltaLog` path is for N-1.json. Thus, no deadlock. + pathLock.acquire(resolvedPath); + + if (overwrite) { + writeActions(fs, path, actions); + return; + } else if (fs.exists(path)) { + // Step 0: Fail if N.json already exists in FileSystem and overwrite=false. + throw new java.nio.file.FileAlreadyExistsException(path.toString()); + } + + // Step 1: Ensure that N-1.json exists + final Path tablePath = getTablePath(resolvedPath); + if (FileNameUtils.isDeltaFile(path)) { + final long version = FileNameUtils.deltaVersion(path); + if (version > 0) { + final long prevVersion = version - 1; + final Path deltaLogPath = new Path(tablePath, "_delta_log"); + final Path prevPath = FileNameUtils.deltaFile(deltaLogPath, prevVersion); + final String prevFileName = prevPath.getName(); + final Optional prevEntry = getExternalEntry( + tablePath.toString(), + prevFileName + ); + if (prevEntry.isPresent() && !prevEntry.get().complete) { + fixDeltaLog(fs, prevEntry.get()); + } else { + if (!fs.exists(prevPath)) { + throw new java.nio.file.FileSystemException( + String.format("previous commit %s doesn't exist on the file system but does in the external log store", prevPath) + ); + } + } + } else { + final String fileName = path.getName(); + final Optional entry = getExternalEntry( + tablePath.toString(), + fileName + ); + if (entry.isPresent()) { + if (entry.get().complete && !fs.exists(path)) { + throw new java.nio.file.FileSystemException( + String.format( + "Old entries for table %s still exist in the external log store", + tablePath + ) + ); + } + } + } + } + + // Step 2: PREPARE the commit + final String tempPath = createTemporaryPath(resolvedPath); + final ExternalCommitEntry entry = new ExternalCommitEntry( + tablePath, + resolvedPath.getName(), + tempPath, + false, // not complete + null // no expireTime + ); + + // Step 2.1: Create temp file T(N) + writeActions(fs, entry.absoluteTempPath(), actions); + + // Step 2.2: Create externals store entry E(N, T(N), complete=false) + putExternalEntry(entry, false); // overwrite=false + + try { + // Step 3: COMMIT the commit to the delta log. + // Copy T(N) -> N.json with overwrite=false + writeCopyTempFile(fs, entry.absoluteTempPath(), resolvedPath); + + // Step 4: ACKNOWLEDGE the commit + writePutCompleteDbEntry(entry); + } catch (Throwable e) { + LOG.info( + "{}: ignoring recoverable error", e.getClass().getSimpleName(), e + ); + } + } catch (java.lang.InterruptedException e) { + throw new InterruptedIOException(e.getMessage()); + } finally { + pathLock.release(resolvedPath); + } + } + + @Override + public Boolean isPartialWriteVisible(Path path, Configuration hadoopConf) { + return false; + } + + ///////////////////////////////////////////////////////////// + // Protected Members (for interaction with external store) // + ///////////////////////////////////////////////////////////// + + /** + * Write file with actions under a specific path. + */ + protected void writeActions( + FileSystem fs, + Path path, + Iterator actions + ) throws IOException { + LOG.debug("writeActions to: {}", path); + FSDataOutputStream stream = fs.create(path, true); + while (actions.hasNext()) { + byte[] line = String.format("%s\n", actions.next()).getBytes(StandardCharsets.UTF_8); + stream.write(line); + } + stream.close(); + } + + /** + * Generate temporary path for TransactionLog. + */ + protected String createTemporaryPath(Path path) { + String uuid = java.util.UUID.randomUUID().toString(); + return String.format(".tmp/%s.%s", path.getName(), uuid); + } + + /** + * Returns the base table path for a given Delta log entry located in + * e.g. input path of $tablePath/_delta_log/00000N.json would return $tablePath + */ + protected Path getTablePath(Path path) { + return path.getParent().getParent(); + } + + /** + * Write to external store in exclusive way. + * + * @throws java.nio.file.FileAlreadyExistsException if path exists in cache and `overwrite` is + * false + */ + abstract protected void putExternalEntry( + ExternalCommitEntry entry, + boolean overwrite) throws IOException; + + /** + * Return external store entry corresponding to delta log file with given `tablePath` and + * `fileName`, or `Optional.empty()` if it doesn't exist. + */ + abstract protected Optional getExternalEntry( + String tablePath, + String fileName) throws IOException; + + /** + * Return the latest external store entry corresponding to the delta log for given `tablePath`, + * or `Optional.empty()` if it doesn't exist. + */ + abstract protected Optional getLatestExternalEntry( + Path tablePath) throws IOException; + + ////////////////////////////////////////////////////////// + // Protected Members (for error injection during tests) // + ////////////////////////////////////////////////////////// + + /** + * Wrapper for `copyFile`, called by the `write` method. + */ + @VisibleForTesting + protected void writeCopyTempFile(FileSystem fs, Path src, Path dst) throws IOException { + copyFile(fs, src, dst); + } + + /** + * Wrapper for `putExternalEntry`, called by the `write` method. + */ + @VisibleForTesting + protected void writePutCompleteDbEntry(ExternalCommitEntry entry) throws IOException { + putExternalEntry(entry.asComplete(getExpirationDelaySeconds()), true); // overwrite=true + } + + /** + * Wrapper for `copyFile`, called by the `fixDeltaLog` method. + */ + @VisibleForTesting + protected void fixDeltaLogCopyTempFile(FileSystem fs, Path src, Path dst) throws IOException { + copyFile(fs, src, dst); + } + + /** + * Wrapper for `putExternalEntry`, called by the `fixDeltaLog` method. + */ + @VisibleForTesting + protected void fixDeltaLogPutCompleteDbEntry(ExternalCommitEntry entry) throws IOException { + putExternalEntry(entry.asComplete(getExpirationDelaySeconds()), true); // overwrite=true + } + + //////////////////// + // Helper Methods // + //////////////////// + + /** + * Method for assuring consistency on filesystem according to the external cache. + * Method tries to rewrite TransactionLog entry from temporary path if it does not exist. + * + * Should never throw a FileAlreadyExistsException. + * - If we see one when copying the temp file, we can assume the target file N.json already + * exists and a concurrent writer has already copied the contents of T(N). + * - We will never see one when writing to the external cache since overwrite=true. + */ + private void fixDeltaLog(FileSystem fs, ExternalCommitEntry entry) throws IOException { + if (entry.complete) { + return; + } + + final Path targetPath = entry.absoluteFilePath(); + try { + pathLock.acquire(targetPath); + + int retry = 0; + boolean copied = false; + while (true) { + LOG.info("trying to fix: {}", entry.fileName); + try { + if (!copied && !fs.exists(targetPath)) { + fixDeltaLogCopyTempFile(fs, entry.absoluteTempPath(), targetPath); + copied = true; + } + fixDeltaLogPutCompleteDbEntry(entry); + LOG.info("fixed file {}", entry.fileName); + return; + } catch (java.nio.file.FileAlreadyExistsException e) { + LOG.info("file {} already copied: {}:", + entry.fileName, e.getClass().getSimpleName(), e); + copied = true; + // Don't return since we still need to mark the DB entry as complete. This will + // happen when we execute the main try block on the next while loop iteration + } catch (Throwable e) { + LOG.info("{}:", e.getClass().getSimpleName(), e); + if (retry >= 3) { + throw e; + } + } + retry += 1; + } + } catch (java.lang.InterruptedException e) { + throw new InterruptedIOException(e.getMessage()); + } finally { + pathLock.release(targetPath); + } + } + + /** + * Copies file within filesystem. + * + * @param fs reference to [[FileSystem]] + * @param src path to source file + * @param dst path to destination file + */ + private void copyFile(FileSystem fs, Path src, Path dst) throws IOException { + LOG.info("copy file: {} -> {}", src, dst); + final FSDataInputStream inputStream = fs.open(src); + try { + final FSDataOutputStream outputStream = fs.create(dst, false); // overwrite=false + IOUtils.copy(inputStream, outputStream); + + // We don't close `outputStream` if an exception happens because it may create a partial + // file. + outputStream.close(); + } catch (org.apache.hadoop.fs.FileAlreadyExistsException e) { + throw new java.nio.file.FileAlreadyExistsException(dst.toString()); + } finally { + inputStream.close(); + } + } + + /** + * Returns path stripped user info. + */ + private Path stripUserInfo(Path path) { + final URI uri = path.toUri(); + + try { + final URI newUri = new URI( + uri.getScheme(), + null, // userInfo + uri.getHost(), + uri.getPort(), + uri.getPath(), + uri.getQuery(), + uri.getFragment() + ); + + return new Path(newUri); + } catch (URISyntaxException e) { + // Propagating this URISyntaxException to callers would mean we would have to either + // include it in the public LogStore.java interface or wrap it in an + // IllegalArgumentException somewhere else. Instead, catch and wrap it here. + throw new IllegalArgumentException(e); + } + } +} diff --git a/storage-s3-dynamodb/src/main/java/io/delta/storage/ExternalCommitEntry.java b/storage-s3-dynamodb/src/main/java/io/delta/storage/ExternalCommitEntry.java new file mode 100644 index 00000000000..c5aeab381ae --- /dev/null +++ b/storage-s3-dynamodb/src/main/java/io/delta/storage/ExternalCommitEntry.java @@ -0,0 +1,94 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import org.apache.hadoop.fs.Path; + +/** + * Wrapper class representing an entry in an external store for a given commit into the Delta log. + * + * Contains relevant fields and helper methods. + */ +public final class ExternalCommitEntry { + + /** + * Absolute path to this delta table + */ + public final Path tablePath; + + /** + * File name of this commit, e.g. "000000N.json" + */ + public final String fileName; + + /** + * Path to temp file for this commit, relative to the `_delta_log + */ + public final String tempPath; + + /** + * true if delta json file is successfully copied to its destination location, else false + */ + public final boolean complete; + + /** + * If complete = true, epoch seconds at which this external commit entry is safe to be deleted. + * Else, null. + */ + public final Long expireTime; + + public ExternalCommitEntry( + Path tablePath, + String fileName, + String tempPath, + boolean complete, + Long expireTime) { + this.tablePath = tablePath; + this.fileName = fileName; + this.tempPath = tempPath; + this.complete = complete; + this.expireTime = expireTime; + } + + /** + * @return this entry with `complete=true` and a valid `expireTime` + */ + public ExternalCommitEntry asComplete(long expirationDelaySeconds) { + return new ExternalCommitEntry( + this.tablePath, + this.fileName, + this.tempPath, + true, + System.currentTimeMillis() / 1000L + expirationDelaySeconds + ); + } + + /** + * @return the absolute path to the file for this entry. + * e.g. $tablePath/_delta_log/0000000N.json + */ + public Path absoluteFilePath() { + return new Path(new Path(tablePath, "_delta_log"), fileName); + } + + /** + * @return the absolute path to the temp file for this entry + */ + public Path absoluteTempPath() { + return new Path(new Path(tablePath, "_delta_log"), tempPath); + } +} diff --git a/storage-s3-dynamodb/src/main/java/io/delta/storage/RetryableCloseableIterator.java b/storage-s3-dynamodb/src/main/java/io/delta/storage/RetryableCloseableIterator.java new file mode 100644 index 00000000000..a244254b62c --- /dev/null +++ b/storage-s3-dynamodb/src/main/java/io/delta/storage/RetryableCloseableIterator.java @@ -0,0 +1,230 @@ +package io.delta.storage; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.function.Supplier; + +import io.delta.storage.utils.ThrowingSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class presents an iterator view over the iterator supplier in the constructor. + * + * This class assumes that the iterator supplied by the supplier can throw, and that subsequent + * supplier.get() calls will return an iterator over the same data. + * + * If there are any RemoteFileChangedException during `next` and `hasNext` calls, will retry + * at most `MAX_RETRIES` times. If there are similar exceptions during the retry, those are handled + * and count towards the MAX_RETRIES. + * + * Internally, keeps track of the last-successfully-returned index. Upon retry, will iterate back + * to that same position. + */ +public class RetryableCloseableIterator implements CloseableIterator { + private static final Logger LOG = LoggerFactory.getLogger(RetryableCloseableIterator.class); + + public static final int DEFAULT_MAX_RETRIES = 3; + + private final ThrowingSupplier, IOException> iterSupplier; + + private final int maxRetries; + + /** + * Index of the last element successfully returned without an exception. A value of -1 means + * that no element has ever been returned yet. + */ + private int lastSuccessfullIndex; + + private int numRetries = 0; + + private CloseableIterator currentIter; + + public RetryableCloseableIterator( + ThrowingSupplier, IOException> iterSupplier, + int maxRetries) throws IOException { + if (maxRetries < 0) throw new IllegalArgumentException("maxRetries can't be negative"); + + this.iterSupplier = Objects.requireNonNull(iterSupplier); + this.maxRetries = maxRetries; + this.lastSuccessfullIndex = -1; + this.currentIter = this.iterSupplier.get(); + } + + public RetryableCloseableIterator( + ThrowingSupplier, IOException> iterSupplier) + throws IOException { + + this(iterSupplier, DEFAULT_MAX_RETRIES); + } + + ///////////////// + // Public APIs // + ///////////////// + + @Override + public void close() throws IOException { + currentIter.close(); + } + + /** + * `hasNext` must be idempotent. It does not change the `lastSuccessfulIndex` variable. + */ + @Override + public boolean hasNext() { + try { + return hasNextInternal(); + } catch (IOException ex) { + if (isRemoteFileChangedException(ex)) { + try { + replayIterToLastSuccessfulIndex(ex); + } catch (IOException ex2) { + throw new UncheckedIOException(ex2); + } + return hasNext(); + } else { + throw new UncheckedIOException(ex); + } + + } + } + + @Override + public String next() { + if (!hasNext()) throw new NoSuchElementException(); + + try { + final String ret = nextInternal(); + lastSuccessfullIndex++; + return ret; + } catch (IOException ex) { + if (isRemoteFileChangedException(ex)) { + try { + replayIterToLastSuccessfulIndex(ex); + } catch (IOException ex2) { + throw new UncheckedIOException(ex2); + } + + if (!hasNext()) { + throw new IllegalStateException( + String.format( + "A retried iterator doesn't have enough data " + + "(hasNext=false, lastSuccessfullIndex=%s)", + lastSuccessfullIndex + ) + ); + } + + return next(); + } else { + throw new UncheckedIOException(ex); + } + } + } + + ////////////////////////////////////// + // Package-private APIs for testing // + ////////////////////////////////////// + + /** Visible for testing. */ + int getLastSuccessfullIndex() { + return lastSuccessfullIndex; + } + + /** Visible for testing. */ + int getNumRetries() { + return numRetries; + } + + //////////////////// + // Helper Methods // + //////////////////// + + /** Throw a checked exception so we can catch this in the caller. */ + private boolean hasNextInternal() throws IOException { + return currentIter.hasNext(); + } + + /** Throw a checked exception so we can catch this in the caller. */ + private String nextInternal() throws IOException { + return currentIter.next(); + } + + /** + * Called after a RemoteFileChangedException was thrown. Tries to replay the underlying + * iter implementation (supplied by the `implSupplier`) to the last successful index, so that + * the previous error open (hasNext, or next) can be retried. If a RemoteFileChangedException + * is thrown while replaying the iter, we just increment the `numRetries` counter and try again. + */ + private void replayIterToLastSuccessfulIndex(IOException topLevelEx) throws IOException { + LOG.warn( + "Caught a RemoteFileChangedException. NumRetries is {} / {}.\n{}", + numRetries + 1, maxRetries, topLevelEx + ); + currentIter.close(); + + while (numRetries < maxRetries) { + numRetries++; + LOG.info( + "Replaying until (inclusive) index {}. NumRetries is {} / {}.", + lastSuccessfullIndex, numRetries + 1, maxRetries + ); + currentIter = iterSupplier.get(); + + // Last successful index replayed. Starts at -1, and not 0, because 0 means we've + // already replayed the 1st element! + int replayIndex = -1; + try { + while (replayIndex < lastSuccessfullIndex) { + if (currentIter.hasNext()) { + currentIter.next(); // Disregard data that has been read + replayIndex++; + } else { + throw new IllegalStateException( + String.format( + "A retried iterator doesn't have enough data " + + "(replayIndex=%s, lastSuccessfullIndex=%s)", + replayIndex, + lastSuccessfullIndex + ) + ); + } + } + + // Just like how in RetryableCloseableIterator::next we have to handle + // RemoteFileChangedException, we must also hadnle that here during the replay. + // `currentIter.next()` isn't declared to throw a RemoteFileChangedException, so we + // trick the compiler into thinking this block can throw RemoteFileChangedException + // via `fakeIOException`. That way, we can catch it, and retry replaying the iter. + fakeIOException(); + + LOG.info("Successfully replayed until (inclusive) index {}", lastSuccessfullIndex); + + return; + } catch (IOException ex) { + if (isRemoteFileChangedException(ex)) { + // Ignore and try replaying the iter again at the top of the while loop + LOG.warn("Caught a RemoteFileChangedException while replaying the iterator"); + } else { + throw ex; + } + } + } + + throw topLevelEx; + } + + private boolean isRemoteFileChangedException(IOException ex) { + // `endsWith` should still work if the class is shaded. + final String exClassName = ex.getClass().getName(); + return exClassName.endsWith("org.apache.hadoop.fs.s3a.RemoteFileChangedException"); + } + + private void fakeIOException() throws IOException { + if (false) { + throw new IOException(); + } + } +} diff --git a/storage-s3-dynamodb/src/main/java/io/delta/storage/S3DynamoDBLogStore.java b/storage-s3-dynamodb/src/main/java/io/delta/storage/S3DynamoDBLogStore.java new file mode 100644 index 00000000000..69cfe9a8767 --- /dev/null +++ b/storage-s3-dynamodb/src/main/java/io/delta/storage/S3DynamoDBLogStore.java @@ -0,0 +1,365 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import io.delta.storage.utils.ReflectionUtils; +import org.apache.hadoop.fs.Path; + +import java.io.InterruptedIOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; +import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; +import com.amazonaws.services.dynamodbv2.model.AttributeValue; +import com.amazonaws.services.dynamodbv2.model.ComparisonOperator; +import com.amazonaws.services.dynamodbv2.model.Condition; +import com.amazonaws.services.dynamodbv2.model.ConditionalCheckFailedException; +import com.amazonaws.services.dynamodbv2.model.DescribeTableResult; +import com.amazonaws.services.dynamodbv2.model.TableDescription; +import com.amazonaws.services.dynamodbv2.model.ExpectedAttributeValue; +import com.amazonaws.services.dynamodbv2.model.GetItemRequest; +import com.amazonaws.services.dynamodbv2.model.KeySchemaElement; +import com.amazonaws.services.dynamodbv2.model.KeyType; +import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput; +import com.amazonaws.services.dynamodbv2.model.PutItemRequest; +import com.amazonaws.services.dynamodbv2.model.QueryRequest; +import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; +import com.amazonaws.services.dynamodbv2.model.ResourceInUseException; +import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType; +import com.amazonaws.regions.Region; +import com.amazonaws.regions.Regions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A concrete implementation of {@link BaseExternalLogStore} that uses an external DynamoDB table + * to provide the mutual exclusion during calls to `putExternalEntry`. + * + * DynamoDB entries are of form + * - key + * -- tablePath (HASH, STRING) + * -- filename (RANGE, STRING) + * + * - attributes + * -- tempPath (STRING, relative to _delta_log) + * -- complete (STRING, representing boolean, "true" or "false") + * -- commitTime (NUMBER, epoch seconds) + */ +public class S3DynamoDBLogStore extends BaseExternalLogStore { + private static final Logger LOG = LoggerFactory.getLogger(S3DynamoDBLogStore.class); + + /** + * Configuration keys for the DynamoDB client. + * + * Keys are either of the form $SPARK_CONF_PREFIX.$CONF or $BASE_CONF_PREFIX.$CONF, + * e.g. spark.io.delta.storage.S3DynamoDBLogStore.ddb.tableName + * or io.delta.storage.S3DynamoDBLogStore.ddb.tableName + */ + public static final String SPARK_CONF_PREFIX = "spark.io.delta.storage.S3DynamoDBLogStore"; + public static final String BASE_CONF_PREFIX = "io.delta.storage.S3DynamoDBLogStore"; + public static final String READ_RETRIES = "read.retries"; + public static final String DDB_CLIENT_TABLE = "ddb.tableName"; + public static final String DDB_CLIENT_REGION = "ddb.region"; + public static final String DDB_CLIENT_CREDENTIALS_PROVIDER = "credentials.provider"; + public static final String DDB_CREATE_TABLE_RCU = "provisionedThroughput.rcu"; + public static final String DDB_CREATE_TABLE_WCU = "provisionedThroughput.wcu"; + + // WARNING: setting this value too low can cause data loss. Defaults to a duration of 1 day. + public static final String TTL_SECONDS = "ddb.ttl"; + + /** + * DynamoDB table attribute keys + */ + private static final String ATTR_TABLE_PATH = "tablePath"; + private static final String ATTR_FILE_NAME = "fileName"; + private static final String ATTR_TEMP_PATH = "tempPath"; + private static final String ATTR_COMPLETE = "complete"; + private static final String ATTR_EXPIRE_TIME = "expireTime"; + + /** + * Member fields + */ + private final AmazonDynamoDBClient client; + private final String tableName; + private final String credentialsProviderName; + private final String regionName; + private final long expirationDelaySeconds; + + public S3DynamoDBLogStore(Configuration hadoopConf) throws IOException { + super(hadoopConf); + + tableName = getParam(hadoopConf, DDB_CLIENT_TABLE, "delta_log"); + credentialsProviderName = getParam( + hadoopConf, + DDB_CLIENT_CREDENTIALS_PROVIDER, + "com.amazonaws.auth.DefaultAWSCredentialsProviderChain" + ); + regionName = getParam(hadoopConf, DDB_CLIENT_REGION, "us-east-1"); + + final String ttl = getParam(hadoopConf, TTL_SECONDS, null); + expirationDelaySeconds = ttl == null ? + BaseExternalLogStore.DEFAULT_EXTERNAL_ENTRY_EXPIRATION_DELAY_SECONDS : + Long.parseLong(ttl); + if (expirationDelaySeconds < 0) { + throw new IllegalArgumentException( + String.format( + "Can't use negative `%s` value of %s", TTL_SECONDS, expirationDelaySeconds)); + } + + LOG.info("using tableName {}", tableName); + LOG.info("using credentialsProviderName {}", credentialsProviderName); + LOG.info("using regionName {}", regionName); + LOG.info("using ttl (seconds) {}", expirationDelaySeconds); + + client = getClient(); + tryEnsureTableExists(hadoopConf); + } + + @Override + public CloseableIterator read(Path path, Configuration hadoopConf) throws IOException { + // With many concurrent readers/writers, there's a chance that concurrent 'recovery' + // operations occur on the same file, i.e. the same temp file T(N) is copied into the target + // N.json file more than once. Though data loss will *NOT* occur, readers of N.json may + // receive a RemoteFileChangedException from S3 as the ETag of N.json was changed. This is + // safe to retry, so we do so here. + final int maxRetries = Integer.parseInt( + getParam( + hadoopConf, + READ_RETRIES, + Integer.toString(RetryableCloseableIterator.DEFAULT_MAX_RETRIES) + ) + ); + + return new RetryableCloseableIterator(() -> super.read(path, hadoopConf), maxRetries); + } + + @Override + protected long getExpirationDelaySeconds() { + return expirationDelaySeconds; + } + + @Override + protected void putExternalEntry( + ExternalCommitEntry entry, + boolean overwrite) throws IOException { + try { + LOG.debug(String.format("putItem %s, overwrite: %s", entry, overwrite)); + client.putItem(createPutItemRequest(entry, overwrite)); + } catch (ConditionalCheckFailedException e) { + LOG.debug(e.toString()); + throw new java.nio.file.FileAlreadyExistsException( + entry.absoluteFilePath().toString() + ); + } + } + + @Override + protected Optional getExternalEntry( + String tablePath, + String fileName) { + final Map attributes = new ConcurrentHashMap<>(); + attributes.put(ATTR_TABLE_PATH, new AttributeValue(tablePath)); + attributes.put(ATTR_FILE_NAME, new AttributeValue(fileName)); + + Map item = client.getItem( + new GetItemRequest(tableName, attributes).withConsistentRead(true) + ).getItem(); + + return item != null ? Optional.of(dbResultToCommitEntry(item)) : Optional.empty(); + } + + @Override + protected Optional getLatestExternalEntry(Path tablePath) { + final Map conditions = new ConcurrentHashMap<>(); + conditions.put( + ATTR_TABLE_PATH, + new Condition() + .withComparisonOperator(ComparisonOperator.EQ) + .withAttributeValueList(new AttributeValue(tablePath.toString())) + ); + + final List> items = client.query( + new QueryRequest(tableName) + .withConsistentRead(true) + .withScanIndexForward(false) + .withLimit(1) + .withKeyConditions(conditions) + ).getItems(); + + if (items.isEmpty()) { + return Optional.empty(); + } else { + return Optional.of(dbResultToCommitEntry(items.get(0))); + } + } + + /** + * Map a DBB query result item to an {@link ExternalCommitEntry}. + */ + private ExternalCommitEntry dbResultToCommitEntry(Map item) { + final AttributeValue expireTimeAttr = item.get(ATTR_EXPIRE_TIME); + return new ExternalCommitEntry( + new Path(item.get(ATTR_TABLE_PATH).getS()), + item.get(ATTR_FILE_NAME).getS(), + item.get(ATTR_TEMP_PATH).getS(), + item.get(ATTR_COMPLETE).getS().equals("true"), + expireTimeAttr != null ? Long.parseLong(expireTimeAttr.getN()) : null + ); + } + + private PutItemRequest createPutItemRequest(ExternalCommitEntry entry, boolean overwrite) { + final Map attributes = new ConcurrentHashMap<>(); + attributes.put(ATTR_TABLE_PATH, new AttributeValue(entry.tablePath.toString())); + attributes.put(ATTR_FILE_NAME, new AttributeValue(entry.fileName)); + attributes.put(ATTR_TEMP_PATH, new AttributeValue(entry.tempPath)); + attributes.put( + ATTR_COMPLETE, + new AttributeValue().withS(Boolean.toString(entry.complete)) + ); + + if (entry.expireTime != null) { + attributes.put( + ATTR_EXPIRE_TIME, + new AttributeValue().withN(entry.expireTime.toString()) + ); + } + + final PutItemRequest pr = new PutItemRequest(tableName, attributes); + + if (!overwrite) { + Map expected = new ConcurrentHashMap<>(); + expected.put(ATTR_FILE_NAME, new ExpectedAttributeValue(false)); + pr.withExpected(expected); + } + + return pr; + } + + private void tryEnsureTableExists(Configuration hadoopConf) throws IOException { + int retries = 0; + boolean created = false; + while(retries < 20) { + String status = "CREATING"; + try { + // https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/dynamodbv2/model/TableDescription.html#getTableStatus-- + DescribeTableResult result = client.describeTable(tableName); + TableDescription descr = result.getTable(); + status = descr.getTableStatus(); + } catch (ResourceNotFoundException e) { + final long rcu = Long.parseLong(getParam(hadoopConf, DDB_CREATE_TABLE_RCU, "5")); + final long wcu = Long.parseLong(getParam(hadoopConf, DDB_CREATE_TABLE_WCU, "5")); + + LOG.info( + "DynamoDB table `{}` in region `{}` does not exist. " + + "Creating it now with provisioned throughput of {} RCUs and {} WCUs.", + tableName, regionName, rcu, wcu); + try { + client.createTable( + // attributeDefinitions + java.util.Arrays.asList( + new AttributeDefinition(ATTR_TABLE_PATH, ScalarAttributeType.S), + new AttributeDefinition(ATTR_FILE_NAME, ScalarAttributeType.S) + ), + tableName, + // keySchema + Arrays.asList( + new KeySchemaElement(ATTR_TABLE_PATH, KeyType.HASH), + new KeySchemaElement(ATTR_FILE_NAME, KeyType.RANGE) + ), + new ProvisionedThroughput(rcu, wcu) + ); + created = true; + } catch (ResourceInUseException e3) { + // race condition - table just created by concurrent process + } + } + if (status.equals("ACTIVE")) { + if (created) { + LOG.info("Successfully created DynamoDB table `{}`", tableName); + } else { + LOG.info("Table `{}` already exists", tableName); + } + break; + } else if (status.equals("CREATING")) { + retries += 1; + LOG.info("Waiting for `{}` table creation", tableName); + try { + Thread.sleep(1000); + } catch(InterruptedException e) { + throw new InterruptedIOException(e.getMessage()); + } + } else { + LOG.error("table `{}` status: {}", tableName, status); + break; // TODO - raise exception? + } + }; + } + + private AmazonDynamoDBClient getClient() throws java.io.IOException { + try { + final AWSCredentialsProvider awsCredentialsProvider = + ReflectionUtils.createAwsCredentialsProvider(credentialsProviderName, initHadoopConf()); + final AmazonDynamoDBClient client = new AmazonDynamoDBClient(awsCredentialsProvider); + client.setRegion(Region.getRegion(Regions.fromName(regionName))); + return client; + } catch (ReflectiveOperationException e) { + throw new java.io.IOException(e); + } + } + + /** + * Get the hadoopConf param $name that is prefixed either with $SPARK_CONF_PREFIX or + * $BASE_CONF_PREFIX. + * + * If two parameters exist, one for each prefix, then an IllegalArgumentException is thrown. + * + * If no parameters exist, then the $defaultValue is returned. + */ + protected static String getParam(Configuration hadoopConf, String name, String defaultValue) { + final String sparkPrefixKey = String.format("%s.%s", SPARK_CONF_PREFIX, name); + final String basePrefixKey = String.format("%s.%s", BASE_CONF_PREFIX, name); + + final String sparkPrefixVal = hadoopConf.get(sparkPrefixKey); + final String basePrefixVal = hadoopConf.get(basePrefixKey); + + if (sparkPrefixVal != null && + basePrefixVal != null && + !sparkPrefixVal.equals(basePrefixVal)) { + throw new IllegalArgumentException( + String.format( + "Configuration properties `%s=%s` and `%s=%s` have different values. " + + "Please set only one.", + sparkPrefixKey, sparkPrefixVal, basePrefixKey, basePrefixVal + ) + ); + } + + if (sparkPrefixVal != null) return sparkPrefixVal; + if (basePrefixVal != null) return basePrefixVal; + return defaultValue; + } +} diff --git a/storage-s3-dynamodb/src/main/java/io/delta/storage/utils/ReflectionUtils.java b/storage-s3-dynamodb/src/main/java/io/delta/storage/utils/ReflectionUtils.java new file mode 100644 index 00000000000..bd72c1c83bf --- /dev/null +++ b/storage-s3-dynamodb/src/main/java/io/delta/storage/utils/ReflectionUtils.java @@ -0,0 +1,59 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage.utils; + +import com.amazonaws.auth.AWSCredentialsProvider; +import org.apache.hadoop.conf.Configuration; + +import java.util.Arrays; + +public class ReflectionUtils { + + private static boolean readsCredsFromHadoopConf(Class awsCredentialsProviderClass) { + return Arrays.stream(awsCredentialsProviderClass.getConstructors()) + .anyMatch(constructor -> constructor.getParameterCount() == 1 && + Arrays.equals(constructor.getParameterTypes(), new Class[]{Configuration.class})); + } + + /** + * Create AWS credentials provider from given provider classname and {@link Configuration}. + * + * It first check if AWS Credentials Provider class has constructor Hadoop configuration as parameter. + * If yes - create instance of class using this constructor. + * If no - create instance with empty parameters constructor. + * + * @param credentialsProviderClassName Fully qualified name of the desired credentials provider class. + * @param hadoopConf Hadoop configuration, used to create instance of AWS credentials + * provider, if supported. + * @return {@link AWSCredentialsProvider} object, instantiated from the class @see {credentialsProviderClassName} + * @throws ReflectiveOperationException When AWS credentials provider constrictor do not matched. + * Means class has neither an constructor with no args as input + * nor constructor with only Hadoop configuration as argument. + */ + public static AWSCredentialsProvider createAwsCredentialsProvider( + String credentialsProviderClassName, + Configuration hadoopConf) throws ReflectiveOperationException { + Class awsCredentialsProviderClass = Class.forName(credentialsProviderClassName); + if (readsCredsFromHadoopConf(awsCredentialsProviderClass)) + return (AWSCredentialsProvider) awsCredentialsProviderClass + .getConstructor(Configuration.class) + .newInstance(hadoopConf); + else + return (AWSCredentialsProvider) awsCredentialsProviderClass.getConstructor().newInstance(); + } + +} diff --git a/storage-s3-dynamodb/src/main/java/io/delta/storage/utils/ThrowingSupplier.java b/storage-s3-dynamodb/src/main/java/io/delta/storage/utils/ThrowingSupplier.java new file mode 100644 index 00000000000..b5ea4fdfa2c --- /dev/null +++ b/storage-s3-dynamodb/src/main/java/io/delta/storage/utils/ThrowingSupplier.java @@ -0,0 +1,6 @@ +package io.delta.storage.utils; + +@FunctionalInterface +public interface ThrowingSupplier { + T get() throws E; +} diff --git a/storage-s3-dynamodb/src/test/java/io/delta/storage/FailingS3DynamoDBLogStore.java b/storage-s3-dynamodb/src/test/java/io/delta/storage/FailingS3DynamoDBLogStore.java new file mode 100644 index 00000000000..d9cd04d68b7 --- /dev/null +++ b/storage-s3-dynamodb/src/test/java/io/delta/storage/FailingS3DynamoDBLogStore.java @@ -0,0 +1,86 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * An ExternalLogStore implementation that allows for easy, probability-based error injection during + * runtime. + * + * This is used to test the error-handling capabilities of S3DynamoDBLogStore during integration + * tests. + */ +public class FailingS3DynamoDBLogStore extends S3DynamoDBLogStore { + + private static java.util.Random rng = new java.util.Random(); + private final ConcurrentHashMap errorRates; + + public FailingS3DynamoDBLogStore(Configuration hadoopConf) throws IOException { + super(hadoopConf); + errorRates = new ConcurrentHashMap<>(); + + // for each optional key in set { write_copy_temp_file, write_put_db_entry, + // fix_delta_log_copy_temp_file, fix_delta_log_put_db_entry }, `errorRates` string is + // expected to be of form key1=value1,key2=value2 etc where each value is a fraction + // indicating how often that method should fail (e.g. 0.10 ==> 10% failure rate). + String errorRatesDef = getParam(hadoopConf, "errorRates", ""); + for (String s: errorRatesDef.split(",")) { + if (!s.contains("=")) continue; + String[] parts = s.split("=", 2); + if (parts.length == 2) { + errorRates.put(parts[0], Float.parseFloat(parts[1])); + } + } + } + + @Override + protected void writeCopyTempFile(FileSystem fs, Path src, Path dst) throws IOException { + injectError("write_copy_temp_file"); + super.writeCopyTempFile(fs, src, dst); + } + + @Override + protected void writePutCompleteDbEntry(ExternalCommitEntry entry) throws IOException { + injectError("write_put_db_entry"); + super.writePutCompleteDbEntry(entry); + } + + @Override + protected void fixDeltaLogCopyTempFile(FileSystem fs, Path src, Path dst) throws IOException { + injectError("fix_delta_log_copy_temp_file"); + super.fixDeltaLogCopyTempFile(fs, src, dst); + } + + @Override + protected void fixDeltaLogPutCompleteDbEntry(ExternalCommitEntry entry) throws IOException { + injectError("fix_delta_log_put_db_entry"); + super.fixDeltaLogPutCompleteDbEntry(entry); + } + + private void injectError(String name) throws IOException { + float rate = errorRates.getOrDefault(name, 0.1f); + if (rng.nextFloat() < rate) { + throw new IOException(String.format("injected failure: %s", name)); + } + } +} diff --git a/storage-s3-dynamodb/src/test/java/io/delta/storage/MemoryLogStore.java b/storage-s3-dynamodb/src/test/java/io/delta/storage/MemoryLogStore.java new file mode 100644 index 00000000000..7cb58f62a78 --- /dev/null +++ b/storage-s3-dynamodb/src/test/java/io/delta/storage/MemoryLogStore.java @@ -0,0 +1,108 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import java.io.IOException; +import java.util.Comparator; +import java.util.concurrent.ConcurrentHashMap; + +import java.util.Optional; + +/** + * Simple ExternalLogStore implementation using an in-memory hashmap (as opposed to an actual + * database) + */ +public class MemoryLogStore extends BaseExternalLogStore { + public MemoryLogStore(Configuration hadoopConf) { + super(hadoopConf); + } + + @Override + protected void putExternalEntry( + ExternalCommitEntry entry, + boolean overwrite) throws IOException { + final String key = createKey(entry.tablePath.toString(), entry.fileName); + final ExternalCommitEntry correctedEntry = new ExternalCommitEntry( + // some tests use "failing:" scheme to inject errors, but we want to store normal paths + new Path(fixPathSchema(entry.tablePath.toString())), + entry.fileName, + entry.tempPath, + entry.complete, + entry.expireTime + ); + + if (overwrite) { + hashMap.put(key, correctedEntry); + } else if (hashMap.containsKey(key)) { // and overwrite=false + throw new java.nio.file.FileAlreadyExistsException("already exists"); + } else { + hashMap.put(key, correctedEntry); + } + } + + @Override + protected Optional getExternalEntry( + String tablePath, + String fileName) { + final String key = createKey(tablePath, fileName); + if (hashMap.containsKey(key)) { + return Optional.of(hashMap.get(key)); + } + return Optional.empty(); + } + + @Override + protected Optional getLatestExternalEntry(Path tablePath) { + final Path fixedTablePath = new Path(fixPathSchema(tablePath.toString())); + return hashMap + .values() + .stream() + .filter(item -> item.tablePath.equals(fixedTablePath)) + .max(Comparator.comparing(ExternalCommitEntry::absoluteFilePath)); + } + + /** + * ExternalLogStoreSuite sometimes uses "failing:" scheme prefix to inject errors during tests + * However, we want lookups for the same $tablePath to return the same result, regardless of + * scheme. + */ + static String fixPathSchema(String tablePath) { + return tablePath.replace("failing:", "file:"); + } + + static String createKey(String tablePath, String fileName) { + return String.format("%s-%s", fixPathSchema(tablePath), fileName); + } + + static ExternalCommitEntry get(Path path) { + final String tablePath = path.getParent().getParent().toString(); + final String fileName = path.getName(); + final String key = createKey(tablePath, fileName); + return hashMap.get(key); + } + + static boolean containsKey(Path path) { + final String tablePath = path.getParent().getParent().toString(); + final String fileName = path.getName(); + final String key = createKey(tablePath, fileName); + return hashMap.containsKey(key); + } + + static ConcurrentHashMap hashMap = new ConcurrentHashMap<>(); +} diff --git a/storage-s3-dynamodb/src/test/java/io/delta/storage/utils/ReflectionsUtilsSuiteHelper.java b/storage-s3-dynamodb/src/test/java/io/delta/storage/utils/ReflectionsUtilsSuiteHelper.java new file mode 100644 index 00000000000..c5788c6fb76 --- /dev/null +++ b/storage-s3-dynamodb/src/test/java/io/delta/storage/utils/ReflectionsUtilsSuiteHelper.java @@ -0,0 +1,55 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage.utils; + +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import org.apache.hadoop.conf.Configuration; + +public class ReflectionsUtilsSuiteHelper { + // this class only purpose to test DynamoDBLogStore logic to create AWS credentials provider with reflection. + public static class TestOnlyAWSCredentialsProviderWithHadoopConf implements AWSCredentialsProvider { + + public TestOnlyAWSCredentialsProviderWithHadoopConf(Configuration hadoopConf) {} + + @Override + public AWSCredentials getCredentials() { + return null; + } + + @Override + public void refresh() { + + } + } + + // this class only purpose to test DynamoDBLogStore logic to create AWS credentials provider with reflection. + public static class TestOnlyAWSCredentialsProviderWithUnexpectedConstructor implements AWSCredentialsProvider { + + public TestOnlyAWSCredentialsProviderWithUnexpectedConstructor(String hadoopConf) {} + + @Override + public AWSCredentials getCredentials() { + return null; + } + + @Override + public void refresh() { + + } + } +} diff --git a/storage-s3-dynamodb/src/test/scala/io/delta/storage/ExternalLogStoreSuite.scala b/storage-s3-dynamodb/src/test/scala/io/delta/storage/ExternalLogStoreSuite.scala new file mode 100644 index 00000000000..945618f355b --- /dev/null +++ b/storage-s3-dynamodb/src/test/scala/io/delta/storage/ExternalLogStoreSuite.scala @@ -0,0 +1,355 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage + +import java.io.File +import java.net.URI + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs._ +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.spark.sql.delta.FakeFileSystem +import org.apache.spark.sql.delta.util.FileNames + +///////////////////// +// Base Test Suite // +///////////////////// + +class ExternalLogStoreSuite extends org.apache.spark.sql.delta.PublicLogStoreSuite { + override protected val publicLogStoreClassName: String = + classOf[MemoryLogStore].getName + + testHadoopConf( + expectedErrMsg = "No FileSystem for scheme \"fake\"", + "fs.fake.impl" -> classOf[FakeFileSystem].getName, + "fs.fake.impl.disable.cache" -> "true" + ) + + def getDeltaVersionPath(logDir: File, version: Int): Path = { + FileNames.deltaFile(new Path(logDir.toURI), version) + } + + def getFailingDeltaVersionPath(logDir: File, version: Int): Path = { + FileNames.deltaFile(new Path(s"failing:${logDir.getCanonicalPath}"), version) + } + + test("single write") { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + val path = getDeltaVersionPath(tempLogDir, 0) + store.write(path, Iterator("foo", "bar"), overwrite = false, sessionHadoopConf) + val entry = MemoryLogStore.get(path); + assert(entry != null) + assert(entry.complete); + } + } + + test("double write") { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + val path = getDeltaVersionPath(tempLogDir, 0) + store.write(path, Iterator("foo", "bar"), overwrite = false, sessionHadoopConf) + assert(MemoryLogStore.containsKey(path)) + assertThrows[java.nio.file.FileSystemException] { + store.write(path, Iterator("foo", "bar"), overwrite = false, sessionHadoopConf) + } + } + } + + test("overwrite") { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + val path = getDeltaVersionPath(tempLogDir, 0) + store.write(path, Iterator("foo", "bar"), overwrite = false, sessionHadoopConf) + assert(MemoryLogStore.containsKey(path)) + store.write(path, Iterator("foo", "bar"), overwrite = true, sessionHadoopConf) + assert(MemoryLogStore.containsKey(path)) + } + } + + test("write N fails if overwrite=false and N already exists in FileSystem " + + "and N does not exist in external store") { + withTempLogDir { tempLogDir => + val delta0 = getDeltaVersionPath(tempLogDir, 0) + val delta1_a = getDeltaVersionPath(tempLogDir, 1) + val delta1_b = getDeltaVersionPath(tempLogDir, 1) + + val store = createLogStore(spark) + store.write(delta0, Iterator("zero"), overwrite = false, sessionHadoopConf) + store.write(delta1_a, Iterator("one_a"), overwrite = false, sessionHadoopConf) + + // Pretend that BaseExternalLogStore.getExpirationDelaySeconds() seconds have + // transpired and that the external store has run TTL cleanup. + MemoryLogStore.hashMap.clear(); + + val e = intercept[java.nio.file.FileAlreadyExistsException] { + store.write(delta1_b, Iterator("one_b"), overwrite = false, sessionHadoopConf) + } + + assert(e.getMessage.contains(delta1_b.toString)) + } + } + + test("write N fails and does not write to external store if overwrite=false and N " + + "already exists in FileSystem and N already exists in external store") { + withTempLogDir { tempLogDir => + val delta0 = getDeltaVersionPath(tempLogDir, 0) + val delta1_a = getDeltaVersionPath(tempLogDir, 1) + val delta1_b = getDeltaVersionPath(tempLogDir, 1) + + val store = createLogStore(spark) + store.write(delta0, Iterator("zero"), overwrite = false, sessionHadoopConf) + store.write(delta1_a, Iterator("one_a"), overwrite = false, sessionHadoopConf) + + assert(MemoryLogStore.hashMap.size() == 2) + + val e = intercept[java.nio.file.FileAlreadyExistsException] { + store.write(delta1_b, Iterator("one_b"), overwrite = false, sessionHadoopConf) + } + + assert(e.getMessage.contains(delta1_b.toString)) + assert(MemoryLogStore.hashMap.size() == 2) + } + } + + test("write N+1 fails if N doesn't exist in external store or FileSystem") { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + + val delta0 = getDeltaVersionPath(tempLogDir, 0) + val delta1 = getDeltaVersionPath(tempLogDir, 1) + val e = intercept[java.nio.file.FileSystemException] { + store.write(delta1, Iterator("one"), overwrite = false, sessionHadoopConf) + } + assert(e.getMessage == s"previous commit $delta0 doesn't exist on the file system but does in the external log store") + } + } + + // scalastyle:off line.size.limit + test("write N+1 fails if N is marked as complete in external store but doesn't exist in FileSystem") { + // scalastyle:on line.size.limit + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + + val delta0 = getDeltaVersionPath(tempLogDir, 0) + val delta1 = getDeltaVersionPath(tempLogDir, 1) + + store.write(delta0, Iterator("one"), overwrite = false, sessionHadoopConf) + delta0.getFileSystem(sessionHadoopConf).delete(delta0, true) + val e = intercept[java.nio.file.FileSystemException] { + store.write(delta1, Iterator("one"), overwrite = false, sessionHadoopConf) + } + assert(e.getMessage == s"previous commit $delta0 doesn't exist on the file system but does in the external log store") + } + } + + test("write N+1 succeeds and recovers version N if N is incomplete in external store") { + withSQLConf( + "fs.failing.impl" -> classOf[FailingFileSystem].getName, + "fs.failing.impl.disable.cache" -> "true" + ) { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + + val delta0_normal = getDeltaVersionPath(tempLogDir, 0) + val delta0_fail = getFailingDeltaVersionPath(tempLogDir, 0) + val delta1 = getDeltaVersionPath(tempLogDir, 1) + + // Create N (incomplete) in external store, with no N in FileSystem + FailingFileSystem.failOnSuffix = Some(delta0_fail.getName) + store.write(delta0_fail, Iterator("zero"), overwrite = false, sessionHadoopConf) + assert(!delta0_fail.getFileSystem(sessionHadoopConf).exists(delta0_fail)) + assert(!MemoryLogStore.get(delta0_fail).complete) + + // Write N + 1 and check that recovery was performed + store.write(delta1, Iterator("one"), overwrite = false, sessionHadoopConf) + assert(delta0_fail.getFileSystem(sessionHadoopConf).exists(delta0_fail)) + assert(MemoryLogStore.get(delta0_fail).complete) + assert(MemoryLogStore.get(delta1).complete) + } + } + } + + test("listFrom performs recovery") { + withSQLConf( + "fs.failing.impl" -> classOf[FailingFileSystem].getName, + "fs.failing.impl.disable.cache" -> "true" + ) { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + val delta0_normal = getDeltaVersionPath(tempLogDir, 0) + val delta0_fail = getFailingDeltaVersionPath(tempLogDir, 0) + + // fail to write to FileSystem when we try to commit 0000.json + FailingFileSystem.failOnSuffix = Some(delta0_fail.getName) + + // try and commit 0000.json + store.write(delta0_fail, Iterator("foo", "bar"), overwrite = false, sessionHadoopConf) + + // check that entry was written to external store and that it doesn't exist in FileSystem + val entry = MemoryLogStore.get(delta0_fail) + assert(!entry.complete) + assert(!delta0_fail.getFileSystem(sessionHadoopConf).exists(delta0_fail)) + + // Now perform a `listFrom` read, which should fix the transaction log + val contents = store.read(entry.absoluteTempPath(), sessionHadoopConf).toList + FailingFileSystem.failOnSuffix = None + store.listFrom(delta0_normal, sessionHadoopConf) + + val entry2 = MemoryLogStore.get(delta0_normal) + assert(entry2.complete) + assert(store.read(entry2.absoluteFilePath(), sessionHadoopConf).toList == contents) + } + } + } + + test("write to new Delta table but a DynamoDB entry for it already exists") { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + + // write 0000.json + val path = getDeltaVersionPath(tempLogDir, 0) + store.write(path, Iterator("foo"), overwrite = false, sessionHadoopConf) + + // delete 0000.json from FileSystem + val fs = path.getFileSystem(sessionHadoopConf) + fs.delete(path, false) + + // try and write a new 0000.json, while the external store entry still exists + val e = intercept[java.nio.file.FileSystemException] { + store.write(path, Iterator("bar"), overwrite = false, sessionHadoopConf) + }.getMessage + + val tablePath = path.getParent.getParent + assert(e == s"Old entries for table $tablePath still exist in the external log store") + } + } + + test("listFrom exceptions") { + val store = createLogStore(spark) + assertThrows[java.io.FileNotFoundException] { + store.listFrom("/non-existing-path/with-parent") + } + } + + test("MemoryLogStore ignores failing scheme") { + withSQLConf( + "fs.failing.impl" -> classOf[FailingFileSystem].getName, + "fs.failing.impl.disable.cache" -> "true" + ) { + withTempLogDir { tempLogDir => + val store = createLogStore(spark) + val delta0_normal = getDeltaVersionPath(tempLogDir, 0) + val delta0_fail = getFailingDeltaVersionPath(tempLogDir, 0) + + store.write(delta0_fail, Iterator("zero"), overwrite = false, sessionHadoopConf) + assert(MemoryLogStore.get(delta0_fail) eq MemoryLogStore.get(delta0_normal)) + } + } + } + + protected def shouldUseRenameToWriteCheckpoint: Boolean = false +} + +/////////////////////////////////// +// S3DynamoDBLogStore Test Suite // +/////////////////////////////////// + +class S3DynamoDBLogStoreSuite extends AnyFunSuite { + test("getParam") { + import S3DynamoDBLogStore._ + + val sparkPrefixKey = "spark.io.delta.storage.S3DynamoDBLogStore.ddb.tableName" + val basePrefixKey = "io.delta.storage.S3DynamoDBLogStore.ddb.tableName" + + // Sanity check + require(sparkPrefixKey == SPARK_CONF_PREFIX + "." + DDB_CLIENT_TABLE) + require(basePrefixKey == BASE_CONF_PREFIX + "." + DDB_CLIENT_TABLE) + + // Case 1: no parameters exist, should use default + assert(getParam(new Configuration(), DDB_CLIENT_TABLE, "default_table") == "default_table") + + // Case 2: spark-prefix param only + { + val hadoopConf = new Configuration() + hadoopConf.set(sparkPrefixKey, "some_other_table_2") + assert(getParam(hadoopConf, DDB_CLIENT_TABLE, "default_table") == "some_other_table_2") + } + + // Case 3: base-prefix param only + { + val hadoopConf = new Configuration() + hadoopConf.set(basePrefixKey, "some_other_table_3") + assert(getParam(hadoopConf, DDB_CLIENT_TABLE, "default_table") == "some_other_table_3") + } + + // Case 4: both params set, same value + { + val hadoopConf = new Configuration() + hadoopConf.set(sparkPrefixKey, "some_other_table_4") + hadoopConf.set(basePrefixKey, "some_other_table_4") + assert(getParam(hadoopConf, DDB_CLIENT_TABLE, "default_table") == "some_other_table_4") + } + + // Case 5: both param set, different value + { + val hadoopConf = new Configuration() + hadoopConf.set(sparkPrefixKey, "some_other_table_5a") + hadoopConf.set(basePrefixKey, "some_other_table_5b") + val e = intercept[IllegalArgumentException] { + getParam(hadoopConf, DDB_CLIENT_TABLE, "default_table") + }.getMessage + assert(e == (s"Configuration properties `$sparkPrefixKey=some_other_table_5a` and " + + s"`$basePrefixKey=some_other_table_5b` have different values. Please set only one.")) + } + } +} + +//////////////////////////////// +// File System Helper Classes // +//////////////////////////////// + +/** + * This utility enables failure simulation on file system. + * Providing a matching suffix results in an exception being + * thrown that allows to test file system failure scenarios. + */ +class FailingFileSystem extends RawLocalFileSystem { + override def getScheme: String = FailingFileSystem.scheme + + override def getUri: URI = FailingFileSystem.uri + + override def create(path: Path, overwrite: Boolean): FSDataOutputStream = { + + FailingFileSystem.failOnSuffix match { + case Some(suffix) => + if (path.toString.endsWith(suffix)) { + throw new java.nio.file.FileSystemException("fail") + } + case None => ; + } + super.create(path, overwrite) + } +} + +object FailingFileSystem { + private val scheme = "failing" + private val uri: URI = URI.create(s"$scheme:///") + + var failOnSuffix: Option[String] = None +} diff --git a/storage-s3-dynamodb/src/test/scala/io/delta/storage/RetryableCloseableIteratorSuite.scala b/storage-s3-dynamodb/src/test/scala/io/delta/storage/RetryableCloseableIteratorSuite.scala new file mode 100644 index 00000000000..2f0372466f0 --- /dev/null +++ b/storage-s3-dynamodb/src/test/scala/io/delta/storage/RetryableCloseableIteratorSuite.scala @@ -0,0 +1,231 @@ +package io.delta.storage + +import java.io.{FileNotFoundException, IOException} + +import scala.collection.JavaConverters._ + +import io.delta.storage.utils.ThrowingSupplier +import org.apache.hadoop.fs.s3a.RemoteFileChangedException +import org.scalatest.funsuite.AnyFunSuite + +class RetryableCloseableIteratorSuite extends AnyFunSuite { + + private def getIter( + range: Range, + throwAtIndex: Option[Int] = None): CloseableIterator[String] = + new CloseableIterator[String] { + var index = 0 + val impl = range.iterator.asJava + + override def close(): Unit = { } + + override def hasNext: Boolean = { + impl.hasNext + } + + override def next(): String = { + if (throwAtIndex.contains(index)) { + throw new RemoteFileChangedException(s"path -> index $index", "operation", "msg"); + } + + index = index + 1 + + impl.next().toString + } + } + + /** + * Fails at indices 25, 50, 75, 110. + * + * Provide a suitable input range to get the # of failures you want. e.g. range 0 to 100 will fail + * 3 times. + */ + def getFailingIterSupplier( + range: Range, + failIndices: Seq[Int] = Seq.empty): ThrowingSupplier[CloseableIterator[String], IOException] = + new ThrowingSupplier[CloseableIterator[String], IOException] { + var numGetCalls = 0 + + override def get(): CloseableIterator[String] = { + if (numGetCalls < failIndices.length) { + val result = getIter(range, Some(failIndices(numGetCalls))) + numGetCalls = numGetCalls + 1 + result + } else { + getIter(range) + } + } + } + + test("simple case - internally keeps track of the correct index") { + val testIter = new RetryableCloseableIterator(() => getIter(0 to 100)) + assert(testIter.getLastSuccessfullIndex == -1) + + for (i <- 0 to 100) { + val elem = testIter.next() + assert(elem.toInt == i) + assert(testIter.getLastSuccessfullIndex == i) + } + + assert(!testIter.hasNext) // this would be index 101 + } + + test("complex case - replays underlying iter back to correct index after error") { + // Here, we just do the simplest verification + val testIter1 = new RetryableCloseableIterator( + getFailingIterSupplier(0 to 100, Seq(25, 50, 75))) + + // this asserts the size, order, and elements of the testIter1 + assert(testIter1.asScala.toList.map(_.toInt) == (0 to 100).toList) + + // Here, we do more complex verification + val testIter2 = new RetryableCloseableIterator( + getFailingIterSupplier(0 to 100, Seq(25, 50, 75))) + + for (_ <- 0 to 24) { testIter2.next() } + assert(testIter2.getLastSuccessfullIndex == 24) + assert(testIter2.getNumRetries == 0) + + assert(testIter2.next().toInt == 25) // this will fail once, and then re-scan + assert(testIter2.getLastSuccessfullIndex == 25) + assert(testIter2.getNumRetries == 1) + + for (_ <- 26 to 49) { testIter2.next() } + assert(testIter2.getLastSuccessfullIndex == 49) + assert(testIter2.getNumRetries == 1) + + assert(testIter2.next().toInt == 50) // this will fail once, and then re-scan + assert(testIter2.getLastSuccessfullIndex == 50) + assert(testIter2.getNumRetries == 2) + + for (_ <- 51 to 74) { testIter2.next() } + assert(testIter2.getLastSuccessfullIndex == 74) + assert(testIter2.getNumRetries == 2) + + assert(testIter2.next().toInt == 75) // this will fail once, and then re-scan + assert(testIter2.getLastSuccessfullIndex == 75) + assert(testIter2.getNumRetries == 3) + + for (_ <- 76 to 100) { testIter2.next() } + assert(testIter2.getLastSuccessfullIndex == 100) + assert(!testIter2.hasNext) + } + + test("handles exceptions while retrying") { + // Iterates normally until index 50 (return [0, 49] successfully). Then fails. + // Tries to replay, but fails at 30 + // Tries to replay again, but fails at 20 + // Successfully replays to 49, starts returning results from index 50 (inclusive) again + val testIter1 = + new RetryableCloseableIterator(getFailingIterSupplier(0 to 100, Seq(50, 30, 20))) + + assert(testIter1.asScala.toList.map(_.toInt) == (0 to 100).toList) + + // Iterates normally until index 50 (return [0, 49] successfully). Then fails. + // Successfully replayed to 49, starts returning results from index 50 (inclusive) + // Fails at index 50 (returned [50, 69]). Tries to replay, but fails at 5 + // Successfully replays until 69, then normally returns results from 70 + val testIter2 = + new RetryableCloseableIterator(getFailingIterSupplier(0 to 100, Seq(50, 70, 5))) + assert(testIter2.asScala.toList.map(_.toInt) == (0 to 100).toList) + } + + test("throws after maxRetries exceptions") { + val testIter = + new RetryableCloseableIterator(getFailingIterSupplier(0 to 100, Seq(20, 49, 60, 80))) + + for (i <- 0 to 79) { + assert(testIter.next().toInt == i) + } + assert(testIter.getNumRetries == 3) + val ex = intercept[RuntimeException] { + testIter.next() + } + assert(ex.getCause.isInstanceOf[RemoteFileChangedException]) + } + + test("can specify maxRetries") { + val testIter1 = + new RetryableCloseableIterator( + getFailingIterSupplier(0 to 100, Seq(5, 10, 15, 20, 25, 30, 35, 40, 45, 50)), + 10 // maxRetries + ) + + assert(testIter1.asScala.toList.map(_.toInt) == (0 to 100).toList) + + val testIter2 = + new RetryableCloseableIterator( + getFailingIterSupplier(0 to 100, Seq(5, 10, 15, 20, 25, 30)), + 5 // maxRetries + ) + + for (i <- 0 to 29) { + assert(testIter2.next().toInt == i) + } + assert(testIter2.getNumRetries == 5) + val ex = intercept[RuntimeException] { + testIter2.next() + } + assert(ex.getCause.isInstanceOf[RemoteFileChangedException]) + } + + test("retried iterator doesn't have enough data (underlying data changed!)") { + val testIter = new RetryableCloseableIterator( + new ThrowingSupplier[CloseableIterator[String], IOException] { + var getCount = 0 + + override def get(): CloseableIterator[String] = getCount match { + case 0 => + getCount = getCount + 1 + getIter(0 to 100, Some(50)) // try to iterate 0->100, fail at 50 + + case 1 => + getCount = getCount + 1 + getIter(0 to 30) // try to replay 0 to 50, but no elements after 30! + } + } + ) + + for (_ <- 0 to 49) { testIter.next() } + val e = intercept[IllegalStateException] { + testIter.next() + } + assert(e.getMessage.contains("A retried iterator doesn't have enough data")) + } + + test("after replaying the iter, hasNext is false") { + val testIter = new RetryableCloseableIterator( + new ThrowingSupplier[CloseableIterator[String], IOException] { + var getCount = 0 + + override def get(): CloseableIterator[String] = getCount match { + case 0 => + getCount = getCount + 1 + getIter(0 to 100, Some(50)) // try to iterate 0->100, fail at 50 + + case 1 => + getCount = getCount + 1 + // when we failed at index 50 above, the lastSuccessfulIndex was 49. here, we can + // replay back to index 49, but the `hasNext` call will be false! + getIter(0 to 49) + } + } + ) + + for (_ <- 0 to 49) { testIter.next() } + assert(testIter.getLastSuccessfullIndex == 49) + + val e = intercept[IllegalStateException] { + testIter.next() + } + assert(e.getMessage.contains("A retried iterator doesn't have enough data (hasNext=false, " + + "lastSuccessfullIndex=49)")) + } + + test("throws FileNotFoundException (i.e. not UncheckedIOException) if file not found") { + intercept[FileNotFoundException] { + new RetryableCloseableIterator(() => { throw new FileNotFoundException() }) + } + } + +} diff --git a/storage-s3-dynamodb/src/test/scala/io/delta/storage/utils/ReflectionsUtilsSuite.scala b/storage-s3-dynamodb/src/test/scala/io/delta/storage/utils/ReflectionsUtilsSuite.scala new file mode 100644 index 00000000000..7bb870262ad --- /dev/null +++ b/storage-s3-dynamodb/src/test/scala/io/delta/storage/utils/ReflectionsUtilsSuite.scala @@ -0,0 +1,56 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage.utils + +import com.amazonaws.auth.EnvironmentVariableCredentialsProvider +import io.delta.storage.utils.ReflectionsUtilsSuiteHelper.TestOnlyAWSCredentialsProviderWithHadoopConf +import org.apache.hadoop.conf.Configuration +import org.scalatest.funsuite.AnyFunSuite + +class ReflectionsUtilsSuite extends AnyFunSuite { + private val emptyHadoopConf = new Configuration() + + test("support AWS credentials provider with hadoop Configuration as constructor parameter") { + val awsProvider = ReflectionUtils.createAwsCredentialsProvider( + "io.delta.storage.utils.ReflectionsUtilsSuiteHelper" + + "$TestOnlyAWSCredentialsProviderWithHadoopConf", + emptyHadoopConf + ) + assert( + awsProvider.isInstanceOf[TestOnlyAWSCredentialsProviderWithHadoopConf] + ) + } + + test("support AWS credentials provider with empty constructor(default from aws lib)") { + val awsProvider = ReflectionUtils.createAwsCredentialsProvider( + classOf[EnvironmentVariableCredentialsProvider].getCanonicalName, + emptyHadoopConf + ) + assert(awsProvider.isInstanceOf[EnvironmentVariableCredentialsProvider]) + } + + test("do not support AWS credentials provider with unexpected constructors parameters") { + assertThrows[NoSuchMethodException] { + ReflectionUtils.createAwsCredentialsProvider( + "io.delta.storage.utils.ReflectionsUtilsSuiteHelper" + + "$TestOnlyAWSCredentialsProviderWithUnexpectedConstructor", + emptyHadoopConf + ) + } + } + +} diff --git a/storage/src/main/java/io/delta/storage/AzureLogStore.java b/storage/src/main/java/io/delta/storage/AzureLogStore.java new file mode 100644 index 00000000000..5422f64e3aa --- /dev/null +++ b/storage/src/main/java/io/delta/storage/AzureLogStore.java @@ -0,0 +1,61 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +/** + * LogStore implementation for Azure. + *

+ * We assume the following from Azure's [[FileSystem]] implementations: + *

    + *
  • Rename without overwrite is atomic.
  • + *
  • List-after-write is consistent.
  • + *
+ *

+ * Regarding file creation, this implementation: + *

    + *
  • Uses atomic rename when overwrite is false; if the destination file exists or the rename + * fails, throws an exception.
  • + *
  • Uses create-with-overwrite when overwrite is true. This does not make the file atomically + * visible and therefore the caller must handle partial files.
  • + *
+ */ +public class AzureLogStore extends HadoopFileSystemLogStore { + + public AzureLogStore(Configuration hadoopConf) { + super(hadoopConf); + } + + @Override + public void write( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException { + writeWithRename(path, actions, overwrite, hadoopConf); + } + + @Override + public Boolean isPartialWriteVisible(Path path, Configuration hadoopConf) { + return true; + } +} diff --git a/storage/src/main/java/io/delta/storage/CloseableIterator.java b/storage/src/main/java/io/delta/storage/CloseableIterator.java new file mode 100644 index 00000000000..bcd74d9bc18 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/CloseableIterator.java @@ -0,0 +1,30 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import java.io.Closeable; +import java.util.Iterator; + +/** + * :: DeveloperApi :: + * + * An iterator that may contain resources which should be released after use. Users of + * CloseableIterator are responsible for closing the iterator if they are done with it. + * + * @since 1.0.0 + */ +public interface CloseableIterator extends Iterator, Closeable {} diff --git a/storage/src/main/java/io/delta/storage/GCSLogStore.java b/storage/src/main/java/io/delta/storage/GCSLogStore.java new file mode 100644 index 00000000000..84c4f2f5505 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/GCSLogStore.java @@ -0,0 +1,135 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import com.google.common.base.Throwables; +import io.delta.storage.internal.ThreadUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileAlreadyExistsException; +import java.util.Iterator; +import java.util.concurrent.Callable; + +/** + * The {@link LogStore} implementation for GCS, which uses gcs-connector to + * provide the necessary atomic and durability guarantees: + * + *
    + *
  1. Atomic Visibility: Read/read-after-metadata-update/delete are strongly + * consistent for GCS.
  2. + * + *
  3. Consistent Listing: GCS guarantees strong consistency for both object and + * bucket listing operations. + * https://cloud.google.com/storage/docs/consistency
  4. + * + *
  5. Mutual Exclusion: Preconditions are used to handle race conditions.
  6. + *
+ * + * Regarding file creation, this implementation: + *
    + *
  • Opens a stream to write to GCS otherwise.
  • + *
  • Throws [[FileAlreadyExistsException]] if file exists and overwrite is false.
  • + *
  • Assumes file writing to be all-or-nothing, irrespective of overwrite option.
  • + *
+ *

+ * This class is not meant for direct access but for configuration based on storage system. + * See https://docs.delta.io/latest/delta-storage.html for details. + */ +public class GCSLogStore extends HadoopFileSystemLogStore { + + final String preconditionFailedExceptionMessage = "412 Precondition Failed"; + + public GCSLogStore(Configuration hadoopConf) { + super(hadoopConf); + } + + @Override + public void write( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException { + final FileSystem fs = path.getFileSystem(hadoopConf); + + // This is needed for the tests to throw error with local file system. + if (fs instanceof LocalFileSystem && !overwrite && fs.exists(path)) { + throw new FileAlreadyExistsException(path.toString()); + } + + // GCS may upload an incomplete file when the current thread is interrupted, hence we move + // the write to a new thread so that the write cannot be interrupted. + // TODO Remove this hack when the GCS Hadoop connector fixes the issue. + // If overwrite=false and path already exists, gcs-connector will throw + // org.apache.hadoop.fs.FileAlreadyExistsException after fs.create is invoked. + // This should be mapped to java.nio.file.FileAlreadyExistsException. + Callable body = () -> { + FSDataOutputStream stream = fs.create(path, overwrite); + while (actions.hasNext()) { + stream.write((actions.next() + "\n").getBytes(StandardCharsets.UTF_8)); + } + stream.close(); + return ""; + }; + + try { + ThreadUtils.runInNewThread("delta-gcs-logstore-write", true, body); + } catch (org.apache.hadoop.fs.FileAlreadyExistsException e) { + throw new FileAlreadyExistsException(path.toString()); + } catch (IOException e) { + // GCS uses preconditions to handle race conditions for multiple writers. + // If path gets created between fs.create and stream.close by an external + // agent or race conditions. Then this block will execute. + // Reference: https://cloud.google.com/storage/docs/generations-preconditions + if (isPreconditionFailure(e)) { + if (!overwrite) { + throw new FileAlreadyExistsException(path.toString()); + } + } else { + throw e; + } + } catch (InterruptedException e) { + InterruptedIOException iio = new InterruptedIOException(e.getMessage()); + iio.initCause(e); + throw iio; + } catch (Error | RuntimeException t) { + throw t; + } catch (Throwable t) { + // Throw RuntimeException to avoid the calling interfaces from throwing Throwable + throw new RuntimeException(t.getMessage(), t); + } + } + + private boolean isPreconditionFailure(Throwable x) { + return Throwables.getCausalChain(x) + .stream() + .filter(p -> p != null) + .filter(p -> p.getMessage() != null) + .anyMatch(p -> p.getMessage().contains(preconditionFailedExceptionMessage)); + } + + @Override + public Boolean isPartialWriteVisible(Path path, Configuration hadoopConf) throws IOException { + return false; + } +} diff --git a/storage/src/main/java/io/delta/storage/HDFSLogStore.java b/storage/src/main/java/io/delta/storage/HDFSLogStore.java new file mode 100644 index 00000000000..af5106bcf2f --- /dev/null +++ b/storage/src/main/java/io/delta/storage/HDFSLogStore.java @@ -0,0 +1,194 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.lang.reflect.Method; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileAlreadyExistsException; +import java.util.EnumSet; +import java.util.Iterator; + +import io.delta.storage.internal.LogStoreErrors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.CreateFlag; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The {@link LogStore} implementation for HDFS, which uses Hadoop {@link FileContext} API's to + * provide the necessary atomic and durability guarantees: + *

    + *
  1. Atomic visibility of files: `FileContext.rename` is used write files which is atomic for + * HDFS.
  2. + *
  3. Consistent file listing: HDFS file listing is consistent.
  4. + *
+ */ +public class HDFSLogStore extends HadoopFileSystemLogStore { + private static final Logger LOG = LoggerFactory.getLogger(HDFSLogStore.class); + public static final String NO_ABSTRACT_FILE_SYSTEM_EXCEPTION_MESSAGE = "No AbstractFileSystem"; + + public HDFSLogStore(Configuration hadoopConf) { + super(hadoopConf); + } + + @Override + public void write( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException { + final boolean isLocalFs = path.getFileSystem(hadoopConf) instanceof RawLocalFileSystem; + if (isLocalFs) { + // We need to add `synchronized` for RawLocalFileSystem as its rename will not throw an + // exception when the target file exists. Hence we must make sure `exists + rename` in + // `writeInternal` for RawLocalFileSystem is atomic in our tests. + synchronized(this) { + writeInternal(path, actions, overwrite, hadoopConf); + } + } else { + // rename is atomic and also will fail when the target file exists. Not need to add the + // extra `synchronized`. + writeInternal(path, actions, overwrite, hadoopConf); + } + } + + @Override + public Boolean isPartialWriteVisible(Path path, Configuration hadoopConf) { + return true; + } + + /** + * @throws IOException if this HDFSLogStore is used to write into a Delta table on a non-HDFS + * storage system. + * @throws FileAlreadyExistsException if {@code overwrite} is false and the file at {@code path} + * already exists. + */ + private void writeInternal( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException { + final FileContext fc; + try { + fc = FileContext.getFileContext(path.toUri(), hadoopConf); + } catch (IOException e) { + if (e.getMessage().contains(NO_ABSTRACT_FILE_SYSTEM_EXCEPTION_MESSAGE)) { + final IOException newException = + LogStoreErrors.incorrectLogStoreImplementationException(e); + LOG.error(newException.getMessage(), newException.getCause()); + throw newException; + } else { + throw e; + } + } + + if (!overwrite && fc.util().exists(path)) { + // This is needed for the tests to throw error with local file system + throw new FileAlreadyExistsException(path.toString()); + } + + final Path tempPath = createTempPath(path); + boolean streamClosed = false; // This flag is to avoid double close + boolean renameDone = false; // This flag is to save the delete operation in most cases. + final FSDataOutputStream stream = fc.create( + tempPath, + EnumSet.of(CreateFlag.CREATE), + Options.CreateOpts.checksumParam(Options.ChecksumOpt.createDisabled()) + ); + + try { + while (actions.hasNext()) { + stream.write((actions.next() + "\n").getBytes(StandardCharsets.UTF_8)); + } + stream.close(); + streamClosed = true; + try { + final Options.Rename renameOpt = + overwrite ? Options.Rename.OVERWRITE : Options.Rename.NONE; + fc.rename(tempPath, path, renameOpt); + renameDone = true; + // TODO: this is a workaround of HADOOP-16255 - remove this when HADOOP-16255 is + // resolved + tryRemoveCrcFile(fc, tempPath); + } catch (org.apache.hadoop.fs.FileAlreadyExistsException e) { + throw new FileAlreadyExistsException(path.toString()); + } + } finally { + if (!streamClosed) { + stream.close(); + } + if (!renameDone) { + fc.delete(tempPath, false); // recursive=false + } + } + + msyncIfSupported(path, hadoopConf); + } + + /** + * Normally when using HDFS with an Observer NameNode setup, there would be read after write + * consistency within a single process, so the write would be guaranteed to be visible on the + * next read. However, since we are using the FileContext API for writing (for atomic rename), + * and the FileSystem API for reading (for more compatibility with various file systems), we + * are essentially using two separate clients that are not guaranteed to be kept in sync. + * Therefore we "msync" the FileSystem instance, which is cached across all uses of the same + * protocol/host combination, to make sure the next read through the HDFSLogStore can see this + * write. + * Any underlying FileSystem that is not the DistributedFileSystem will simply throw an + * UnsupportedOperationException, which can be ignored. Additionally, if an older version of + * Hadoop is being used that does not include msync, a NoSuchMethodError will be thrown while + * looking up the method, which can also be safely ignored. + */ + private void msyncIfSupported(Path path, Configuration hadoopConf) throws IOException { + try { + FileSystem fs = path.getFileSystem(hadoopConf); + Method msync = fs.getClass().getMethod("msync"); + msync.invoke(fs); + } catch (InterruptedIOException e) { + throw e; + } catch (Throwable e) { + if (e instanceof InterruptedException) { + // Propagate the interrupt status + Thread.currentThread().interrupt(); + } + // We ignore non fatal errors as calling msync is best effort. + } + } + + /** + * @throws IOException if a fatal exception occurs. Will try to ignore most exceptions. + */ + private void tryRemoveCrcFile(FileContext fc, Path path) throws IOException { + try { + final Path checksumFile = + new Path(path.getParent(), String.format(".%s.crc", path.getName())); + + if (fc.util().exists(checksumFile)) { + // checksum file exists, deleting it + fc.delete(checksumFile, true); // recursive=true + } + } catch (Throwable e) { + if (!LogStoreErrors.isNonFatal(e)) { + throw e; + } + // else, ignore - we are removing crc file as "best-effort" + } + } +} diff --git a/storage/src/main/java/io/delta/storage/HadoopFileSystemLogStore.java b/storage/src/main/java/io/delta/storage/HadoopFileSystemLogStore.java new file mode 100644 index 00000000000..2bf3c87b354 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/HadoopFileSystemLogStore.java @@ -0,0 +1,149 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileAlreadyExistsException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.UUID; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * Default implementation of {@link LogStore} for Hadoop {@link FileSystem} implementations. + */ +public abstract class HadoopFileSystemLogStore extends LogStore { + + public HadoopFileSystemLogStore(Configuration hadoopConf) { + super(hadoopConf); + } + + @Override + public CloseableIterator read(Path path, Configuration hadoopConf) throws IOException { + FileSystem fs = path.getFileSystem(hadoopConf); + FSDataInputStream stream = fs.open(path); + Reader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); + return new LineCloseableIterator(reader); + } + + @Override + public Iterator listFrom(Path path, Configuration hadoopConf) throws IOException { + FileSystem fs = path.getFileSystem(hadoopConf); + if (!fs.exists(path.getParent())) { + throw new FileNotFoundException( + String.format("No such file or directory: %s", path.getParent()) + ); + } + FileStatus[] files = fs.listStatus(path.getParent()); + return Arrays.stream(files) + .filter(f -> f.getPath().getName().compareTo(path.getName()) >= 0) + .sorted(Comparator.comparing(o -> o.getPath().getName())) + .iterator(); + } + + @Override + public Path resolvePathOnPhysicalStorage( + Path path, + Configuration hadoopConf) throws IOException { + return path.getFileSystem(hadoopConf).makeQualified(path); + } + + /** + * An internal write implementation that uses FileSystem.rename(). + *

+ * This implementation should only be used for the underlying file systems that support atomic + * renames, e.g., Azure is OK but HDFS is not. + */ + protected void writeWithRename( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException { + FileSystem fs = path.getFileSystem(hadoopConf); + + if (!fs.exists(path.getParent())) { + throw new FileNotFoundException( + String.format("No such file or directory: %s", path.getParent()) + ); + } + if (overwrite) { + final FSDataOutputStream stream = fs.create(path, true); + try { + while (actions.hasNext()) { + stream.write((actions.next() + "\n").getBytes(StandardCharsets.UTF_8)); + } + } finally { + stream.close(); + } + } else { + if (fs.exists(path)) { + throw new FileAlreadyExistsException(path.toString()); + } + Path tempPath = createTempPath(path); + boolean streamClosed = false; // This flag is to avoid double close + boolean renameDone = false; // This flag is to save the delete operation in most cases + final FSDataOutputStream stream = fs.create(tempPath); + try { + while (actions.hasNext()) { + stream.write((actions.next() + "\n").getBytes(StandardCharsets.UTF_8)); + } + stream.close(); + streamClosed = true; + try { + if (fs.rename(tempPath, path)) { + renameDone = true; + } else { + if (fs.exists(path)) { + throw new FileAlreadyExistsException(path.toString()); + } else { + throw new IllegalStateException( + String.format("Cannot rename %s to %s", tempPath, path) + ); + } + } + } catch (org.apache.hadoop.fs.FileAlreadyExistsException e) { + throw new FileAlreadyExistsException(path.toString()); + } + } finally { + if (!streamClosed) { + stream.close(); + } + if (!renameDone) { + fs.delete(tempPath, false); + } + } + } + } + + /** + * Create a temporary path (to be used as a copy) for the input {@code path} + */ + protected Path createTempPath(Path path) { + return new Path( + path.getParent(), + String.format(".%s.%s.tmp", path.getName(), UUID.randomUUID()) + ); + } +} diff --git a/storage/src/main/java/io/delta/storage/LineCloseableIterator.java b/storage/src/main/java/io/delta/storage/LineCloseableIterator.java new file mode 100644 index 00000000000..d1178d2ef25 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/LineCloseableIterator.java @@ -0,0 +1,92 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.io.UncheckedIOException; +import java.util.NoSuchElementException; + +/** + * Turn a {@link Reader} to {@link CloseableIterator} which can be read on demand. Each element is + * a trimmed line. + */ +public class LineCloseableIterator implements CloseableIterator { + private final BufferedReader reader; + + // Whether `nextValue` is valid. If it's invalid, we should try to read the next line. + private boolean gotNext = false; + + // The next value to return when `next` is called. This is valid only if `getNext` is true. + private String nextValue = null; + + // Whether the reader is closed. + private boolean closed = false; + + // Whether we have consumed all data in the reader. + private boolean finished = false; + + public LineCloseableIterator(Reader reader) { + this.reader = + reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader); + } + + @Override + public boolean hasNext() { + try { + if (!finished) { + // Check whether we have closed the reader before reading. Even if `nextValue` is + // valid, we still don't return `nextValue` after a reader is closed. Otherwise, it + // would be confusing. + if (closed) { + throw new IllegalStateException("Iterator is closed"); + } + if (!gotNext) { + String nextLine = reader.readLine(); + if (nextLine == null) { + finished = true; + close(); + } else { + nextValue = nextLine.trim(); + } + gotNext = true; + } + } + return !finished; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public String next() { + if (!hasNext()) { + throw new NoSuchElementException("End of stream"); + } + gotNext = false; + return nextValue; + } + + @Override + public void close() throws IOException { + if (!closed) { + closed = true; + reader.close(); + } + } +} diff --git a/storage/src/main/java/io/delta/storage/LocalLogStore.java b/storage/src/main/java/io/delta/storage/LocalLogStore.java new file mode 100644 index 00000000000..694955a2ad8 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/LocalLogStore.java @@ -0,0 +1,70 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; + +import java.io.IOException; +import java.util.Iterator; + +/** + * Default {@link LogStore} implementation (should be used for testing only!). + * + * Production users should specify the appropriate {@link LogStore} implementation in Spark properties.

+ * + * We assume the following from {@link FileSystem} implementations: + *

    + *
  • Rename without overwrite is atomic.
  • + *
  • List-after-write is consistent.
  • + *
+ * Regarding file creation, this implementation: + *
    + *
  • Uses atomic rename when overwrite is false; if the destination file exists or the rename + * fails, throws an exception.
  • + *
  • Uses create-with-overwrite when overwrite is true. This does not make the file atomically + * visible and therefore the caller must handle partial files.
  • + *
+ */ +public class LocalLogStore extends HadoopFileSystemLogStore{ + public LocalLogStore(Configuration hadoopConf) { + super(hadoopConf); + } + + /** + * This write implementation needs to wrap `writeWithRename` with `synchronized` as rename() + * for {@link RawLocalFileSystem} doesn't throw an exception when the target file + * exists. Hence, we must make sure `exists + rename` in `writeWithRename` is atomic in our tests. + */ + @Override + public void write( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException { + synchronized(this) { + writeWithRename(path, actions, overwrite, hadoopConf); + } + } + + @Override + public Boolean isPartialWriteVisible(Path path, Configuration hadoopConf) throws IOException { + return true; + } +} diff --git a/storage/src/main/java/io/delta/storage/LogStore.java b/storage/src/main/java/io/delta/storage/LogStore.java new file mode 100644 index 00000000000..a24790e8995 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/LogStore.java @@ -0,0 +1,143 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; + +import java.io.IOException; +import java.nio.file.FileAlreadyExistsException; +import java.util.Iterator; + +/** + * :: DeveloperApi :: + * + *

+ * General interface for all critical file system operations required to read and write the + * Delta logs. The correctness is predicated on the atomicity and durability guarantees of + * the implementation of this interface. Specifically, + *

+ *
    + *
  1. Atomic visibility of files: If isPartialWriteVisible is false, any file written through + * this store must be made visible atomically. In other words, this should not generate + * partial files.
  2. + * + *
  3. Mutual exclusion: Only one writer must be able to create (or rename) a file at the final + * destination.
  4. + * + *
  5. Consistent listing: Once a file has been written in a directory, all future listings for + * that directory must return that file.
  6. + *
+ *

+ * All subclasses of this interface is required to have a constructor that takes Configuration + * as a single parameter. This constructor is used to dynamically create the LogStore. + *

+ *

+ * LogStore and its implementations are not meant for direct access but for configuration based + * on storage system. See [[https://docs.delta.io/latest/delta-storage.html]] for details. + *

+ * + * @since 1.0.0 + */ +public abstract class LogStore { + + private Configuration initHadoopConf; + + public LogStore(Configuration initHadoopConf) { + this.initHadoopConf = initHadoopConf; + } + + /** + * :: DeveloperApi :: + * + * Hadoop configuration that should only be used during initialization of LogStore. Each method + * should use their `hadoopConf` parameter rather than this (potentially outdated) hadoop + * configuration. + */ + public Configuration initHadoopConf() { return initHadoopConf; } + + /** + * :: DeveloperApi :: + * + * Load the given file and return an `Iterator` of lines, with line breaks removed from each line. + * Callers of this function are responsible to close the iterator if they are done with it. + * + * @throws IOException if there's an issue resolving the FileSystem + * @since 1.0.0 + */ + public abstract CloseableIterator read( + Path path, + Configuration hadoopConf) throws IOException; + + /** + * :: DeveloperApi :: + * + * Write the given `actions` to the given `path` with or without overwrite as indicated. + * Implementation must throw {@link java.nio.file.FileAlreadyExistsException} exception if the + * file already exists and overwrite = false. Furthermore, if isPartialWriteVisible returns false, + * implementation must ensure that the entire file is made visible atomically, that is, + * it should not generate partial files. + * + * @throws IOException if there's an issue resolving the FileSystem + * @throws FileAlreadyExistsException if the file already exists and overwrite is false + * @since 1.0.0 + */ + public abstract void write( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException; + + /** + * :: DeveloperApi :: + * + * List the paths in the same directory that are lexicographically greater or equal to + * (UTF-8 sorting) the given `path`. The result should also be sorted by the file name. + * + * @throws IOException if there's an issue resolving the FileSystem + * @throws FileAlreadyExistsException if {@code path} directory can't be found + * @since 1.0.0 + */ + public abstract Iterator listFrom( + Path path, + Configuration hadoopConf) throws IOException; + + /** + * :: DeveloperApi :: + * + * Resolve the fully qualified path for the given `path`. + * + * @throws IOException if there's an issue resolving the FileSystem + * @since 1.0.0 + */ + public abstract Path resolvePathOnPhysicalStorage( + Path path, + Configuration hadoopConf) throws IOException; + + /** + * :: DeveloperApi :: + * + * Whether a partial write is visible for the underlying file system of `path`. + * + * @throws IOException if there's an issue resolving the FileSystem + * @since 1.0.0 + */ + public abstract Boolean isPartialWriteVisible( + Path path, + Configuration hadoopConf) throws IOException; +} diff --git a/storage/src/main/java/io/delta/storage/S3SingleDriverLogStore.java b/storage/src/main/java/io/delta/storage/S3SingleDriverLogStore.java new file mode 100644 index 00000000000..a1c4a296d7b --- /dev/null +++ b/storage/src/main/java/io/delta/storage/S3SingleDriverLogStore.java @@ -0,0 +1,330 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.io.CountingOutputStream; +import io.delta.storage.internal.FileNameUtils; +import io.delta.storage.internal.PathLock; +import io.delta.storage.internal.S3LogStoreUtil; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; + +/** + * Single Spark-driver/JVM LogStore implementation for S3. + *

+ * We assume the following from S3's {@link FileSystem} implementations: + *

    + *
  • File writing on S3 is all-or-nothing, whether overwrite or not.
  • + *
  • List-after-write can be inconsistent.
  • + *
+ *

+ * Regarding file creation, this implementation: + *

    + *
  • Opens a stream to write to S3 (regardless of the overwrite option).
  • + *
  • Failures during stream write may leak resources, but may never result in partial + * writes.
  • + *
+ *

+ * Regarding directory listing, this implementation: + *

    + *
  • returns a list by merging the files listed from S3 and recently-written files from the + * cache.
  • + *
+ */ +public class S3SingleDriverLogStore extends HadoopFileSystemLogStore { + + /** + * Enables a faster implementation of listFrom by setting the startAfter parameter in S3 list + * requests. The feature is enabled by setting the property delta.enableFastS3AListFrom in the + * Hadoop configuration. + * + * This feature requires the Hadoop file system used for S3 paths to be castable to + * org.apache.hadoop.fs.s3a.S3AFileSystem. + */ + private final boolean enableFastListFrom + = initHadoopConf().getBoolean("delta.enableFastS3AListFrom", false); + + /////////////////////////// + // Static Helper Methods // + /////////////////////////// + + /** + * A global path lock to ensure that no concurrent writers writing to the same path in the same + * JVM. + */ + private static final PathLock pathLock = new PathLock(); + + /** + * A global cache that records the metadata of the files recently written. + * As list-after-write may be inconsistent on S3, we can use the files in the cache + * to fix the inconsistent file listing. + */ + private static final Cache writtenPathCache = + CacheBuilder.newBuilder() + .expireAfterAccess(120, TimeUnit.MINUTES) + .build(); + + ///////////////////////////////////////////// + // Constructor and Instance Helper Methods // + ///////////////////////////////////////////// + + public S3SingleDriverLogStore(Configuration hadoopConf) { + super(hadoopConf); + } + + /** + * Check if the path is an initial version of a Delta log. + */ + private boolean isInitialVersion(Path path) { + return FileNameUtils.isDeltaFile(path) && FileNameUtils.deltaVersion(path) == 0L; + } + + private Path resolvePath(FileSystem fs, Path path) { + return stripUserInfo(fs.makeQualified(path)); + } + + private Path stripUserInfo(Path path) { + final URI uri = path.toUri(); + + try { + final URI newUri = new URI( + uri.getScheme(), + null, // userInfo + uri.getHost(), + uri.getPort(), + uri.getPath(), + uri.getQuery(), + uri.getFragment() + ); + + return new Path(newUri); + } catch (URISyntaxException e) { + // Propagating this URISyntaxException to callers would mean we would have to either + // include it in the public LogStore.java interface or wrap it in an + // IllegalArgumentException somewhere else. Instead, catch and wrap it here. + throw new IllegalArgumentException(e); + } + } + + /** + * Merge two lists of {@link FileStatus} into a single list ordered by file path name. + * In case both lists have {@link FileStatus}'s for the same file path, keep the one from + * `listWithPrecedence` and discard the other from `list`. + */ + private Iterator mergeFileLists( + List list, + List listWithPrecedence) { + final Map fileStatusMap = new HashMap<>(); + + // insert all elements from `listWithPrecedence` (highest priority) + // and then insert elements from `list` if and only if that key doesn't already exist + Stream.concat(listWithPrecedence.stream(), list.stream()) + .forEach(fs -> fileStatusMap.putIfAbsent(fs.getPath(), fs)); + + return fileStatusMap + .values() + .stream() + .sorted(Comparator.comparing(a -> a.getPath().getName())) + .iterator(); + } + + /** + * List files starting from `resolvedPath` (inclusive) in the same directory. + */ + private List listFromCache( + FileSystem fs, + Path resolvedPath) { + final Path pathKey = stripUserInfo(resolvedPath); + + return writtenPathCache + .asMap() + .entrySet() + .stream() + .filter(e -> { + final Path path = e.getKey(); + return path.getParent().equals(pathKey.getParent()) && + path.getName().compareTo(pathKey.getName()) >= 0; + }).map(e -> { + final Path path = e.getKey(); + final FileMetadata fileMetadata = e.getValue(); + return new FileStatus( + fileMetadata.length, + false, // isDir + 1, // block_replication + fs.getDefaultBlockSize(path), + fileMetadata.modificationTime, + path); + }).collect(Collectors.toList()); + } + + /** + * List files starting from `resolvedPath` (inclusive) in the same directory, which merges + * the file system list and the cache list when `useCache` is on, otherwise + * use file system list only. + */ + private Iterator listFromInternal( + FileSystem fs, + Path resolvedPath, + boolean useCache) throws IOException { + final Path parentPath = resolvedPath.getParent(); + if (!fs.exists(parentPath)) { + throw new FileNotFoundException( + String.format("No such file or directory: %s", parentPath) + ); + } + + FileStatus[] statuses; + if ( + // LocalFileSystem and RawLocalFileSystem checks are needed for tests to pass + fs instanceof LocalFileSystem || fs instanceof RawLocalFileSystem || !enableFastListFrom + ) { + statuses = fs.listStatus(parentPath); + } else { + statuses = S3LogStoreUtil.s3ListFromArray(fs, resolvedPath, parentPath); + } + + final List listedFromFs = Arrays + .stream(statuses) + .filter(s -> s.getPath().getName().compareTo(resolvedPath.getName()) >= 0) + .collect(Collectors.toList()); + + final List listedFromCache = useCache ? + listFromCache(fs, resolvedPath) : Collections.emptyList(); + + // File statuses listed from file system take precedence + return mergeFileLists(listedFromCache, listedFromFs); + } + + /** + * Check if a path exists. Normally we check both the file system and the cache, but when the + * path is the first version of a Delta log, we ignore the cache. + */ + private boolean exists( + FileSystem fs, + Path resolvedPath) throws IOException { + final boolean useCache = !isInitialVersion(resolvedPath); + final Iterator iter = listFromInternal(fs, resolvedPath, useCache); + if (!iter.hasNext()) return false; + + return iter.next().getPath().getName().equals(resolvedPath.getName()); + } + + //////////////////////// + // Public API Methods // + //////////////////////// + + @Override + public void write( + Path path, + Iterator actions, + Boolean overwrite, + Configuration hadoopConf) throws IOException { + final FileSystem fs = path.getFileSystem(hadoopConf); + final Path resolvedPath = resolvePath(fs, path); + try { + pathLock.acquire(resolvedPath); + try { + if (exists(fs, resolvedPath) && !overwrite) { + throw new java.nio.file.FileAlreadyExistsException( + resolvedPath.toUri().toString() + ); + } + + final CountingOutputStream stream = + new CountingOutputStream(fs.create(resolvedPath, overwrite)); + + while (actions.hasNext()) { + stream.write((actions.next() + "\n").getBytes(StandardCharsets.UTF_8)); + } + stream.close(); + + // When a Delta log starts afresh, all cached files in that Delta log become + // obsolete, so we remove them from the cache. + if (isInitialVersion(resolvedPath)) { + final List obsoleteFiles = writtenPathCache + .asMap() + .keySet() + .stream() + .filter(p -> p.getParent().equals(resolvedPath.getParent())) + .collect(Collectors.toList()); + + writtenPathCache.invalidateAll(obsoleteFiles); + } + + // Cache the information of written files to help fix the inconsistency in future + // listings + writtenPathCache.put( + resolvedPath, + new FileMetadata(stream.getCount(), System.currentTimeMillis()) + ); + } catch (org.apache.hadoop.fs.FileAlreadyExistsException e) { + // Convert Hadoop's FileAlreadyExistsException to Java's FileAlreadyExistsException + throw new java.nio.file.FileAlreadyExistsException(e.getMessage()); + } + } catch (java.lang.InterruptedException e) { + throw new InterruptedIOException(e.getMessage()); + } finally { + pathLock.release(resolvedPath); + } + } + + @Override + public Iterator listFrom(Path path, Configuration hadoopConf) throws IOException { + final FileSystem fs = path.getFileSystem(hadoopConf); + final Path resolvedPath = resolvePath(fs, path); + return listFromInternal(fs, resolvedPath, true); // useCache=true + } + + @Override + public Boolean isPartialWriteVisible(Path path, Configuration hadoopConf) { + return false; + } + + ////////////////// + // Helper Class // + ////////////////// + + /** + * The file metadata to be stored in the cache. + */ + private class FileMetadata { + private long length; + private long modificationTime; + + public FileMetadata(long length, long modificationTime) { + this.length = length; + this.modificationTime = modificationTime; + } + } +} diff --git a/storage/src/main/java/io/delta/storage/internal/FileNameUtils.java b/storage/src/main/java/io/delta/storage/internal/FileNameUtils.java new file mode 100644 index 00000000000..8dae27584a6 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/internal/FileNameUtils.java @@ -0,0 +1,33 @@ +package io.delta.storage.internal; + +import java.util.regex.Pattern; + +import org.apache.hadoop.fs.Path; + +/** + * Helper for misc functions relating to file names for delta commits. + */ +public final class FileNameUtils { + static Pattern DELTA_FILE_PATTERN = Pattern.compile("\\d+\\.json"); + + /** + * Returns the delta (json format) path for a given delta file. + */ + public static Path deltaFile(Path path, long version) { + return new Path(path, String.format("%020d.json", version)); + } + + /** + * Returns the version for the given delta path. + */ + public static long deltaVersion(Path path) { + return Long.parseLong(path.getName().split("\\.")[0]); + } + + /** + * Returns true if the given path is a delta file, else false. + */ + public static boolean isDeltaFile(Path path) { + return DELTA_FILE_PATTERN.matcher(path.getName()).matches(); + } +} diff --git a/storage/src/main/java/io/delta/storage/internal/LogStoreErrors.java b/storage/src/main/java/io/delta/storage/internal/LogStoreErrors.java new file mode 100644 index 00000000000..6685ce3abfb --- /dev/null +++ b/storage/src/main/java/io/delta/storage/internal/LogStoreErrors.java @@ -0,0 +1,51 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage.internal; + +import java.io.IOException; + +public class LogStoreErrors { + + /** + * Returns true if the provided Throwable is to be considered non-fatal, or false if it is to be + * considered fatal + */ + public static boolean isNonFatal(Throwable t) { + // VirtualMachineError includes OutOfMemoryError and other fatal errors + if (t instanceof VirtualMachineError || + t instanceof ThreadDeath || + t instanceof InterruptedException || + t instanceof LinkageError) { + return false; + } + + return true; + } + + public static IOException incorrectLogStoreImplementationException(Throwable cause) { + return new IOException( + String.join("\n", + "The error typically occurs when the default LogStore implementation, that", + "is, HDFSLogStore, is used to write into a Delta table on a non-HDFS storage system.", + "In order to get the transactional ACID guarantees on table updates, you have to use the", + "correct implementation of LogStore that is appropriate for your storage system.", + "See https://docs.delta.io/latest/delta-storage.html for details." + ), + cause + ); + } +} diff --git a/storage/src/main/java/io/delta/storage/internal/PathLock.java b/storage/src/main/java/io/delta/storage/internal/PathLock.java new file mode 100644 index 00000000000..c11238f42d1 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/internal/PathLock.java @@ -0,0 +1,44 @@ +package io.delta.storage.internal; + +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.hadoop.fs.Path; + +/** + * A lock that provides per-file-path `acquire` and `release` semantics. Can be used to ensure that + * no two writers are creating the same external (e.g. S3) file at the same time. + *

+ * Note: For all APIs, the caller should resolve the path to make sure we are locking the correct + * absolute path. + */ +public class PathLock { + + private final ConcurrentHashMap pathLock; + + public PathLock() { + this.pathLock = new ConcurrentHashMap<>(); + } + + /** Release the lock for the path after writing. */ + public void release(Path resolvedPath) { + final Object lock = pathLock.remove(resolvedPath); + synchronized(lock) { + lock.notifyAll(); + } + } + + /** Acquire a lock for the path before writing. */ + public void acquire(Path resolvedPath) throws InterruptedException { + while (true) { + final Object lock = pathLock.putIfAbsent(resolvedPath, new Object()); + if (lock == null) { + return; + } + synchronized (lock) { + while (pathLock.get(resolvedPath) == lock) { + lock.wait(); + } + } + } + } +} diff --git a/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java b/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java new file mode 100644 index 00000000000..198d18ab9b7 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java @@ -0,0 +1,116 @@ +/* + * Copyright (2022) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage.internal; + +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.s3a.*; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.HashSet; + +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_PAGING_KEYS; +import static org.apache.hadoop.fs.s3a.Constants.MAX_PAGING_KEYS; +import static org.apache.hadoop.fs.s3a.S3AUtils.iteratorToStatuses; + + +/** + * Static utility methods for the S3SingleDriverLogStore. + * + * Used to trick the class loader so we can use methods of org.apache.hadoop:hadoop-aws without needing to load this as + * a dependency for tests in core. + */ +public final class S3LogStoreUtil { + private S3LogStoreUtil() {} + + private static PathFilter ACCEPT_ALL = new PathFilter() { + @Override + public boolean accept(Path file) { + return true; + } + + @Override + public String toString() { + return "ACCEPT_ALL"; + } + }; + + /** + * Uses the S3ListRequest.v2 interface with the startAfter parameter to only list files + * which are lexicographically greater than resolvedPath. + */ + private static RemoteIterator s3ListFrom( + S3AFileSystem s3afs, + Path resolvedPath, + Path parentPath) throws IOException { + int maxKeys = S3AUtils.intOption(s3afs.getConf(), MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS, 1); + Listing listing = s3afs.getListing(); + // List files lexicographically after resolvedPath inclusive within the same directory + return listing.createFileStatusListingIterator(resolvedPath, + S3ListRequest.v2( + new ListObjectsV2Request() + .withBucketName(s3afs.getBucket()) + .withMaxKeys(maxKeys) + .withPrefix(s3afs.pathToKey(parentPath)) + .withStartAfter(keyBefore(s3afs.pathToKey(resolvedPath))) + ), ACCEPT_ALL, + new Listing.AcceptAllButSelfAndS3nDirs(parentPath), + s3afs.getActiveAuditSpan()); + } + + /** + * Uses the S3ListRequest.v2 interface with the startAfter parameter to only list files + * which are lexicographically greater than resolvedPath. + * + * Wraps s3ListFrom in an array. Contained in this class to avoid contaminating other + * classes with dependencies on recent Hadoop versions. + * + * TODO: Remove this method when iterators are used everywhere. + */ + public static FileStatus[] s3ListFromArray( + FileSystem fs, + Path resolvedPath, + Path parentPath) throws IOException { + S3AFileSystem s3afs; + try { + s3afs = (S3AFileSystem) fs; + } catch (ClassCastException e) { + throw new UnsupportedOperationException( + "The Hadoop file system used for the S3LogStore must be castable to " + + "org.apache.hadoop.fs.s3a.S3AFileSystem.", e); + } + return iteratorToStatuses(S3LogStoreUtil.s3ListFrom(s3afs, resolvedPath, parentPath), new HashSet<>()); + } + + /** + * Get the key which is lexicographically right before key. + * If the key is empty return null. + * If the key ends in a null byte, remove the last byte. + * Otherwise, subtract one from the last byte. + */ + static String keyBefore(String key) { + byte[] bytes = key.getBytes(StandardCharsets.UTF_8); + if(bytes.length == 0) return null; + if(bytes[bytes.length - 1] > 0) { + bytes[bytes.length - 1] -= 1; + return new String(bytes, StandardCharsets.UTF_8); + } else { + return new String(bytes, 0, bytes.length - 1, StandardCharsets.UTF_8); + } + } +} diff --git a/storage/src/main/java/io/delta/storage/internal/ThreadUtils.java b/storage/src/main/java/io/delta/storage/internal/ThreadUtils.java new file mode 100644 index 00000000000..feea5e7273f --- /dev/null +++ b/storage/src/main/java/io/delta/storage/internal/ThreadUtils.java @@ -0,0 +1,104 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage.internal; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; + +public final class ThreadUtils { + + /** + * Based on Apache Spark's ThreadUtils.runInNewThread + * Run a piece of code in a new thread and return the result. + */ + public static T runInNewThread( + String threadName, + boolean isDaemon, + Callable body) throws Throwable { + // Using a single-element list to hold the throwable and result, + // since values used in static method must be final + List exceptionHolder = new ArrayList<>(1); + List resultHolder = new ArrayList<>(1); + Thread thread = new Thread(threadName) { + @Override + public void run() { + try { + resultHolder.add(body.call()); + } catch (Throwable t) { + exceptionHolder.add(t); + } + } + }; + thread.setDaemon(isDaemon); + thread.start(); + thread.join(); + + if (!exceptionHolder.isEmpty()) { + Throwable realException = exceptionHolder.get(0); + + // Remove the part of the stack that shows method calls into this helper method + // This means drop everything from the top until the stack element + // ThreadUtils.runInNewThread(), and then drop that as well (hence the `drop(1)`). + List baseStackTrace = new ArrayList<>(); + boolean shouldDrop = true; + for (StackTraceElement st : Thread.currentThread().getStackTrace()) { + if (!shouldDrop) { + baseStackTrace.add(st); + } else if (st.getClassName().contains(ThreadUtils.class.getSimpleName())){ + shouldDrop = false; + } + } + + // Remove the part of the new thread stack that shows methods call from this helper + // method. This means take everything from the top until the stack element + List extraStackTrace = new ArrayList<>(); + for (StackTraceElement st : realException.getStackTrace()) { + if (!st.getClassName().contains(ThreadUtils.class.getSimpleName())) { + extraStackTrace.add(st); + } else { + break; + } + } + + // Combine the two stack traces, with a placeholder just specifying that there + // was a helper method used, without any further details of the helper + StackTraceElement placeHolderStackElem = new StackTraceElement( + String.format( // Providing the helper class info. + "... run in separate thread using %s static method runInNewThread", + ThreadUtils.class.getSimpleName() + ), + " ", // method name containing the execution point, not required here. + "", // filename containing the execution point, not required here. + -1); // source line number also not required. -1 indicates unavailable. + List finalStackTrace = new ArrayList<>(); + finalStackTrace.addAll(extraStackTrace); + finalStackTrace.add(placeHolderStackElem); + finalStackTrace.addAll(baseStackTrace); + + // Update the stack trace and rethrow the exception in the caller thread + realException.setStackTrace( + finalStackTrace.toArray(new StackTraceElement[0]) + ); + throw realException; + } else { + return resultHolder.get(0); + } + } +} diff --git a/storage/src/test/scala/io/delta/storage/ThreadUtilsSuite.scala b/storage/src/test/scala/io/delta/storage/ThreadUtilsSuite.scala new file mode 100644 index 00000000000..ee9cd8ed590 --- /dev/null +++ b/storage/src/test/scala/io/delta/storage/ThreadUtilsSuite.scala @@ -0,0 +1,45 @@ +package io.delta.storage + +import java.io.IOException + +import scala.util.Random + +import org.scalatest.funsuite.AnyFunSuite + +class ThreadUtilsSuite extends AnyFunSuite { + test("runInNewThread") { + import io.delta.storage.internal.ThreadUtils.runInNewThread + + assert(runInNewThread("thread-name", + true, + () => { + Thread.currentThread().getName + }) === "thread-name" + ) + assert(runInNewThread("thread-name", + true, + () => { + Thread.currentThread().isDaemon + }) + ) + assert(runInNewThread("thread-name", + false, + () => { + Thread.currentThread().isDaemon + } === false) + ) + + val ioExceptionMessage = "test" + Random.nextInt() + val ioException = intercept[IOException] { + runInNewThread("thread-name", + true, + () => { + throw new IOException(ioExceptionMessage) + }) + } + assert(ioException.getMessage === ioExceptionMessage) + assert(ioException.getStackTrace.mkString("\n") + .contains("... run in separate thread using ThreadUtils")) + assert(!ioException.getStackTrace.mkString("\n").contains("ThreadUtils.java")) + } +} diff --git a/storage/src/test/scala/io/delta/storage/integration/S3LogStoreUtilIntegrationTest.scala b/storage/src/test/scala/io/delta/storage/integration/S3LogStoreUtilIntegrationTest.scala new file mode 100644 index 00000000000..9d34d30c05a --- /dev/null +++ b/storage/src/test/scala/io/delta/storage/integration/S3LogStoreUtilIntegrationTest.scala @@ -0,0 +1,104 @@ +package io.delta.storage.integration + +import io.delta.storage.internal.{FileNameUtils, S3LogStoreUtil} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.s3a.S3AFileSystem +import org.scalatest.Tag +import org.scalatest.funsuite.AnyFunSuite + +import java.net.URI +import scala.math.max +import scala.math.ceil + +/** + * These integration tests are executed by setting the + * environment variables + * S3_LOG_STORE_UTIL_TEST_BUCKET=some-s3-bucket-name + * S3_LOG_STORE_UTIL_TEST_RUN_UID=some-uuid-for-test-run + * and running + * python run-integration-tests.py --s3-log-store-util-only + * + * Alternatively you can set the environment variables + * S3_LOG_STORE_UTIL_TEST_ENABLED=true + * S3_LOG_STORE_UTIL_TEST_BUCKET=some-s3-bucket-name + * S3_LOG_STORE_UTIL_TEST_RUN_UID=some-uuid-for-test-run + * and run the tests in this suite using your preferred + * test execution mechanism (e.g., the IDE or sbt) + * + * S3_LOG_STORE_UTIL_TEST_BUCKET is the name of the S3 bucket used for the test. + * S3_LOG_STORE_UTIL_TEST_RUN_UID is a prefix for all keys used in the test. + * This is useful for isolating multiple test runs. + */ +class S3LogStoreUtilIntegrationTest extends AnyFunSuite { + private val runIntegrationTests: Boolean = + Option(System.getenv("S3_LOG_STORE_UTIL_TEST_ENABLED")).exists(_.toBoolean) + private val bucket = System.getenv("S3_LOG_STORE_UTIL_TEST_BUCKET") + private val testRunUID = + System.getenv("S3_LOG_STORE_UTIL_TEST_RUN_UID") // Prefix for all S3 keys in the current run + private lazy val fs: S3AFileSystem = { + val fs = new S3AFileSystem() + fs.initialize(new URI(s"s3a://$bucket"), configuration) + fs + } + private val maxKeys = 2 + private val configuration = new Configuration() + configuration.set( // for local testing only + "fs.s3a.aws.credentials.provider", + "com.amazonaws.auth.profile.ProfileCredentialsProvider", + ) + configuration.set("fs.s3a.paging.maximum", maxKeys.toString) + + private def touch(key: String) { + fs.create(new Path(s"s3a://$bucket/$key")).close() + } + + private def key(table: String, version: Int): String = + s"$testRunUID/$table/_delta_log/%020d.json".format(version) + + private def path(table: String, version: Int): Path = + new Path(s"s3a://$bucket/${key(table, version)}") + + private def version(path: Path): Long = FileNameUtils.deltaVersion(path) + + private val integrationTestTag = Tag("IntegrationTest") + + def integrationTest(name: String)(testFun: => Any): Unit = + if (runIntegrationTests) test(name, integrationTestTag)(testFun) + + def testCase(testName: String, numKeys: Int): Unit = integrationTest(testName) { + // Setup delta log + (1 to numKeys).foreach(v => touch(s"$testRunUID/$testName/_delta_log/%020d.json".format(v))) + + // Check number of S3 requests and correct listing + (1 to numKeys + 2).foreach(v => { + val startCount = fs.getIOStatistics.counters().get("object_list_request") + + fs.getIOStatistics.counters().get("object_continue_list_request") + val resolvedPath = path(testName, v) + val response = S3LogStoreUtil.s3ListFromArray(fs, resolvedPath, resolvedPath.getParent) + val endCount = fs.getIOStatistics.counters().get("object_list_request") + + fs.getIOStatistics.counters().get("object_continue_list_request") + // Check that we don't do more S3 list requests than necessary + val numberOfKeysToList = numKeys - (v - 1) + val optimalNumberOfListRequests = + max(ceil(numberOfKeysToList / maxKeys.toDouble).toInt, 1) + val actualNumberOfListRequests = endCount - startCount + assert(optimalNumberOfListRequests == actualNumberOfListRequests) + // Check that we get consecutive versions from v to the max version. The smallest version is 1 + assert((max(1, v) to numKeys) == response.map(r => version(r.getPath)).toSeq) + }) + } + + integrationTest("setup empty delta log") { + touch(s"$testRunUID/empty/some.json") + } + + testCase("empty", 0) + + testCase("small", 1) + + testCase("medium", maxKeys) + + testCase("large", 10 * maxKeys) + +} diff --git a/storage/src/test/scala/io/delta/storage/internal/S3LogStoreUtilTest.scala b/storage/src/test/scala/io/delta/storage/internal/S3LogStoreUtilTest.scala new file mode 100644 index 00000000000..1092bae0568 --- /dev/null +++ b/storage/src/test/scala/io/delta/storage/internal/S3LogStoreUtilTest.scala @@ -0,0 +1,24 @@ +package io.delta.storage.internal + +import org.scalatest.funsuite.AnyFunSuite + +class S3LogStoreUtilTest extends AnyFunSuite { + test("keyBefore") { + assert("a" == S3LogStoreUtil.keyBefore("b")) + assert("aa/aa" == S3LogStoreUtil.keyBefore("aa/ab")) + assert(Seq(1.toByte, 1.toByte) + == S3LogStoreUtil.keyBefore(new String(Seq(1.toByte, 2.toByte).toArray)).getBytes.toList) + } + + test("keyBefore with emojis") { + assert("♥a" == S3LogStoreUtil.keyBefore("♥b")) + } + + test("keyBefore with zero bytes") { + assert("abc" == S3LogStoreUtil.keyBefore("abc\u0000")) + } + + test("keyBefore with empty key") { + assert(null == S3LogStoreUtil.keyBefore("")) + } +} diff --git a/testDeltaIcebergJar/src/test/scala/JarSuite.scala b/testDeltaIcebergJar/src/test/scala/JarSuite.scala new file mode 100644 index 00000000000..9e2edf80d31 --- /dev/null +++ b/testDeltaIcebergJar/src/test/scala/JarSuite.scala @@ -0,0 +1,106 @@ +/* + * Copyright (2023-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File +import java.net.JarURLConnection +import java.util.jar.JarFile + +import scala.collection.JavaConverters._ + +import org.scalatest.funsuite.AnyFunSuite + +class JarSuite extends AnyFunSuite { + + val allowedClassPrefixes = Seq( + // e.g. shadedForDelta/org/apache/iceberg/BaseTable.class + "shadedForDelta/", + // e.g. scala/collection/compat/immutable/ArraySeq.class + // e.g. scala/jdk/CollectionConverters.class + "scala/", + // e.g. org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.class + "org/apache/spark/sql/delta/icebergShaded/", + // We explicitly include all the /delta/commands/convert classes we want, to ensure we don't + // accidentally pull in some from delta-spark package. + "org/apache/spark/sql/delta/commands/convert/IcebergFileManifest", + "org/apache/spark/sql/delta/commands/convert/IcebergSchemaUtils", + "org/apache/spark/sql/delta/commands/convert/IcebergTable", + // e.g. org/apache/iceberg/transforms/IcebergPartitionUtil.class + "org/apache/iceberg/", + "com/github/benmanes/caffeine/" + ) + + test("audit files in assembly jar") { + // Step 1: load the jar (and make sure it exists) + // scalastyle:off classforname + val classUrl = Class.forName("org.apache.spark.sql.delta.icebergShaded.IcebergConverter").getResource("IcebergConverter.class") + // scalastyle:on classforname + assert(classUrl != null, "Could not find delta-iceberg jar") + val connection = classUrl.openConnection().asInstanceOf[JarURLConnection] + val url = connection.getJarFileURL + val jarFile = new JarFile(new File(url.toURI)) + + // Step 2: Verify the JAR has the classes we want it to have + try { + val jarClasses = jarFile + .entries() + .asScala + .filter(!_.isDirectory) + .map(_.toString) + .filter(_.endsWith(".class")) // let's ignore any .properties or META-INF files for now + .toSet + + // 2.1: Verify there are no prohibited classes (e.g. io/delta/storage/...) + // + // You can test this code path by commenting out the "io/delta" match case of the + // assemblyMergeStrategy config in build.sbt. + val prohibitedJarClasses = jarClasses + .filter { clazz => !allowedClassPrefixes.exists(prefix => clazz.startsWith(prefix)) } + + if (prohibitedJarClasses.nonEmpty) { + throw new Exception( + s"Prohibited jar class(es) found:\n- ${prohibitedJarClasses.mkString("\n- ")}" + ) + } + + // 2.2: Verify that, for each allowed class prefix, we actually loaded a class for it (instead + // of, say, loading an empty jar). + // + // You can test this code path by adding the following code snippet to the delta-iceberg + // assemblyMergeStrategy config in build.sbt: + // case PathList("shadedForDelta", xs @ _*) => MergeStrategy.discard + + // Map of prefix -> # classes with that prefix + val allowedClassesCounts = scala.collection.mutable.Map( + allowedClassPrefixes.map(prefix => (prefix, 0)) : _* + ) + jarClasses.foreach { clazz => + allowedClassPrefixes.foreach { prefix => + if (clazz.startsWith(prefix)) { + allowedClassesCounts(prefix) += 1 + } + } + } + val missingClasses = allowedClassesCounts.filter(_._2 == 0).keys + if (missingClasses.nonEmpty) { + throw new Exception( + s"No classes found for the following prefix(es):\n- ${missingClasses.mkString("\n- ")}" + ) + } + } finally { + jarFile.close() + } + } +} diff --git a/version.sbt b/version.sbt new file mode 100644 index 00000000000..54616825f9b --- /dev/null +++ b/version.sbt @@ -0,0 +1 @@ +ThisBuild / version := "3.1.0-SNAPSHOT"